Files
esl-reader/plugins/merriam-webster.py
Christopher T. Johnson 46580b75ea Checkpoint, not working
2024-04-05 11:53:52 -04:00

295 lines
7.6 KiB
Python

from trycast import trycast
import json
import re
from typing import Any, Literal, NamedTuple, NotRequired, TypedDict, cast
from PyQt6.QtCore import QEventLoop, QUrl, Qt
from PyQt6.QtGui import QColor, QFont
from PyQt6.QtNetwork import QNetworkRequest
from lib.words import Word
from lib.definition import Line, Fragment
registration = {
'source': 'mw',
'name': 'Merriam-Webster',
}
API = "https://www.dictionaryapi.com/api/v3/references/collegiate/json/{word}?key={key}"
key = "51d9df34-ee13-489e-8656-478c215e846c"
class TextTuple(NamedTuple):
type_: str # 'text'
text: str
class TTuple(NamedTuple):
type_: str # 't'
text: str
class VerbalIllustration(TypedDict):
t: str
aq: str
class VerbalIllustrationTuple(NamedTuple):
type_: str # 'vis'
data: list[VerbalIllustration]
class Sound(TypedDict):
audio: str
ref: str
stat: str
class Pronunciation(TypedDict):
mw: str
l: str
l2: str
pun: str
sound: Sound
class Meta(TypedDict):
id: str
uuid: str
sort: str
src: str
section: str
stems: list[str]
offensive: bool
class HeadWordInfo(TypedDict):
hw: str
prs: list[Pronunciation]
class HeadWord(TypedDict):
hw: str
prs: list[Pronunciation]
psl: str
class Variant(TypedDict):
va: str
vl: str
prs: list[Pronunciation]
spl: str
class Inflection(TypedDict):
if_: str
ifc: str
il: str
prs: list[Pronunciation]
spl: str
class DividedSense(TypedDict):
sd: str
et: list[str] # Not full
ins: list[Inflection]
lbs: list[str]
prs: list[Pronunciation]
sgram: str
sls: list[str]
vrs: list[Variant]
class BioGraphicalNameWrap(TypedDict):
pname: str
sname: str
altname: str
prs: list[Pronunciation]
class CalledAlsoTarget(TypedDict):
cat: str
catref: str
pn: str
prs: list[Pronunciation]
psl: str
class CalledAlso(TypedDict):
intro: str
cats: list[CalledAlsoTarget]
class RunInWrap(TypedDict):
rie: str
prs: list[Pronunciation]
text: str
vrs: list[Variant]
class Sense:
dt: list[str] # not full
et: list[str] # not full
ins: list[Inflection]
lbs: list[str]
prs: list[Pronunciation]
sdsense: DividedSense
sgram: str
sls: list[str]
sn: str
vrs: list[Variant]
class SenseSequence(TypedDict):
sense: Sense
sen: Sense
class Definition(TypedDict):
sseq: list[SenseSequence]
vd: str
class EntryX(TypedDict):
meta: Meta
hom: NotRequired[str]
hwi: HeadWordInfo
ahws: NotRequired[list[HeadWord]]
vrs: NotRequired[list[Variant]]
fl: str
def_: list[Definition]
Entry = TypedDict(
'Entry',
{
'meta': Meta,
'hom': NotRequired[str],
'hwi': HeadWordInfo,
'ahws': NotRequired[list[HeadWord]],
'vrs': NotRequired[list[Variant]],
'fl': NotRequired[str],
'def': list[Definition],
}
)
def fetch(word:str) -> dict[str, Any]:
request = QNetworkRequest()
url = QUrl(API.format(word=word, key=key))
request.setUrl(url)
request.setTransferTimeout(3000)
reply = Word._nam.get(request)
assert reply is not None
loop = QEventLoop()
reply.finished.connect(loop.quit)
loop.exec()
content = reply.readAll()
data = json.loads(content.data().decode('utf-8'))
return {
'word': word,
'source': 'mw',
'definition': data,
}
def soundUrl(sound:Sound, fmt='ogg') -> QUrl:
"""Create a URL from a PRS structure."""
base = f"https://media.merriam-webster.com/audio/prons/en/us/{fmt}"
audio = sound['audio']
m = re.match(r"(bix|gg|[a-zA-Z])", audio)
if m:
url = base + f"/{m.group(1)}/"
else:
url = base + "/number/"
url += audio + f".{fmt}"
return QUrl(url)
def getFirstSound(definition: list[Entry]) -> QUrl:
# ahws, cats, dros, hwi, ins, ri, sdsense, sen, sense, uros, vrs
for entry in definition:
for v in entry.values():
hwi = trycast(HeadWordInfo, v)
if hwi is None:
continue
if 'prs' in hwi:
for pr in hwi['prs']:
if 'sound' in pr:
url = soundUrl(pr['sound'])
if url.isValid():
return url
return QUrl()
def do_prs(prs: list[Pronunciation]) -> list[Fragment]:
frags: list[Fragment] = []
font = trycast(QFont, Word._resources['fonts']['label'])
assert font is not None
linkColor = trycast(QColor, Word._resources['colors']['link'])
assert linkColor is not None
subduedColor = trycast(QColor, Word._resources['colors']['subdued'])
assert subduedColor is not None
for pr in prs:
if 'pun' in pr:
pun = pr['pun']
else:
pun = ' '
if 'l' in pr:
frags.append(
Fragment(pr['l'] + pun, font, color=subduedColor)
)
frag = Fragment(pr['mw'], font, color=subduedColor)
if 'sound' in pr:
frag.setAudio(soundUrl(pr['sound']))
frags.append(frag)
if 'l2' in pr:
frags.append(
Fragment(pun + pr['l2'], font, color=subduedColor)
)
return frags
def getDef(definition: list[Entry]) -> list[Line]:
lines = []
#
# Pull the fonts for ease of use
#
headerFont = trycast(QFont, Word._resources['fonts']['header'])
assert headerFont is not None
textFont = trycast(QFont, Word._resources['fonts']['text'])
assert textFont is not None
labelFont = trycast(QFont, Word._resources['fonts']['label'])
assert labelFont is not None
#
# Pull the colors for ease of use
#
baseColor = trycast(QColor, Word._resources['colors']['base'])
assert baseColor is not None
linkColor = trycast(QColor, Word._resources['colors']['link'])
assert linkColor is not None
subduedColor = trycast(QColor, Word._resources['colors']['subdued'])
assert subduedColor is not None
#
# No need to figure it out each time it is used
#
entries = 0
id = definition[0]['meta']['id']
id = ':'.split(id)[0].lower()
for entry in definition:
if entry['meta']['id'].lower() == id:
entries += 1
for count, entry in enumerate(definition):
if entry['meta']['id'].lower() != id:
continue
#
# Create the First line from the hwi, [ahws] and fl
#
line = Line()
hwi = trycast(HeadWordInfo, entry['hwi'])
assert hwi is not None
hw = re.sub(r'\*', '', hwi['hw'])
line.addFragment(Fragment(hw, headerFont, color=baseColor))
if 'ahws' in entry:
ahws = trycast(list[HeadWord], entry['ahws'])
assert ahws is not None
for ahw in ahws:
hw = re.sub(r'\*', '', ahw['hw'])
line.addFragment(Fragment(', ' + hw, headerFont, color=baseColor))
if 'hom' in entry:
if 'fl' in entry:
frag = Fragment(f"{count} of {entries} ", textFont, color=
frag.setBackground(QColor(Qt.GlobalColor.gray))
line.addFragment(frag)
line.addFragment(Fragment(entry['fl'], labelFont, color=baseColor))
lines.append(line)
#
# Next is the pronunciation.
# While 'prs' is optional, the headword is not. This gets us what we want.
#
line = Line()
hw = re.sub(r'\*', '\u00b7', hwi['hw'])
line.addFragment(Fragment(hw + ' ', textFont, color=subduedColor))
for frag in do_prs(hwi['prs']):
line.addFragment(frag)
#
# Try for
return [Line()]