Files
esl-reader/plugins/merriam-webster.py
Christopher T. Johnson ad5904f3ae checkpoint
2024-04-09 11:45:56 -04:00

408 lines
12 KiB
Python

from PyQt6.QtGui import QColor
from trycast import trycast
import json
import re
from typing import Any, NamedTuple, NotRequired, TypedDict
from PyQt6.QtCore import QEventLoop, QUrl, Qt
from PyQt6.QtNetwork import QNetworkRequest
from lib.utils import Resources
from lib.definition import Line, Fragment
registration = {
'source': 'mw',
'name': 'Merriam-Webster',
}
API = "https://www.dictionaryapi.com/api/v3/references/collegiate/json/{word}?key={key}"
key = "51d9df34-ee13-489e-8656-478c215e846c"
class TextTuple(NamedTuple):
type_: str # 'text'
text: str
class TTuple(NamedTuple):
type_: str # 't'
text: str
class VerbalIllustration(TypedDict):
t: str
aq: str
class Sound(TypedDict):
audio: str
ref: str
stat: str
class Pronunciation(TypedDict):
mw: str
l: NotRequired[str]
l2: NotRequired[str]
pun: NotRequired[str]
sound: NotRequired[Sound]
class Meta(TypedDict):
id: str
uuid: str
sort: str
src: str
section: str
stems: list[str]
offensive: bool
class HeadWordInfo(TypedDict):
hw: str
prs: NotRequired[list[Pronunciation]]
class HeadWord(TypedDict):
hw: str
prs: NotRequired[list[Pronunciation]]
psl: NotRequired[str]
class Variant(TypedDict):
va: str
vl: str
prs: list[Pronunciation]
spl: str
class Inflection(TypedDict):
if_: str
ifc: str
il: str
prs: list[Pronunciation]
spl: str
class DividedSense(TypedDict):
sd: str
et: list[str] # Not full
ins: list[Inflection]
lbs: list[str]
prs: list[Pronunciation]
sgram: str
sls: list[str]
vrs: list[Variant]
class BioGraphicalNameWrap(TypedDict):
pname: str
sname: str
altname: str
prs: list[Pronunciation]
class CalledAlsoTarget(TypedDict):
cat: str
catref: str
pn: str
prs: list[Pronunciation]
psl: str
class CalledAlso(TypedDict):
intro: str
cats: list[CalledAlsoTarget]
class RunInWrap(TypedDict):
rie: str
prs: list[Pronunciation]
text: str
vrs: list[Variant]
class Sense(TypedDict):
dt: list[list] # not full
et: NotRequired[list[str]]
ins: NotRequired[list[Inflection]]
lbs: NotRequired[list[str]]
prs: NotRequired[list[Pronunciation]]
sdsense: NotRequired[DividedSense]
sgram: NotRequired[str]
sls: NotRequired[list[str]]
sn: NotRequired[str]
vrs: NotRequired[list[Variant]]
class Definition(TypedDict):
sseq: list[list[list[Any]]]
vd: NotRequired[str]
class Pair(TypedDict):
objType: str
obj: list[Sense]|Sense|str|list[VerbalIllustration]|list[Any]
Entry = TypedDict(
'Entry',
{
'meta': Meta,
'hom': NotRequired[str],
'hwi': HeadWordInfo,
'ahws': NotRequired[list[HeadWord]],
'vrs': NotRequired[list[Variant]],
'fl': NotRequired[str],
'def': list[Definition],
}
)
class WordType(TypedDict):
word: str
source: str
definition: dict[str, Any]
def make_pairs(src: list[Any]) -> list[Pair]:
result:list[Pair] = []
iters = [iter(src)]*2
for entry in zip(*iters):
pair = { 'objType': entry[0],
'obj': entry[1],
}
pair = trycast(Pair, pair)
assert pair is not None
result.append(pair)
return result
def fetch(word:str) -> WordType:
request = QNetworkRequest()
url = QUrl(API.format(word=word, key=key))
request.setUrl(url)
request.setTransferTimeout(3000)
reply = Resources.nam.get(request)
assert reply is not None
loop = QEventLoop()
reply.finished.connect(loop.quit)
loop.exec()
content = reply.readAll()
data = json.loads(content.data().decode('utf-8'))
return {
'word': word,
'source': 'mw',
'definition': data,
}
def soundUrl(sound:Sound, fmt='ogg') -> QUrl:
"""Create a URL from a PRS structure."""
base = f"https://media.merriam-webster.com/audio/prons/en/us/{fmt}"
audio = sound['audio']
m = re.match(r"(bix|gg|[a-zA-Z])", audio)
if m:
url = base + f"/{m.group(1)}/"
else:
url = base + "/number/"
url += audio + f".{fmt}"
return QUrl(url)
def getFirstSound(definition: list[Entry]) -> QUrl:
# ahws, cats, dros, hwi, ins, ri, sdsense, sen, sense, uros, vrs
for entry in definition:
for v in entry.values():
hwi = trycast(HeadWordInfo, v)
if hwi is None:
continue
if 'prs' in hwi:
for pr in hwi['prs']:
if 'sound' in pr:
url = soundUrl(pr['sound'])
if url.isValid():
return url
return QUrl()
def do_prs(hwi: HeadWordInfo) -> list[Fragment]:
r = Resources()
frags: list[Fragment] = []
font = r.labelFont
linkColor = r.linkColor
subduedColor = r.subduedColor
if 'prs' not in hwi:
return []
for pr in hwi['prs']:
if 'pun' in pr:
pun = pr['pun']
else:
pun = ' '
if 'l' in pr:
frags.append(
Fragment(pr['l'] + pun, font, color=subduedColor)
)
frag = Fragment(pr['mw'], font, color=subduedColor)
if 'sound' in pr:
frag.setAudio(soundUrl(pr['sound']))
frag.setColor(linkColor)
frags.append(frag)
if 'l2' in pr:
frags.append(
Fragment(pun + pr['l2'], font, color=subduedColor)
)
return frags
def do_sense(sense: Sense|None) -> tuple[list[Fragment], list[Line]]:
if sense is None:
return ([],[])
lines: list[Line] = []
frags: list[Fragment] = []
r = Resources()
if 'sn' in sense:
sn = sense['sn']
else:
sn = ''
print(f'{sn}\n\n',json.dumps(sense['dt'], indent=2))
iters = [iter(sense['dt'])]*2
for pair in zip(*iters):
pair = trycast(tuple[str, Any], pair)
assert pair is not None
print(pair[0])
if pair[0] == 'text':
line = Line()
line.addFragment(
Fragment(pair[1], r.textFont, color=r.baseColor)
)
lines.append(line)
return (frags, lines)
def do_pseq(outer: int,
inner: int,
pseq: list[list[Pair]]| None ) -> tuple[list[Fragment], list[Line]]:
assert pseq is not None
lines: list[Line] = []
frags: list[Fragment] = []
for entry in pseq:
pairs = make_pairs(entry)
for pair in pairs:
if pair['objType'] == 'bs':
(newFrags, newLines) = do_sense(trycast(Sense, pair['obj']))
frags += newFrags
lines += newLines
elif pair['objType'] == 'sense':
(newFrags, newLines) = do_sense(trycast(Sense, pair['obj']))
frags += newFrags
lines += newLines
else:
raise Exception(f"Unknown object type {pair['objType']}")
return (frags, lines)
def do_sseq(sseq:list[list[list[Pair]]]) -> list[Line]:
lines: list[Line] = []
r = Resources()
for outer, item_o in enumerate(sseq):
line = Line()
line.addFragment(
Fragment(str(outer+1), r.boldFont, color=r.baseColor)
)
for inner, item_i in enumerate(item_o):
line.addFragment(
Fragment(chr(ord('a')+inner), r.boldFont, color=r.baseColor)
)
pairs = make_pairs(item_i)
for pair in pairs:
objType = pair['objType']
if objType == 'sense':
sense = trycast(Sense, pair['obj'])
(frags, newlines) = do_sense(sense)
for frag in frags:
line.addFragment(frag)
lines.append(line)
lines += newlines
elif objType == 'sen':
raise Exception(f"sen unimplimented")
elif objType == 'pseq':
pseq = trycast(list[list[Pair]], pair['obj'])
(frags, newlines) = do_pseq(inner, outer, trycast(list[list[Pair]], pair['obj']))
for frag in frags:
line.addFragment(frag)
lines.append(line)
lines += newlines
elif objType == 'bs':
raise Exception(f"bs unimplimented")
else:
raise Exception(f"Unknown object[{objType}] for \n{json.dumps(pair['obj'],indent=2)}")
return lines
def do_def(entry: Definition) -> list[Line]:
r = Resources()
lines: list[Line] = []
assert trycast(Definition, entry) is not None
if 'vd' in entry:
line = Line()
line.addFragment(
Fragment(entry['vd'], r.italicFont, color = r.linkColor)
)
lines.append(line)
#
# sseg is required
#
sseq = entry['sseq']
lines += do_sseq(sseq)
return lines
def getDef(definition: list[Entry]) -> list[Line]:
r = Resources()
lines:list[Line] = []
#
# Pull the fonts for ease of use
#
headerFont = r.headerFont
textFont = r.textFont
labelFont = r.labelFont
#
# Pull the colors for ease of use
#
baseColor = r.baseColor
linkColor = r.linkColor
subduedColor = r.subduedColor
#
# No need to figure it out each time it is used
#
entries = 0
id = definition[0]['meta']['id'].lower().split(':')[0]
uses: dict[str,int] = {}
for entry in definition:
testId = entry['meta']['id'].lower().split(':')[0]
if testId == id:
entries += 1
try:
uses[entry['fl']] = uses.get(entry['fl'], 0) + 1
except KeyError:
pass
used: dict[str, int] = {}
for k in uses.keys():
used[k] = 0
for count, entry in enumerate(definition):
testId = entry['meta']['id'].lower().split(':')[0]
if testId != id:
continue
#
# Create the First line from the hwi, [ahws] and fl
#
line = Line()
hwi = trycast(HeadWordInfo, entry['hwi'])
assert hwi is not None
hw = re.sub(r'\*', '', hwi['hw'])
line.addFragment(Fragment(hw, headerFont, color=baseColor))
if 'ahws' in entry:
ahws = trycast(list[HeadWord], entry['ahws'])
assert ahws is not None
for ahw in ahws:
hw = re.sub(r'\*', '', ahw['hw'])
line.addFragment(Fragment(', ' + hw, headerFont, color=baseColor))
if entries > 1:
frag = Fragment(f" {count + 1} of {entries} ", textFont, color= subduedColor)
frag.setBackground(QColor(Qt.GlobalColor.gray))
line.addFragment(frag)
if 'fl' in entry:
text = entry['fl']
used[text] += 1
if uses[text] > 1:
text += f' ({used[text]})'
line.addFragment(Fragment(text, labelFont, color=baseColor))
lines.append(line)
#
# Next is the pronunciation.
# While 'prs' is optional, the headword is not. This gets us what we want.
#
line = Line()
if hwi['hw'].find('*') >= 0:
hw = re.sub(r'\*', '\u00b7', hwi['hw'])
line.addFragment(Fragment(hw + ' ', textFont, color=subduedColor))
for frag in do_prs(hwi):
line.addFragment(frag)
if len(line.getLine()) > 0:
lines.append(line)
defines = trycast(list[Definition], entry['def'])
assert defines is not None
for define in defines:
lines += do_def(define)
return lines