408 lines
12 KiB
Python
408 lines
12 KiB
Python
from PyQt6.QtGui import QColor
|
|
from trycast import trycast
|
|
import json
|
|
import re
|
|
from typing import Any, NamedTuple, NotRequired, TypedDict
|
|
|
|
from PyQt6.QtCore import QEventLoop, QUrl, Qt
|
|
from PyQt6.QtNetwork import QNetworkRequest
|
|
from lib.utils import Resources
|
|
from lib.definition import Line, Fragment
|
|
|
|
registration = {
|
|
'source': 'mw',
|
|
'name': 'Merriam-Webster',
|
|
}
|
|
|
|
API = "https://www.dictionaryapi.com/api/v3/references/collegiate/json/{word}?key={key}"
|
|
key = "51d9df34-ee13-489e-8656-478c215e846c"
|
|
|
|
class TextTuple(NamedTuple):
|
|
type_: str # 'text'
|
|
text: str
|
|
class TTuple(NamedTuple):
|
|
type_: str # 't'
|
|
text: str
|
|
class VerbalIllustration(TypedDict):
|
|
t: str
|
|
aq: str
|
|
|
|
class Sound(TypedDict):
|
|
audio: str
|
|
ref: str
|
|
stat: str
|
|
|
|
class Pronunciation(TypedDict):
|
|
mw: str
|
|
l: NotRequired[str]
|
|
l2: NotRequired[str]
|
|
pun: NotRequired[str]
|
|
sound: NotRequired[Sound]
|
|
|
|
class Meta(TypedDict):
|
|
id: str
|
|
uuid: str
|
|
sort: str
|
|
src: str
|
|
section: str
|
|
stems: list[str]
|
|
offensive: bool
|
|
|
|
class HeadWordInfo(TypedDict):
|
|
hw: str
|
|
prs: NotRequired[list[Pronunciation]]
|
|
|
|
class HeadWord(TypedDict):
|
|
hw: str
|
|
prs: NotRequired[list[Pronunciation]]
|
|
psl: NotRequired[str]
|
|
|
|
class Variant(TypedDict):
|
|
va: str
|
|
vl: str
|
|
prs: list[Pronunciation]
|
|
spl: str
|
|
|
|
class Inflection(TypedDict):
|
|
if_: str
|
|
ifc: str
|
|
il: str
|
|
prs: list[Pronunciation]
|
|
spl: str
|
|
|
|
class DividedSense(TypedDict):
|
|
sd: str
|
|
et: list[str] # Not full
|
|
ins: list[Inflection]
|
|
lbs: list[str]
|
|
prs: list[Pronunciation]
|
|
sgram: str
|
|
sls: list[str]
|
|
vrs: list[Variant]
|
|
|
|
class BioGraphicalNameWrap(TypedDict):
|
|
pname: str
|
|
sname: str
|
|
altname: str
|
|
prs: list[Pronunciation]
|
|
|
|
class CalledAlsoTarget(TypedDict):
|
|
cat: str
|
|
catref: str
|
|
pn: str
|
|
prs: list[Pronunciation]
|
|
psl: str
|
|
|
|
class CalledAlso(TypedDict):
|
|
intro: str
|
|
cats: list[CalledAlsoTarget]
|
|
|
|
class RunInWrap(TypedDict):
|
|
rie: str
|
|
prs: list[Pronunciation]
|
|
text: str
|
|
vrs: list[Variant]
|
|
|
|
class Sense(TypedDict):
|
|
dt: list[list] # not full
|
|
et: NotRequired[list[str]]
|
|
ins: NotRequired[list[Inflection]]
|
|
lbs: NotRequired[list[str]]
|
|
prs: NotRequired[list[Pronunciation]]
|
|
sdsense: NotRequired[DividedSense]
|
|
sgram: NotRequired[str]
|
|
sls: NotRequired[list[str]]
|
|
sn: NotRequired[str]
|
|
vrs: NotRequired[list[Variant]]
|
|
|
|
class Definition(TypedDict):
|
|
sseq: list[list[list[Any]]]
|
|
vd: NotRequired[str]
|
|
|
|
class Pair(TypedDict):
|
|
objType: str
|
|
obj: list[Sense]|Sense|str|list[VerbalIllustration]|list[Any]
|
|
|
|
Entry = TypedDict(
|
|
'Entry',
|
|
{
|
|
'meta': Meta,
|
|
'hom': NotRequired[str],
|
|
'hwi': HeadWordInfo,
|
|
'ahws': NotRequired[list[HeadWord]],
|
|
'vrs': NotRequired[list[Variant]],
|
|
'fl': NotRequired[str],
|
|
'def': list[Definition],
|
|
}
|
|
)
|
|
class WordType(TypedDict):
|
|
word: str
|
|
source: str
|
|
definition: dict[str, Any]
|
|
|
|
def make_pairs(src: list[Any]) -> list[Pair]:
|
|
result:list[Pair] = []
|
|
iters = [iter(src)]*2
|
|
for entry in zip(*iters):
|
|
pair = { 'objType': entry[0],
|
|
'obj': entry[1],
|
|
}
|
|
pair = trycast(Pair, pair)
|
|
assert pair is not None
|
|
result.append(pair)
|
|
return result
|
|
|
|
def fetch(word:str) -> WordType:
|
|
request = QNetworkRequest()
|
|
url = QUrl(API.format(word=word, key=key))
|
|
request.setUrl(url)
|
|
request.setTransferTimeout(3000)
|
|
reply = Resources.nam.get(request)
|
|
assert reply is not None
|
|
loop = QEventLoop()
|
|
reply.finished.connect(loop.quit)
|
|
loop.exec()
|
|
content = reply.readAll()
|
|
data = json.loads(content.data().decode('utf-8'))
|
|
return {
|
|
'word': word,
|
|
'source': 'mw',
|
|
'definition': data,
|
|
}
|
|
|
|
def soundUrl(sound:Sound, fmt='ogg') -> QUrl:
|
|
"""Create a URL from a PRS structure."""
|
|
base = f"https://media.merriam-webster.com/audio/prons/en/us/{fmt}"
|
|
audio = sound['audio']
|
|
m = re.match(r"(bix|gg|[a-zA-Z])", audio)
|
|
if m:
|
|
url = base + f"/{m.group(1)}/"
|
|
else:
|
|
url = base + "/number/"
|
|
url += audio + f".{fmt}"
|
|
return QUrl(url)
|
|
|
|
def getFirstSound(definition: list[Entry]) -> QUrl:
|
|
# ahws, cats, dros, hwi, ins, ri, sdsense, sen, sense, uros, vrs
|
|
for entry in definition:
|
|
for v in entry.values():
|
|
hwi = trycast(HeadWordInfo, v)
|
|
if hwi is None:
|
|
continue
|
|
if 'prs' in hwi:
|
|
for pr in hwi['prs']:
|
|
if 'sound' in pr:
|
|
url = soundUrl(pr['sound'])
|
|
if url.isValid():
|
|
return url
|
|
return QUrl()
|
|
|
|
def do_prs(hwi: HeadWordInfo) -> list[Fragment]:
|
|
r = Resources()
|
|
frags: list[Fragment] = []
|
|
font = r.labelFont
|
|
linkColor = r.linkColor
|
|
subduedColor = r.subduedColor
|
|
|
|
if 'prs' not in hwi:
|
|
return []
|
|
for pr in hwi['prs']:
|
|
if 'pun' in pr:
|
|
pun = pr['pun']
|
|
else:
|
|
pun = ' '
|
|
if 'l' in pr:
|
|
frags.append(
|
|
Fragment(pr['l'] + pun, font, color=subduedColor)
|
|
)
|
|
frag = Fragment(pr['mw'], font, color=subduedColor)
|
|
if 'sound' in pr:
|
|
frag.setAudio(soundUrl(pr['sound']))
|
|
frag.setColor(linkColor)
|
|
frags.append(frag)
|
|
if 'l2' in pr:
|
|
frags.append(
|
|
Fragment(pun + pr['l2'], font, color=subduedColor)
|
|
)
|
|
return frags
|
|
|
|
def do_sense(sense: Sense|None) -> tuple[list[Fragment], list[Line]]:
|
|
if sense is None:
|
|
return ([],[])
|
|
lines: list[Line] = []
|
|
frags: list[Fragment] = []
|
|
r = Resources()
|
|
if 'sn' in sense:
|
|
sn = sense['sn']
|
|
else:
|
|
sn = ''
|
|
print(f'{sn}\n\n',json.dumps(sense['dt'], indent=2))
|
|
iters = [iter(sense['dt'])]*2
|
|
for pair in zip(*iters):
|
|
pair = trycast(tuple[str, Any], pair)
|
|
assert pair is not None
|
|
print(pair[0])
|
|
if pair[0] == 'text':
|
|
line = Line()
|
|
line.addFragment(
|
|
Fragment(pair[1], r.textFont, color=r.baseColor)
|
|
)
|
|
lines.append(line)
|
|
return (frags, lines)
|
|
|
|
def do_pseq(outer: int,
|
|
inner: int,
|
|
pseq: list[list[Pair]]| None ) -> tuple[list[Fragment], list[Line]]:
|
|
assert pseq is not None
|
|
lines: list[Line] = []
|
|
frags: list[Fragment] = []
|
|
for entry in pseq:
|
|
pairs = make_pairs(entry)
|
|
for pair in pairs:
|
|
if pair['objType'] == 'bs':
|
|
(newFrags, newLines) = do_sense(trycast(Sense, pair['obj']))
|
|
frags += newFrags
|
|
lines += newLines
|
|
elif pair['objType'] == 'sense':
|
|
(newFrags, newLines) = do_sense(trycast(Sense, pair['obj']))
|
|
frags += newFrags
|
|
lines += newLines
|
|
else:
|
|
raise Exception(f"Unknown object type {pair['objType']}")
|
|
return (frags, lines)
|
|
|
|
def do_sseq(sseq:list[list[list[Pair]]]) -> list[Line]:
|
|
lines: list[Line] = []
|
|
r = Resources()
|
|
for outer, item_o in enumerate(sseq):
|
|
line = Line()
|
|
line.addFragment(
|
|
Fragment(str(outer+1), r.boldFont, color=r.baseColor)
|
|
)
|
|
for inner, item_i in enumerate(item_o):
|
|
line.addFragment(
|
|
Fragment(chr(ord('a')+inner), r.boldFont, color=r.baseColor)
|
|
)
|
|
pairs = make_pairs(item_i)
|
|
for pair in pairs:
|
|
objType = pair['objType']
|
|
if objType == 'sense':
|
|
sense = trycast(Sense, pair['obj'])
|
|
(frags, newlines) = do_sense(sense)
|
|
for frag in frags:
|
|
line.addFragment(frag)
|
|
lines.append(line)
|
|
lines += newlines
|
|
elif objType == 'sen':
|
|
raise Exception(f"sen unimplimented")
|
|
elif objType == 'pseq':
|
|
pseq = trycast(list[list[Pair]], pair['obj'])
|
|
(frags, newlines) = do_pseq(inner, outer, trycast(list[list[Pair]], pair['obj']))
|
|
for frag in frags:
|
|
line.addFragment(frag)
|
|
lines.append(line)
|
|
lines += newlines
|
|
elif objType == 'bs':
|
|
raise Exception(f"bs unimplimented")
|
|
else:
|
|
raise Exception(f"Unknown object[{objType}] for \n{json.dumps(pair['obj'],indent=2)}")
|
|
return lines
|
|
|
|
def do_def(entry: Definition) -> list[Line]:
|
|
r = Resources()
|
|
lines: list[Line] = []
|
|
assert trycast(Definition, entry) is not None
|
|
if 'vd' in entry:
|
|
line = Line()
|
|
line.addFragment(
|
|
Fragment(entry['vd'], r.italicFont, color = r.linkColor)
|
|
)
|
|
lines.append(line)
|
|
#
|
|
# sseg is required
|
|
#
|
|
sseq = entry['sseq']
|
|
lines += do_sseq(sseq)
|
|
return lines
|
|
|
|
def getDef(definition: list[Entry]) -> list[Line]:
|
|
r = Resources()
|
|
lines:list[Line] = []
|
|
#
|
|
# Pull the fonts for ease of use
|
|
#
|
|
headerFont = r.headerFont
|
|
textFont = r.textFont
|
|
labelFont = r.labelFont
|
|
#
|
|
# Pull the colors for ease of use
|
|
#
|
|
baseColor = r.baseColor
|
|
linkColor = r.linkColor
|
|
subduedColor = r.subduedColor
|
|
|
|
#
|
|
# No need to figure it out each time it is used
|
|
#
|
|
entries = 0
|
|
id = definition[0]['meta']['id'].lower().split(':')[0]
|
|
uses: dict[str,int] = {}
|
|
for entry in definition:
|
|
testId = entry['meta']['id'].lower().split(':')[0]
|
|
if testId == id:
|
|
entries += 1
|
|
try:
|
|
uses[entry['fl']] = uses.get(entry['fl'], 0) + 1
|
|
except KeyError:
|
|
pass
|
|
used: dict[str, int] = {}
|
|
for k in uses.keys():
|
|
used[k] = 0
|
|
for count, entry in enumerate(definition):
|
|
testId = entry['meta']['id'].lower().split(':')[0]
|
|
if testId != id:
|
|
continue
|
|
#
|
|
# Create the First line from the hwi, [ahws] and fl
|
|
#
|
|
line = Line()
|
|
hwi = trycast(HeadWordInfo, entry['hwi'])
|
|
assert hwi is not None
|
|
hw = re.sub(r'\*', '', hwi['hw'])
|
|
line.addFragment(Fragment(hw, headerFont, color=baseColor))
|
|
if 'ahws' in entry:
|
|
ahws = trycast(list[HeadWord], entry['ahws'])
|
|
assert ahws is not None
|
|
for ahw in ahws:
|
|
hw = re.sub(r'\*', '', ahw['hw'])
|
|
line.addFragment(Fragment(', ' + hw, headerFont, color=baseColor))
|
|
if entries > 1:
|
|
frag = Fragment(f" {count + 1} of {entries} ", textFont, color= subduedColor)
|
|
frag.setBackground(QColor(Qt.GlobalColor.gray))
|
|
line.addFragment(frag)
|
|
if 'fl' in entry:
|
|
text = entry['fl']
|
|
used[text] += 1
|
|
if uses[text] > 1:
|
|
text += f' ({used[text]})'
|
|
line.addFragment(Fragment(text, labelFont, color=baseColor))
|
|
lines.append(line)
|
|
|
|
#
|
|
# Next is the pronunciation.
|
|
# While 'prs' is optional, the headword is not. This gets us what we want.
|
|
#
|
|
line = Line()
|
|
if hwi['hw'].find('*') >= 0:
|
|
hw = re.sub(r'\*', '\u00b7', hwi['hw'])
|
|
line.addFragment(Fragment(hw + ' ', textFont, color=subduedColor))
|
|
for frag in do_prs(hwi):
|
|
line.addFragment(frag)
|
|
if len(line.getLine()) > 0:
|
|
lines.append(line)
|
|
defines = trycast(list[Definition], entry['def'])
|
|
assert defines is not None
|
|
for define in defines:
|
|
lines += do_def(define)
|
|
return lines
|