from PyQt6.QtGui import QColor, QFont from trycast import trycast import json import re from typing import Any, Literal, NotRequired, TypedDict, cast from PyQt6.QtCore import QEventLoop, QUrl, Qt from PyQt6.QtNetwork import QNetworkRequest from lib.utils import Resources from lib.definition import Line, Fragment registration = { 'source': 'mw', 'name': 'Merriam-Webster', 'language': 'en-us', } API = "https://www.dictionaryapi.com/api/v3/references/collegiate/json/{word}?key={key}" key = "51d9df34-ee13-489e-8656-478c215e846c" class Meta(TypedDict): id: str uuid: str sort: str src: str section: str stems: list[str] offensive: bool class Sound(TypedDict): audio: str ref: str stat: str class Pronunciation(TypedDict): mw: str l: NotRequired[str] l2: NotRequired[str] pun: NotRequired[str] sound: NotRequired[Sound] class SubSource(TypedDict): source: NotRequired[str] aqdate: NotRequired[str] class AttributionOfQuote(TypedDict): auth: NotRequired[str] source: NotRequired[str] aqdate: NotRequired[str] subsource: NotRequired[SubSource] class VerbalIllustration(TypedDict): t: str aq: NotRequired[AttributionOfQuote] class HeadWordInformation(TypedDict): hw: str prs: NotRequired[list[Pronunciation]] class AlternanteHeadword(TypedDict): hw: str psl: NotRequired[str] class Variant(TypedDict): va: str vl: NotRequired[str] prs: NotRequired[list[Pronunciation]] spl: NotRequired[str] Inflection = TypedDict('Inflection', { 'if': NotRequired[str], 'ifc': NotRequired[str], 'il': NotRequired[str], 'prs': NotRequired[list[Pronunciation]], 'spl': NotRequired[str] }) class CrossReferenceTarget(TypedDict): cxl: str cxr: NotRequired[str] cxt: str cxn: NotRequired[str] class CognateCrossRef(TypedDict): cxl: str cxtis: list[CrossReferenceTarget] class Pair(TypedDict): objType: str obj: Any class DividedSense(TypedDict): sd: str dt: list[list[Pair]] et: NotRequired[list[Pair]] ins: NotRequired[list[Inflection]] lbs: NotRequired[list[str]] prs: NotRequired[list[Pronunciation]] sgram: NotRequired[str] sls: NotRequired[list[str]] vrs: NotRequired[list[Variant]] class Sense(TypedDict): dt: list[list[Pair]] et: NotRequired[list[Pair]] ins: NotRequired[list[Inflection]] lbs: NotRequired[list[str]] prs: NotRequired[list[Pronunciation]] sdsense: NotRequired[DividedSense] sgram: NotRequired[str] sls: NotRequired[list[str]] sn: NotRequired[str] vrs: NotRequired[list[Variant]] class TruncatedSense(Sense): pass class BindingSubstitutePair(TypedDict): objType: Literal['bs'] obj: Sense class SensePair(TypedDict): objType: Literal['sense'] obj: Sense class DefinitionSection(TypedDict): vd: NotRequired[str] sls: NotRequired[list[str]] sseq: Any # list[list[Pair]] Definition =TypedDict('Definition', { 'meta': Meta, 'hom': NotRequired[int], 'hwi': HeadWordInformation, 'ahws': NotRequired[list[AlternanteHeadword]], 'vrs': NotRequired[list[Variant]], 'fl': str, 'lbs': NotRequired[list[str]], 'sls': NotRequired[list[str]], 'ins': NotRequired[list[Inflection]], 'cxs': NotRequired[list[CognateCrossRef]], 'def': list[DefinitionSection], }) def make_pairs(src: list[Any]) -> list[Pair]: result:list[Pair] = [] iters = [iter(src)]*2 for entry in zip(*iters): pair0 = { 'objType': entry[0], 'obj': entry[1], } if isinstance(pair0['obj'], list): result.append(cast(Pair,pair0)) continue pair1 = trycast(Pair, pair0) if pair1 is None: print(pair0['objType'], type(pair0['obj']), json.dumps(pair0['obj'],indent=2) ) assert pair1 is not None result.append(pair1) return result Elements = [ 'dt', 'sen', 'bs', 'pseq', 'snot', 't', 'text', 'vis', 'sens', 'uns', 'sense' ] def restructure(obj: Any) -> Any: if isinstance(obj, list): if len(obj) == 0: return [] if isinstance(obj[0], str) and obj[0] in Elements: pairs = make_pairs(obj) result = [] for pair in pairs: if isinstance(pair['obj'], list): r2 = [] for item in pair['obj']: r2.append(restructure(item)) pair['obj'] = r2 elif isinstance(pair['obj'], dict): r2 = {} for k,v in pair['obj'].items(): r2[k] = restructure(v) pair['obj'] = r2 result.append(pair) return result result = [] for v in obj: result.append(restructure(v)) return result elif isinstance(obj, dict): obj2 = cast(dict, obj) result = {} for k,v in obj2.items(): result[k] = restructure(v) return result else: return obj class WordType(TypedDict): word: str source: str definition: Any def fetch(word:str) -> WordType: request = QNetworkRequest() url = QUrl(API.format(word=word, key=key)) request.setUrl(url) request.setTransferTimeout(3000) reply = Resources.nam.get(request) assert reply is not None loop = QEventLoop() reply.finished.connect(loop.quit) loop.exec() content = reply.readAll() data = json.loads(content.data().decode('utf-8')) return { 'word': word, 'source': 'mw', 'definition': data, } def soundUrl(sound:Sound, fmt='ogg') -> QUrl: """Create a URL from a PRS structure.""" base = f"https://media.merriam-webster.com/audio/prons/en/us/{fmt}" audio = sound['audio'] m = re.match(r"(bix|gg|[a-zA-Z])", audio) if m: url = base + f"/{m.group(1)}/" else: url = base + "/number/" url += audio + f".{fmt}" return QUrl(url) def getFirstSound(definition: Any) -> QUrl: # ahws, cats, dros, hwi, ins, ri, sdsense, sen, sense, uros, vrs for entry in definition: for v in entry.values(): hwi = v # trycast if hwi is None: continue if 'prs' in hwi: for pr in hwi['prs']: if 'sound' in pr: url = soundUrl(pr['sound']) if url.isValid(): return url return QUrl() def do_prs(prs: list[Pronunciation] | None) -> list[Fragment]: assert prs is not None r = Resources() frags: list[Fragment] = [] font = r.labelFont linkColor = r.linkColor subduedColor = r.subduedColor for pr in prs: if 'pun' in pr: pun = pr['pun'] else: pun = ' ' if 'l' in pr: frags.append( Fragment(pr['l'] + pun, r.italicFont, color=subduedColor) ) frag = Fragment(pr['mw'], font, color=subduedColor) if 'sound' in pr: frag.setAudio(soundUrl(pr['sound'])) frag.setColor(linkColor) frags.append(frag) frags.append(Fragment(' ', r.phonicFont)) if 'l2' in pr: frags.append( Fragment(pun + pr['l2'], font, color=subduedColor) ) return frags def do_aq(aq: AttributionOfQuote|None) -> list[Line]: assert aq is not None return [] def do_vis(vis: list[VerbalIllustration]|None,indent=0) -> list[Line]: assert vis is not None r = Resources() lines: list[Line] = [] for vi in vis: line = Line() frag = Fragment(vi['t'], r.textFont, color=r.subduedColor) if indent > 0: frag.setIndent(indent) line.addFragment(frag) lines.append(line) if 'aq' in vi: lines += do_aq(trycast(AttributionOfQuote, vi['aq'])) return lines def do_uns(uns: list[list[list[Pair]]]|None, indent:int) -> tuple[list[Fragment], list[Line]]: assert uns is not None r = Resources() frags: list[Fragment] = [] lines: list[Line] = [] for note in uns: for entry in note: for pair in entry: if pair['objType'] == 'text': frag = Fragment('\u2192 '+pair['obj'], r.textFont, color=r.baseColor) frag.setIndent(indent) frags.append(frag) elif pair['objType'] == 'vis': lines += do_vis(trycast(list[VerbalIllustration], pair['obj']), indent) elif pair['objType'] == 'ri': raise NotImplementedError("NO ri") return (frags, lines) def do_dt(dt: list[list[Pair]]|None, indent: int) -> tuple[list[Fragment], list[Line]]: assert dt is not None frags: list[Fragment] = [] lines: list[Line] = [] r = Resources() first = True for entry in dt: for pair in entry: if pair['objType'] == 'text': frag = Fragment(pair['obj'], r.textFont, color=r.baseColor) frag.setIndent(indent) if first: frags.append(frag) else: line = Line() line.addFragment(frag) lines.append(line) elif pair['objType'] == 'vis': lines += do_vis(trycast(list[VerbalIllustration], pair['obj']),indent) elif pair['objType'] == 'uns': (newFrags,newLines) = do_uns(trycast(list[list[list[Pair]]], pair['obj']),indent) frags += newFrags lines += newLines else: print(json.dumps(pair, indent=2)) raise NotImplementedError(f"Unknown or unimplimented element {pair['objType']}") first = False return (frags, lines) def do_sense(sense: Sense|None, indent:int=3) -> tuple[list[Fragment], list[Line]]: if sense is None: return ([],[]) lines: list[Line] = [] frags: list[Fragment] = [] r = Resources() for k,v in sense.items(): if k == 'sn': continue elif k == 'dt': (newFrags, newLines) = do_dt(trycast(list[list[Pair]], sense['dt']), indent) frags += newFrags lines += newLines elif k == 'sdsense': # XXX - This needs to expand to handle et, ins, lbs, prs, sgram, sls, vrs sdsense = trycast(DividedSense, v) assert sdsense is not None frag = Fragment(sdsense['sd']+' ', r.italicFont, color=r.baseColor) frag.setIndent(indent) line = Line() line.addFragment(frag) (newFrags, newLines) = do_dt(trycast(list[list[Pair]], sdsense['dt']), indent=indent) line.addFragment(newFrags) lines.append(line) lines += newLines elif k == 'sls': labels = trycast(list[str], v) assert labels is not None frag = Fragment(", ".join(labels)+' ', r.boldFont, color=r.subduedColor) frag.setIndent(indent) frag.setBackground(r.subduedBackground) frags.append(frag) else: print(k,v) raise NotImplementedError(f"Unknown or unimplimented element {k}") return (frags, lines) def do_pseq(inner: int, outer: int, pseq: list[Any] ) -> tuple[list[Fragment], list[Line]]: lines: list[Line] = [] frags: list[Fragment] = [] indent = 3 # XXX - Should this be a parameter passed in? count = 1 r = Resources() newLine = False for entry in pseq: for pair in entry: if pair['objType'] == 'bs': sense = pair['obj']['sense'] (newFrags, newLines) = do_sense(trycast(Sense, sense),indent=indent) frags += newFrags lines += newLines newLine = True elif pair['objType'] == 'sense': frag = Fragment(f"({count})", r.textFont, color=r.baseColor) frag.setIndent(indent) if newLine: line = Line() line.addFragment(frag) else: frags.append(frag) (newFrags, newLines) = do_sense(trycast(Sense, pair['obj']), indent=indent+1) if newLine: line.addFragment(newFrags) lines.append(line) else: frags += newFrags newLine = True lines += newLines count += 1 else: raise NotImplementedError(f"Unknown object type {pair['objType']}") return (frags, lines) def do_sseq(sseq:list[list[list[Pair]]]) -> list[Line]: lines: list[Line] = [] r = Resources() for outer, item_o in enumerate(sseq): line = Line() frag =Fragment(str(outer+1), r.boldFont, color=r.baseColor) frag.setIndent(1) line.addFragment(frag) for inner, item_i in enumerate(item_o): indent = 2 if len(item_o) > 1: frag =Fragment(chr(ord('a')+inner), r.boldFont, color=r.baseColor) frag.setIndent(2) line.addFragment(frag) indent = 3 for pair in item_i: objType = pair['objType'] if objType == 'sense': sense = trycast(Sense, pair['obj']) (frags, newlines) = do_sense(sense, indent=indent) line.addFragment(frags) lines.append(line) line = Line() lines += newlines elif objType == 'sen': raise NotImplementedError(f"sen unimplimented") elif objType == 'pseq': (frags, newlines) = do_pseq(inner, outer, pair['obj']) line.addFragment(frags) lines.append(line) line = Line() lines += newlines elif objType == 'bs': sense = pair['obj']['sense'] (newFrags, newLines) = do_sense(trycast(Sense, sense),indent=indent) line.addFragment(newFrags) lines.append(line) line = Line() lines += newLines else: raise NotImplementedError(f"Unknown object[{objType}] for \n{json.dumps(pair['obj'],indent=2)}") return lines def do_ins(inflections:list[Inflection]|None) -> list[Fragment]: assert inflections is not None r = Resources() frags: list[Fragment] = [] sep = '' for inflection in inflections: if sep == '; ': frag = Fragment('; ', font=r.boldFont, color=r.baseColor) frags.append(frag) elif sep != '': frag = Fragment(sep, font=r.italicFont, color=r.baseColor) frags.append(frag) if 'ifc' in inflection: text = inflection['ifc'] elif 'if' in inflection: text = inflection['if'] else: raise ValueError(f"Missing 'if' or 'ifc' in {inflection}") frag = Fragment(text, r.boldFont, color=r.baseColor) frags.append(frag) sep = '; ' if 'il' in inflection: sep = ' ' + inflection['il'] + ' ' if 'prs' in inflection: newFrags = do_prs(trycast(list[Pronunciation], inflection['prs'])) frags += newFrags if 'spl' in inflection: raise NotImplementedError(f"We haven't implimented 'spl' for inflection: {inflection}") return frags def do_ets(ets:list[list[Pair]]|None) -> list[Line]: assert ets is not None r = Resources() lines: list[Line] = [] for et in ets: for pair in et: if pair['objType'] == 'text': line = Line() line.addFragment( Fragment(pair['obj'], r.textFont, color=r.baseColor) ) lines.append(line) elif pair['objType'] == 'et_snote': line = Line() line.addFragment( Fragment('Note: '+pair['obj'], r.textFont, color=r.baseColor) ) lines.append(line) else: raise NotImplementedError(f"Unknown key {pair['objType']} in et") return lines def do_def(entry: DefinitionSection) -> list[Line]: assert entry is not None r = Resources() lines: list[Line] = [] if 'vd' in entry: line = Line() line.addFragment( Fragment(entry['vd'], r.italicFont, color = r.linkColor) ) lines.append(line) # # sseg is required # sseq = entry['sseq'] lines += do_sseq(sseq) return lines def getDef(defines: Any) -> list[Line]: Line.setParseText(parseText) workList = restructure(defines) workList = trycast(list[Definition], workList) assert workList is not None r = Resources() lines:list[Line] = [] # # No need to figure it out each time it is used # entries = 0 id = workList[0]['meta']['id'].lower().split(':')[0] uses: dict[str,int] = {} for entry in workList: testId = entry['meta']['id'].lower().split(':')[0] if testId == id: entries += 1 # # If there is a Functional Lable, then we are going # to capture the count of each FL # try: uses[entry['fl']] = uses.get(entry['fl'], 0) + 1 except KeyError: pass del(entry) used: dict[str, int] = {} for k in uses.keys(): used[k] = 0 ets: list[Line] = [] for count, work in enumerate(workList): testId = work['meta']['id'].lower().split(':')[0] # # Skip entries which are not part of the primary definition # if testId != id: continue # # Create the First line from the hwi, [ahws] and fl # line = Line() hwi = trycast(HeadWordInformation, work['hwi']) assert hwi is not None hw = re.sub(r'\*', '', hwi['hw']) line.addFragment(Fragment(hw, r.headerFont, color=r.baseColor)) if 'ahws' in work: ahws = trycast(list[AlternanteHeadword], work['ahws']) assert ahws is not None for ahw in ahws: hw = re.sub(r'\*', '', ahw['hw']) line.addFragment(Fragment(', ' + hw, r.headerFont, color=r.baseColor)) if entries > 1: frag = Fragment(f" {count + 1} of {entries} ", r.textFont, color= r.subduedColor) frag.setBackground(r.subduedBackground) line.addFragment(frag) if 'fl' in work: text = work['fl'] used[text] += 1 if uses[text] > 1: text += f' ({used[text]})' line.addFragment(Fragment(text, r.labelFont, color=r.baseColor)) lines.append(line) # # Next is the pronunciation. # While 'prs' is optional, the headword is not. This gets us what we want. # line = Line() if hwi['hw'].find('*') >= 0: hw = re.sub(r'\*', '\u00b7', hwi['hw']) line.addFragment(Fragment(hw + ' ', r.textFont, color=r.subduedColor)) if 'prs' in hwi: newFrags = do_prs(trycast(list[Pronunciation], hwi['prs'])) line.addFragment(newFrags) lines.append(line) line = Line() if 'ins' in work: inflections = trycast(list[Inflection], work['ins']) newFrags = do_ins(inflections) line = Line() line.addFragment(newFrags) lines.append(line) defines = trycast(list[DefinitionSection], work['def']) assert defines is not None for define in defines: try: lines += do_def(define) except NotImplementedError as e: print(e) if 'et' in work: line = Line() line.addFragment( Fragment(f"{work['fl']} ({used[work['fl']]})", r.labelFont, color=r.baseColor) ) ets.append(line) ets += do_ets(trycast(list[list[Pair]], work['et'])) for k in work.keys(): if k not in [ 'meta', 'hom', 'hwi', 'fl', 'def', 'ins', 'prs', 'et', 'date', 'shortdef']: #raise NotImplementedError(f"Unknown key {k} in work") print(f"Unknown key {k} in work") if len(ets)>0: line = Line() line.addFragment( Fragment('Etymology', r.labelFont, color=r.baseColor) ) lines.append(line) lines+=ets return lines def parseText(frag: Fragment) -> list[Fragment]: org = frag.text() if frag.asis(): return [frag] # # Get the fonts we might need. # We can't use Resources() because we don't know the original font. textFont = QFont(frag.font()) textFont.setWeight(QFont.Weight.Normal) textFont.setItalic(False) textFont.setCapitalization(QFont.Capitalization.MixedCase) boldFont = QFont(textFont) boldFont.setBold(True) italicFont = QFont(textFont) italicFont.setItalic(True) smallCapsFont = QFont(textFont) smallCapsFont.setCapitalization(QFont.Capitalization.SmallCaps) scriptFont = QFont(textFont) scriptFont.setPixelSize(int(scriptFont.pixelSize()/4)) boldItalicFont = QFont(boldFont) boldItalicFont.setItalic(True) boldSmallCapsFont = QFont(smallCapsFont) boldSmallCapsFont.setBold(True) capsFont = QFont(textFont) capsFont.setCapitalization(QFont.Capitalization.AllUppercase) # # Default color: # baseColor = frag.color() r = Resources() results: list[Fragment] = [] while True: text = frag.text() start = text.find("{") if start < 0: results.append(frag) return results if start > 0: newFrag = Fragment(frag) newFrag.setText(text[:start]) results.append(newFrag) frag.setText(text[start:]) continue # # Start == 0 # # # If the token is an end-token, return now. # if text.startswith("{/"): results.append(frag) return results # # extract this token # end = text.find("}") token = text[1:end] frag.setText(text[end + 1 :]) oldFont = QFont(frag.font()) if token == "bc": newFrag = Fragment(": ", boldFont, color=baseColor) newFrag.setIndent(frag.indent()) results.append(newFrag) continue if token in [ "b", "inf", "it", "sc", "sup", "phrase", "parahw", "gloss", "qword", "wi", "dx", "dx_def", "dx_ety", "ma", ]: if token == "b": frag.setFont(boldFont) elif token in ["it", "qword", "wi"]: frag.setFont(italicFont) elif token == "sc": frag.setFont(smallCapsFont) elif token in ["inf", "sup"]: frag.setFont(scriptFont) elif token == "phrase": frag.setFont(boldItalicFont) elif token == "parahw": frag.setFont(boldSmallCapsFont) elif token == "gloss": frag.setText("[" + frag.text()) elif token in ["dx", "dx_ety"]: frag.setText("\u2014" + frag.text()) elif token == "ma": frag.setText("\u2014 more at " + frag.text()) elif token == "dx_def": frag.setText("(" + frag.text()) else: raise NotImplementedError(f"Unknown block marker: {token}") results += parseText(frag) frag = results.pop() frag.setFont(oldFont) text = frag.text() if not text.startswith("{/" + token + "}"): raise NotImplementedError( f"No matching close for {token} in {org}" ) if token == "gloss": results[-1].setText(results[-1].text() + "]") elif token == "dx_def": results[-1].setText(results[-1].text() + ")") end = text.find("}") text = text[end + 1 :] frag.setText(text) continue # # These are codes that include all information within the token # fields = token.split("|") token = fields[0] if token in [ "a_link", "d_link", "dxt", "et_link", "i_link", "mat", "sx", ]: wref = "" htext = fields[1] oldFont = QFont(frag.font()) target = "word" if token == "a_link": wref = fields[1] elif token in ["d_link", "et_link", "mat", "sx", "i_link"]: if fields[2] == "": wref = fields[1] else: wref = fields[2] if token == "i_link": frag.setFont(italicFont) elif token == "sx": frag.setFont(capsFont) elif token == "dxt": if fields[3] == "illustration": wref = fields[2] target = "article" elif fields[3] == "table": wref = fields[2] target = "table" elif fields[3] != "": wref = fields[3] target = "sense" else: wref = fields[1] target = "word" elif token == "a_link": target = "word" wref = fields[1] else: raise NotImplementedError(f"Unknown code: {token} in {org}") newFrag = Fragment(frag) newFrag.setText(htext) newFrag.setWRef(wref) newFrag.setTarget(target) newFrag.setColor(r.linkColor) results.append(newFrag) frag.setFont(oldFont) text = frag.text() continue raise NotImplementedError( f"Unable to locate a known token {token} in {org}" )