esl-reader/plugins/merriam-webster.py

from PyQt6.QtGui import QColor
from trycast import trycast
import json
import re
from typing import Any, NamedTuple, NotRequired, TypedDict

from PyQt6.QtCore import QEventLoop, QUrl, Qt
from PyQt6.QtNetwork import QNetworkRequest
from lib.utils import Resources
from lib.definition import Line, Fragment

registration = {
    'source': 'mw',
    'name': 'Merriam-Webster',
}

API = "https://www.dictionaryapi.com/api/v3/references/collegiate/json/{word}?key={key}"
key = "51d9df34-ee13-489e-8656-478c215e846c"

class TextTuple(NamedTuple):
    type_: str # 'text'
    text: str
class TTuple(NamedTuple):
    type_: str # 't'
    text: str
class VerbalIllustration(TypedDict):
    t: str
    aq: str

class Sound(TypedDict):
    audio: str
    ref: str
    stat: str

class Pronunciation(TypedDict):
    mw: str
    l: NotRequired[str]
    l2: NotRequired[str]
    pun: NotRequired[str]
    sound: NotRequired[Sound]

class Meta(TypedDict):
    id: str
    uuid: str
    sort: str
    src: str
    section: str
    stems: list[str]
    offensive: bool

class HeadWordInfo(TypedDict):
    hw: str
    prs: NotRequired[list[Pronunciation]]

class HeadWord(TypedDict):
    hw: str
    prs: NotRequired[list[Pronunciation]]
    psl: NotRequired[str]

class Variant(TypedDict):
    va: str
    vl: str
    prs: list[Pronunciation]
    spl: str

class Inflection(TypedDict):
    if_: str
    ifc: str
    il: str
    prs: list[Pronunciation]
    spl: str

class DividedSense(TypedDict):
    sd: str
    et: list[str] # Not full
    ins: list[Inflection]
    lbs: list[str]
    prs: list[Pronunciation]
    sgram: str
    sls: list[str]
    vrs: list[Variant]

class BioGraphicalNameWrap(TypedDict):
    pname: str
    sname: str
    altname: str
    prs: list[Pronunciation]

class CalledAlsoTarget(TypedDict):
    cat: str
    catref: str
    pn: str
    prs: list[Pronunciation]
    psl: str

class CalledAlso(TypedDict):
    intro: str
    cats: list[CalledAlsoTarget]

class RunInWrap(TypedDict):
    rie: str
    prs: list[Pronunciation]
    text: str
    vrs: list[Variant]

class Sense(TypedDict):
    dt: list[list] # not full
    et: NotRequired[list[str]]
    ins: NotRequired[list[Inflection]]
    lbs: NotRequired[list[str]]
    prs: NotRequired[list[Pronunciation]]
    sdsense: NotRequired[DividedSense]
    sgram: NotRequired[str]
    sls: NotRequired[list[str]]
    sn: NotRequired[str]
    vrs: NotRequired[list[Variant]]

class Definition(TypedDict):
    sseq: list[list[list[Any]]]
    vd: NotRequired[str]

class Pair(TypedDict):
    objType: str
    obj: list[Sense]|Sense|str|list[VerbalIllustration]|list[Any]

Entry = TypedDict(
    'Entry',
    {
        'meta': Meta,
        'hom': NotRequired[str],
        'hwi': HeadWordInfo,
        'ahws': NotRequired[list[HeadWord]],
        'vrs': NotRequired[list[Variant]],
        'fl': NotRequired[str],
        'def': list[Definition],
    }
)
class WordType(TypedDict):
    word: str
    source: str
    definition: dict[str, Any]

def make_pairs(src: list[Any]) -> list[Pair]:
    result:list[Pair] = []
    iters = [iter(src)]*2
    for entry in zip(*iters):
        pair = { 'objType': entry[0],
                 'obj': entry[1],
                }
        pair = trycast(Pair, pair)
        assert pair is not None
        result.append(pair)
    return result

def fetch(word:str) ->  WordType:
    request = QNetworkRequest()
    url = QUrl(API.format(word=word, key=key))
    request.setUrl(url)
    request.setTransferTimeout(3000)
    reply = Resources.nam.get(request)
    assert reply is not None
    loop = QEventLoop()
    reply.finished.connect(loop.quit)
    loop.exec()
    content = reply.readAll()
    data = json.loads(content.data().decode('utf-8'))
    return {
        'word': word,
        'source': 'mw',
        'definition': data,
    }

def soundUrl(sound:Sound, fmt='ogg') -> QUrl:
    """Create a URL from a PRS structure."""
    base = f"https://media.merriam-webster.com/audio/prons/en/us/{fmt}"
    audio = sound['audio']
    m = re.match(r"(bix|gg|[a-zA-Z])", audio)
    if m:
        url = base + f"/{m.group(1)}/"
    else:
        url = base + "/number/"
    url += audio + f".{fmt}"
    return QUrl(url)

def getFirstSound(definition: list[Entry]) -> QUrl:
    #  ahws, cats, dros, hwi, ins, ri, sdsense, sen, sense, uros, vrs
    for entry in definition:
        for v in entry.values():
            hwi = trycast(HeadWordInfo, v)
            if hwi is None:
                continue
            if 'prs' in hwi:
                for pr in hwi['prs']:
                    if 'sound' in pr:
                        url = soundUrl(pr['sound'])
                        if url.isValid():
                            return url
    return QUrl()

def do_prs(hwi: HeadWordInfo) -> list[Fragment]:
    r = Resources()
    frags: list[Fragment] = []
    font = r.labelFont
    linkColor = r.linkColor
    subduedColor = r.subduedColor

    if 'prs' not in hwi:
        return []
    for pr in hwi['prs']:
        if 'pun' in pr:
            pun = pr['pun']
        else:
            pun = ' '
        if 'l' in pr:
            frags.append(
                Fragment(pr['l'] + pun, font, color=subduedColor)
            )
        frag = Fragment(pr['mw'], font, color=subduedColor)
        if 'sound' in pr:
            frag.setAudio(soundUrl(pr['sound']))
            frag.setColor(linkColor)
        frags.append(frag)
        if 'l2' in pr:
            frags.append(
                Fragment(pun + pr['l2'], font, color=subduedColor)
            )
    return frags

def do_sense(sense: Sense|None) -> tuple[list[Fragment], list[Line]]:
    if sense is None:
        return ([],[])
    lines: list[Line] = []
    frags: list[Fragment] = []
    r = Resources()
    if 'sn' in sense:
        sn = sense['sn']
    else:
        sn = ''
    print(f'{sn}\n\n',json.dumps(sense['dt'], indent=2))
    iters = [iter(sense['dt'])]*2
    for pair in zip(*iters):
        pair = trycast(tuple[str, Any], pair)
        assert pair is not None
        print(pair[0])
        if pair[0] == 'text':
            line = Line()
            line.addFragment(
                Fragment(pair[1], r.textFont, color=r.baseColor)
            )
            lines.append(line)
    return (frags, lines)

def do_pseq(outer: int,
            inner: int,
            pseq: list[list[Pair]]| None ) -> tuple[list[Fragment], list[Line]]:
    assert pseq is not None
    lines: list[Line] = []
    frags: list[Fragment] = []
    for entry in pseq:
        pairs = make_pairs(entry)
        for pair in pairs:
            if pair['objType'] == 'bs':
                (newFrags, newLines) = do_sense(trycast(Sense, pair['obj']))
                frags += newFrags
                lines += newLines
            elif pair['objType'] == 'sense':
                (newFrags, newLines) = do_sense(trycast(Sense, pair['obj']))
                frags += newFrags
                lines += newLines
            else:
                raise Exception(f"Unknown object type {pair['objType']}")
    return (frags, lines)

def do_sseq(sseq:list[list[list[Pair]]]) -> list[Line]:
    lines: list[Line] = []
    r = Resources()
    for outer, item_o in enumerate(sseq):
        line = Line()
        line.addFragment(
            Fragment(str(outer+1), r.boldFont, color=r.baseColor)
        )
        for inner, item_i in enumerate(item_o):
            line.addFragment(
                Fragment(chr(ord('a')+inner), r.boldFont, color=r.baseColor)
            )
            pairs = make_pairs(item_i)
            for pair in pairs:
                objType = pair['objType']
                if objType == 'sense':
                    sense = trycast(Sense, pair['obj'])
                    (frags, newlines) = do_sense(sense)
                    for frag in frags:
                        line.addFragment(frag)
                    lines.append(line)
                    lines += newlines
                elif objType == 'sen':
                    raise Exception(f"sen unimplimented")
                elif objType == 'pseq':
                    pseq = trycast(list[list[Pair]], pair['obj'])
                    (frags, newlines) = do_pseq(inner, outer, trycast(list[list[Pair]], pair['obj']))
                    for frag in frags:
                        line.addFragment(frag)
                    lines.append(line)
                    lines += newlines
                elif objType == 'bs':
                    raise Exception(f"bs unimplimented")
                else:
                    raise Exception(f"Unknown object[{objType}] for \n{json.dumps(pair['obj'],indent=2)}")
    return lines

def do_def(entry: Definition) -> list[Line]:
    r = Resources()
    lines: list[Line] = []
    assert trycast(Definition, entry) is not None
    if 'vd' in entry:
        line = Line()
        line.addFragment(
            Fragment(entry['vd'], r.italicFont, color = r.linkColor)
        )
        lines.append(line)
    #
    # sseg is required
    #
    sseq = entry['sseq']
    lines += do_sseq(sseq)
    return lines

def getDef(definition: list[Entry]) -> list[Line]:
    r = Resources()
    lines:list[Line] = []
    #
    # Pull the fonts for ease of use
    #
    headerFont = r.headerFont
    textFont = r.textFont
    labelFont = r.labelFont
    #
    # Pull the colors for ease of use
    #
    baseColor = r.baseColor
    linkColor = r.linkColor
    subduedColor = r.subduedColor

    #
    # No need to figure it out each time it is used
    #
    entries = 0
    id = definition[0]['meta']['id'].lower().split(':')[0]
    uses: dict[str,int] = {}
    for entry in definition:
        testId = entry['meta']['id'].lower().split(':')[0]
        if testId == id:
            entries += 1
            try:
                uses[entry['fl']] = uses.get(entry['fl'], 0) + 1
            except KeyError:
                pass
    used: dict[str, int] = {}
    for k in uses.keys():
        used[k] = 0
    for count, entry in enumerate(definition):
        testId = entry['meta']['id'].lower().split(':')[0]
        if testId != id:
            continue
        #
        # Create the First line from the hwi, [ahws] and fl
        #
        line = Line()
        hwi = trycast(HeadWordInfo, entry['hwi'])
        assert hwi is not None
        hw = re.sub(r'\*', '', hwi['hw'])
        line.addFragment(Fragment(hw, headerFont, color=baseColor))
        if 'ahws' in entry:
            ahws = trycast(list[HeadWord], entry['ahws'])
            assert ahws is not None
            for ahw in ahws:
                hw = re.sub(r'\*', '', ahw['hw'])
                line.addFragment(Fragment(', ' + hw, headerFont, color=baseColor))
        if  entries > 1:
            frag = Fragment(f" {count + 1} of {entries} ", textFont, color= subduedColor)
            frag.setBackground(QColor(Qt.GlobalColor.gray))
            line.addFragment(frag)
        if 'fl' in entry:
            text = entry['fl']
            used[text] += 1
            if uses[text] > 1:
                text += f' ({used[text]})'
            line.addFragment(Fragment(text, labelFont, color=baseColor))
        lines.append(line)

        #
        # Next is the pronunciation.
        # While 'prs' is optional, the headword is not.  This gets us what we want.
        #
        line = Line()
        if hwi['hw'].find('*') >= 0:
            hw = re.sub(r'\*', '\u00b7', hwi['hw'])
            line.addFragment(Fragment(hw + ' ', textFont, color=subduedColor))
        for frag in do_prs(hwi):
            line.addFragment(frag)
        if len(line.getLine()) > 0:
            lines.append(line)
        defines = trycast(list[Definition], entry['def'])
        assert defines is not None
        for define in defines:
            lines += do_def(define)
    return lines