esl-reader/plugins/merriam-webster.py

from trycast import trycast
import json
import re
from typing import Any, Literal, NamedTuple, NotRequired, TypedDict, cast

from PyQt6.QtCore import QEventLoop, QUrl, Qt
from PyQt6.QtGui import QColor, QFont
from PyQt6.QtNetwork import QNetworkRequest
from lib.words import Word
from lib.definition import Line, Fragment

registration = {
    'source': 'mw',
    'name': 'Merriam-Webster',
}

API = "https://www.dictionaryapi.com/api/v3/references/collegiate/json/{word}?key={key}"
key = "51d9df34-ee13-489e-8656-478c215e846c"

class TextTuple(NamedTuple):
    type_: str # 'text'
    text: str
class TTuple(NamedTuple):
    type_: str # 't'
    text: str
class VerbalIllustration(TypedDict):
    t: str
    aq: str

class VerbalIllustrationTuple(NamedTuple):
    type_: str # 'vis'
    data: list[VerbalIllustration]

class Sound(TypedDict):
    audio: str
    ref: str
    stat: str

class Pronunciation(TypedDict):
    mw: str
    l: str
    l2: str
    pun: str
    sound: Sound


class Meta(TypedDict):
    id: str
    uuid: str
    sort: str
    src: str
    section: str
    stems: list[str]
    offensive: bool

class HeadWordInfo(TypedDict):
    hw: str
    prs: list[Pronunciation]

class HeadWord(TypedDict):
    hw: str
    prs: list[Pronunciation]
    psl: str

class Variant(TypedDict):
    va: str
    vl: str
    prs: list[Pronunciation]
    spl: str

class Inflection(TypedDict):
    if_: str
    ifc: str
    il: str
    prs: list[Pronunciation]
    spl: str

class DividedSense(TypedDict):
    sd: str
    et: list[str] # Not full
    ins: list[Inflection]
    lbs: list[str]
    prs: list[Pronunciation]
    sgram: str
    sls: list[str]
    vrs: list[Variant]

class BioGraphicalNameWrap(TypedDict):
    pname: str
    sname: str
    altname: str
    prs: list[Pronunciation]

class CalledAlsoTarget(TypedDict):
    cat: str
    catref: str
    pn: str
    prs: list[Pronunciation]
    psl: str

class CalledAlso(TypedDict):
    intro: str
    cats: list[CalledAlsoTarget]

class RunInWrap(TypedDict):
    rie: str
    prs: list[Pronunciation]
    text: str
    vrs: list[Variant]

class Sense:
    dt: list[str] # not full
    et: list[str] # not full
    ins: list[Inflection]
    lbs: list[str]
    prs: list[Pronunciation]
    sdsense: DividedSense
    sgram: str
    sls: list[str]
    sn: str
    vrs: list[Variant]

class SenseSequence(TypedDict):
    sense: Sense
    sen: Sense

class Definition(TypedDict):
    sseq: list[SenseSequence]
    vd: str

class EntryX(TypedDict):
    meta: Meta
    hom: NotRequired[str]
    hwi: HeadWordInfo
    ahws: NotRequired[list[HeadWord]]
    vrs: NotRequired[list[Variant]]
    fl: str
    def_: list[Definition]
Entry = TypedDict(
    'Entry',
    {
        'meta': Meta,
        'hom': NotRequired[str],
        'hwi': HeadWordInfo,
        'ahws': NotRequired[list[HeadWord]],
        'vrs': NotRequired[list[Variant]],
        'fl': NotRequired[str],
        'def': list[Definition],
    }
)

def fetch(word:str) ->  dict[str, Any]:
    request = QNetworkRequest()
    url = QUrl(API.format(word=word, key=key))
    request.setUrl(url)
    request.setTransferTimeout(3000)
    reply = Word._nam.get(request)
    assert reply is not None
    loop = QEventLoop()
    reply.finished.connect(loop.quit)
    loop.exec()
    content = reply.readAll()
    data = json.loads(content.data().decode('utf-8'))
    return {
        'word': word,
        'source': 'mw',
        'definition': data,
    }

def soundUrl(sound:Sound, fmt='ogg') -> QUrl:
    """Create a URL from a PRS structure."""
    base = f"https://media.merriam-webster.com/audio/prons/en/us/{fmt}"
    audio = sound['audio']
    m = re.match(r"(bix|gg|[a-zA-Z])", audio)
    if m:
        url = base + f"/{m.group(1)}/"
    else:
        url = base + "/number/"
    url += audio + f".{fmt}"
    return QUrl(url)

def getFirstSound(definition: list[Entry]) -> QUrl:
    #  ahws, cats, dros, hwi, ins, ri, sdsense, sen, sense, uros, vrs
    for entry in definition:
        for v in entry.values():
            hwi = trycast(HeadWordInfo, v)
            if hwi is None:
                continue
            if 'prs' in hwi:
                for pr in hwi['prs']:
                    if 'sound' in pr:
                        url = soundUrl(pr['sound'])
                        if url.isValid():
                            return url
    return QUrl()

def do_prs(prs: list[Pronunciation]) -> list[Fragment]:
    frags: list[Fragment] = []
    font = trycast(QFont, Word._resources['fonts']['label'])
    assert font is not None
    linkColor = trycast(QColor, Word._resources['colors']['link'])
    assert linkColor is not None
    subduedColor = trycast(QColor, Word._resources['colors']['subdued'])
    assert subduedColor is not None

    for pr in prs:
        if 'pun' in pr:
            pun = pr['pun']
        else:
            pun = ' '
        if 'l' in pr:
            frags.append(
                Fragment(pr['l'] + pun, font, color=subduedColor)
            )
        frag = Fragment(pr['mw'], font, color=subduedColor)
        if 'sound' in pr:
            frag.setAudio(soundUrl(pr['sound']))
        frags.append(frag)
        if 'l2' in pr:
            frags.append(
                Fragment(pun + pr['l2'], font, color=subduedColor)
            )
    return frags

def getDef(definition: list[Entry]) -> list[Line]:
    lines = []
    #
    # Pull the fonts for ease of use
    #
    headerFont = trycast(QFont, Word._resources['fonts']['header'])
    assert headerFont is not None
    textFont = trycast(QFont, Word._resources['fonts']['text'])
    assert textFont is not None
    labelFont = trycast(QFont, Word._resources['fonts']['label'])
    assert labelFont is not None
    #
    # Pull the colors for ease of use
    #
    baseColor = trycast(QColor, Word._resources['colors']['base'])
    assert baseColor is not None
    linkColor = trycast(QColor, Word._resources['colors']['link'])
    assert linkColor is not None
    subduedColor = trycast(QColor, Word._resources['colors']['subdued'])
    assert subduedColor is not None

    #
    # No need to figure it out each time it is used
    #
    entries = 0
    id = definition[0]['meta']['id']
    id = ':'.split(id)[0].lower()
    for entry in definition:
        if entry['meta']['id'].lower() == id:
            entries += 1
    for count, entry in enumerate(definition):
        if entry['meta']['id'].lower() != id:
            continue
        #
        # Create the First line from the hwi, [ahws] and fl
        #
        line = Line()
        hwi = trycast(HeadWordInfo, entry['hwi'])
        assert hwi is not None
        hw = re.sub(r'\*', '', hwi['hw'])
        line.addFragment(Fragment(hw, headerFont, color=baseColor))
        if 'ahws' in entry:
            ahws = trycast(list[HeadWord], entry['ahws'])
            assert ahws is not None
            for ahw in ahws:
                hw = re.sub(r'\*', '', ahw['hw'])
                line.addFragment(Fragment(', ' + hw, headerFont, color=baseColor))
        if 'hom' in entry:

        if 'fl' in entry:
            frag = Fragment(f"{count} of {entries} ", textFont, color=
        frag.setBackground(QColor(Qt.GlobalColor.gray))
        line.addFragment(frag)
        line.addFragment(Fragment(entry['fl'], labelFont, color=baseColor))
        lines.append(line)

        #
        # Next is the pronunciation.
        # While 'prs' is optional, the headword is not.  This gets us what we want.
        #
        line = Line()
        hw = re.sub(r'\*', '\u00b7', hwi['hw'])
        line.addFragment(Fragment(hw + ' ', textFont, color=subduedColor))
        for frag in do_prs(hwi['prs']):
            line.addFragment(frag)

        #
        # Try for
    return [Line()]