esl-reader/plugins/merriam-webster.py

import json
import re
from typing import Any, Literal, NotRequired, TypedDict, cast

from PyQt6.QtCore import QEventLoop, QUrl
from PyQt6.QtGui import QFont, QFontDatabase, QTextCharFormat, QTextLayout
from PyQt6.QtNetwork import QNetworkRequest
from trycast import trycast

from lib.definition import Fragment, Line
from lib.utils import Resources

registration = {
    "source": "mw",
    "name": "Merriam-Webster",
    "language": "en-us",
}

API = "https://www.dictionaryapi.com/api/v3/references/collegiate/json/{word}?key={key}"
key = "51d9df34-ee13-489e-8656-478c215e846c"


class Meta(TypedDict):
    id: str
    uuid: str
    sort: str
    src: str
    section: str
    stems: list[str]
    offensive: bool


class Sound(TypedDict):
    audio: str
    ref: str
    stat: str


class Pronunciation(TypedDict):
    mw: str
    l: NotRequired[str]
    l2: NotRequired[str]
    pun: NotRequired[str]
    sound: NotRequired[Sound]


class SubSource(TypedDict):
    source: NotRequired[str]
    aqdate: NotRequired[str]


class AttributionOfQuote(TypedDict):
    auth: NotRequired[str]
    source: NotRequired[str]
    aqdate: NotRequired[str]
    subsource: NotRequired[SubSource]


class VerbalIllustration(TypedDict):
    t: str
    aq: NotRequired[AttributionOfQuote]


class HeadWordInformation(TypedDict):
    hw: str
    prs: NotRequired[list[Pronunciation]]


class AlternanteHeadword(TypedDict):
    hw: str
    psl: NotRequired[str]


class Variant(TypedDict):
    va: str
    vl: NotRequired[str]
    prs: NotRequired[list[Pronunciation]]
    spl: NotRequired[str]


Inflection = TypedDict(
    "Inflection",
    {
        "if": NotRequired[str],
        "ifc": NotRequired[str],
        "il": NotRequired[str],
        "prs": NotRequired[list[Pronunciation]],
        "spl": NotRequired[str],
    },
)


class CrossReferenceTarget(TypedDict):
    cxl: str
    cxr: NotRequired[str]
    cxt: str
    cxn: NotRequired[str]


class CognateCrossRef(TypedDict):
    cxl: str
    cxtis: list[CrossReferenceTarget]


class Pair(TypedDict):
    objType: str
    obj: Any


class DividedSense(TypedDict):
    sd: str
    dt: list[list[Pair]]
    et: NotRequired[list[Pair]]
    ins: NotRequired[list[Inflection]]
    lbs: NotRequired[list[str]]
    prs: NotRequired[list[Pronunciation]]
    sgram: NotRequired[str]
    sls: NotRequired[list[str]]
    vrs: NotRequired[list[Variant]]


class Sense(TypedDict):
    dt: list[list[Pair]]
    et: NotRequired[list[Pair]]
    ins: NotRequired[list[Inflection]]
    lbs: NotRequired[list[str]]
    prs: NotRequired[list[Pronunciation]]
    sdsense: NotRequired[DividedSense]
    sgram: NotRequired[str]
    sls: NotRequired[list[str]]
    sn: NotRequired[str]
    vrs: NotRequired[list[Variant]]


class TruncatedSense(Sense):
    pass


class BindingSubstitutePair(TypedDict):
    objType: Literal["bs"]
    obj: Sense


class SensePair(TypedDict):
    objType: Literal["sense"]
    obj: Sense


class DefinitionSection(TypedDict):
    vd: NotRequired[str]
    sls: NotRequired[list[str]]
    sseq: Any  # list[list[Pair]]

class UndefinedRunOn(TypedDict):
    ure: str
    fl: str
    utxt: NotRequired[list[list[Pair]]]
    ins: NotRequired[list[Inflection]]
    lbs: NotRequired[list[str]]
    prs: NotRequired[list[Pronunciation]]
    sls: NotRequired[list[str]]
    vrs: NotRequired[list[Variant]]


DefinedRunOn = TypedDict(
    "DefinedRunOn",
    {
        "drp": str,
        "def": list[DefinitionSection],
        "et": NotRequired[list[Pair]],
        "lbs": NotRequired[list[str]],
        "prs": NotRequired[list[Pronunciation]],
        "sls": NotRequired[list[str]],
        "vrs": NotRequired[list[Variant]]
    }
)

Definition = TypedDict(
    "Definition",
    {
        "ahws": NotRequired[list[AlternanteHeadword]],
        "cxs": NotRequired[list[CognateCrossRef]],
        "date": NotRequired[str],
        "def": list[DefinitionSection],
        "dros": NotRequired[Any],
        "et": NotRequired[list[Pair]],
        "fl": str,
        "hom": NotRequired[int],
        "hwi": HeadWordInformation,
        "ins": NotRequired[list[Inflection]],
        "lbs": NotRequired[list[str]],
        "meta": Meta,
        "shortdef": NotRequired[list[str]],
        "sls": NotRequired[list[str]],
        "syns": NotRequired[Any],
        "uros": NotRequired[Any],
        "vrs": NotRequired[list[Variant]],
    },
)


def make_pairs(src: list[Any]) -> list[Pair]:
    result: list[Pair] = []
    iters = [iter(src)] * 2
    for entry in zip(*iters):
        pair0 = {
            "objType": entry[0],
            "obj": entry[1],
        }
        if isinstance(pair0["obj"], list):
            result.append(cast(Pair, pair0))
            continue
        pair1 = trycast(Pair, pair0)
        if pair1 is None:
            print(
                pair0["objType"],
                type(pair0["obj"]),
                json.dumps(pair0["obj"], indent=2),
            )
        assert pair1 is not None
        result.append(pair1)
    return result


Elements = [
    "dt",
    "sen",
    "bs",
    "pseq",
    "snot",
    "t",
    "text",
    "vis",
    "sens",
    "uns",
    "sense",
]


def restructure(obj: Any) -> Any:
    if isinstance(obj, list):
        if len(obj) == 0:
            return []
        if isinstance(obj[0], str) and obj[0] in Elements:
            pairs = make_pairs(obj)
            result = []
            for pair in pairs:
                if isinstance(pair["obj"], list):
                    r2 = []
                    for item in pair["obj"]:
                        r2.append(restructure(item))
                    pair["obj"] = r2
                elif isinstance(pair["obj"], dict):
                    r2 = {}
                    for k, v in pair["obj"].items():
                        r2[k] = restructure(v)
                    pair["obj"] = r2
                result.append(pair)
            return result
        result = []
        for v in obj:
            result.append(restructure(v))
        return result
    elif isinstance(obj, dict):
        obj2 = cast(dict, obj)
        result = {}
        for k, v in obj2.items():
            result[k] = restructure(v)
        return result
    else:
        return obj


class WordType(TypedDict):
    word: str
    source: str
    definition: Any


def fetch(word: str) -> WordType:
    request = QNetworkRequest()
    url = QUrl(API.format(word=word, key=key))
    request.setUrl(url)
    request.setTransferTimeout(3000)
    reply = Resources.nam.get(request)
    assert reply is not None
    loop = QEventLoop()
    reply.finished.connect(loop.quit)
    loop.exec()
    content = reply.readAll()
    data = json.loads(content.data().decode("utf-8"))
    return {
        "word": word,
        "source": "mw",
        "definition": data,
    }


def soundUrl(sound: Sound, fmt="ogg") -> QUrl:
    """Create a URL from a PRS structure."""
    base = f"audio://media.merriam-webster.com/audio/prons/en/us/{fmt}"
    audio = sound["audio"]
    m = re.match(r"(bix|gg|[a-zA-Z])", audio)
    if m:
        url = base + f"/{m.group(1)}/"
    else:
        url = base + "/number/"
    url += audio + f".{fmt}"
    return QUrl(url)


def getFirstSound(definition: Any) -> QUrl:
    #  ahws, cats, dros, hwi, ins, ri, sdsense, sen, sense, uros, vrs
    for entry in definition:
        for v in entry.values():
            hwi = v  # trycast
            if hwi is None:
                continue
            if "prs" in hwi:
                for pr in hwi["prs"]:
                    if "sound" in pr:
                        url = soundUrl(pr["sound"])
                        if url.isValid():
                            return url
    return QUrl()


def do_prs(frag: Fragment, prs: list[Pronunciation] | None) -> None:
    assert prs is not None
    r = Resources()

    for pr in prs:
        if "pun" in pr:
            pun = pr["pun"]
        else:
            pun = " "
        if "l" in pr:
            frag.addText(pr["l"] + pun, r.subduedItalicFormat)
        fmt = r.phonticFormat
        if "sound" in pr:
            fmt = QTextCharFormat(r.phonticFormat)
            fmt.setAnchor(True)
            fmt.setAnchorHref(soundUrl(pr["sound"]).toString())
            fmt.setForeground(r.linkColor)
            #text = pr["mw"] +' \N{SPEAKER} '
            text = pr["mw"] +' '
        else:
            text = pr['mw'] + ' '
        print(f"text: {text}, length: {len(text)}")
        frag.addText(text, fmt)
        if "l2" in pr:
            frag.addText(pun + pr["l2"], r.subduedLabelFormat)
    text = frag.layout().text()
    for fmt in frag.layout().formats():
        print(f"start: {fmt.start}, length: {fmt.length}, text: \"{text[fmt.start:fmt.start+fmt.length]}\"")
    return


def do_aq(aq: AttributionOfQuote | None) -> Line:
    assert aq is not None
    r = Resources()
    frag = Fragment()
    if 'auth' in aq:
        frag.addText(aq['auth']+', ', r.subduedFormat)
    if 'source' in aq:
        frag.addText(aq['source'], r.subduedFormat)
    if 'aqdate' in aq:
        frag.addText(', '+aq['aqdate'], r.subduedFormat)
    if 'subsource' in aq:
        ss = trycast(SubSource, aq['subsource'])
        assert ss is not None
        if 'source' in ss:
            frag.addText(', '+ss['source'], r.subduedFormat)
        if 'aqdate' in ss:
            frag.addText(', '+ss['aqdate'], r.subduedFormat)
    line = Line()
    line.addFragment(frag)
    return line


def do_vis(vis: list[VerbalIllustration] | None, indent=0) -> list[Line]:
    assert vis is not None
    r = Resources()
    lines: list[Line] = []
    for vi in vis:
        line = Line()
        frag = Fragment()
        frag.addText(vi['t'], r.subduedFormat)
        if indent > 0:
            frag.setIndent(indent)
        line.addFragment(frag)
        lines.append(line)
        if "aq" in vi:
            lines.append(do_aq(trycast(AttributionOfQuote, vi["aq"])))
    return lines


def do_uns(
    uns: list[list[list[Pair]]] | None, indent: int
) -> tuple[list[Fragment], list[Line]]:
    assert uns is not None
    r = Resources()
    frags: list[Fragment] = []
    lines: list[Line] = []
    for note in uns:
        for entry in note:
            for pair in entry:
                if pair["objType"] == "text":
                    frag = Fragment(
                        "\u2192 " + pair["obj"], r.textFont, color=r.baseColor
                    )
                    frag.setIndent(indent)
                    frags.append(frag)
                elif pair["objType"] == "vis":
                    lines += do_vis(
                        trycast(list[VerbalIllustration], pair["obj"]), indent
                    )
                elif pair["objType"] == "ri":
                    raise NotImplementedError("NO ri")
    return (frags, lines)


def do_dt(frag, dt: list[list[Pair]] | None, indent: int) -> list[Line]:
    assert dt is not None
    lines: list[Line] = []
    r = Resources()
    first = True
    for entry in dt:
        for pair in entry:
            if pair["objType"] == "text":
                if first:
                    frag.setIndent(indent)
                    frag.addText(pair["obj"], r.textFormat)
                else:
                    line = Line()
                    f = Fragment()
                    f.setIndent(indent)
                    f.addText(pair["obj"], r.textFormat)
                    line.addFragment(frag)
                    lines.append(line)
            elif pair["objType"] == "vis":
                first = False
                lines += do_vis(
                    trycast(list[VerbalIllustration], pair["obj"]), indent
                )
            elif pair["objType"] == "uns":
                first = False
                (newFrags, newLines) = do_uns(
                    trycast(list[list[list[Pair]]], pair["obj"]), indent
                )
                #frags += newFrags
                #lines += newLines
                raise NotImplementedError("uns")
            else:
                print(json.dumps(pair, indent=2))
                raise NotImplementedError(
                    f"Unknown or unimplimented element {pair['objType']}"
                )
        first = False
    return lines


def do_sense(
        sense: Sense | None, indent: int = 3
) -> tuple[Fragment, list[Line]]:
    assert sense is not None
    lines: list[Line] = []
    r = Resources()
    first = True
    frag = Fragment()
    for k, v in sense.items():
        if k == "sn":
            continue
        elif k == "dt":
            newLines = do_dt(frag, trycast(list[list[Pair]], sense["dt"]), indent)
            if first:
                firstFrag = frag
                frag = Fragment()
            else:
                line = Line()
                line.addFragment(frag)
                lines.append(line)
            lines += newLines
        elif k == "sdsense":
            # XXX - This needs to expand to handle et, ins, lbs, prs, sgram, sls, vrs
            sdsense = trycast(DividedSense, v)
            assert sdsense is not None
            frag = Fragment()
            frag.setIndent(indent)
            frag.addText(sdsense["sd"] + ' ', r.italicFormat)
            line = Line()
            line.addFragment(frag)
            newLines = do_dt(frag, trycast(list[list[Pair]], sdsense["dt"]), indent=indent)
            if first:
                firstFrag = frag
                frag = Fragment()
            else:
                line = Line()
                line.addFragment(frag)
                lines.append(line)
            lines += newLines
        elif k == "sls":
            labels = trycast(list[str], v)
            assert labels is not None
            frag.addText(", ".join(labels) + " ",r.boldOnSFormat)
        elif "lbs" == k:
            pass
        else:
            print(k, v)
            raise NotImplementedError(f"Unknown or unimplimented element {k}")
    return (firstFrag, lines)


def do_pseq(
    inner: int, outer: int, pseq: list[Any]
) -> tuple[list[Fragment], list[Line]]:
    lines: list[Line] = []
    frags: list[Fragment] = []
    indent = 3  # XXX - Should this be a parameter passed in?
    count = 1
    r = Resources()
    newLine = False
    for entry in pseq:
        for pair in entry:
            if pair["objType"] == "bs":
                sense = pair["obj"]["sense"]
                (frag, newLines) = do_sense(
                    trycast(Sense, sense), indent=indent
                )
                frags.append(frag)
                lines += newLines
                newLine = True
            elif pair["objType"] == "sense":
                sn = Fragment()
                sn.addText(f"({count})", r.textFormat)
                sn.setIndent(indent)
                (frag, newLines) = do_sense(trycast(Sense, pair["obj"]), indent=indent + 1)
                if newLine:
                    line = Line()
                    line.addFragment(sn)
                    line.addFragment(frag)
                else:
                    frags = [sn, frag, ]
                    newLine = True
                lines += newLines
                count += 1
            else:
                raise NotImplementedError(
                    f"Unknown object type {pair['objType']}"
                )
    return (frags, lines)


def do_sseq(sseq: list[list[list[Pair]]]) -> list[Line]:
    lines: list[Line] = []
    r = Resources()
    line = Line()
    for outer, item_o in enumerate(sseq):
        frag = Fragment()
        frag.setIndent(1)
        frag.addText(str(outer +1), r.boldFormat)
        line.addFragment(frag)
        for inner, item_i in enumerate(item_o):
            indent = 2
            if len(item_o) > 1:
                frag = Fragment()
                frag.addText(chr(ord("a") + inner), r.boldFormat)
                frag.setIndent(2)
                line.addFragment(frag)
                indent = 3
            for pair in item_i:
                objType = pair["objType"]
                if objType == "sense":
                    sense = trycast(Sense, pair["obj"])
                    (frag, newlines) = do_sense(sense, indent=indent)
                    line.addFragment(frag)
                    lines.append(line)
                    line = Line()
                    lines += newlines
                elif objType == "sen":
                    raise NotImplementedError(f"sen unimplimented")
                elif objType == "pseq":
                    (frags, newlines) = do_pseq(inner, outer, pair["obj"])
                    line.addFragment(frags)
                    lines.append(line)
                    line = Line()
                    lines += newlines
                elif objType == "bs":
                    raise NotImplementedError("bs")
                    sense = pair["obj"]["sense"]
                    (newFrags, newLines) = do_sense(
                        trycast(Sense, sense), indent=indent
                    )
                    line.addFragment(newFrags)
                    lines.append(line)
                    line = Line()
                    lines += newLines
                else:
                    raise NotImplementedError(
                        f"Unknown object[{objType}] for \n{json.dumps(pair['obj'],indent=2)}"
                    )
    return lines


def do_ins(frag: Fragment, inflections: list[Inflection] | None) -> None:
    assert inflections is not None
    r = Resources()
    sep = ""
    for inflection in inflections:
        if sep == "; ":
            frag.addText(sep, r.boldFormat)
        elif sep != "":
            frag.addText(sep, r.italicFormat)

        if "ifc" in inflection:
            text = inflection["ifc"]
        elif "if" in inflection:
            text = inflection["if"]
        else:
            raise ValueError(f"Missing 'if' or 'ifc' in {inflection}")

        text = re.sub(r'\*', '\u00b7', text)
        frag.addText(text, r.boldFormat)
        sep = "; "
        if "il" in inflection:
            sep = " " + inflection["il"] + " "
        if "prs" in inflection:
            do_prs(frag, trycast(list[Pronunciation], inflection["prs"]))
        if "spl" in inflection:
            raise NotImplementedError(
                f"We haven't implimented 'spl' for inflection: {inflection}"
            )
    return


def do_ets(ets: list[list[Pair]] | None) -> list[Line]:
    assert ets is not None
    r = Resources()
    lines: list[Line] = []
    for et in ets:
        for pair in et:
            if pair["objType"] == "text":
                line = Line()
                frag = Fragment('', r.textFont)
                frag.addText(pair['obj'], r.textFormat)
                line.addFragment(frag)
                lines.append(line)
            elif pair["objType"] == "et_snote":
                line = Line()
                frag = Fragment('', r.textFont)
                frag.addText(f"Note: {pair['obj']}",r.textFormat)
                line.addFragment(frag)
                lines.append(line)
            else:
                raise NotImplementedError(
                    f"Unknown key {pair['objType']} in et"
                )
    return lines


def do_def(entry: DefinitionSection) -> list[Line]:
    assert entry is not None
    r = Resources()
    lines: list[Line] = []
    if "vd" in entry:
        line = Line()
        frag = Fragment()
        frag.addText(entry["vd"], r.italicFormat)
        line.addFragment(frag)
        lines.append(line)
    #
    # sseg is required
    #
    sseq = entry["sseq"]
    lines += do_sseq(sseq)
    return lines

def do_vrs(vrs: list[Variant]|None) -> Line:
    assert vrs is not None
    r = Resources()
    line = Line()
    frag = Fragment()
    frag.addText('variants: ', r.sOnSFormat)
    for var in vrs:
        if 'vl' in var:
            frag.addText(var['vl']+' ', r.italicFormat)
        if 'spl' in var:
            frag.addText(var['spl']+' ', r.sOnSFormat)
        frag.addText(var['va'], r.boldFormat)
        if 'prs' in var:
            frag.addText(' ')
            do_prs(frag, trycast(list[Pronunciation], var['prs']))
            frag.addText(' ')
        line.addFragment(frag)
    return line

def do_dros(dros: list[DefinedRunOn]|None) -> list[Line]:
    assert dros is not None
    r = Resources()
    lines: list[Line] = []
    for dro in dros:
        line = Line()
        frag = Fragment()
        frag.addText(dro["drp"], r.boldFormat)
        line.addFragment(frag)
        lines.append(line)
        for entry in dro['def']:
            lines += do_def(entry)
        for k,v in dro.items():
            if 'drp' == k or 'def' == k:
                continue
            elif 'et' == k:
                lines += do_ets(trycast(list[list[Pair]], v))
            else:
                raise NotImplementedError(f"Key of {k}")
    return lines

def do_uros(uros: list[UndefinedRunOn]|None) -> list[Line]:
    assert uros is not None
    r = Resources()
    lines: list[Line] = []
    for uro in uros:
        frag = Fragment()
        text = re.sub(r'\*', '', uro['ure'])
        frag.addText(text, r.labelFormat)
        if 'prs' in uro:
            do_prs(frag, uro['prs'])
        frag.addText(' '+uro['fl'],r.textFormat) # r.linkFormat
        line = Line()
        line.addFragment(frag)
        lines.append(line)
        if 'utxt' in uro:
            for entry in uro['utxt']:
                for pair in entry:
                    if pair['objType'] == 'vis':
                        lines += do_vis(trycast(list[VerbalIllustration], pair['obj']))
                    elif pair['objType'] == 'uns':
                        (newFrags, newLines) = do_uns(trycast(list[list[list[Pair]]],pair['obj']),0)
                        line = Line()
                        line.addFragment(newFrags)
                        lines.append(line)
                        lines += newLines
    return lines
def getDef(defines: Any) -> list[Line]:
    Line.setParseText(parseText)
    workList = restructure(defines)
#    workList = trycast(list[Definition], workList)
#    assert workList is not None
    r = Resources()
    lines: list[Line] = []

    #
    # No need to figure it out each time it is used
    #
    entries = 0
    id = workList[0]["meta"]["id"].lower().split(":")[0]
    uses: dict[str, int] = {}
    for entry in workList:
        testId = entry["meta"]["id"].lower().split(":")[0]
        if testId == id:
            entries += 1
            #
            # If there is a Functional Lable, then we are going
            # to capture the count of each FL
            #
            try:
                uses[entry["fl"]] = uses.get(entry["fl"], 0) + 1
            except KeyError:
                pass
    del entry
    used: dict[str, int] = {}
    for k in uses.keys():
        used[k] = 0

    ets: list[Line] = []
    phrases: list[Line] = []
    for count, work in enumerate(workList):
        testId = work["meta"]["id"].lower().split(":")[0]
        #
        # Skip entries which are not part of the primary definition
        #
        if testId != id:
            continue
        #
        # Create the First line from the hwi, [ahws] and fl
        #
        line = Line()
        frag = Fragment()
        hwi = trycast(HeadWordInformation, work["hwi"])
        assert hwi is not None
        hw = re.sub(r"\*", "", hwi["hw"])
        frag.addText(hw,r.headerFormat)
        if "ahws" in work:
            ahws = trycast(list[AlternanteHeadword], work["ahws"])
            assert ahws is not None
            for ahw in ahws:
                hw = re.sub(r"\*", "", ahw["hw"])
                frag.addText(", " + hw)
        if entries > 1:
            frag.addText(f" {count + 1} of {entries} ", r.sOnSFormat)
        if "fl" in work:
            text = work["fl"]
            used[text] += 1
            if uses[text] > 1:
                text += f" ({used[text]})"
            frag.addText(text, r.labelFormat)
        line.addFragment(frag)
        lines.append(line)

        #
        # Next is the pronunciation.
        # While 'prs' is optional, the headword is not.  This gets us what we want.
        #
        line = Line()
        frag = Fragment()
        if hwi["hw"].find("*") >= 0:
            hw = re.sub(r"\*", "\u00b7", hwi["hw"])
            frag.addText(hw + " ", r.subduedFormat)
        if "prs" in hwi:
            do_prs(frag, trycast(list[Pronunciation], hwi["prs"]))
            line.addFragment(frag)
            lines.append(line)
            line = Line()
            frag = Fragment()
        if 'vrs' in work:
            lines.append(do_vrs(trycast(list[Variant], work['vrs'])))
        if "ins" in work:
            inflections = trycast(list[Inflection], work["ins"])
            do_ins(frag,inflections)
            line.addFragment(frag)
            lines.append(line)
            line = Line()
            frag = Fragment()
        defines = trycast(list[DefinitionSection], work["def"])
        assert defines is not None
        for define in defines:
            try:
                lines += do_def(define)
            except NotImplementedError:
                raise
        if "uros" in work:
            print(json.dumps(work['uros'],indent=2))
            uros = trycast(list[UndefinedRunOn], work['uros'])
            lines += do_uros(uros)
        if "dros" in work:
            dros = trycast(list[DefinedRunOn], work["dros"])
            if len(phrases) < 1:
                frag = Fragment()
                frag.addText("Phrases", r.labelFormat)
                line = Line()
                line.addFragment(frag)
                phrases.append(line)
            phrases += do_dros(dros)
        if "et" in work:
            line = Line()
            frag = Fragment('', r.textFont)
            frag.addText(f"{work['fl']} ({used[work['fl']]})",r.labelFormat)
            line.addFragment(frag)
            ets += do_ets(trycast(list[list[Pair]], work["et"]))
        for k in work.keys():
            if k not in [
                    "meta",
                    "hom",
                    "hwi",
                    "fl",
                    "def",
                    "ins",
                    "prs",
                    "et",
                    "date",
                    "shortdef",
                    "vrs",
                    "dros",
                    'uros',
            ]:
                raise NotImplementedError(f"Unknown key {k} in work")
    if len(phrases) > 0:
        lines += phrases
    if len(ets) > 0:
        line = Line()
        line.addFragment(Fragment("Etymology", r.labelFont, color=r.baseColor))
        lines.append(line)
        lines += ets
    return lines

def replaceCode(code:str) -> tuple[str, QTextCharFormat]:
    r = Resources()
    fmt = QTextCharFormat()
    if code == 'bc':
        fmt.setFontWeight(QFont.Weight.Bold)
        return (': ', fmt)
    elif code == 'ldquo':
        return ('\u201c', fmt)
    elif code == 'rdquo':
        return ('\u201d', fmt)
    fmt.setAnchor(True)
    fmt.setForeground(r.linkColor)
    fmt.setFontUnderline(True)
    fmt.setUnderlineColor(r.linkColor)
    fmt.setFontUnderline(True)
    fields = code.split('|')
    token = fields[0]
    if token == 'a_link':
        text = fields[1]
        fmt.setAnchorHref('auto://'+fields[1])
    elif token in ['d_link', 'et_link', 'mat', 'sx', 'i_link']:
        text = fields[1]
        pre = 'word:///'
        if fields[2] == '':
            fmt.setAnchorHref(pre+fields[1])
        else:
            fmt.setAnchorHref(pre+fields[2])
        if token == 'i_link':
            fmt.setFontItalic(True)
        elif token == 'sx':
            fmt.setFontCapitalization(QFont.Capitalization.SmallCaps)
        elif token == 'dxt':
            if fields[3] == 'illustration':
                fmt.setAnchorHref('article:///'+fields[2])
            elif fields[3] == 'table':
                fmt.setAnchorHref('table:///'+fields[2])
            elif fields[3] != "":
                fmt.setAnchorHref('sense:///'+fields[3])
            else:
                fmt.setAnchorHref('word:///'+fields[1])
        elif token == 'et_link':
            if fields[2] != '':
                fmt.setAnchorHref('etymology:///'+fields[2])
            else:
                fmt.setAnchorHref('etymology:///' + fields[1])
        else:
            raise NotImplementedError(f"Token {code} not implimented")
        fmt.setForeground(r.linkColor)
        print(f"Format.capitalization(): {fmt.fontCapitalization()}")
    return (text,fmt)

def markup(offset: int, text:str) -> tuple[str, list[QTextLayout.FormatRange]]:
    close = text.find('}')
    code = text[1:close]
    text = text[close+1:-(close+2)]
    fmt = QTextCharFormat()
    if code == 'b':
        fmt.setFontWeight(QFont.Weight.Bold)
    elif code == 'inf':
        fmt.setVerticalAlignment(QTextCharFormat.VerticalAlignment.AlignSubScript)
    elif code == 'it':
        fmt.setFontItalic(True)
    elif code == 'sc':
        fmt.setFontCapitalization(QFont.Capitalization.SmallCaps)
    fr = QTextLayout.FormatRange()
    fr.start = offset
    fr.length = len(text)
    fr.format = fmt
    return (text, [fr,])

def parseText(frag: Fragment) -> QTextLayout:
    layout = frag.layout()
    text = layout.text()
    formats = layout.formats()
    REPLACE_TEXT = [
        'bc','a_link', 'd_link', 'dxt', 'et_link', 'i_link', 'mat',
        'sx'
    ]
    pos = 0
    start = text[pos:].find('{')

    while start >= 0:
        start += pos
        end = text[start+1:].find('}')
        end += start
        code = text[start+1:end+1]
        pos = end+2
        for maybe in REPLACE_TEXT:
            if code.startswith(maybe):
                (repl, tfmt) = replaceCode(code)
                text = text[:start] + repl + text[end+2:]
                fmt = QTextLayout.FormatRange()
                fmt.format = tfmt
                fmt.start=start
                fmt.length = len(repl)
                formats.append(fmt)
                pos = start + len(repl)
                code = ''
                break
        if code != '':
            needle = f'{{/{code}}}'
            codeEnd = text[start:].find(needle)
            codeEnd += start+len(needle)
            straw = text[start:codeEnd]
            (repl, frs) = markup(start, straw)
            fmt = QTextLayout.FormatRange()
            formats += frs
            text = text[:start] + repl + text[codeEnd:]
            pos = start + len(repl)
        start = text[pos:].find('{')
    layout.setFormats(formats)
    layout.setText(text)
    return layout