999 lines
30 KiB
Python
999 lines
30 KiB
Python
import json
|
|
import re
|
|
from typing import Any, Literal, NotRequired, TypedDict, cast
|
|
|
|
from PyQt6.QtCore import QEventLoop, QUrl
|
|
from PyQt6.QtGui import QFont, QFontDatabase, QTextCharFormat, QTextLayout
|
|
from PyQt6.QtNetwork import QNetworkRequest
|
|
from trycast import trycast
|
|
|
|
from lib.definition import Fragment, Line
|
|
from lib.utils import Resources
|
|
|
|
registration = {
|
|
"source": "mw",
|
|
"name": "Merriam-Webster",
|
|
"language": "en-us",
|
|
}
|
|
|
|
API = "https://www.dictionaryapi.com/api/v3/references/collegiate/json/{word}?key={key}"
|
|
key = "51d9df34-ee13-489e-8656-478c215e846c"
|
|
|
|
|
|
class Meta(TypedDict):
|
|
id: str
|
|
uuid: str
|
|
sort: str
|
|
src: str
|
|
section: str
|
|
stems: list[str]
|
|
offensive: bool
|
|
|
|
|
|
class Sound(TypedDict):
|
|
audio: str
|
|
ref: str
|
|
stat: str
|
|
|
|
|
|
class Pronunciation(TypedDict):
|
|
mw: str
|
|
l: NotRequired[str]
|
|
l2: NotRequired[str]
|
|
pun: NotRequired[str]
|
|
sound: NotRequired[Sound]
|
|
|
|
|
|
class SubSource(TypedDict):
|
|
source: NotRequired[str]
|
|
aqdate: NotRequired[str]
|
|
|
|
|
|
class AttributionOfQuote(TypedDict):
|
|
auth: NotRequired[str]
|
|
source: NotRequired[str]
|
|
aqdate: NotRequired[str]
|
|
subsource: NotRequired[SubSource]
|
|
|
|
|
|
class VerbalIllustration(TypedDict):
|
|
t: str
|
|
aq: NotRequired[AttributionOfQuote]
|
|
|
|
|
|
class HeadWordInformation(TypedDict):
|
|
hw: str
|
|
prs: NotRequired[list[Pronunciation]]
|
|
|
|
|
|
class AlternanteHeadword(TypedDict):
|
|
hw: str
|
|
psl: NotRequired[str]
|
|
|
|
|
|
class Variant(TypedDict):
|
|
va: str
|
|
vl: NotRequired[str]
|
|
prs: NotRequired[list[Pronunciation]]
|
|
spl: NotRequired[str]
|
|
|
|
|
|
Inflection = TypedDict(
|
|
"Inflection",
|
|
{
|
|
"if": NotRequired[str],
|
|
"ifc": NotRequired[str],
|
|
"il": NotRequired[str],
|
|
"prs": NotRequired[list[Pronunciation]],
|
|
"spl": NotRequired[str],
|
|
},
|
|
)
|
|
|
|
|
|
class CrossReferenceTarget(TypedDict):
|
|
cxl: str
|
|
cxr: NotRequired[str]
|
|
cxt: str
|
|
cxn: NotRequired[str]
|
|
|
|
|
|
class CognateCrossRef(TypedDict):
|
|
cxl: str
|
|
cxtis: list[CrossReferenceTarget]
|
|
|
|
|
|
class Pair(TypedDict):
|
|
objType: str
|
|
obj: Any
|
|
|
|
|
|
class DividedSense(TypedDict):
|
|
sd: str
|
|
dt: list[list[Pair]]
|
|
et: NotRequired[list[Pair]]
|
|
ins: NotRequired[list[Inflection]]
|
|
lbs: NotRequired[list[str]]
|
|
prs: NotRequired[list[Pronunciation]]
|
|
sgram: NotRequired[str]
|
|
sls: NotRequired[list[str]]
|
|
vrs: NotRequired[list[Variant]]
|
|
|
|
|
|
class Sense(TypedDict):
|
|
dt: list[list[Pair]]
|
|
et: NotRequired[list[Pair]]
|
|
ins: NotRequired[list[Inflection]]
|
|
lbs: NotRequired[list[str]]
|
|
prs: NotRequired[list[Pronunciation]]
|
|
sdsense: NotRequired[DividedSense]
|
|
sgram: NotRequired[str]
|
|
sls: NotRequired[list[str]]
|
|
sn: NotRequired[str]
|
|
vrs: NotRequired[list[Variant]]
|
|
|
|
|
|
class TruncatedSense(Sense):
|
|
pass
|
|
|
|
|
|
class BindingSubstitutePair(TypedDict):
|
|
objType: Literal["bs"]
|
|
obj: Sense
|
|
|
|
|
|
class SensePair(TypedDict):
|
|
objType: Literal["sense"]
|
|
obj: Sense
|
|
|
|
|
|
class DefinitionSection(TypedDict):
|
|
vd: NotRequired[str]
|
|
sls: NotRequired[list[str]]
|
|
sseq: Any # list[list[Pair]]
|
|
|
|
class UndefinedRunOn(TypedDict):
|
|
ure: str
|
|
fl: str
|
|
utxt: NotRequired[list[list[Pair]]]
|
|
ins: NotRequired[list[Inflection]]
|
|
lbs: NotRequired[list[str]]
|
|
prs: NotRequired[list[Pronunciation]]
|
|
sls: NotRequired[list[str]]
|
|
vrs: NotRequired[list[Variant]]
|
|
|
|
|
|
DefinedRunOn = TypedDict(
|
|
"DefinedRunOn",
|
|
{
|
|
"drp": str,
|
|
"def": list[DefinitionSection],
|
|
"et": NotRequired[list[Pair]],
|
|
"lbs": NotRequired[list[str]],
|
|
"prs": NotRequired[list[Pronunciation]],
|
|
"sls": NotRequired[list[str]],
|
|
"vrs": NotRequired[list[Variant]]
|
|
}
|
|
)
|
|
|
|
Definition = TypedDict(
|
|
"Definition",
|
|
{
|
|
"ahws": NotRequired[list[AlternanteHeadword]],
|
|
"cxs": NotRequired[list[CognateCrossRef]],
|
|
"date": NotRequired[str],
|
|
"def": list[DefinitionSection],
|
|
"dros": NotRequired[Any],
|
|
"et": NotRequired[list[Pair]],
|
|
"fl": str,
|
|
"hom": NotRequired[int],
|
|
"hwi": HeadWordInformation,
|
|
"ins": NotRequired[list[Inflection]],
|
|
"lbs": NotRequired[list[str]],
|
|
"meta": Meta,
|
|
"shortdef": NotRequired[list[str]],
|
|
"sls": NotRequired[list[str]],
|
|
"syns": NotRequired[Any],
|
|
"uros": NotRequired[Any],
|
|
"vrs": NotRequired[list[Variant]],
|
|
},
|
|
)
|
|
|
|
|
|
def make_pairs(src: list[Any]) -> list[Pair]:
|
|
result: list[Pair] = []
|
|
iters = [iter(src)] * 2
|
|
for entry in zip(*iters):
|
|
pair0 = {
|
|
"objType": entry[0],
|
|
"obj": entry[1],
|
|
}
|
|
if isinstance(pair0["obj"], list):
|
|
result.append(cast(Pair, pair0))
|
|
continue
|
|
pair1 = trycast(Pair, pair0)
|
|
if pair1 is None:
|
|
print(
|
|
pair0["objType"],
|
|
type(pair0["obj"]),
|
|
json.dumps(pair0["obj"], indent=2),
|
|
)
|
|
assert pair1 is not None
|
|
result.append(pair1)
|
|
return result
|
|
|
|
|
|
Elements = [
|
|
"dt",
|
|
"sen",
|
|
"bs",
|
|
"pseq",
|
|
"snot",
|
|
"t",
|
|
"text",
|
|
"vis",
|
|
"sens",
|
|
"uns",
|
|
"sense",
|
|
]
|
|
|
|
|
|
def restructure(obj: Any) -> Any:
|
|
if isinstance(obj, list):
|
|
if len(obj) == 0:
|
|
return []
|
|
if isinstance(obj[0], str) and obj[0] in Elements:
|
|
pairs = make_pairs(obj)
|
|
result = []
|
|
for pair in pairs:
|
|
if isinstance(pair["obj"], list):
|
|
r2 = []
|
|
for item in pair["obj"]:
|
|
r2.append(restructure(item))
|
|
pair["obj"] = r2
|
|
elif isinstance(pair["obj"], dict):
|
|
r2 = {}
|
|
for k, v in pair["obj"].items():
|
|
r2[k] = restructure(v)
|
|
pair["obj"] = r2
|
|
result.append(pair)
|
|
return result
|
|
result = []
|
|
for v in obj:
|
|
result.append(restructure(v))
|
|
return result
|
|
elif isinstance(obj, dict):
|
|
obj2 = cast(dict, obj)
|
|
result = {}
|
|
for k, v in obj2.items():
|
|
result[k] = restructure(v)
|
|
return result
|
|
else:
|
|
return obj
|
|
|
|
|
|
class WordType(TypedDict):
|
|
word: str
|
|
source: str
|
|
definition: Any
|
|
|
|
|
|
def fetch(word: str) -> WordType:
|
|
request = QNetworkRequest()
|
|
url = QUrl(API.format(word=word, key=key))
|
|
request.setUrl(url)
|
|
request.setTransferTimeout(3000)
|
|
reply = Resources.nam.get(request)
|
|
assert reply is not None
|
|
loop = QEventLoop()
|
|
reply.finished.connect(loop.quit)
|
|
loop.exec()
|
|
content = reply.readAll()
|
|
data = json.loads(content.data().decode("utf-8"))
|
|
return {
|
|
"word": word,
|
|
"source": "mw",
|
|
"definition": data,
|
|
}
|
|
|
|
|
|
def soundUrl(sound: Sound, fmt="ogg") -> QUrl:
|
|
"""Create a URL from a PRS structure."""
|
|
base = f"audio://media.merriam-webster.com/audio/prons/en/us/{fmt}"
|
|
audio = sound["audio"]
|
|
m = re.match(r"(bix|gg|[a-zA-Z])", audio)
|
|
if m:
|
|
url = base + f"/{m.group(1)}/"
|
|
else:
|
|
url = base + "/number/"
|
|
url += audio + f".{fmt}"
|
|
return QUrl(url)
|
|
|
|
|
|
def getFirstSound(definition: Any) -> QUrl:
|
|
# ahws, cats, dros, hwi, ins, ri, sdsense, sen, sense, uros, vrs
|
|
for entry in definition:
|
|
for v in entry.values():
|
|
hwi = v # trycast
|
|
if hwi is None:
|
|
continue
|
|
if "prs" in hwi:
|
|
for pr in hwi["prs"]:
|
|
if "sound" in pr:
|
|
url = soundUrl(pr["sound"])
|
|
if url.isValid():
|
|
return url
|
|
return QUrl()
|
|
|
|
|
|
def do_prs(frag: Fragment, prs: list[Pronunciation] | None) -> None:
|
|
assert prs is not None
|
|
r = Resources()
|
|
|
|
for pr in prs:
|
|
if "pun" in pr:
|
|
pun = pr["pun"]
|
|
else:
|
|
pun = " "
|
|
if "l" in pr:
|
|
frag.addText(pr["l"] + pun, r.subduedItalicFormat)
|
|
fmt = r.phonticFormat
|
|
if "sound" in pr:
|
|
fmt = QTextCharFormat(r.phonticFormat)
|
|
fmt.setAnchor(True)
|
|
fmt.setAnchorHref(soundUrl(pr["sound"]).toString())
|
|
fmt.setForeground(r.linkColor)
|
|
#text = pr["mw"] +' \N{SPEAKER} '
|
|
text = pr["mw"] +' '
|
|
else:
|
|
text = pr['mw'] + ' '
|
|
print(f"text: {text}, length: {len(text)}")
|
|
frag.addText(text, fmt)
|
|
if "l2" in pr:
|
|
frag.addText(pun + pr["l2"], r.subduedLabelFormat)
|
|
text = frag.layout().text()
|
|
for fmt in frag.layout().formats():
|
|
print(f"start: {fmt.start}, length: {fmt.length}, text: \"{text[fmt.start:fmt.start+fmt.length]}\"")
|
|
return
|
|
|
|
|
|
def do_aq(aq: AttributionOfQuote | None) -> Line:
|
|
assert aq is not None
|
|
r = Resources()
|
|
frag = Fragment()
|
|
if 'auth' in aq:
|
|
frag.addText(aq['auth']+', ', r.subduedFormat)
|
|
if 'source' in aq:
|
|
frag.addText(aq['source'], r.subduedFormat)
|
|
if 'aqdate' in aq:
|
|
frag.addText(', '+aq['aqdate'], r.subduedFormat)
|
|
if 'subsource' in aq:
|
|
ss = trycast(SubSource, aq['subsource'])
|
|
assert ss is not None
|
|
if 'source' in ss:
|
|
frag.addText(', '+ss['source'], r.subduedFormat)
|
|
if 'aqdate' in ss:
|
|
frag.addText(', '+ss['aqdate'], r.subduedFormat)
|
|
line = Line()
|
|
line.addFragment(frag)
|
|
return line
|
|
|
|
|
|
def do_vis(vis: list[VerbalIllustration] | None, indent=0) -> list[Line]:
|
|
assert vis is not None
|
|
r = Resources()
|
|
lines: list[Line] = []
|
|
for vi in vis:
|
|
line = Line()
|
|
frag = Fragment()
|
|
frag.addText(vi['t'], r.subduedFormat)
|
|
if indent > 0:
|
|
frag.setIndent(indent)
|
|
line.addFragment(frag)
|
|
lines.append(line)
|
|
if "aq" in vi:
|
|
lines.append(do_aq(trycast(AttributionOfQuote, vi["aq"])))
|
|
return lines
|
|
|
|
|
|
def do_uns(
|
|
uns: list[list[list[Pair]]] | None, indent: int
|
|
) -> tuple[list[Fragment], list[Line]]:
|
|
assert uns is not None
|
|
r = Resources()
|
|
frags: list[Fragment] = []
|
|
lines: list[Line] = []
|
|
for note in uns:
|
|
for entry in note:
|
|
for pair in entry:
|
|
if pair["objType"] == "text":
|
|
frag = Fragment(
|
|
"\u2192 " + pair["obj"], r.textFont, color=r.baseColor
|
|
)
|
|
frag.setIndent(indent)
|
|
frags.append(frag)
|
|
elif pair["objType"] == "vis":
|
|
lines += do_vis(
|
|
trycast(list[VerbalIllustration], pair["obj"]), indent
|
|
)
|
|
elif pair["objType"] == "ri":
|
|
raise NotImplementedError("NO ri")
|
|
return (frags, lines)
|
|
|
|
|
|
def do_dt(frag, dt: list[list[Pair]] | None, indent: int) -> list[Line]:
|
|
assert dt is not None
|
|
lines: list[Line] = []
|
|
r = Resources()
|
|
first = True
|
|
for entry in dt:
|
|
for pair in entry:
|
|
if pair["objType"] == "text":
|
|
if first:
|
|
frag.setIndent(indent)
|
|
frag.addText(pair["obj"], r.textFormat)
|
|
else:
|
|
line = Line()
|
|
f = Fragment()
|
|
f.setIndent(indent)
|
|
f.addText(pair["obj"], r.textFormat)
|
|
line.addFragment(frag)
|
|
lines.append(line)
|
|
elif pair["objType"] == "vis":
|
|
first = False
|
|
lines += do_vis(
|
|
trycast(list[VerbalIllustration], pair["obj"]), indent
|
|
)
|
|
elif pair["objType"] == "uns":
|
|
first = False
|
|
(newFrags, newLines) = do_uns(
|
|
trycast(list[list[list[Pair]]], pair["obj"]), indent
|
|
)
|
|
#frags += newFrags
|
|
#lines += newLines
|
|
raise NotImplementedError("uns")
|
|
else:
|
|
print(json.dumps(pair, indent=2))
|
|
raise NotImplementedError(
|
|
f"Unknown or unimplimented element {pair['objType']}"
|
|
)
|
|
first = False
|
|
return lines
|
|
|
|
|
|
def do_sense(
|
|
sense: Sense | None, indent: int = 3
|
|
) -> tuple[Fragment, list[Line]]:
|
|
assert sense is not None
|
|
lines: list[Line] = []
|
|
r = Resources()
|
|
first = True
|
|
frag = Fragment()
|
|
for k, v in sense.items():
|
|
if k == "sn":
|
|
continue
|
|
elif k == "dt":
|
|
newLines = do_dt(frag, trycast(list[list[Pair]], sense["dt"]), indent)
|
|
if first:
|
|
firstFrag = frag
|
|
frag = Fragment()
|
|
else:
|
|
line = Line()
|
|
line.addFragment(frag)
|
|
lines.append(line)
|
|
lines += newLines
|
|
elif k == "sdsense":
|
|
# XXX - This needs to expand to handle et, ins, lbs, prs, sgram, sls, vrs
|
|
sdsense = trycast(DividedSense, v)
|
|
assert sdsense is not None
|
|
frag = Fragment()
|
|
frag.setIndent(indent)
|
|
frag.addText(sdsense["sd"] + ' ', r.italicFormat)
|
|
line = Line()
|
|
line.addFragment(frag)
|
|
newLines = do_dt(frag, trycast(list[list[Pair]], sdsense["dt"]), indent=indent)
|
|
if first:
|
|
firstFrag = frag
|
|
frag = Fragment()
|
|
else:
|
|
line = Line()
|
|
line.addFragment(frag)
|
|
lines.append(line)
|
|
lines += newLines
|
|
elif k == "sls":
|
|
labels = trycast(list[str], v)
|
|
assert labels is not None
|
|
frag.addText(", ".join(labels) + " ",r.boldOnSFormat)
|
|
elif "lbs" == k:
|
|
pass
|
|
else:
|
|
print(k, v)
|
|
raise NotImplementedError(f"Unknown or unimplimented element {k}")
|
|
return (firstFrag, lines)
|
|
|
|
|
|
def do_pseq(
|
|
inner: int, outer: int, pseq: list[Any]
|
|
) -> tuple[list[Fragment], list[Line]]:
|
|
lines: list[Line] = []
|
|
frags: list[Fragment] = []
|
|
indent = 3 # XXX - Should this be a parameter passed in?
|
|
count = 1
|
|
r = Resources()
|
|
newLine = False
|
|
for entry in pseq:
|
|
for pair in entry:
|
|
if pair["objType"] == "bs":
|
|
sense = pair["obj"]["sense"]
|
|
(frag, newLines) = do_sense(
|
|
trycast(Sense, sense), indent=indent
|
|
)
|
|
frags.append(frag)
|
|
lines += newLines
|
|
newLine = True
|
|
elif pair["objType"] == "sense":
|
|
sn = Fragment()
|
|
sn.addText(f"({count})", r.textFormat)
|
|
sn.setIndent(indent)
|
|
(frag, newLines) = do_sense(trycast(Sense, pair["obj"]), indent=indent + 1)
|
|
if newLine:
|
|
line = Line()
|
|
line.addFragment(sn)
|
|
line.addFragment(frag)
|
|
else:
|
|
frags = [sn, frag, ]
|
|
newLine = True
|
|
lines += newLines
|
|
count += 1
|
|
else:
|
|
raise NotImplementedError(
|
|
f"Unknown object type {pair['objType']}"
|
|
)
|
|
return (frags, lines)
|
|
|
|
|
|
def do_sseq(sseq: list[list[list[Pair]]]) -> list[Line]:
|
|
lines: list[Line] = []
|
|
r = Resources()
|
|
line = Line()
|
|
for outer, item_o in enumerate(sseq):
|
|
frag = Fragment()
|
|
frag.setIndent(1)
|
|
frag.addText(str(outer +1), r.boldFormat)
|
|
line.addFragment(frag)
|
|
for inner, item_i in enumerate(item_o):
|
|
indent = 2
|
|
if len(item_o) > 1:
|
|
frag = Fragment()
|
|
frag.addText(chr(ord("a") + inner), r.boldFormat)
|
|
frag.setIndent(2)
|
|
line.addFragment(frag)
|
|
indent = 3
|
|
for pair in item_i:
|
|
objType = pair["objType"]
|
|
if objType == "sense":
|
|
sense = trycast(Sense, pair["obj"])
|
|
(frag, newlines) = do_sense(sense, indent=indent)
|
|
line.addFragment(frag)
|
|
lines.append(line)
|
|
line = Line()
|
|
lines += newlines
|
|
elif objType == "sen":
|
|
raise NotImplementedError(f"sen unimplimented")
|
|
elif objType == "pseq":
|
|
(frags, newlines) = do_pseq(inner, outer, pair["obj"])
|
|
line.addFragment(frags)
|
|
lines.append(line)
|
|
line = Line()
|
|
lines += newlines
|
|
elif objType == "bs":
|
|
raise NotImplementedError("bs")
|
|
sense = pair["obj"]["sense"]
|
|
(newFrags, newLines) = do_sense(
|
|
trycast(Sense, sense), indent=indent
|
|
)
|
|
line.addFragment(newFrags)
|
|
lines.append(line)
|
|
line = Line()
|
|
lines += newLines
|
|
else:
|
|
raise NotImplementedError(
|
|
f"Unknown object[{objType}] for \n{json.dumps(pair['obj'],indent=2)}"
|
|
)
|
|
return lines
|
|
|
|
|
|
def do_ins(frag: Fragment, inflections: list[Inflection] | None) -> None:
|
|
assert inflections is not None
|
|
r = Resources()
|
|
sep = ""
|
|
for inflection in inflections:
|
|
if sep == "; ":
|
|
frag.addText(sep, r.boldFormat)
|
|
elif sep != "":
|
|
frag.addText(sep, r.italicFormat)
|
|
|
|
if "ifc" in inflection:
|
|
text = inflection["ifc"]
|
|
elif "if" in inflection:
|
|
text = inflection["if"]
|
|
else:
|
|
raise ValueError(f"Missing 'if' or 'ifc' in {inflection}")
|
|
|
|
text = re.sub(r'\*', '\u00b7', text)
|
|
frag.addText(text, r.boldFormat)
|
|
sep = "; "
|
|
if "il" in inflection:
|
|
sep = " " + inflection["il"] + " "
|
|
if "prs" in inflection:
|
|
do_prs(frag, trycast(list[Pronunciation], inflection["prs"]))
|
|
if "spl" in inflection:
|
|
raise NotImplementedError(
|
|
f"We haven't implimented 'spl' for inflection: {inflection}"
|
|
)
|
|
return
|
|
|
|
|
|
def do_ets(ets: list[list[Pair]] | None) -> list[Line]:
|
|
assert ets is not None
|
|
r = Resources()
|
|
lines: list[Line] = []
|
|
for et in ets:
|
|
for pair in et:
|
|
if pair["objType"] == "text":
|
|
line = Line()
|
|
frag = Fragment('', r.textFont)
|
|
frag.addText(pair['obj'], r.textFormat)
|
|
line.addFragment(frag)
|
|
lines.append(line)
|
|
elif pair["objType"] == "et_snote":
|
|
line = Line()
|
|
frag = Fragment('', r.textFont)
|
|
frag.addText(f"Note: {pair['obj']}",r.textFormat)
|
|
line.addFragment(frag)
|
|
lines.append(line)
|
|
else:
|
|
raise NotImplementedError(
|
|
f"Unknown key {pair['objType']} in et"
|
|
)
|
|
return lines
|
|
|
|
|
|
def do_def(entry: DefinitionSection) -> list[Line]:
|
|
assert entry is not None
|
|
r = Resources()
|
|
lines: list[Line] = []
|
|
if "vd" in entry:
|
|
line = Line()
|
|
frag = Fragment()
|
|
frag.addText(entry["vd"], r.italicFormat)
|
|
line.addFragment(frag)
|
|
lines.append(line)
|
|
#
|
|
# sseg is required
|
|
#
|
|
sseq = entry["sseq"]
|
|
lines += do_sseq(sseq)
|
|
return lines
|
|
|
|
def do_vrs(vrs: list[Variant]|None) -> Line:
|
|
assert vrs is not None
|
|
r = Resources()
|
|
line = Line()
|
|
frag = Fragment()
|
|
frag.addText('variants: ', r.sOnSFormat)
|
|
for var in vrs:
|
|
if 'vl' in var:
|
|
frag.addText(var['vl']+' ', r.italicFormat)
|
|
if 'spl' in var:
|
|
frag.addText(var['spl']+' ', r.sOnSFormat)
|
|
frag.addText(var['va'], r.boldFormat)
|
|
if 'prs' in var:
|
|
frag.addText(' ')
|
|
do_prs(frag, trycast(list[Pronunciation], var['prs']))
|
|
frag.addText(' ')
|
|
line.addFragment(frag)
|
|
return line
|
|
|
|
def do_dros(dros: list[DefinedRunOn]|None) -> list[Line]:
|
|
assert dros is not None
|
|
r = Resources()
|
|
lines: list[Line] = []
|
|
for dro in dros:
|
|
line = Line()
|
|
frag = Fragment()
|
|
frag.addText(dro["drp"], r.boldFormat)
|
|
line.addFragment(frag)
|
|
lines.append(line)
|
|
for entry in dro['def']:
|
|
lines += do_def(entry)
|
|
for k,v in dro.items():
|
|
if 'drp' == k or 'def' == k:
|
|
continue
|
|
elif 'et' == k:
|
|
lines += do_ets(trycast(list[list[Pair]], v))
|
|
else:
|
|
raise NotImplementedError(f"Key of {k}")
|
|
return lines
|
|
|
|
def do_uros(uros: list[UndefinedRunOn]|None) -> list[Line]:
|
|
assert uros is not None
|
|
r = Resources()
|
|
lines: list[Line] = []
|
|
for uro in uros:
|
|
frag = Fragment()
|
|
text = re.sub(r'\*', '', uro['ure'])
|
|
frag.addText(text, r.labelFormat)
|
|
if 'prs' in uro:
|
|
do_prs(frag, uro['prs'])
|
|
frag.addText(' '+uro['fl'],r.textFormat) # r.linkFormat
|
|
line = Line()
|
|
line.addFragment(frag)
|
|
lines.append(line)
|
|
if 'utxt' in uro:
|
|
for entry in uro['utxt']:
|
|
for pair in entry:
|
|
if pair['objType'] == 'vis':
|
|
lines += do_vis(trycast(list[VerbalIllustration], pair['obj']))
|
|
elif pair['objType'] == 'uns':
|
|
(newFrags, newLines) = do_uns(trycast(list[list[list[Pair]]],pair['obj']),0)
|
|
line = Line()
|
|
line.addFragment(newFrags)
|
|
lines.append(line)
|
|
lines += newLines
|
|
return lines
|
|
def getDef(defines: Any) -> list[Line]:
|
|
Line.setParseText(parseText)
|
|
workList = restructure(defines)
|
|
# workList = trycast(list[Definition], workList)
|
|
# assert workList is not None
|
|
r = Resources()
|
|
lines: list[Line] = []
|
|
|
|
#
|
|
# No need to figure it out each time it is used
|
|
#
|
|
entries = 0
|
|
id = workList[0]["meta"]["id"].lower().split(":")[0]
|
|
uses: dict[str, int] = {}
|
|
for entry in workList:
|
|
testId = entry["meta"]["id"].lower().split(":")[0]
|
|
if testId == id:
|
|
entries += 1
|
|
#
|
|
# If there is a Functional Lable, then we are going
|
|
# to capture the count of each FL
|
|
#
|
|
try:
|
|
uses[entry["fl"]] = uses.get(entry["fl"], 0) + 1
|
|
except KeyError:
|
|
pass
|
|
del entry
|
|
used: dict[str, int] = {}
|
|
for k in uses.keys():
|
|
used[k] = 0
|
|
|
|
ets: list[Line] = []
|
|
phrases: list[Line] = []
|
|
for count, work in enumerate(workList):
|
|
testId = work["meta"]["id"].lower().split(":")[0]
|
|
#
|
|
# Skip entries which are not part of the primary definition
|
|
#
|
|
if testId != id:
|
|
continue
|
|
#
|
|
# Create the First line from the hwi, [ahws] and fl
|
|
#
|
|
line = Line()
|
|
frag = Fragment()
|
|
hwi = trycast(HeadWordInformation, work["hwi"])
|
|
assert hwi is not None
|
|
hw = re.sub(r"\*", "", hwi["hw"])
|
|
frag.addText(hw,r.headerFormat)
|
|
if "ahws" in work:
|
|
ahws = trycast(list[AlternanteHeadword], work["ahws"])
|
|
assert ahws is not None
|
|
for ahw in ahws:
|
|
hw = re.sub(r"\*", "", ahw["hw"])
|
|
frag.addText(", " + hw)
|
|
if entries > 1:
|
|
frag.addText(f" {count + 1} of {entries} ", r.sOnSFormat)
|
|
if "fl" in work:
|
|
text = work["fl"]
|
|
used[text] += 1
|
|
if uses[text] > 1:
|
|
text += f" ({used[text]})"
|
|
frag.addText(text, r.labelFormat)
|
|
line.addFragment(frag)
|
|
lines.append(line)
|
|
|
|
#
|
|
# Next is the pronunciation.
|
|
# While 'prs' is optional, the headword is not. This gets us what we want.
|
|
#
|
|
line = Line()
|
|
frag = Fragment()
|
|
if hwi["hw"].find("*") >= 0:
|
|
hw = re.sub(r"\*", "\u00b7", hwi["hw"])
|
|
frag.addText(hw + " ", r.subduedFormat)
|
|
if "prs" in hwi:
|
|
do_prs(frag, trycast(list[Pronunciation], hwi["prs"]))
|
|
line.addFragment(frag)
|
|
lines.append(line)
|
|
line = Line()
|
|
frag = Fragment()
|
|
if 'vrs' in work:
|
|
lines.append(do_vrs(trycast(list[Variant], work['vrs'])))
|
|
if "ins" in work:
|
|
inflections = trycast(list[Inflection], work["ins"])
|
|
do_ins(frag,inflections)
|
|
line.addFragment(frag)
|
|
lines.append(line)
|
|
line = Line()
|
|
frag = Fragment()
|
|
defines = trycast(list[DefinitionSection], work["def"])
|
|
assert defines is not None
|
|
for define in defines:
|
|
try:
|
|
lines += do_def(define)
|
|
except NotImplementedError:
|
|
raise
|
|
if "uros" in work:
|
|
print(json.dumps(work['uros'],indent=2))
|
|
uros = trycast(list[UndefinedRunOn], work['uros'])
|
|
lines += do_uros(uros)
|
|
if "dros" in work:
|
|
dros = trycast(list[DefinedRunOn], work["dros"])
|
|
if len(phrases) < 1:
|
|
frag = Fragment()
|
|
frag.addText("Phrases", r.labelFormat)
|
|
line = Line()
|
|
line.addFragment(frag)
|
|
phrases.append(line)
|
|
phrases += do_dros(dros)
|
|
if "et" in work:
|
|
line = Line()
|
|
frag = Fragment('', r.textFont)
|
|
frag.addText(f"{work['fl']} ({used[work['fl']]})",r.labelFormat)
|
|
line.addFragment(frag)
|
|
ets += do_ets(trycast(list[list[Pair]], work["et"]))
|
|
for k in work.keys():
|
|
if k not in [
|
|
"meta",
|
|
"hom",
|
|
"hwi",
|
|
"fl",
|
|
"def",
|
|
"ins",
|
|
"prs",
|
|
"et",
|
|
"date",
|
|
"shortdef",
|
|
"vrs",
|
|
"dros",
|
|
'uros',
|
|
]:
|
|
raise NotImplementedError(f"Unknown key {k} in work")
|
|
if len(phrases) > 0:
|
|
lines += phrases
|
|
if len(ets) > 0:
|
|
line = Line()
|
|
line.addFragment(Fragment("Etymology", r.labelFont, color=r.baseColor))
|
|
lines.append(line)
|
|
lines += ets
|
|
return lines
|
|
|
|
def replaceCode(code:str) -> tuple[str, QTextCharFormat]:
|
|
r = Resources()
|
|
fmt = QTextCharFormat()
|
|
if code == 'bc':
|
|
fmt.setFontWeight(QFont.Weight.Bold)
|
|
return (': ', fmt)
|
|
elif code == 'ldquo':
|
|
return ('\u201c', fmt)
|
|
elif code == 'rdquo':
|
|
return ('\u201d', fmt)
|
|
fmt.setAnchor(True)
|
|
fmt.setForeground(r.linkColor)
|
|
fmt.setFontUnderline(True)
|
|
fmt.setUnderlineColor(r.linkColor)
|
|
fmt.setFontUnderline(True)
|
|
fields = code.split('|')
|
|
token = fields[0]
|
|
if token == 'a_link':
|
|
text = fields[1]
|
|
fmt.setAnchorHref('auto://'+fields[1])
|
|
elif token in ['d_link', 'et_link', 'mat', 'sx', 'i_link']:
|
|
text = fields[1]
|
|
pre = 'word:///'
|
|
if fields[2] == '':
|
|
fmt.setAnchorHref(pre+fields[1])
|
|
else:
|
|
fmt.setAnchorHref(pre+fields[2])
|
|
if token == 'i_link':
|
|
fmt.setFontItalic(True)
|
|
elif token == 'sx':
|
|
fmt.setFontCapitalization(QFont.Capitalization.SmallCaps)
|
|
elif token == 'dxt':
|
|
if fields[3] == 'illustration':
|
|
fmt.setAnchorHref('article:///'+fields[2])
|
|
elif fields[3] == 'table':
|
|
fmt.setAnchorHref('table:///'+fields[2])
|
|
elif fields[3] != "":
|
|
fmt.setAnchorHref('sense:///'+fields[3])
|
|
else:
|
|
fmt.setAnchorHref('word:///'+fields[1])
|
|
elif token == 'et_link':
|
|
if fields[2] != '':
|
|
fmt.setAnchorHref('etymology:///'+fields[2])
|
|
else:
|
|
fmt.setAnchorHref('etymology:///' + fields[1])
|
|
else:
|
|
raise NotImplementedError(f"Token {code} not implimented")
|
|
fmt.setForeground(r.linkColor)
|
|
print(f"Format.capitalization(): {fmt.fontCapitalization()}")
|
|
return (text,fmt)
|
|
|
|
def markup(offset: int, text:str) -> tuple[str, list[QTextLayout.FormatRange]]:
|
|
close = text.find('}')
|
|
code = text[1:close]
|
|
text = text[close+1:-(close+2)]
|
|
fmt = QTextCharFormat()
|
|
if code == 'b':
|
|
fmt.setFontWeight(QFont.Weight.Bold)
|
|
elif code == 'inf':
|
|
fmt.setVerticalAlignment(QTextCharFormat.VerticalAlignment.AlignSubScript)
|
|
elif code == 'it':
|
|
fmt.setFontItalic(True)
|
|
elif code == 'sc':
|
|
fmt.setFontCapitalization(QFont.Capitalization.SmallCaps)
|
|
fr = QTextLayout.FormatRange()
|
|
fr.start = offset
|
|
fr.length = len(text)
|
|
fr.format = fmt
|
|
return (text, [fr,])
|
|
|
|
def parseText(frag: Fragment) -> QTextLayout:
|
|
layout = frag.layout()
|
|
text = layout.text()
|
|
formats = layout.formats()
|
|
REPLACE_TEXT = [
|
|
'bc','a_link', 'd_link', 'dxt', 'et_link', 'i_link', 'mat',
|
|
'sx'
|
|
]
|
|
pos = 0
|
|
start = text[pos:].find('{')
|
|
|
|
while start >= 0:
|
|
start += pos
|
|
end = text[start+1:].find('}')
|
|
end += start
|
|
code = text[start+1:end+1]
|
|
pos = end+2
|
|
for maybe in REPLACE_TEXT:
|
|
if code.startswith(maybe):
|
|
(repl, tfmt) = replaceCode(code)
|
|
text = text[:start] + repl + text[end+2:]
|
|
fmt = QTextLayout.FormatRange()
|
|
fmt.format = tfmt
|
|
fmt.start=start
|
|
fmt.length = len(repl)
|
|
formats.append(fmt)
|
|
pos = start + len(repl)
|
|
code = ''
|
|
break
|
|
if code != '':
|
|
needle = f'{{/{code}}}'
|
|
codeEnd = text[start:].find(needle)
|
|
codeEnd += start+len(needle)
|
|
straw = text[start:codeEnd]
|
|
(repl, frs) = markup(start, straw)
|
|
fmt = QTextLayout.FormatRange()
|
|
formats += frs
|
|
text = text[:start] + repl + text[codeEnd:]
|
|
pos = start + len(repl)
|
|
start = text[pos:].find('{')
|
|
layout.setFormats(formats)
|
|
layout.setText(text)
|
|
return layout
|