More progress on all entites in MW data load

This commit is contained in:
Christopher T. Johnson
2024-05-14 11:08:02 -04:00
parent 7c65b466f1
commit 0acba3ed9b
4 changed files with 96 additions and 30 deletions

View File

@@ -4,7 +4,7 @@ from typing import Any, Literal, NotRequired, TypedDict, cast
from PyQt6.QtCore import QEventLoop, QUrl
from PyQt6.QtGui import QFont, QFontDatabase, QTextCharFormat, QTextLayout
from PyQt6.QtNetwork import QNetworkRequest
from PyQt6.QtNetwork import QNetworkReply, QNetworkRequest
from trycast import trycast
from lib.definition import Fragment, Line
@@ -91,7 +91,7 @@ Inflection = TypedDict(
class CrossReferenceTarget(TypedDict):
cxl: str
cxl: NotRequired[str]
cxr: NotRequired[str]
cxt: str
cxn: NotRequired[str]
@@ -277,7 +277,7 @@ class WordType(TypedDict):
definition: Any
def fetch(word: str) -> WordType:
def fetch(word: str) -> WordType|None:
request = QNetworkRequest()
url = QUrl(API.format(word=word, key=key))
request.setUrl(url)
@@ -287,6 +287,9 @@ def fetch(word: str) -> WordType:
loop = QEventLoop()
reply.finished.connect(loop.quit)
loop.exec()
if reply.error() != QNetworkReply.NetworkError.NoError:
print(f"Error fetching {word}: {reply.errorString()}")
return None
content = reply.readAll()
data = json.loads(content.data().decode("utf-8"))
return {
@@ -295,7 +298,6 @@ def fetch(word: str) -> WordType:
"definition": data,
}
def soundUrl(sound: Sound, fmt="ogg") -> QUrl:
"""Create a URL from a PRS structure."""
base = f"audio://media.merriam-webster.com/audio/prons/en/us/{fmt}"
@@ -343,16 +345,12 @@ def do_prs(frag: Fragment, prs: list[Pronunciation] | None) -> None:
fmt.setAnchorHref(soundUrl(pr["sound"]).toString())
fmt.setForeground(r.linkColor)
#text = pr["mw"] +' \N{SPEAKER} '
text = pr["mw"] +' '
text = ' '+pr["mw"] +' '
else:
text = pr['mw'] + ' '
print(f"text: {text}, length: {len(text)}")
frag.addText(text, fmt)
if "l2" in pr:
frag.addText(pun + pr["l2"], r.subduedLabelFormat)
text = frag.layout().text()
for fmt in frag.layout().formats():
print(f"start: {fmt.start}, length: {fmt.length}, text: \"{text[fmt.start:fmt.start+fmt.length]}\"")
return
@@ -741,6 +739,42 @@ def do_uros(uros: list[UndefinedRunOn]|None) -> list[Line]:
lines.append(line)
lines += newLines
return lines
def do_cxs(cxs: list[CognateCrossRef]|None) -> list[Line]:
assert cxs is not None
r = Resources()
lines: list[Line] = []
for cx in cxs:
frag = Fragment()
frag.addText(cx['cxl']+' ', r.italicFormat)
for cxt in cx['cxtis']:
if 'cxl' in cxt:
frag.addText(cxt['cxl'], r.italicFormat)
text = cxt['cxt']
anchor = text
if 'cxr' in cxt:
anchor = cxt['cxr']
if 'cxn' in cxt:
anchor += f"/{cxt['cxn']}"
fmt = QTextCharFormat(r.smallCapsFormat)
fmt.setAnchor(True)
fmt.setForeground(r.linkColor)
fmt.setFontUnderline(True)
fmt.setUnderlineColor(r.linkColor)
fmt.setFontUnderline(True)
fmt.setAnchorHref('sense:///'+anchor)
#
# XXX - Capitalization does not work
#
text = text.upper()
fmt.setFontPointSize(fmt.fontPointSize() * 0.90)
frag.addText(text, fmt)
line = Line()
line.addFragment(frag)
lines.append(line)
return lines
def getDef(defines: Any) -> list[Line]:
Line.setParseText(parseText)
workList = restructure(defines)
@@ -831,15 +865,17 @@ def getDef(defines: Any) -> list[Line]:
lines.append(line)
line = Line()
frag = Fragment()
defines = trycast(list[DefinitionSection], work["def"])
assert defines is not None
for define in defines:
try:
lines += do_def(define)
except NotImplementedError:
raise
if 'def' in work:
defines = trycast(list[DefinitionSection], work["def"])
assert defines is not None
for define in defines:
try:
lines += do_def(define)
except NotImplementedError:
pass
if 'cxs' in work:
lines += do_cxs(trycast(list[CognateCrossRef], work['cxs']))
if "uros" in work:
print(json.dumps(work['uros'],indent=2))
uros = trycast(list[UndefinedRunOn], work['uros'])
lines += do_uros(uros)
if "dros" in work:
@@ -852,10 +888,6 @@ def getDef(defines: Any) -> list[Line]:
phrases.append(line)
phrases += do_dros(dros)
if "et" in work:
line = Line()
frag = Fragment('', r.textFont)
frag.addText(f"{work['fl']} ({used[work['fl']]})",r.labelFormat)
line.addFragment(frag)
ets += do_ets(trycast(list[list[Pair]], work["et"]))
for k in work.keys():
if k not in [
@@ -872,8 +904,9 @@ def getDef(defines: Any) -> list[Line]:
"vrs",
"dros",
'uros',
'cxs',
]:
raise NotImplementedError(f"Unknown key {k} in work")
print( NotImplementedError(f"Unknown key {k} in work"))
if len(phrases) > 0:
lines += phrases
if len(ets) > 0:
@@ -914,6 +947,11 @@ def replaceCode(code:str) -> tuple[str, QTextCharFormat]:
fmt.setFontItalic(True)
elif token == 'sx':
fmt.setFontCapitalization(QFont.Capitalization.SmallCaps)
#
# XXX - Capitalization does not work
#
text = text.upper()
fmt.setFontPointSize(fmt.fontPointSize() * 0.90)
elif token == 'dxt':
if fields[3] == 'illustration':
fmt.setAnchorHref('article:///'+fields[2])
@@ -928,10 +966,14 @@ def replaceCode(code:str) -> tuple[str, QTextCharFormat]:
fmt.setAnchorHref('etymology:///'+fields[2])
else:
fmt.setAnchorHref('etymology:///' + fields[1])
elif token == 'd_link':
if fields[2] != '':
fmt.setAnchorHref('direct:///' + fields[2])
else:
fmt.setAnchorHref('direct:///' + fields[1])
else:
raise NotImplementedError(f"Token {code} not implimented")
fmt.setForeground(r.linkColor)
print(f"Format.capitalization(): {fmt.fontCapitalization()}")
return (text,fmt)
def markup(offset: int, text:str) -> tuple[str, list[QTextLayout.FormatRange]]: