Files
esl-reader/lib/words.py
Christopher T. Johnson d9fefb99dd Checkpoint of Word refactor
2024-03-21 09:42:09 -04:00

534 lines
18 KiB
Python

import json
import re
from typing import Any, Dict, List, Optional, Self, Type, cast
import requests
from PyQt6.QtCore import QPoint, QRect, Qt, pyqtSignal
from PyQt6.QtGui import (
QBrush,
QColor,
QFont,
QFontDatabase,
QFontMetrics,
QMouseEvent,
QPainter,
QPaintEvent,
QTextFormat,
QTextOption,
)
from PyQt6.QtSql import QSqlQuery
from PyQt6.QtWidgets import QWidget
from lib import query_error
API = "https://api.dictionaryapi.dev/api/v2/entries/en/{word}"
MWAPI = "https://www.dictionaryapi.com/api/v3/references/collegiate/json/{word}?key=51d9df34-ee13-489e-8656-478c215e846c"
class Word:
_instance = None
_words: Dict[str, str] = {}
_current: Optional[Dict[str, Any]] = None
_currentWord: Optional[str] = None
class Fragment:
_type: str
_text: str
_font: QFont
_audio: str
_align: QTextOption
_rect: QRect
def __init__(self,
text:str,
font:QFont,
t:str = 'text',
audio:str = '',
align:QTextOption = QTextOption(Qt.AlignmentFlag.AlignLeft|
Qt.AlignmentFlag.AlignBaseline),
rect:QRect = QRect(0,0,0,0)
) -> None:
self._type = t # or 'container'
self._text = text
self._font = font
self._audio = audio
self._align = align
self._rect = rect
return
def setType(self, t:str) -> None:
if t == 'text':
self._type = t
elif t== 'container':
self._type = t
else:
raise Exception("Bad Value")
return
def setText(self, text:str) -> None:
self._text = text
return
def setFont(self, font:QFont) -> None:
self._font = font
return
def setAudio(self, audio:str) -> None:
self._audio = audio
return
def setAlign(self, align:QTextOption) -> None:
self._align = align
return
def setRect(self,rect:QRect) -> None:
self._rect = rect
return
def type(self) -> str:
return self._type
def text(self) -> str:
return self._text
def font(self) -> QFont:
return self._font
def audio(self) -> str:
return self._audio
def align(self) -> QTextOption:
return self._align
def rect(self) -> QRect:
return self._rect
class Line:
_maxHeight: int
_leading: int
_baseLine: int
_fragments: List['Word.Fragment']
def __init__(self) -> None:
self._maxHeight = -1
self._baseLine = -1
self._leading = -1
self._fragments = []
return
def fixText(self, frag: 'Word.Fragment') -> List['Word.Fragment']:
text = frag.text()
text = re.sub(r"\*", "\u2022", text)
text = re.sub(r"\{ldquo\}", "\u201c", text)
text = re.sub(r"\{rdquo\}", "\u201d", text)
parts: List[str] = []
#
# Break the text into parts based on brace markup
#
while len(text) > 0:
start = text.find("{")
if start > 0:
parts.append(text[:start])
text = text[start:]
if start >= 0:
end = text.find("}")
parts.append(text[:end])
text = text[end:]
else:
parts.append(text)
text = ''
results: List[Word.Fragment] = []
bold = QFont(frag.font())
bold.setBold(True)
italic = QFont(frag.font())
italic.setItalic(True)
script = QFont(frag.font())
script.setPixelSize(int(script.pixelSize() / 4))
while len(parts) > 0:
if parts[0] == '{bc}':
results.append(Word.Fragment(': ', bold))
elif parts[0] == '{inf}':
parts.pop(0)
results.append(Word.Fragment(parts[0], script)) # baseAdjust=???
parts.pop(0)
elif parts[0] == '{sup}':
parts.pop(0)
results.append(Word.Fragment(parts[0], script)) # baseAdjust=???
parts.pop(0)
elif parts[0] == '{it}' or parts[0] == '{wi}':
parts.pop(0)
results.append(Word.Fragment(parts[0], italic)) # baseAdjust=???
parts.pop(0)
elif parts[0] == '{sc}' or parts[0] == '{parahw}':
parts.pop(0)
font = QFont(frag.font())
font.setCapitalization(QFont.Capitalization.SmallCaps)
results.append(Word.Fragment(parts[0], font))
parts.pop(0)
elif parts[0] == '{phrase}':
font = QFont(bold)
font.setItalic(True)
parts.pop(0)
results.append(Word.Fragment(parts[0], font))
parts.pop(0)
elif parts[0] == '{gloss}':
parts.pop(0)
results.append(Word.Fragment(f"[{parts[0]}]",frag.font()))
parts.pop(0)
else:
results.append(Word.Fragment(parts[0],frag.font()))
parts.pop(0)
return results
def addFragment(self, frag: 'Word.Fragment',) -> None:
SPEAKER = "\U0001F508"
if len(self._fragments) > 0:
frag._text = ' ' + frag._text
if frag._audio is not None:
frag._audio += ' ' + SPEAKER
items = self.fixText(frag))
for item in items:
self._fragments.append(item)
return
def getLine(self) -> List['Word.Fragment']:
for fragment in self._fragments:
font = fragment.font()
fm = QFontMetrics(font)
if fm.leading() > self._leading:
self._leading = fm.leading()
rect = fm.boundingRect(fragment.text(), fragment.align())
height = rect.height()
baseLine = height - fm.descent()
if fragment.type() == "btn":
height += 6
baseLine += 3
if baseLine > self._baseLine:
self._baseLine = baseLine
if rect.height() > self._maxHeight:
self._maxHeight = rect.height()
x = 0
for fragment in self._fragments:
fragment.setPosition(QPoint(x,self._baseLine))
fm = QFontMetrics(fragment.font())
rect = fm.boundingRect(fragment.text(),fragment.align())
x += rect.width()
if fragment.type() == "btn":
x += 6
return self._fragments
def getLeading(self) -> int:
return self._leading + self._maxHeight
def getBtnRect(
self, frag: Dict[str, str | QTextOption | QFont | int]
) -> QRect:
fm = QFontMetrics(cast(QFont, frag["font"]))
rect = fm.boundingRect(
cast(str, frag["text"]), cast(QTextOption, frag["align"])
)
rect.setHeight(rect.height() + 6)
rect.setWidth(rect.width() + 6)
return rect
_lines: List[Line] = []
def __new__(cls: Type[Self], word: str) -> Self: # flycheck: ignore
if cls._instance:
return cls._instance
cls._instance = super(Word, cls).__new__(cls)
return cls._instance
def __init__(self, word: str) -> None:
self._currentWord = word
#
# Have we already retrieved this word?
#
try:
self._current = json.loads(self._words[word])
return
except KeyError:
pass
query = QSqlQuery()
query.prepare("SELECT * FROM words " "WHERE word = :word")
query.bindValue(":word", word)
if not query.exec():
query_error(query)
if query.next():
self._words[word] = query.value("definition")
self._current = json.loads(self._words[word])
return
response = requests.get(MWAPI.format(word=word))
if response.status_code != 200:
self._current = None
return
data = json.loads(response.content.decode("utf-8"))
#
# XXX - The first entry should be the correct entry. There could be more
# if there is a "hom" entry, then that will be appended to meta.id
# word = "lady", hom=1, meta.id = "lady:1";
#
print(response.content.decode("utf-8"))
self._words[word] = json.dumps(data[0])
self._current = data[0]
query.prepare(
"INSERT INTO words "
"(word, definition) "
"VALUES (:word, :definition)"
)
query.bindValue(":word", word)
query.bindValue(":definition", self._words[word])
if not query.exec():
query_error(query)
return
def getCurrent(self) -> str:
assert self._currentWord is not None
return self._currentWord
def get_html(self) -> str | None:
if not self._current:
return None
if "meta" in self._current.keys():
return self.mw_html()
else:
return self.apidictionary_html()
def get_def(self) -> List[Line] | None:
if not self._current:
return None
if "meta" in self._current.keys():
return self.mw_def()
else:
return None
def mw_def(self) -> List[Line]:
if len(self._lines) > 0:
return self._lines
assert self._current is not None
line = self.Line()
headerFont = QFontDatabase.font("OpenDyslexic", None, 10)
headerFont.setPixelSize(48)
headerFont.setWeight(QFont.Weight.Bold)
labelFont = QFontDatabase.font("OpenDyslexic", None, 10)
labelFont.setPixelSize(32)
phonicFont = QFontDatabase.font("Gentium", None, 10)
phonicFont.setPixelSize(32)
boldFont = QFontDatabase.font("OpenDyslexic", None, 10)
boldFont.setPixelSize(24)
boldFont.setBold(True)
textFont = QFontDatabase.font("OpenDyslexic", None, 10)
textFont.setPixelSize(24)
line.addFragment(self._current["hwi"]["hw"], headerFont)
line.addFragment(self._current["fl"], labelFont, color="#4a7d95")
self._lines.append(line)
if "vrs" in self._current.keys():
line = self.Line()
for vrs in self._current["vrs"]:
line.addFragment(vrs["va"], labelFont)
self._lines.append(line)
if "prs" in self._current["hwi"].keys():
line = self.Line()
for prs in self._current["hwi"]["prs"]:
audio = self.sound_url(prs)
if audio is None:
audio = ""
line.addFragment(
prs["mw"],
phonicFont,
opt="btn",
audio=audio,
color="#4a7d95",
)
self._lines.append(line)
if "ins" in self._current.keys():
line = self.Line()
line.addFragment(
"; ".join([x["if"] for x in self._current["ins"]]), boldFont
)
self._lines.append(line)
return self._lines
def sound_url(self, prs: Dict[str, Any], fmt: str = "ogg") -> str | None:
"""Create a URL from a PRS structure."""
base = f"https://media.merriam-webster.com/audio/prons/en/us/{fmt}"
if "sound" not in prs.keys():
return None
audio = prs["sound"]["audio"]
m = re.match(r"(bix|gg|[a-zA-Z])", audio)
if m:
url = base + f"/{m.group(1)}/"
else:
url = base + "/number/"
url += audio + f".fmt"
return url
def mw_html(self) -> str:
def parse_sn(sn: str, old: str) -> str:
return sn
assert self._current is not None
#
# Create the header, base word and its label
#
word = self._current["hwi"]["hw"]
label = self._current["fl"]
html = f'<h1 class="def-word">{word} <span class="def-label">{label}</span></h1>\n'
#
# If there are variants, then add them in an unordered list.
# CSS will make it pretty
#
if "vrs" in self._current.keys():
html += "<ul class=\"def-vrs'>\n"
html += "<li>"
html += "</li>\n<li>".join(
[vrs["va"] for vrs in self._current["vrs"]]
)
html += "</li>\n</ul>\n"
#
# If there is a pronunciation section, create it
#
if "prs" in self._current["hwi"].keys():
tmp = []
for prs in self._current["hwi"]["prs"]:
url = self.sound_url(prs)
how = prs["mw"]
if url:
tmp.append(f'<a href="{url}">\\{how}\\</a>')
else:
tmp.append(f"\\{how}\\")
html += '<span class="def-phonetic">'
html += '</span><span="def-phonetic">'.join(tmp)
html += "</span>\n"
#
# If there are inflections, create a header for that.
#
if "ins" in self._current.keys():
html += '<h2 class="def-word">'
html += ", ".join([ins["if"] for ins in self._current["ins"]])
html += "</h2>\n"
#
# Start creating the definition section
#
html += "<ul class='def-outer'>\n"
for meaning in self._current["def"]:
html += f"<li>{meaning['vd']}\n"
html += '<ul class="def-inner">\n'
label = ""
for sseq in meaning["sseq"]:
for sense in sseq:
label = parse_sn(sense[1]["sn"], label)
sls = ""
if "sls" in sense[1].keys():
sls = ", ".join(sense[1]["sls"])
sls = f'<span class="def-sls">{sls}</span> '
for dt in sense[1]["dt"]:
if dt[0] == "text":
html += f'<li class="def-text"><span class="def-sn">{label}</span>{sls}{dt[1]}</li>\n'
elif dt[0] == "vis":
for vis in dt[1]:
html += (
f"<li class=\"def-vis\">{vis['t']}</li>\n"
)
else:
print(f"Do something with {dt[0]}")
html += "</ul>\n"
html += "</ul>\n"
return html
def apidictionary_html(self) -> str:
html = ""
return html
class Definition(QWidget):
pronounce = pyqtSignal(str)
_word: str
_lines: List[Word.Line]
_buttons: List[QRect]
def __init__(self, w: Word, *args: Any, **kwargs: Any) -> None:
super(Definition, self).__init__(*args, **kwargs)
self._word = w.getCurrent()
lines = w.get_def()
assert lines is not None
self._lines = lines
self._buttons = []
assert self._lines is not None
base = 0
for line in self._lines:
for frag in line.getLine():
if frag["opt"] == "btn":
rect = line.getBtnRect(frag)
rect.moveTop(base)
self._buttons.append(rect)
base += line.getLeading()
return
_downRect: QRect | None = None
def mousePressEvent(self, event: Optional[QMouseEvent]) -> None:
if not event:
return super().mousePressEvent(event)
for rect in self._buttons:
if rect.contains(event.pos()):
self._downRect = rect
return
return super().mousePressEvent(event)
def mouseReleaseEvent(self, event: Optional[QMouseEvent]) -> None:
if not event:
return super().mouseReleaseEvent(event)
if self._downRect is not None and self._downRect.contains(event.pos()):
self.pronounce.emit(
"https://media.merriam-webster.com/audio/prons/en/us/ogg/a/await001.ogg"
)
self._downRect = None
return
self._downRect = None
return super().mouseReleaseEvent(event)
def paintEvent(self, event: Optional[QPaintEvent]) -> None: # noqa
painter = QPainter(self)
painter.save()
painter.setBrush(QBrush())
painter.setPen(QColor("white"))
#
# Each line needs a base calculated. To do that, we need to find the
# bounding rectangle of the text. Once we have the bounding rectangle,
# we can use the descendant to calculate the baseline within that
# bounding box.
#
# All text on this line needs to be on the same baseline
#
assert self._lines is not None
base = 0
for line in self._lines:
for frag in line.getLine():
keys = frag.keys()
font = cast(QFont, frag["font"])
painter.setFont(font)
if "color" in keys:
painter.save()
painter.setPen(QColor(frag["color"]))
if frag["opt"] == "btn":
rect = line.getBtnRect(frag)
rect.moveTop(base)
painter.drawRoundedRect(rect, 10.0, 10.0)
painter.drawText(
cast(int, frag["x"]) + 3,
base + 3 + cast(int, frag["y"]),
cast(str, frag["text"]),
)
else:
painter.drawText(
cast(int, frag["x"]),
base + cast(int, frag["y"]),
cast(str, frag["text"]),
)
if "color" in keys:
painter.restore()
base += line.getLeading()
painter.restore()
return