Mostly working Reader
This commit is contained in:
136
lib/books.py
136
lib/books.py
@@ -1,15 +1,49 @@
|
||||
import json
|
||||
import os
|
||||
import xml.dom.minidom
|
||||
from typing import Dict, List, cast
|
||||
|
||||
from PyQt6.QtSql import QSqlQuery
|
||||
|
||||
from main import query_error
|
||||
|
||||
|
||||
class Book:
|
||||
sections = []
|
||||
metadata = {}
|
||||
sections: List[str] = []
|
||||
metadata: Dict[str, str] = {}
|
||||
words = {}
|
||||
|
||||
def __init__(self, src: str) -> None:
|
||||
super(Book, self).__init__()
|
||||
self.parse_book(src)
|
||||
book_id = self.store() # Does nothing if already in database
|
||||
self.load(book_id)
|
||||
return
|
||||
|
||||
def load(self, book_id: int) -> None:
|
||||
query = QSqlQuery()
|
||||
query.prepare("SELECT * FROM books where book_id = :book_id")
|
||||
query.bindValue(":book_id", book_id)
|
||||
if not query.exec():
|
||||
query_error(query)
|
||||
if not query.next():
|
||||
raise Exception(f"Missing book? book_id={book_id}")
|
||||
self.metadata = {
|
||||
"title": query.value("title"),
|
||||
"creator": query.value("author"),
|
||||
"identifier": query.value("uuid"),
|
||||
"level": query.value("level"),
|
||||
}
|
||||
|
||||
self.sections = []
|
||||
query.prepare(
|
||||
"SELECT * FORM sections WHERE book_id = :book_id " "ORDER BY sequence"
|
||||
)
|
||||
while query.next():
|
||||
self.sections.append(query.value("contents"))
|
||||
#
|
||||
# Load words!
|
||||
#
|
||||
return
|
||||
|
||||
def parse_book(self, src: str) -> None:
|
||||
@@ -53,15 +87,105 @@ class Book:
|
||||
href = item.getAttribute("href")
|
||||
print(f"{idref}: {href}")
|
||||
self.parse_section(src, href)
|
||||
#
|
||||
# "sections" is now loaded
|
||||
#
|
||||
return
|
||||
|
||||
def store(self) -> int:
|
||||
uuid = self.metadata["identifier"]
|
||||
query = QSqlQuery()
|
||||
query.prepare(
|
||||
"SELECT COUNT(*) AS number, book_id FROM books b " "WHERE b.uuid = :uuid"
|
||||
)
|
||||
query.bindValue(":uuid", uuid)
|
||||
if not query.exec():
|
||||
query_error(query)
|
||||
query.next()
|
||||
if query.value("number") > 0:
|
||||
book_id: int = query.value("book_id")
|
||||
return book_id
|
||||
query.prepare(
|
||||
"INSERT INTO books (title, author, uuid, level) VALUES ("
|
||||
":title, :author, :uuid, 0)"
|
||||
)
|
||||
query.bindValue(":title", self.metadata["title"])
|
||||
query.bindValue(":author", self.metadata["creator"])
|
||||
query.bindValue(":uuid", uuid)
|
||||
if not query.exec():
|
||||
query_error(query)
|
||||
book_id = query.lastInsertId()
|
||||
query.prepare(
|
||||
"INSERT INTO sections (sequence, book_id, content) "
|
||||
"VALUES (:sequence, :book_id, :content)"
|
||||
)
|
||||
query.bindValue(":book_id", book_id)
|
||||
for seq, section in enumerate(self.sections):
|
||||
query.bindValue(":sequence", seq)
|
||||
query.bindValue(":content", section)
|
||||
if not query.exec():
|
||||
query_error(query)
|
||||
section_id = query.lastInsertId()
|
||||
return book_id
|
||||
|
||||
def parse_section(self, src: str, href: str) -> None:
|
||||
newdom = xml.dom.getDOMImplementation().createDocument("", "html", None)
|
||||
|
||||
def strip_node(elm: xml.dom.minidom.Element) -> xml.dom.minidom.Node:
|
||||
if elm.nodeType == xml.dom.Node.TEXT_NODE:
|
||||
return cast(
|
||||
xml.dom.minidom.Node,
|
||||
newdom.createTextNode(cast(xml.dom.minidom.Text, elm).data),
|
||||
)
|
||||
|
||||
newelm = newdom.createElement(elm.localName)
|
||||
node = elm.firstChild
|
||||
while node:
|
||||
if node.nodeType == xml.dom.Node.TEXT_NODE:
|
||||
text = node.data
|
||||
if text:
|
||||
text = text.strip()
|
||||
if text and len(text) > 0:
|
||||
newelm.appendChild(newdom.createTextNode(text))
|
||||
elif node.localName == "img":
|
||||
pass
|
||||
elif node.localName == "a":
|
||||
a_node = node.firstChild
|
||||
while a_node:
|
||||
if a_node.nodeType == xml.dom.Node.TEXT_NODE:
|
||||
newelm.appendChild(newdom.createTextNode(a_node.data))
|
||||
else:
|
||||
newelm.appendChild(strip_node(a_node))
|
||||
a_node = a_node.nextSibling
|
||||
else:
|
||||
newelm.appendChild(strip_node(node))
|
||||
node = node.nextSibling
|
||||
return newelm
|
||||
|
||||
def parse_node(parent: xml.dom.Node, elm: xml.dom.Node) -> None:
|
||||
if elm.nodeType == xml.dom.Node.ELEMENT_NODE:
|
||||
if elm.localName.startswith("h"):
|
||||
clone = strip_node(elm)
|
||||
parent.appendChild(clone)
|
||||
elif elm.localName == "p":
|
||||
clone = strip_node(elm)
|
||||
clone.normalize()
|
||||
parent.appendChild(clone)
|
||||
else:
|
||||
node = elm.firstChild
|
||||
while node:
|
||||
parse_node(parent, node)
|
||||
node = node.nextSibling
|
||||
return
|
||||
|
||||
with open(f"{src}/{href}") as f:
|
||||
dom = xml.dom.minidom.parse(f)
|
||||
title = dom.getElementsByTagName("title")[0].firstChild.data
|
||||
body = dom.getElementsByTagName("body")[0]
|
||||
paragraphs = []
|
||||
for p in body.getElementsByTagName("p"):
|
||||
paragraphs.append(p.toxml())
|
||||
self.sections.append({"title": title, "paragraphs": paragraphs})
|
||||
section = newdom.createElement("body")
|
||||
node = body.firstChild
|
||||
while node:
|
||||
parse_node(section, node)
|
||||
node = node.nextSibling
|
||||
self.sections.append(section.toxml())
|
||||
return
|
||||
|
||||
Reference in New Issue
Block a user