diff --git a/workers.py b/workers.py index d1af6c1..6d28a17 100644 --- a/workers.py +++ b/workers.py @@ -1,5 +1,4 @@ import datetime -from re import template import dateparser import requests @@ -10,21 +9,30 @@ from PySide6.QtSql import QSqlDatabase, QSqlQuery from lib.utils import query_error -def update_proceedings(case_id: int, bs: BeautifulSoup) -> bool: +def update_proceedings( + case_id: int, bs: BeautifulSoup, db: QSqlDatabase +) -> bool: table = bs.find("table", id="proceedings") assert isinstance(table, Tag) trs = table.find_all("tr") tr = trs.pop(0) - query = QSqlQuery(QSqlDatabase.database("update")) + query = QSqlQuery(db) while len(trs) > 0: tr = trs.pop(0) assert isinstance(tr, Tag) + print(tr.contents) td = tr.contents[0] assert isinstance(td, Tag) and isinstance(td.string, str) date = dateparser.parse(td.string) td = tr.contents[1] - assert isinstance(td, Tag) and isinstance(td.string, str) - text = td.string.strip() + # + # When a case is GVRed, the will contain which means that + # td is a Tag but not a simple string. + # We need to convert to HTML and store the HTML in the entry, not just plaintext. + assert isinstance(td, Tag) + text = td.string + if not text: + text = "".join([str(x) for x in td.contents]) query.prepare( "SELECT * FROM entries WHERE case_id = :cid AND date = :date AND text=:text" ) @@ -70,21 +78,24 @@ def update_proceedings(case_id: int, bs: BeautifulSoup) -> bool: query.bindValue(":url", url) if not query.exec(): query_error(query) + assert isinstance(text, str) print(f"text: {text.lower()}") result = not text.lower() in [ "petition denied.", + "rehearing denied.", + # "judgement issued.", ] return result -def update_db(case_id) -> int: +def update_db(case_id: str, db: QSqlDatabase) -> int: # # See if this case already exists. # # We assume that case_id == docket_id at this point. If it does not, # then we will build out from the request we get - query = QSqlQuery(QSqlDatabase.database("update")) + query = QSqlQuery(db) query.prepare("SELECT * FROM cases WHERE docket_id = :did") query.bindValue(":did", case_id) if not query.exec(): @@ -105,6 +116,13 @@ def update_db(case_id) -> int: exit(1) bs = BeautifulSoup(r.text, "lxml") # + # SCOTUS does not return 404 for page not found. + # + title = bs.find("title") + assert isinstance(title, Tag) and isinstance(title.string, str) + if title.string.strip() == "": + return -1 + # # docket_id, previous_docket, petitioners, respondents, date # all come from the docketinfo table # @@ -136,8 +154,11 @@ def update_db(case_id) -> int: # petitioners = petitioners.replace(", Petitioners", "") petitioners = petitioners.replace(", Applicants", "") - assert isinstance(span.contents[4], str) - respondent = span.contents[4].strip() + if len(span.contents) >= 4: + assert isinstance(span.contents[4], str) + respondent = span.contents[4].strip() + else: + respondent = "" # # Date on which the case was docketed @@ -184,7 +205,7 @@ def update_db(case_id) -> int: if not query.exec(): query_error(query) if not query.next(): - new_id = update_db(linked) + new_id = update_db(linked, db) else: new_id = query.value(0) if new_id != linked_id: @@ -196,7 +217,7 @@ def update_db(case_id) -> int: # # XXX - Process lower courts # - active = update_proceedings(case_id, bs) + active = update_proceedings(case_id, bs, db) if not active: query.prepare("UPDATE cases SET active=0 WHERE case_id = :cid") query.bindValue(":cid", case_id) @@ -208,25 +229,115 @@ def update_db(case_id) -> int: class updateThread(QThread): docket_id = None - def __init__(self): + def __init__(self) -> None: super(updateThread, self).__init__() print("updateThread: __init__(docket_id)") return - def setDocketId(self, docket_id): + def setDocketId(self, docket_id: str) -> None: self.docket_id = docket_id return - def run(self): - print("updateThread: run()") - if not "update" in QSqlDatabase.connectionNames(): - db = QSqlDatabase.cloneDatabase( - "qt_sql_default_connection", "update" - ) - if not db.open(): - print(db.lastError()) - raise Exception("db.open()") + def run(self) -> None: + print(f"updateThread: running on {self.currentThread()}") + db = QSqlDatabase.cloneDatabase("qt_sql_default_connection", "update") + if not db.open(): + print(db.lastError()) + raise Exception("db.open()") - case_id = update_db(self.docket_id) + case_id = update_db(str(self.docket_id), db) + db.close() + del db + QSqlDatabase.removeDatabase("update") print(f"updateThread: run() returns {case_id}") - self.exit(1) + return + + +class loadCases(QThread): + def run(self) -> None: + db = QSqlDatabase.cloneDatabase("qt_sql_default_connection", "load") + if not db.open(): + raise Exception("db.open()") + dt = datetime.datetime.now() + year = dt.strftime("%y") + query = QSqlQuery(db) + query.prepare("SELECT * FROM history WHERE year = :year") + print(f"year = {year}") + query.bindValue(":year", year) + if not query.exec(): + query_error(query) + + if not query.next(): + query.prepare( + "INSERT INTO history (year, edocket, number) " + "VALUES (:year, 0, 1)" + ) + query.bindValue(":year", year) + if not query.exec(): + query_error(query) + edocket = 0 + number = 1 + history_id = query.lastInsertId() + else: + history_id = query.value("history_id") + edocket = query.value("edocket") + number = query.value("number") + count = 0 + + while year > "00" and count < 100: + query.prepare("SELECT * FROM cases WHERE docket_id = :did") + if edocket == 1: + docket_id = f"{year}A{number}" + else: + docket_id = f"{year}-{number}" + query.bindValue(":did", docket_id) + print(f"Loading {docket_id}") + if not query.exec(): + query_error(query) + if query.next(): + if query.value("active") == 0: + print("Already exists and is inactive") + number += 1 + continue + result = update_db(docket_id, db) + print(f"result: {result}") + if result < 0: + year = f"{int(year) - 1:02d}" + query.prepare( + "UPDATE history set number = :number WHERE history_id=:hid" + ) + query.bindValue(":number", number - 1) + query.bindValue(":hid", history_id) + if not query.exec(): + query_error(query) + + query.prepare("SELECT * FROM history WHERE year = :year") + print(f"year = {year}") + query.bindValue(":year", year) + if not query.exec(): + query_error(query) + + if not query.next(): + query.prepare( + "INSERT INTO history (year, edocket, number) " + "VALUES (:year, 0, 1)" + ) + query.bindValue(":year", year) + if not query.exec(): + query_error(query) + edocket = 0 + number = 1 + history_id = query.lastInsertId() + else: + history_id = query.value("history_id") + edocket = query.value("edocket") + number = query.value("number") + continue + + number += 1 + count += 1 + + db.close() + del db + QSqlDatabase.removeDatabase("load") + return