import datetime import dateparser import requests from bs4 import BeautifulSoup, Tag from PySide6.QtCore import QThread from PySide6.QtSql import QSqlDatabase, QSqlQuery from lib.utils import query_error def update_proceedings( case_id: int, bs: BeautifulSoup, db: QSqlDatabase ) -> bool: table = bs.find("table", id="proceedings") assert isinstance(table, Tag) trs = table.find_all("tr") tr = trs.pop(0) query = QSqlQuery(db) while len(trs) > 0: tr = trs.pop(0) assert isinstance(tr, Tag) print(tr.contents) td = tr.contents[0] assert isinstance(td, Tag) and isinstance(td.string, str) date = dateparser.parse(td.string) td = tr.contents[1] # # When a case is GVRed, the will contain which means that # td is a Tag but not a simple string. # We need to convert to HTML and store the HTML in the entry, not just plaintext. assert isinstance(td, Tag) text = td.string if not text: text = "".join([str(x) for x in td.contents]) query.prepare( "SELECT * FROM entries WHERE case_id = :cid AND date = :date AND text=:text" ) query.bindValue(":cid", case_id) query.bindValue(":text", text) assert isinstance(date, datetime.date) query.bindValue(":date", date.timestamp()) if not query.exec(): query_error(query) if not query.next(): query.prepare( "INSERT INTO entries (case_id, date, text) VALUES (:cid,:date,:text)" ) query.bindValue(":cid", case_id) query.bindValue(":date", date.timestamp()) query.bindValue(":text", text) if not query.exec(): query_error(query) entry_id = query.lastInsertId() else: entry_id = query.value(0) tr = trs.pop(0) assert isinstance(tr, Tag) assert isinstance(tr.contents[1], Tag) for a in tr.contents[1]: assert isinstance(a, Tag) url = a.attrs["href"] name = a.string query.prepare( "SELECT * FROM documents WHERE url=:url AND entry_id = :eid" ) query.bindValue(":url", url) query.bindValue(":eid", entry_id) if not query.exec(): query_error(query) if not query.next(): query.prepare( "INSERT INTO documents (entry_id, name, url) " "VALUES (:eid, :name, :url)" ) query.bindValue(":eid", entry_id) query.bindValue(":name", name) query.bindValue(":url", url) if not query.exec(): query_error(query) assert isinstance(text, str) print(f"text: {text.lower()}") result = not text.lower() in [ "petition denied.", "rehearing denied.", # "judgement issued.", ] return result def update_db(case_id: str, db: QSqlDatabase) -> int: # # See if this case already exists. # # We assume that case_id == docket_id at this point. If it does not, # then we will build out from the request we get query = QSqlQuery(db) query.prepare("SELECT * FROM cases WHERE docket_id = :did") query.bindValue(":did", case_id) if not query.exec(): query_error(query) exists = query.next() if exists: active = query.value("active") == 1 else: active = True if not active: return int(query.value("case_id")) r = requests.get( f"https://www.supremecourt.gov/docket/docketfiles/html/public/{case_id}.html" ) if r.status_code != 200: print(r.status_code) exit(1) bs = BeautifulSoup(r.text, "lxml") # # SCOTUS does not return 404 for page not found. # title = bs.find("title") assert isinstance(title, Tag) and isinstance(title.string, str) if title.string.strip() == "": return -1 # # docket_id, previous_docket, petitioners, respondents, date # all come from the docketinfo table # di = bs.find("table", id="docketinfo") assert di is not None and isinstance(di, Tag) # # docket_id is first row, first column span = di.find("span") assert span is not None and isinstance(span, Tag) tmp = span.contents[0] assert isinstance(tmp, str) docket_id = tmp.strip() docket_id = docket_id.replace("No. ", "") # # Title is second row, first column tr = di.contents[1] assert isinstance(tr, Tag) and isinstance(tr.contents[0], Tag) assert tr.contents[0].string == "Title:" td = tr.contents[1] assert isinstance(td, Tag) span = td.contents[0] assert isinstance(span, Tag) and isinstance(span.contents[0], str) petitioners = span.contents[0].strip() # # XXX - We need to deal with other titles. Change this to an RE # UPDATED: we are just handling the two we know about. # petitioners = petitioners.replace(", Petitioners", "") petitioners = petitioners.replace(", Applicants", "") if len(span.contents) >= 4: assert isinstance(span.contents[4], str) respondent = span.contents[4].strip() else: respondent = "" # # Date on which the case was docketed tr = di.contents[2] assert isinstance(tr, Tag) and isinstance(tr.contents[1], Tag) td = tr.contents[1] assert isinstance(td, Tag) and td.string is not None docket_date = td.string.strip() date = dateparser.parse(docket_date) # # linked case is row 3, column 0 tr = di.contents[3] assert isinstance(tr, Tag) and isinstance(tr.contents[0], Tag) linked = tr.contents[0].string # # if it does not exists, create it. This stops a recursion loop. # if not exists: query.prepare( "INSERT INTO cases (docket_id, petitioners, respondents, date, active, linked) " "VALUES (:did, :pet, :resp, :date, 1, NULL)" ) query.bindValue(":did", docket_id) query.bindValue(":pet", petitioners) query.bindValue(":resp", respondent) assert isinstance(date, datetime.date) query.bindValue(":date", date.timestamp()) if not query.exec(): query_error(query) case_id = query.lastInsertId() linked_id = None else: case_id = query.value(0) linked_id = query.value("linked") assert isinstance(case_id, int) # # If there is a linked case, we need to get the ID for that case. if linked is not None: linked = linked.replace("Linked with ", "") query.prepare("SELECT * FROM cases WHERE docket_id = :did") query.bindValue(":did", linked) if not query.exec(): query_error(query) if not query.next(): new_id = update_db(linked, db) else: new_id = query.value(0) if new_id != linked_id: query.prepare("UPDATE cases SET linked=:lid WHERE case_id = :cid") query.bindValue(":lid", new_id) query.bindValue(":cid", case_id) if not query.exec(): query_error(query) # # XXX - Process lower courts # active = update_proceedings(case_id, bs, db) if not active: query.prepare("UPDATE cases SET active=0 WHERE case_id = :cid") query.bindValue(":cid", case_id) if not query.exec(): query_error(query) return case_id class updateThread(QThread): docket_id = None def __init__(self) -> None: super(updateThread, self).__init__() print("updateThread: __init__(docket_id)") return def setDocketId(self, docket_id: str) -> None: self.docket_id = docket_id return def run(self) -> None: print(f"updateThread: running on {self.currentThread()}") db = QSqlDatabase.cloneDatabase("qt_sql_default_connection", "update") if not db.open(): print(db.lastError()) raise Exception("db.open()") case_id = update_db(str(self.docket_id), db) db.close() del db QSqlDatabase.removeDatabase("update") print(f"updateThread: run() returns {case_id}") return class loadCases(QThread): def run(self) -> None: db = QSqlDatabase.cloneDatabase("qt_sql_default_connection", "load") if not db.open(): raise Exception("db.open()") dt = datetime.datetime.now() year = dt.strftime("%y") query = QSqlQuery(db) query.prepare("SELECT * FROM history WHERE year = :year") print(f"year = {year}") query.bindValue(":year", year) if not query.exec(): query_error(query) if not query.next(): query.prepare( "INSERT INTO history (year, edocket, number) " "VALUES (:year, 0, 1)" ) query.bindValue(":year", year) if not query.exec(): query_error(query) edocket = 0 number = 1 history_id = query.lastInsertId() else: history_id = query.value("history_id") edocket = query.value("edocket") number = query.value("number") count = 0 while year > "00" and count < 100: query.prepare("SELECT * FROM cases WHERE docket_id = :did") if edocket == 1: docket_id = f"{year}A{number}" else: docket_id = f"{year}-{number}" query.bindValue(":did", docket_id) print(f"Loading {docket_id}") if not query.exec(): query_error(query) if query.next(): if query.value("active") == 0: print("Already exists and is inactive") number += 1 continue result = update_db(docket_id, db) print(f"result: {result}") if result < 0: year = f"{int(year) - 1:02d}" query.prepare( "UPDATE history set number = :number WHERE history_id=:hid" ) query.bindValue(":number", number - 1) query.bindValue(":hid", history_id) if not query.exec(): query_error(query) query.prepare("SELECT * FROM history WHERE year = :year") print(f"year = {year}") query.bindValue(":year", year) if not query.exec(): query_error(query) if not query.next(): query.prepare( "INSERT INTO history (year, edocket, number) " "VALUES (:year, 0, 1)" ) query.bindValue(":year", year) if not query.exec(): query_error(query) edocket = 0 number = 1 history_id = query.lastInsertId() else: history_id = query.value("history_id") edocket = query.value("edocket") number = query.value("number") continue number += 1 count += 1 db.close() del db QSqlDatabase.removeDatabase("load") return