import datetime import re import dateparser import requests from bs4 import BeautifulSoup, Tag from PySide6.QtCore import QDateTime, QThread from PySide6.QtSql import QSqlDatabase, QSqlQuery from lib.utils import query_error def update_proceedings( case_id: int, bs: BeautifulSoup, db: QSqlDatabase ) -> bool: table = bs.find("table", id="proceedings") assert isinstance(table, Tag) trs = table.find_all("tr") tr = trs.pop(0) query = QSqlQuery(db) while len(trs) > 0: tr = trs.pop(0) assert isinstance(tr, Tag) td = tr.contents[0] assert isinstance(td, Tag) and isinstance(td.string, str) tmp = dateparser.parse(td.string) assert isinstance(tmp, datetime.datetime) date = QDateTime.fromSecsSinceEpoch(int(tmp.timestamp())) td = tr.contents[1] # # When a case is GVRed, the will contain which means that # td is a Tag but not a simple string. # We need to convert to HTML and store the HTML in the entry, not just plaintext. assert isinstance(td, Tag) text = td.string if not text: text = "".join([str(x) for x in td.contents]) query.prepare( "SELECT * FROM entries WHERE case_id = :cid AND date = :date AND text=:text" ) query.bindValue(":cid", case_id) query.bindValue(":text", text) query.bindValue(":date", date) if not query.exec(): query_error(query) if not query.next(): query.prepare( "INSERT INTO entries (case_id, date, text) VALUES (:cid,:date,:text)" ) query.bindValue(":cid", case_id) query.bindValue(":date", date) query.bindValue(":text", text) if not query.exec(): query_error(query) entry_id = query.lastInsertId() else: entry_id = query.value(0) tr = trs.pop(0) assert isinstance(tr, Tag) assert isinstance(tr.contents[1], Tag) for a in tr.contents[1]: assert isinstance(a, Tag) url = a.attrs["href"] name = a.string query.prepare( "SELECT * FROM documents WHERE url=:url AND entry_id = :eid" ) query.bindValue(":url", url) query.bindValue(":eid", entry_id) if not query.exec(): query_error(query) if not query.next(): query.prepare( "INSERT INTO documents (entry_id, name, url) " "VALUES (:eid, :name, :url)" ) query.bindValue(":eid", entry_id) query.bindValue(":name", name) query.bindValue(":url", url) if not query.exec(): query_error(query) assert isinstance(text, str) print(f"text: {text.lower()}") # # If cert is denied, a petion for rehearing can be requested. # The petitioner has 40 days to file for a rehearing. # # Translation, if the last entry is "petition denied\..*$" and 40 days # have passed, the case is final active = True text = text.lower() if text.startswith("rehearing denied.") or text.startswith( "judgment issued." ): active = False if text.startswith("petition denied."): assert isinstance(date, QDateTime) delta = date.daysTo(QDateTime.currentDateTime()) if delta > 40: active = False return active def update_db(case_id: str, db: QSqlDatabase) -> int: # # See if this case already exists. # # We assume that case_id == docket_id at this point. If it does not, # then we will build out from the request we get print(f"Updating {case_id}") matches = re.match(r"(\d\d)[-A](\d+)(.*)$", case_id) if matches is None: raise Exception(f"Not a match {case_id}") else: case_id = matches.group() query = QSqlQuery(db) query.prepare("SELECT * FROM cases WHERE docket_id = :did") query.bindValue(":did", case_id) if not query.exec(): query_error(query) exists = query.next() if exists: active = query.value("active") == 1 else: active = True if not active: return int(query.value("case_id")) r = requests.get( f"https://www.supremecourt.gov/docket/docketfiles/html/public/{case_id}.html" ) if r.status_code != 200: print(r.status_code) exit(1) bs = BeautifulSoup(r.text, "lxml") # # SCOTUS does not return 404 for page not found. # title = bs.find("title") assert isinstance(title, Tag) and isinstance(title.string, str) if title.string.strip() == "": return -1 # # docket_id, previous_docket, petitioners, respondents, date # all come from the docketinfo table # di = bs.find("table", id="docketinfo") assert di is not None and isinstance(di, Tag) # # docket_id is first row, first column span = di.find("span") assert span is not None and isinstance(span, Tag) tmp = span.contents[0] assert isinstance(tmp, str) matches = re.match(r"(No.)?\s*(\d+[-A]\d+).*$", tmp) assert matches is not None print(matches,matches.groups()) docket_id = matches.group(2) print(f"Found {docket_id}") # # Title is second row, first column tr = di.contents[1] assert isinstance(tr, Tag) and isinstance(tr.contents[0], Tag) assert tr.contents[0].string == "Title:" td = tr.contents[1] assert isinstance(td, Tag) span = td.contents[0] assert isinstance(span, Tag) and isinstance(span.contents[0], str) petitioners = span.contents[0].strip() # # XXX - We need to deal with other titles. Change this to an RE # UPDATED: we are just handling the two we know about. # petitioners = petitioners.replace(", Petitioners", "") petitioners = petitioners.replace(", Applicants", "") if len(span.contents) >= 4: assert isinstance(span.contents[4], str) respondent = span.contents[4].strip() else: respondent = "" # # Date on which the case was docketed tr = di.contents[2] assert isinstance(tr, Tag) and isinstance(tr.contents[1], Tag) td = tr.contents[1] assert isinstance(td, Tag) if td.string is None: tmp = datetime.datetime(year=1776, month=7, day=4) else: docket_date = td.string.strip() tmp = dateparser.parse(docket_date) assert isinstance(tmp, datetime.datetime) date = QDateTime.fromSecsSinceEpoch(int(tmp.timestamp())) # # linked case is row 3, column 0 tr = di.contents[3] assert isinstance(tr, Tag) and isinstance(tr.contents[0], Tag) linked = tr.contents[0].string # # if this case does not exists, create it. This stops a recursion loop. # if not exists: query.prepare( "INSERT INTO cases (docket_id, petitioners, respondents, date, active) " "VALUES (:did, :pet, :resp, :date, 1)" ) query.bindValue(":did", docket_id) query.bindValue(":pet", petitioners) query.bindValue(":resp", respondent) query.bindValue(":date", date) if not query.exec(): query_error(query) case_id = query.lastInsertId() else: case_id = query.value(0) assert isinstance(case_id, int) # # If there is a linked case, we need to get the ID for that case. if linked is not None: linked = linked.replace("Linked with ", "") for did in linked.split(','): did = did.strip() query.prepare("SELECT * FROM cases WHERE docket_id = :did") query.bindValue(":did", linked) if not query.exec(): query_error(query) if not query.next(): linked_id = update_db(did, db) if linked_id <= 0: raise Exception(f"Unable to create linked case: {did}") else: linked_id = query.value('case_id') # # query.prepare("SELECT * FROM cases_cases WHERE lhs = :lhs " "AND rhs = :rhs") query.bindValue(':lhs', case_id) query.bindValue(':rhs', linked_id) if not query.exec(): query_error(query) if not query.next(): query.prepare("INSERT INTO cases_cases (lhs, rhs) " "VALUES ( :lhs, :rhs)") query.bindValue(":lhs", case_id) query.bindValue(":rhs", linked_id) if not query.exec(): query_error(query) # # XXX - Process lower courts # active = update_proceedings(case_id, bs, db) if not active: query.prepare("UPDATE cases SET active=0 WHERE case_id = :cid") query.bindValue(":cid", case_id) if not query.exec(): query_error(query) return case_id class updateThread(QThread): docket_id = None def __init__(self) -> None: super(updateThread, self).__init__() print("updateThread: __init__(docket_id)") return def setDocketId(self, docket_id: str) -> None: self.docket_id = docket_id return def run(self) -> None: print(f"updateThread: running on {self.currentThread()}") db = QSqlDatabase.cloneDatabase("qt_sql_default_connection", "update") if not db.open(): print(db.lastError()) raise Exception("db.open()") case_id = update_db(str(self.docket_id), db) db.close() del db QSqlDatabase.removeDatabase("update") print(f"updateThread: run() returns {case_id}") return class loadCases(QThread): def run(self) -> None: db = QSqlDatabase.cloneDatabase("qt_sql_default_connection", "load") if not db.open(): raise Exception("db.open()") year = QDateTime.currentDateTime().toString("yy") query = QSqlQuery(db) query.prepare("SELECT * FROM history WHERE year = :year") print(f"year = {year}") query.bindValue(":year", year) if not query.exec(): query_error(query) if not query.next(): query.prepare( "INSERT INTO history (year, edocket, number) " "VALUES (:year, 0, 1)" ) query.bindValue(":year", year) if not query.exec(): query_error(query) edocket = 0 number = 1 history_id = query.lastInsertId() else: history_id = query.value("history_id") edocket = query.value("edocket") number = query.value("number") count = 0 while year > "00" and count < 100: query.prepare("SELECT * FROM cases WHERE docket_id = :did") if edocket == 1: docket_id = f"{year}A{number}" else: docket_id = f"{year}-{number}" query.bindValue(":did", docket_id) if not query.exec(): query_error(query) if query.next(): if query.value("active") == 0: print("Already exists and is inactive") number += 1 continue result = update_db(docket_id, db) print(f"result: {result}") if result < 0: year = f"{int(year) - 1:02d}" if number > 1: query.prepare( "UPDATE history set number = :number WHERE history_id=:hid" ) query.bindValue(":number", number - 1) query.bindValue(":hid", history_id) if not query.exec(): query_error(query) query.prepare("SELECT * FROM history WHERE year = :year") print(f"year = {year}") query.bindValue(":year", year) if not query.exec(): query_error(query) if not query.next(): query.prepare( "INSERT INTO history (year, edocket, number) " "VALUES (:year, 0, 1)" ) query.bindValue(":year", year) if not query.exec(): query_error(query) edocket = 0 number = 1 history_id = query.lastInsertId() else: history_id = query.value("history_id") edocket = query.value("edocket") number = query.value("number") continue number += 1 count += 1 if number > 1: query.prepare( "UPDATE history SET number= :number WHERE year = :year" ) query.bindValue(":number", number) query.bindValue(":year", year) if not query.exec(): query_error(query) db.close() del db QSqlDatabase.removeDatabase("load") return