import datetime from re import template import dateparser import requests from bs4 import BeautifulSoup, Tag from PySide6.QtCore import QThread from PySide6.QtSql import QSqlDatabase, QSqlQuery from lib.utils import query_error def update_proceedings(case_id: int, bs: BeautifulSoup) -> bool: table = bs.find("table", id="proceedings") assert isinstance(table, Tag) trs = table.find_all("tr") tr = trs.pop(0) query = QSqlQuery(QSqlDatabase.database("update")) while len(trs) > 0: tr = trs.pop(0) assert isinstance(tr, Tag) td = tr.contents[0] assert isinstance(td, Tag) and isinstance(td.string, str) date = dateparser.parse(td.string) td = tr.contents[1] assert isinstance(td, Tag) and isinstance(td.string, str) text = td.string.strip() query.prepare( "SELECT * FROM entries WHERE case_id = :cid AND date = :date AND text=:text" ) query.bindValue(":cid", case_id) query.bindValue(":text", text) assert isinstance(date, datetime.date) query.bindValue(":date", date.timestamp()) if not query.exec(): query_error(query) if not query.next(): query.prepare( "INSERT INTO entries (case_id, date, text) VALUES (:cid,:date,:text)" ) query.bindValue(":cid", case_id) query.bindValue(":date", date.timestamp()) query.bindValue(":text", text) if not query.exec(): query_error(query) entry_id = query.lastInsertId() else: entry_id = query.value(0) tr = trs.pop(0) assert isinstance(tr, Tag) assert isinstance(tr.contents[1], Tag) for a in tr.contents[1]: assert isinstance(a, Tag) url = a.attrs["href"] name = a.string query.prepare( "SELECT * FROM documents WHERE url=:url AND entry_id = :eid" ) query.bindValue(":url", url) query.bindValue(":eid", entry_id) if not query.exec(): query_error(query) if not query.next(): query.prepare( "INSERT INTO documents (entry_id, name, url) " "VALUES (:eid, :name, :url)" ) query.bindValue(":eid", entry_id) query.bindValue(":name", name) query.bindValue(":url", url) if not query.exec(): query_error(query) print(f"text: {text.lower()}") result = not text.lower() in [ "petition denied.", ] return result def update_db(case_id) -> int: # # See if this case already exists. # # We assume that case_id == docket_id at this point. If it does not, # then we will build out from the request we get query = QSqlQuery(QSqlDatabase.database("update")) query.prepare("SELECT * FROM cases WHERE docket_id = :did") query.bindValue(":did", case_id) if not query.exec(): query_error(query) exists = query.next() if exists: active = query.value("active") == 1 else: active = True if not active: return int(query.value("case_id")) r = requests.get( f"https://www.supremecourt.gov/docket/docketfiles/html/public/{case_id}.html" ) if r.status_code != 200: print(r.status_code) exit(1) bs = BeautifulSoup(r.text, "lxml") # # docket_id, previous_docket, petitioners, respondents, date # all come from the docketinfo table # di = bs.find("table", id="docketinfo") assert di is not None and isinstance(di, Tag) # # docket_id is first row, first column span = di.find("span") assert span is not None and isinstance(span, Tag) tmp = span.contents[0] assert isinstance(tmp, str) docket_id = tmp.strip() docket_id = docket_id.replace("No. ", "") # # Title is second row, first column tr = di.contents[1] assert isinstance(tr, Tag) and isinstance(tr.contents[0], Tag) assert tr.contents[0].string == "Title:" td = tr.contents[1] assert isinstance(td, Tag) span = td.contents[0] assert isinstance(span, Tag) and isinstance(span.contents[0], str) petitioners = span.contents[0].strip() # # XXX - We need to deal with other titles. Change this to an RE # UPDATED: we are just handling the two we know about. # petitioners = petitioners.replace(", Petitioners", "") petitioners = petitioners.replace(", Applicants", "") assert isinstance(span.contents[4], str) respondent = span.contents[4].strip() # # Date on which the case was docketed tr = di.contents[2] assert isinstance(tr, Tag) and isinstance(tr.contents[1], Tag) td = tr.contents[1] assert isinstance(td, Tag) and td.string is not None docket_date = td.string.strip() date = dateparser.parse(docket_date) # # linked case is row 3, column 0 tr = di.contents[3] assert isinstance(tr, Tag) and isinstance(tr.contents[0], Tag) linked = tr.contents[0].string # # if it does not exists, create it. This stops a recursion loop. # if not exists: query.prepare( "INSERT INTO cases (docket_id, petitioners, respondents, date, active, linked) " "VALUES (:did, :pet, :resp, :date, 1, NULL)" ) query.bindValue(":did", docket_id) query.bindValue(":pet", petitioners) query.bindValue(":resp", respondent) assert isinstance(date, datetime.date) query.bindValue(":date", date.timestamp()) if not query.exec(): query_error(query) case_id = query.lastInsertId() linked_id = None else: case_id = query.value(0) linked_id = query.value("linked") assert isinstance(case_id, int) # # If there is a linked case, we need to get the ID for that case. if linked is not None: linked = linked.replace("Linked with ", "") query.prepare("SELECT * FROM cases WHERE docket_id = :did") query.bindValue(":did", linked) if not query.exec(): query_error(query) if not query.next(): new_id = update_db(linked) else: new_id = query.value(0) if new_id != linked_id: query.prepare("UPDATE cases SET linked=:lid WHERE case_id = :cid") query.bindValue(":lid", new_id) query.bindValue(":cid", case_id) if not query.exec(): query_error(query) # # XXX - Process lower courts # active = update_proceedings(case_id, bs) if not active: query.prepare("UPDATE cases SET active=0 WHERE case_id = :cid") query.bindValue(":cid", case_id) if not query.exec(): query_error(query) return case_id class updateThread(QThread): docket_id = None def __init__(self): super(updateThread, self).__init__() print("updateThread: __init__(docket_id)") return def setDocketId(self, docket_id): self.docket_id = docket_id return def run(self): print("updateThread: run()") if not "update" in QSqlDatabase.connectionNames(): db = QSqlDatabase.cloneDatabase( "qt_sql_default_connection", "update" ) if not db.open(): print(db.lastError()) raise Exception("db.open()") case_id = update_db(self.docket_id) print(f"updateThread: run() returns {case_id}") self.exit(1)