#!venv/bin/python3 import datetime import re import sys import dateparser import requests from typing import NoReturn from PySide6.QtCore import QCoreApplication, QModelIndex, Signal, Qt from PySide6.QtSql import QSqlDatabase, QSqlQuery, QSqlQueryModel from PySide6.QtWidgets import QAbstractItemView, QApplication, QHeaderView, QMainWindow, QStyledItemDelegate, QTableWidgetItem from bs4 import BeautifulSoup, Tag from ui.MainWindow import Ui_MainWindow translate = QCoreApplication.translate def query_error(query: QSqlQuery) -> NoReturn: """Standarized query error reporter.""" print( translate("MainWindow", "SQL Error:\n") + "{}\n{}\n{}:{}".format( query.executedQuery(), query.boundValues(), query.lastError().type(), query.lastError().text(), ) ) raise Exception(translate("MainWindow", "SQL Error")) class dateDelegate(QStyledItemDelegate): def displayText(self, value, locale) -> str: date = datetime.date.fromtimestamp(value) return date.strftime("%B %-d, %Y") class MainWindow(QMainWindow, Ui_MainWindow): show_entries = Signal(int) def __init__(self) -> None: super(MainWindow, self).__init__() self.setupUi(self) model = QSqlQueryModel() query = QSqlQuery("SELECT * FROM cases ORDER BY docket_id") if not query.exec(): query_error(query) model.setQuery(query) self.casesView.setModel(model) self.casesView.setSelectionMode(QAbstractItemView.SelectionMode.SingleSelection) self.casesView.setSelectionBehavior(QAbstractItemView.SelectionBehavior.SelectRows) self.casesView.hideColumn(0) self.casesView.hideColumn(2) self.casesView.setItemDelegateForColumn(5, dateDelegate()) self.casesView.resizeColumnToContents(1) self.casesView.resizeColumnToContents(5) header = self.casesView.horizontalHeader() header.setSectionResizeMode(3, QHeaderView.ResizeMode.Fixed) header.setSectionResizeMode(4, QHeaderView.ResizeMode.Fixed) self.show() remaining = self.casesView.width() - header.sectionSize(1) - header.sectionSize(5) - 5 self.casesView.setColumnWidth(3,int(remaining * 0.5)) self.casesView.setColumnWidth(4,int(remaining * 0.5)) self.casesView.verticalHeader().hide() self.casesView.resizeRowsToContents() self.casesView.doubleClicked.connect(self.rowClicked) self.casesView.clicked.connect(self.rowClicked) self.docketWidget.setColumnCount(2) self.docketWidget.setHorizontalHeaderLabels([ 'Date','Proceedings and Orders', ]) self.docketWidget.resizeColumnToContents(0) self.docketWidget.horizontalHeader().setSectionResizeMode(1, QHeaderView.ResizeMode.Stretch) return def populateDocket(self, case_id:int) -> None: query = QSqlQuery() query.prepare("SELECT * FROM entries WHERE case_id=:cid ORDER BY entry_id") query.bindValue(":cid", case_id) if not query.exec(): query_error(query) self.docketWidget.clearContents() row = 0 while query.next(): print(query.value(0), query.value(1), query.value(2), query.value(3)) item = QTableWidgetItem() item.setData(Qt.ItemDataRole.DisplayRole, query.value(2)) self.docketWidget.setItem(row,0, item) item = QTableWidgetItem() item.setData(Qt.ItemDataRole.DisplayRole, query.value(3)) self.docketWidget.setItem(row, 1, item) row += 1 self.docketWidget.setRowCount(row) return def rowClicked(self, index:QModelIndex) -> None: docket = index.siblingAtColumn(1).data() print(docket) self.show_entries.emit(index.siblingAtColumn(0).data()) self.populateDocket(index.siblingAtColumn(0).data()) return SQL_CMDS = [ #"PRAGMA foreign_keys=ON", "CREATE TABLE IF NOT EXISTS cases " "(case_id INTEGER PRIMARY KEY AUTOINCREMENT, " "docket_id TEXT, " "linked INTEGER, " "petitioners TEXT, respondents TEXT, date INTEGER, " "FOREIGN KEY(linked) REFERENCES cases(case_id))", # "CREATE TABLE IF NOT EXISTS entries (" "entry_id INTEGER PRIMARY KEY AUTOINCREMENT, " "case_id INTEGER, " "date INTEGER, " "text TEXT, " "FOREIGN KEY(case_id) REFERENCES cases(case_id))", # "CREATE TABLE IF NOT EXISTS documents (" "document_id INTEGER PRIMARY KEY AUTOINCREMENT, " "entry_id INTEGER, " "name TEXT, " "url TEXT, " "FOREIGN KEY(entry_id) REFERENCES entries(entry_id))", ] def schema_update(db: QSqlDatabase) -> None: query = QSqlQuery() for sql in SQL_CMDS: inlower = sql.lower().strip() if not inlower.startswith("create table "): if not query.exec(sql): query_error(query) continue create_cmd = re.sub(r"IF NOT EXISTS ", "", sql.strip()) create_cmd = re.sub(r"\s\s*", " ", create_cmd) matches = re.search(r"^(CREATE TABLE )([^ ]+)( \(.+)$", create_cmd) if matches: table_name = matches.group(2) create_cmd = ( matches.group(1) + matches.group(2) + matches.group(3) ) else: raise AttributeError(f"No match found: {create_cmd}") print("Table name = {}".format(table_name)) query.prepare("SELECT sql FROM sqlite_schema WHERE tbl_name = :tbl") query.bindValue(":tbl", table_name) if not query.exec(): query_error(query) if not query.next(): print(sql) if not query.exec(sql): query_error(query) continue old = query.value(0) if old.lower() == create_cmd.lower(): continue print(old.lower()) print(create_cmd.lower()) print(translate("MainWindow", "Updating: ") + f"{table_name}") # Step 1 turn off foreign key constraints if not query.exec("PRAGMA foreign_keys=OFF"): query_error(query) # Step 2 start a transaction db.transaction() # Step 3 remember old indexes, triggers, and views # Step 4 create new table new_table_name = table_name + "_new" sql = matches.group(1) + new_table_name + matches.group(3) print(sql) if not query.exec(sql): query_error(query) # step 5 transfer content coldefs = re.search(r"\((.+)\)", old).group(1).split(", ") # type: ignore[union-attr] cols = [x.split(" ")[0] for x in filter(lambda s: not s.startswith('FOREIGN '),coldefs)] cols_str = ", ".join(cols) sql = f"INSERT INTO {new_table_name} ({cols_str}) SELECT {cols_str} FROM {table_name}" query.prepare(sql) if not query.exec(): query_error(query) # step 6 Drop old table query.prepare("DROP TABLE " + table_name) if not query.exec(): query_error(query) # step 6 rename new table to old table query.prepare("ALTER TABLE " + new_table_name + " RENAME TO " + table_name) if not query.exec(): query_error(query) # step 8 create indexes, triggers, and views # step 9 rebuild affected views # step 10 turn foreign key constrants back on # if not query.exec("PRAGMA foreign_keys=ON"): # query_error(query) # step 11 commit the changes db.commit() return def update_proceedings(case_id: int, bs: BeautifulSoup) -> None: table = bs.find('table', id="proceedings") assert isinstance(table, Tag) trs = table.find_all('tr') tr = trs.pop(0) query = QSqlQuery() while len(trs) > 0: tr = trs.pop(0) assert isinstance(tr, Tag) td = tr.contents[0] assert isinstance(td, Tag) and isinstance(td.string, str) date = dateparser.parse(td.string) td = tr.contents[1] assert isinstance(td, Tag) and isinstance(td.string, str) text = td.string.strip() query.prepare("SELECT * FROM entries WHERE case_id = :cid AND date = :date AND text=:text") query.bindValue(':cid', case_id) query.bindValue(':text', text) assert isinstance(date, datetime.date) query.bindValue(':date', date.timestamp()) if not query.exec(): query_error(query) if not query.next(): query.prepare("INSERT INTO entries (case_id, date, text) VALUES (:cid,:date,:text)") query.bindValue(':cid', case_id) query.bindValue(':date', date.timestamp) query.bindValue(':text', text) if not query.exec(): query_error(query) entry_id = query.lastInsertId() else: entry_id = query.value(0) tr = trs.pop(0) assert isinstance(tr, Tag) assert isinstance(tr.contents[1], Tag) print(tr.contents[1]) for a in tr.contents[1]: assert isinstance(a, Tag) url = a.attrs['href'] name = a.string query.prepare("SELECT * FROM documents WHERE url=:url AND entry_id = :eid") query.bindValue(':url', url) query.bindValue(":eid", entry_id) if not query.exec(): query_error(query) if not query.next(): query.prepare("INSERT INTO documents (entry_id, name, url) " "VALUES (:eid, :name, :url)") query.bindValue(":eid", entry_id) query.bindValue(":name", name) query.bindValue(":url", url) if not query.exec(): query_error(query) break return def update_db(case_id) -> int: r = requests.get('https://www.supremecourt.gov/docket/docketfiles/html/public/{}.html'.format(case_id)) if r.status_code != 200: print(r.status_code) exit(1) bs = BeautifulSoup(r.text,'lxml') # # docket_id, previous_docket, petitioners, respondents, date # all come from the docketinfo table # di = bs.find('table',id='docketinfo') assert di is not None and isinstance(di, Tag) # # docket_id is first row, first column docket_id = di.find('span') assert docket_id is not None and isinstance(docket_id, Tag) docket_id = docket_id.contents[0] assert isinstance(docket_id, str) docket_id = docket_id.strip() docket_id = docket_id.replace('No. ','') # # Title is second row, first column tr = di.contents[1] assert isinstance(tr, Tag) and isinstance(tr.contents[0], Tag) assert tr.contents[0].string == 'Title:' td = tr.contents[1] assert isinstance(td, Tag) span = td.contents[0] assert isinstance(span, Tag) and isinstance(span.contents[0], str) petitioners = span.contents[0].strip() # # XXX - We need to deal with other titles. Change this to an RE # UPDATED: we are just handling the two we know about. # petitioners = petitioners.replace(', Petitioners','') petitioners = petitioners.replace(', Applicants','') assert isinstance(span.contents[4], str) respondent = span.contents[4].strip() # # Date on which the case was docketed tr = di.contents[2] assert isinstance(tr,Tag) and isinstance(tr.contents[1], Tag) td = tr.contents[1] assert isinstance(td, Tag) and td.string is not None docket_date = td.string.strip() date = dateparser.parse(docket_date) # # linked case is row 3, column 0 tr = di.contents[3] assert isinstance(tr, Tag) and isinstance(tr.contents[0], Tag) linked = tr.contents[0].string print(docket_id, petitioners, respondent, date, linked) # # See if this case already exists. # query = QSqlQuery() query.prepare("SELECT * FROM cases WHERE docket_id = :did") query.bindValue(':did', docket_id) if not query.exec(): query_error(query) # # if it does not exists, create it. This stops a recursion loop. # if not query.next(): query.prepare("INSERT INTO cases (docket_id, petitioners, respondents, date, linked) " "VALUES (:did, :pet, :resp, :date, NULL)") query.bindValue(':did', docket_id) query.bindValue(':pet', petitioners) query.bindValue(':resp', respondent) assert isinstance(date, datetime.date) query.bindValue(':date', date.timestamp()) if not query.exec(): query_error(query) case_id = query.lastInsertId() linked_id = None else: case_id = query.value(0) linked_id = query.value('linked') assert isinstance(case_id, int) # # If there is a linked case, we need to get the ID for that case. if linked is not None: linked = linked.replace('Linked with ','') query.prepare("SELECT * FROM cases WHERE docket_id = :did") query.bindValue(':did', linked) if not query.exec(): query_error(query) if not query.next(): new_id = update_db(linked) else: new_id = query.value(0) if new_id != linked_id: query.prepare("UPDATE cases SET linked=:lid WHERE case_id = :cid") query.bindValue(':lid', new_id) query.bindValue(':cid', case_id) if not query.exec(): query_error(query) # # XXX - Process lower courts # update_proceedings(case_id, bs) return(case_id) def main() -> int: app = QApplication(sys.argv) db = QSqlDatabase.addDatabase("QSQLITE") #db.setConnectOptions("PRAGMA foreign_keys = ON") db.setDatabaseName("scotus.db") db.open() schema_update(db) #update_db('24-203') #update_db('23A1058') window = MainWindow() return app.exec() if __name__ == "__main__": sys.exit(main())