From c7a9ca89f1a3c268d739e353f24f6901125c8569 Mon Sep 17 00:00:00 2001 From: "Christopher T. Johnson" Date: Fri, 7 Feb 2025 09:37:14 -0500 Subject: [PATCH] Initial Commit --- .gitignore | 6 + scotus-pull.py | 385 +++++++++++++++++++++++++++++++++++++++++++++++ ui/MainWindow.py | 58 +++++++ ui/MainWindow.ui | 40 +++++ ui/__init.py__ | 0 5 files changed, 489 insertions(+) create mode 100644 .gitignore create mode 100755 scotus-pull.py create mode 100644 ui/MainWindow.py create mode 100644 ui/MainWindow.ui create mode 100644 ui/__init.py__ diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5cac228 --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +venv +*~ +\#* +*.html +*.db +__pycache__ diff --git a/scotus-pull.py b/scotus-pull.py new file mode 100755 index 0000000..0b4f5cd --- /dev/null +++ b/scotus-pull.py @@ -0,0 +1,385 @@ +#!venv/bin/python3 +import datetime +import re +import sys +import dateparser +import requests +from typing import NoReturn +from PySide6.QtCore import QCoreApplication, QModelIndex, Signal, Qt +from PySide6.QtSql import QSqlDatabase, QSqlQuery, QSqlQueryModel +from PySide6.QtWidgets import QAbstractItemView, QApplication, QHeaderView, QMainWindow, QStyledItemDelegate, QTableWidgetItem + +from bs4 import BeautifulSoup, Tag + +from ui.MainWindow import Ui_MainWindow + +translate = QCoreApplication.translate +def query_error(query: QSqlQuery) -> NoReturn: + """Standarized query error reporter.""" + print( + translate("MainWindow", "SQL Error:\n") + + "{}\n{}\n{}:{}".format( + query.executedQuery(), + query.boundValues(), + query.lastError().type(), + query.lastError().text(), + ) + ) + raise Exception(translate("MainWindow", "SQL Error")) + +class dateDelegate(QStyledItemDelegate): + def displayText(self, value, locale) -> str: + date = datetime.date.fromtimestamp(value) + return date.strftime("%B %-d, %Y") + +class MainWindow(QMainWindow, Ui_MainWindow): + show_entries = Signal(int) + def __init__(self) -> None: + super(MainWindow, self).__init__() + self.setupUi(self) + + model = QSqlQueryModel() + query = QSqlQuery("SELECT * FROM cases ORDER BY docket_id") + if not query.exec(): + query_error(query) + model.setQuery(query) + self.casesView.setModel(model) + + self.casesView.setSelectionMode(QAbstractItemView.SelectionMode.SingleSelection) + self.casesView.setSelectionBehavior(QAbstractItemView.SelectionBehavior.SelectRows) + self.casesView.hideColumn(0) + self.casesView.hideColumn(2) + self.casesView.setItemDelegateForColumn(5, dateDelegate()) + self.casesView.resizeColumnToContents(1) + self.casesView.resizeColumnToContents(5) + header = self.casesView.horizontalHeader() + header.setSectionResizeMode(3, QHeaderView.ResizeMode.Fixed) + header.setSectionResizeMode(4, QHeaderView.ResizeMode.Fixed) + self.show() + remaining = self.casesView.width() - header.sectionSize(1) - header.sectionSize(5) - 5 + self.casesView.setColumnWidth(3,int(remaining * 0.5)) + self.casesView.setColumnWidth(4,int(remaining * 0.5)) + self.casesView.verticalHeader().hide() + self.casesView.resizeRowsToContents() + self.casesView.doubleClicked.connect(self.rowClicked) + self.casesView.clicked.connect(self.rowClicked) + + self.docketWidget.setColumnCount(2) + self.docketWidget.setHorizontalHeaderLabels([ + 'Date','Proceedings and Orders', + ]) + self.docketWidget.resizeColumnToContents(0) + self.docketWidget.horizontalHeader().setSectionResizeMode(1, QHeaderView.ResizeMode.Stretch) + return + + def populateDocket(self, case_id:int) -> None: + query = QSqlQuery() + query.prepare("SELECT * FROM entries WHERE case_id=:cid ORDER BY entry_id") + query.bindValue(":cid", case_id) + if not query.exec(): + query_error(query) + self.docketWidget.clearContents() + row = 0 + while query.next(): + print(query.value(0), query.value(1), query.value(2), query.value(3)) + item = QTableWidgetItem() + item.setData(Qt.ItemDataRole.DisplayRole, query.value(2)) + self.docketWidget.setItem(row,0, item) + item = QTableWidgetItem() + item.setData(Qt.ItemDataRole.DisplayRole, query.value(3)) + self.docketWidget.setItem(row, 1, item) + row += 1 + self.docketWidget.setRowCount(row) + + return + + def rowClicked(self, index:QModelIndex) -> None: + docket = index.siblingAtColumn(1).data() + print(docket) + self.show_entries.emit(index.siblingAtColumn(0).data()) + self.populateDocket(index.siblingAtColumn(0).data()) + return + +SQL_CMDS = [ + #"PRAGMA foreign_keys=ON", + "CREATE TABLE IF NOT EXISTS cases " + "(case_id INTEGER PRIMARY KEY AUTOINCREMENT, " + "docket_id TEXT, " + "linked INTEGER, " + "petitioners TEXT, respondents TEXT, date INTEGER, " + "FOREIGN KEY(linked) REFERENCES cases(case_id))", + # + "CREATE TABLE IF NOT EXISTS entries (" + "entry_id INTEGER PRIMARY KEY AUTOINCREMENT, " + "case_id INTEGER, " + "date INTEGER, " + "text TEXT, " + "FOREIGN KEY(case_id) REFERENCES cases(case_id))", + # + "CREATE TABLE IF NOT EXISTS documents (" + "document_id INTEGER PRIMARY KEY AUTOINCREMENT, " + "entry_id INTEGER, " + "name TEXT, " + "url TEXT, " + "FOREIGN KEY(entry_id) REFERENCES entries(entry_id))", + ] + + +def schema_update(db: QSqlDatabase) -> None: + query = QSqlQuery() + + for sql in SQL_CMDS: + inlower = sql.lower().strip() + if not inlower.startswith("create table "): + if not query.exec(sql): + query_error(query) + continue + create_cmd = re.sub(r"IF NOT EXISTS ", "", sql.strip()) + create_cmd = re.sub(r"\s\s*", " ", create_cmd) + matches = re.search(r"^(CREATE TABLE )([^ ]+)( \(.+)$", create_cmd) + if matches: + table_name = matches.group(2) + create_cmd = ( + matches.group(1) + + matches.group(2) + + matches.group(3) + ) + else: + raise AttributeError(f"No match found: {create_cmd}") + + print("Table name = {}".format(table_name)) + query.prepare("SELECT sql FROM sqlite_schema WHERE tbl_name = :tbl") + query.bindValue(":tbl", table_name) + if not query.exec(): + query_error(query) + if not query.next(): + print(sql) + if not query.exec(sql): + query_error(query) + continue + old = query.value(0) + if old.lower() == create_cmd.lower(): + continue + print(old.lower()) + print(create_cmd.lower()) + print(translate("MainWindow", "Updating: ") + f"{table_name}") + + # Step 1 turn off foreign key constraints + if not query.exec("PRAGMA foreign_keys=OFF"): + query_error(query) + # Step 2 start a transaction + db.transaction() + # Step 3 remember old indexes, triggers, and views + # Step 4 create new table + new_table_name = table_name + "_new" + sql = matches.group(1) + new_table_name + matches.group(3) + print(sql) + if not query.exec(sql): + query_error(query) + # step 5 transfer content + coldefs = re.search(r"\((.+)\)", old).group(1).split(", ") # type: ignore[union-attr] + cols = [x.split(" ")[0] for x in filter(lambda s: not s.startswith('FOREIGN '),coldefs)] + cols_str = ", ".join(cols) + sql = f"INSERT INTO {new_table_name} ({cols_str}) SELECT {cols_str} FROM {table_name}" + query.prepare(sql) + if not query.exec(): + query_error(query) + + # step 6 Drop old table + query.prepare("DROP TABLE " + table_name) + if not query.exec(): + query_error(query) + # step 6 rename new table to old table + query.prepare("ALTER TABLE " + new_table_name + " RENAME TO " + table_name) + if not query.exec(): + query_error(query) + + # step 8 create indexes, triggers, and views + # step 9 rebuild affected views + # step 10 turn foreign key constrants back on +# if not query.exec("PRAGMA foreign_keys=ON"): +# query_error(query) + # step 11 commit the changes + db.commit() + return + +def update_proceedings(case_id: int, bs: BeautifulSoup) -> None: + table = bs.find('table', id="proceedings") + assert isinstance(table, Tag) + trs = table.find_all('tr') + tr = trs.pop(0) + query = QSqlQuery() + while len(trs) > 0: + tr = trs.pop(0) + assert isinstance(tr, Tag) + td = tr.contents[0] + assert isinstance(td, Tag) and isinstance(td.string, str) + date = dateparser.parse(td.string) + td = tr.contents[1] + assert isinstance(td, Tag) and isinstance(td.string, str) + text = td.string.strip() + query.prepare("SELECT * FROM entries WHERE case_id = :cid AND date = :date AND text=:text") + query.bindValue(':cid', case_id) + query.bindValue(':text', text) + assert isinstance(date, datetime.date) + query.bindValue(':date', date.timestamp()) + if not query.exec(): + query_error(query) + if not query.next(): + query.prepare("INSERT INTO entries (case_id, date, text) VALUES (:cid,:date,:text)") + query.bindValue(':cid', case_id) + query.bindValue(':date', date.timestamp) + query.bindValue(':text', text) + if not query.exec(): + query_error(query) + entry_id = query.lastInsertId() + else: + entry_id = query.value(0) + tr = trs.pop(0) + assert isinstance(tr, Tag) + assert isinstance(tr.contents[1], Tag) + print(tr.contents[1]) + for a in tr.contents[1]: + assert isinstance(a, Tag) + url = a.attrs['href'] + name = a.string + query.prepare("SELECT * FROM documents WHERE url=:url AND entry_id = :eid") + query.bindValue(':url', url) + query.bindValue(":eid", entry_id) + if not query.exec(): + query_error(query) + if not query.next(): + query.prepare("INSERT INTO documents (entry_id, name, url) " + "VALUES (:eid, :name, :url)") + query.bindValue(":eid", entry_id) + query.bindValue(":name", name) + query.bindValue(":url", url) + if not query.exec(): + query_error(query) + break + return + +def update_db(case_id) -> int: + r = requests.get('https://www.supremecourt.gov/docket/docketfiles/html/public/{}.html'.format(case_id)) + if r.status_code != 200: + print(r.status_code) + exit(1) + bs = BeautifulSoup(r.text,'lxml') + # + # docket_id, previous_docket, petitioners, respondents, date + # all come from the docketinfo table + # + di = bs.find('table',id='docketinfo') + assert di is not None and isinstance(di, Tag) + + # + # docket_id is first row, first column + docket_id = di.find('span') + assert docket_id is not None and isinstance(docket_id, Tag) + docket_id = docket_id.contents[0] + assert isinstance(docket_id, str) + docket_id = docket_id.strip() + docket_id = docket_id.replace('No. ','') + + # + # Title is second row, first column + tr = di.contents[1] + assert isinstance(tr, Tag) and isinstance(tr.contents[0], Tag) + assert tr.contents[0].string == 'Title:' + td = tr.contents[1] + assert isinstance(td, Tag) + span = td.contents[0] + assert isinstance(span, Tag) and isinstance(span.contents[0], str) + petitioners = span.contents[0].strip() + # + # XXX - We need to deal with other titles. Change this to an RE + # UPDATED: we are just handling the two we know about. + # + petitioners = petitioners.replace(', Petitioners','') + petitioners = petitioners.replace(', Applicants','') + assert isinstance(span.contents[4], str) + respondent = span.contents[4].strip() + + # + # Date on which the case was docketed + tr = di.contents[2] + assert isinstance(tr,Tag) and isinstance(tr.contents[1], Tag) + td = tr.contents[1] + assert isinstance(td, Tag) and td.string is not None + docket_date = td.string.strip() + date = dateparser.parse(docket_date) + + # + # linked case is row 3, column 0 + tr = di.contents[3] + assert isinstance(tr, Tag) and isinstance(tr.contents[0], Tag) + linked = tr.contents[0].string + + print(docket_id, petitioners, respondent, date, linked) + + # + # See if this case already exists. + # + query = QSqlQuery() + query.prepare("SELECT * FROM cases WHERE docket_id = :did") + query.bindValue(':did', docket_id) + if not query.exec(): + query_error(query) + + # + # if it does not exists, create it. This stops a recursion loop. + # + if not query.next(): + query.prepare("INSERT INTO cases (docket_id, petitioners, respondents, date, linked) " + "VALUES (:did, :pet, :resp, :date, NULL)") + query.bindValue(':did', docket_id) + query.bindValue(':pet', petitioners) + query.bindValue(':resp', respondent) + assert isinstance(date, datetime.date) + query.bindValue(':date', date.timestamp()) + if not query.exec(): + query_error(query) + case_id = query.lastInsertId() + linked_id = None + else: + case_id = query.value(0) + linked_id = query.value('linked') + assert isinstance(case_id, int) + # + # If there is a linked case, we need to get the ID for that case. + if linked is not None: + linked = linked.replace('Linked with ','') + query.prepare("SELECT * FROM cases WHERE docket_id = :did") + query.bindValue(':did', linked) + if not query.exec(): + query_error(query) + if not query.next(): + new_id = update_db(linked) + else: + new_id = query.value(0) + if new_id != linked_id: + query.prepare("UPDATE cases SET linked=:lid WHERE case_id = :cid") + query.bindValue(':lid', new_id) + query.bindValue(':cid', case_id) + if not query.exec(): + query_error(query) + # + # XXX - Process lower courts + # + update_proceedings(case_id, bs) + return(case_id) + +def main() -> int: + app = QApplication(sys.argv) + db = QSqlDatabase.addDatabase("QSQLITE") + #db.setConnectOptions("PRAGMA foreign_keys = ON") + db.setDatabaseName("scotus.db") + db.open() + schema_update(db) + #update_db('24-203') + #update_db('23A1058') + window = MainWindow() + return app.exec() + +if __name__ == "__main__": + sys.exit(main()) diff --git a/ui/MainWindow.py b/ui/MainWindow.py new file mode 100644 index 0000000..73c7700 --- /dev/null +++ b/ui/MainWindow.py @@ -0,0 +1,58 @@ +# -*- coding: utf-8 -*- + +################################################################################ +## Form generated from reading UI file 'MainWindow.ui' +## +## Created by: Qt User Interface Compiler version 6.8.2 +## +## WARNING! All changes made in this file will be lost when recompiling UI file! +################################################################################ + +from PySide6.QtCore import (QCoreApplication, QDate, QDateTime, QLocale, + QMetaObject, QObject, QPoint, QRect, + QSize, QTime, QUrl, Qt) +from PySide6.QtGui import (QBrush, QColor, QConicalGradient, QCursor, + QFont, QFontDatabase, QGradient, QIcon, + QImage, QKeySequence, QLinearGradient, QPainter, + QPalette, QPixmap, QRadialGradient, QTransform) +from PySide6.QtWidgets import (QApplication, QHeaderView, QMainWindow, QMenuBar, + QSizePolicy, QStatusBar, QTableView, QTableWidget, + QTableWidgetItem, QVBoxLayout, QWidget) + +class Ui_MainWindow(object): + def setupUi(self, MainWindow): + if not MainWindow.objectName(): + MainWindow.setObjectName(u"MainWindow") + MainWindow.resize(800, 600) + self.centralwidget = QWidget(MainWindow) + self.centralwidget.setObjectName(u"centralwidget") + self.verticalLayout = QVBoxLayout(self.centralwidget) + self.verticalLayout.setObjectName(u"verticalLayout") + self.casesView = QTableView(self.centralwidget) + self.casesView.setObjectName(u"casesView") + + self.verticalLayout.addWidget(self.casesView) + + self.docketWidget = QTableWidget(self.centralwidget) + self.docketWidget.setObjectName(u"docketWidget") + + self.verticalLayout.addWidget(self.docketWidget) + + MainWindow.setCentralWidget(self.centralwidget) + self.menubar = QMenuBar(MainWindow) + self.menubar.setObjectName(u"menubar") + self.menubar.setGeometry(QRect(0, 0, 800, 24)) + MainWindow.setMenuBar(self.menubar) + self.statusbar = QStatusBar(MainWindow) + self.statusbar.setObjectName(u"statusbar") + MainWindow.setStatusBar(self.statusbar) + + self.retranslateUi(MainWindow) + + QMetaObject.connectSlotsByName(MainWindow) + # setupUi + + def retranslateUi(self, MainWindow): + MainWindow.setWindowTitle(QCoreApplication.translate("MainWindow", u"MainWindow", None)) + # retranslateUi + diff --git a/ui/MainWindow.ui b/ui/MainWindow.ui new file mode 100644 index 0000000..de469dd --- /dev/null +++ b/ui/MainWindow.ui @@ -0,0 +1,40 @@ + + + MainWindow + + + + 0 + 0 + 800 + 600 + + + + MainWindow + + + + + + + + + + + + + + + 0 + 0 + 800 + 24 + + + + + + + + diff --git a/ui/__init.py__ b/ui/__init.py__ new file mode 100644 index 0000000..e69de29