Initial Commit
This commit is contained in:
6
.gitignore
vendored
Normal file
6
.gitignore
vendored
Normal file
@@ -0,0 +1,6 @@
|
||||
venv
|
||||
*~
|
||||
\#*
|
||||
*.html
|
||||
*.db
|
||||
__pycache__
|
||||
385
scotus-pull.py
Executable file
385
scotus-pull.py
Executable file
@@ -0,0 +1,385 @@
|
||||
#!venv/bin/python3
|
||||
import datetime
|
||||
import re
|
||||
import sys
|
||||
import dateparser
|
||||
import requests
|
||||
from typing import NoReturn
|
||||
from PySide6.QtCore import QCoreApplication, QModelIndex, Signal, Qt
|
||||
from PySide6.QtSql import QSqlDatabase, QSqlQuery, QSqlQueryModel
|
||||
from PySide6.QtWidgets import QAbstractItemView, QApplication, QHeaderView, QMainWindow, QStyledItemDelegate, QTableWidgetItem
|
||||
|
||||
from bs4 import BeautifulSoup, Tag
|
||||
|
||||
from ui.MainWindow import Ui_MainWindow
|
||||
|
||||
translate = QCoreApplication.translate
|
||||
def query_error(query: QSqlQuery) -> NoReturn:
|
||||
"""Standarized query error reporter."""
|
||||
print(
|
||||
translate("MainWindow", "SQL Error:\n")
|
||||
+ "{}\n{}\n{}:{}".format(
|
||||
query.executedQuery(),
|
||||
query.boundValues(),
|
||||
query.lastError().type(),
|
||||
query.lastError().text(),
|
||||
)
|
||||
)
|
||||
raise Exception(translate("MainWindow", "SQL Error"))
|
||||
|
||||
class dateDelegate(QStyledItemDelegate):
|
||||
def displayText(self, value, locale) -> str:
|
||||
date = datetime.date.fromtimestamp(value)
|
||||
return date.strftime("%B %-d, %Y")
|
||||
|
||||
class MainWindow(QMainWindow, Ui_MainWindow):
|
||||
show_entries = Signal(int)
|
||||
def __init__(self) -> None:
|
||||
super(MainWindow, self).__init__()
|
||||
self.setupUi(self)
|
||||
|
||||
model = QSqlQueryModel()
|
||||
query = QSqlQuery("SELECT * FROM cases ORDER BY docket_id")
|
||||
if not query.exec():
|
||||
query_error(query)
|
||||
model.setQuery(query)
|
||||
self.casesView.setModel(model)
|
||||
|
||||
self.casesView.setSelectionMode(QAbstractItemView.SelectionMode.SingleSelection)
|
||||
self.casesView.setSelectionBehavior(QAbstractItemView.SelectionBehavior.SelectRows)
|
||||
self.casesView.hideColumn(0)
|
||||
self.casesView.hideColumn(2)
|
||||
self.casesView.setItemDelegateForColumn(5, dateDelegate())
|
||||
self.casesView.resizeColumnToContents(1)
|
||||
self.casesView.resizeColumnToContents(5)
|
||||
header = self.casesView.horizontalHeader()
|
||||
header.setSectionResizeMode(3, QHeaderView.ResizeMode.Fixed)
|
||||
header.setSectionResizeMode(4, QHeaderView.ResizeMode.Fixed)
|
||||
self.show()
|
||||
remaining = self.casesView.width() - header.sectionSize(1) - header.sectionSize(5) - 5
|
||||
self.casesView.setColumnWidth(3,int(remaining * 0.5))
|
||||
self.casesView.setColumnWidth(4,int(remaining * 0.5))
|
||||
self.casesView.verticalHeader().hide()
|
||||
self.casesView.resizeRowsToContents()
|
||||
self.casesView.doubleClicked.connect(self.rowClicked)
|
||||
self.casesView.clicked.connect(self.rowClicked)
|
||||
|
||||
self.docketWidget.setColumnCount(2)
|
||||
self.docketWidget.setHorizontalHeaderLabels([
|
||||
'Date','Proceedings and Orders',
|
||||
])
|
||||
self.docketWidget.resizeColumnToContents(0)
|
||||
self.docketWidget.horizontalHeader().setSectionResizeMode(1, QHeaderView.ResizeMode.Stretch)
|
||||
return
|
||||
|
||||
def populateDocket(self, case_id:int) -> None:
|
||||
query = QSqlQuery()
|
||||
query.prepare("SELECT * FROM entries WHERE case_id=:cid ORDER BY entry_id")
|
||||
query.bindValue(":cid", case_id)
|
||||
if not query.exec():
|
||||
query_error(query)
|
||||
self.docketWidget.clearContents()
|
||||
row = 0
|
||||
while query.next():
|
||||
print(query.value(0), query.value(1), query.value(2), query.value(3))
|
||||
item = QTableWidgetItem()
|
||||
item.setData(Qt.ItemDataRole.DisplayRole, query.value(2))
|
||||
self.docketWidget.setItem(row,0, item)
|
||||
item = QTableWidgetItem()
|
||||
item.setData(Qt.ItemDataRole.DisplayRole, query.value(3))
|
||||
self.docketWidget.setItem(row, 1, item)
|
||||
row += 1
|
||||
self.docketWidget.setRowCount(row)
|
||||
|
||||
return
|
||||
|
||||
def rowClicked(self, index:QModelIndex) -> None:
|
||||
docket = index.siblingAtColumn(1).data()
|
||||
print(docket)
|
||||
self.show_entries.emit(index.siblingAtColumn(0).data())
|
||||
self.populateDocket(index.siblingAtColumn(0).data())
|
||||
return
|
||||
|
||||
SQL_CMDS = [
|
||||
#"PRAGMA foreign_keys=ON",
|
||||
"CREATE TABLE IF NOT EXISTS cases "
|
||||
"(case_id INTEGER PRIMARY KEY AUTOINCREMENT, "
|
||||
"docket_id TEXT, "
|
||||
"linked INTEGER, "
|
||||
"petitioners TEXT, respondents TEXT, date INTEGER, "
|
||||
"FOREIGN KEY(linked) REFERENCES cases(case_id))",
|
||||
#
|
||||
"CREATE TABLE IF NOT EXISTS entries ("
|
||||
"entry_id INTEGER PRIMARY KEY AUTOINCREMENT, "
|
||||
"case_id INTEGER, "
|
||||
"date INTEGER, "
|
||||
"text TEXT, "
|
||||
"FOREIGN KEY(case_id) REFERENCES cases(case_id))",
|
||||
#
|
||||
"CREATE TABLE IF NOT EXISTS documents ("
|
||||
"document_id INTEGER PRIMARY KEY AUTOINCREMENT, "
|
||||
"entry_id INTEGER, "
|
||||
"name TEXT, "
|
||||
"url TEXT, "
|
||||
"FOREIGN KEY(entry_id) REFERENCES entries(entry_id))",
|
||||
]
|
||||
|
||||
|
||||
def schema_update(db: QSqlDatabase) -> None:
|
||||
query = QSqlQuery()
|
||||
|
||||
for sql in SQL_CMDS:
|
||||
inlower = sql.lower().strip()
|
||||
if not inlower.startswith("create table "):
|
||||
if not query.exec(sql):
|
||||
query_error(query)
|
||||
continue
|
||||
create_cmd = re.sub(r"IF NOT EXISTS ", "", sql.strip())
|
||||
create_cmd = re.sub(r"\s\s*", " ", create_cmd)
|
||||
matches = re.search(r"^(CREATE TABLE )([^ ]+)( \(.+)$", create_cmd)
|
||||
if matches:
|
||||
table_name = matches.group(2)
|
||||
create_cmd = (
|
||||
matches.group(1)
|
||||
+ matches.group(2)
|
||||
+ matches.group(3)
|
||||
)
|
||||
else:
|
||||
raise AttributeError(f"No match found: {create_cmd}")
|
||||
|
||||
print("Table name = {}".format(table_name))
|
||||
query.prepare("SELECT sql FROM sqlite_schema WHERE tbl_name = :tbl")
|
||||
query.bindValue(":tbl", table_name)
|
||||
if not query.exec():
|
||||
query_error(query)
|
||||
if not query.next():
|
||||
print(sql)
|
||||
if not query.exec(sql):
|
||||
query_error(query)
|
||||
continue
|
||||
old = query.value(0)
|
||||
if old.lower() == create_cmd.lower():
|
||||
continue
|
||||
print(old.lower())
|
||||
print(create_cmd.lower())
|
||||
print(translate("MainWindow", "Updating: ") + f"{table_name}")
|
||||
|
||||
# Step 1 turn off foreign key constraints
|
||||
if not query.exec("PRAGMA foreign_keys=OFF"):
|
||||
query_error(query)
|
||||
# Step 2 start a transaction
|
||||
db.transaction()
|
||||
# Step 3 remember old indexes, triggers, and views
|
||||
# Step 4 create new table
|
||||
new_table_name = table_name + "_new"
|
||||
sql = matches.group(1) + new_table_name + matches.group(3)
|
||||
print(sql)
|
||||
if not query.exec(sql):
|
||||
query_error(query)
|
||||
# step 5 transfer content
|
||||
coldefs = re.search(r"\((.+)\)", old).group(1).split(", ") # type: ignore[union-attr]
|
||||
cols = [x.split(" ")[0] for x in filter(lambda s: not s.startswith('FOREIGN '),coldefs)]
|
||||
cols_str = ", ".join(cols)
|
||||
sql = f"INSERT INTO {new_table_name} ({cols_str}) SELECT {cols_str} FROM {table_name}"
|
||||
query.prepare(sql)
|
||||
if not query.exec():
|
||||
query_error(query)
|
||||
|
||||
# step 6 Drop old table
|
||||
query.prepare("DROP TABLE " + table_name)
|
||||
if not query.exec():
|
||||
query_error(query)
|
||||
# step 6 rename new table to old table
|
||||
query.prepare("ALTER TABLE " + new_table_name + " RENAME TO " + table_name)
|
||||
if not query.exec():
|
||||
query_error(query)
|
||||
|
||||
# step 8 create indexes, triggers, and views
|
||||
# step 9 rebuild affected views
|
||||
# step 10 turn foreign key constrants back on
|
||||
# if not query.exec("PRAGMA foreign_keys=ON"):
|
||||
# query_error(query)
|
||||
# step 11 commit the changes
|
||||
db.commit()
|
||||
return
|
||||
|
||||
def update_proceedings(case_id: int, bs: BeautifulSoup) -> None:
|
||||
table = bs.find('table', id="proceedings")
|
||||
assert isinstance(table, Tag)
|
||||
trs = table.find_all('tr')
|
||||
tr = trs.pop(0)
|
||||
query = QSqlQuery()
|
||||
while len(trs) > 0:
|
||||
tr = trs.pop(0)
|
||||
assert isinstance(tr, Tag)
|
||||
td = tr.contents[0]
|
||||
assert isinstance(td, Tag) and isinstance(td.string, str)
|
||||
date = dateparser.parse(td.string)
|
||||
td = tr.contents[1]
|
||||
assert isinstance(td, Tag) and isinstance(td.string, str)
|
||||
text = td.string.strip()
|
||||
query.prepare("SELECT * FROM entries WHERE case_id = :cid AND date = :date AND text=:text")
|
||||
query.bindValue(':cid', case_id)
|
||||
query.bindValue(':text', text)
|
||||
assert isinstance(date, datetime.date)
|
||||
query.bindValue(':date', date.timestamp())
|
||||
if not query.exec():
|
||||
query_error(query)
|
||||
if not query.next():
|
||||
query.prepare("INSERT INTO entries (case_id, date, text) VALUES (:cid,:date,:text)")
|
||||
query.bindValue(':cid', case_id)
|
||||
query.bindValue(':date', date.timestamp)
|
||||
query.bindValue(':text', text)
|
||||
if not query.exec():
|
||||
query_error(query)
|
||||
entry_id = query.lastInsertId()
|
||||
else:
|
||||
entry_id = query.value(0)
|
||||
tr = trs.pop(0)
|
||||
assert isinstance(tr, Tag)
|
||||
assert isinstance(tr.contents[1], Tag)
|
||||
print(tr.contents[1])
|
||||
for a in tr.contents[1]:
|
||||
assert isinstance(a, Tag)
|
||||
url = a.attrs['href']
|
||||
name = a.string
|
||||
query.prepare("SELECT * FROM documents WHERE url=:url AND entry_id = :eid")
|
||||
query.bindValue(':url', url)
|
||||
query.bindValue(":eid", entry_id)
|
||||
if not query.exec():
|
||||
query_error(query)
|
||||
if not query.next():
|
||||
query.prepare("INSERT INTO documents (entry_id, name, url) "
|
||||
"VALUES (:eid, :name, :url)")
|
||||
query.bindValue(":eid", entry_id)
|
||||
query.bindValue(":name", name)
|
||||
query.bindValue(":url", url)
|
||||
if not query.exec():
|
||||
query_error(query)
|
||||
break
|
||||
return
|
||||
|
||||
def update_db(case_id) -> int:
|
||||
r = requests.get('https://www.supremecourt.gov/docket/docketfiles/html/public/{}.html'.format(case_id))
|
||||
if r.status_code != 200:
|
||||
print(r.status_code)
|
||||
exit(1)
|
||||
bs = BeautifulSoup(r.text,'lxml')
|
||||
#
|
||||
# docket_id, previous_docket, petitioners, respondents, date
|
||||
# all come from the docketinfo table
|
||||
#
|
||||
di = bs.find('table',id='docketinfo')
|
||||
assert di is not None and isinstance(di, Tag)
|
||||
|
||||
#
|
||||
# docket_id is first row, first column
|
||||
docket_id = di.find('span')
|
||||
assert docket_id is not None and isinstance(docket_id, Tag)
|
||||
docket_id = docket_id.contents[0]
|
||||
assert isinstance(docket_id, str)
|
||||
docket_id = docket_id.strip()
|
||||
docket_id = docket_id.replace('No. ','')
|
||||
|
||||
#
|
||||
# Title is second row, first column
|
||||
tr = di.contents[1]
|
||||
assert isinstance(tr, Tag) and isinstance(tr.contents[0], Tag)
|
||||
assert tr.contents[0].string == 'Title:'
|
||||
td = tr.contents[1]
|
||||
assert isinstance(td, Tag)
|
||||
span = td.contents[0]
|
||||
assert isinstance(span, Tag) and isinstance(span.contents[0], str)
|
||||
petitioners = span.contents[0].strip()
|
||||
#
|
||||
# XXX - We need to deal with other titles. Change this to an RE
|
||||
# UPDATED: we are just handling the two we know about.
|
||||
#
|
||||
petitioners = petitioners.replace(', Petitioners','')
|
||||
petitioners = petitioners.replace(', Applicants','')
|
||||
assert isinstance(span.contents[4], str)
|
||||
respondent = span.contents[4].strip()
|
||||
|
||||
#
|
||||
# Date on which the case was docketed
|
||||
tr = di.contents[2]
|
||||
assert isinstance(tr,Tag) and isinstance(tr.contents[1], Tag)
|
||||
td = tr.contents[1]
|
||||
assert isinstance(td, Tag) and td.string is not None
|
||||
docket_date = td.string.strip()
|
||||
date = dateparser.parse(docket_date)
|
||||
|
||||
#
|
||||
# linked case is row 3, column 0
|
||||
tr = di.contents[3]
|
||||
assert isinstance(tr, Tag) and isinstance(tr.contents[0], Tag)
|
||||
linked = tr.contents[0].string
|
||||
|
||||
print(docket_id, petitioners, respondent, date, linked)
|
||||
|
||||
#
|
||||
# See if this case already exists.
|
||||
#
|
||||
query = QSqlQuery()
|
||||
query.prepare("SELECT * FROM cases WHERE docket_id = :did")
|
||||
query.bindValue(':did', docket_id)
|
||||
if not query.exec():
|
||||
query_error(query)
|
||||
|
||||
#
|
||||
# if it does not exists, create it. This stops a recursion loop.
|
||||
#
|
||||
if not query.next():
|
||||
query.prepare("INSERT INTO cases (docket_id, petitioners, respondents, date, linked) "
|
||||
"VALUES (:did, :pet, :resp, :date, NULL)")
|
||||
query.bindValue(':did', docket_id)
|
||||
query.bindValue(':pet', petitioners)
|
||||
query.bindValue(':resp', respondent)
|
||||
assert isinstance(date, datetime.date)
|
||||
query.bindValue(':date', date.timestamp())
|
||||
if not query.exec():
|
||||
query_error(query)
|
||||
case_id = query.lastInsertId()
|
||||
linked_id = None
|
||||
else:
|
||||
case_id = query.value(0)
|
||||
linked_id = query.value('linked')
|
||||
assert isinstance(case_id, int)
|
||||
#
|
||||
# If there is a linked case, we need to get the ID for that case.
|
||||
if linked is not None:
|
||||
linked = linked.replace('Linked with ','')
|
||||
query.prepare("SELECT * FROM cases WHERE docket_id = :did")
|
||||
query.bindValue(':did', linked)
|
||||
if not query.exec():
|
||||
query_error(query)
|
||||
if not query.next():
|
||||
new_id = update_db(linked)
|
||||
else:
|
||||
new_id = query.value(0)
|
||||
if new_id != linked_id:
|
||||
query.prepare("UPDATE cases SET linked=:lid WHERE case_id = :cid")
|
||||
query.bindValue(':lid', new_id)
|
||||
query.bindValue(':cid', case_id)
|
||||
if not query.exec():
|
||||
query_error(query)
|
||||
#
|
||||
# XXX - Process lower courts
|
||||
#
|
||||
update_proceedings(case_id, bs)
|
||||
return(case_id)
|
||||
|
||||
def main() -> int:
|
||||
app = QApplication(sys.argv)
|
||||
db = QSqlDatabase.addDatabase("QSQLITE")
|
||||
#db.setConnectOptions("PRAGMA foreign_keys = ON")
|
||||
db.setDatabaseName("scotus.db")
|
||||
db.open()
|
||||
schema_update(db)
|
||||
#update_db('24-203')
|
||||
#update_db('23A1058')
|
||||
window = MainWindow()
|
||||
return app.exec()
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
58
ui/MainWindow.py
Normal file
58
ui/MainWindow.py
Normal file
@@ -0,0 +1,58 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
################################################################################
|
||||
## Form generated from reading UI file 'MainWindow.ui'
|
||||
##
|
||||
## Created by: Qt User Interface Compiler version 6.8.2
|
||||
##
|
||||
## WARNING! All changes made in this file will be lost when recompiling UI file!
|
||||
################################################################################
|
||||
|
||||
from PySide6.QtCore import (QCoreApplication, QDate, QDateTime, QLocale,
|
||||
QMetaObject, QObject, QPoint, QRect,
|
||||
QSize, QTime, QUrl, Qt)
|
||||
from PySide6.QtGui import (QBrush, QColor, QConicalGradient, QCursor,
|
||||
QFont, QFontDatabase, QGradient, QIcon,
|
||||
QImage, QKeySequence, QLinearGradient, QPainter,
|
||||
QPalette, QPixmap, QRadialGradient, QTransform)
|
||||
from PySide6.QtWidgets import (QApplication, QHeaderView, QMainWindow, QMenuBar,
|
||||
QSizePolicy, QStatusBar, QTableView, QTableWidget,
|
||||
QTableWidgetItem, QVBoxLayout, QWidget)
|
||||
|
||||
class Ui_MainWindow(object):
|
||||
def setupUi(self, MainWindow):
|
||||
if not MainWindow.objectName():
|
||||
MainWindow.setObjectName(u"MainWindow")
|
||||
MainWindow.resize(800, 600)
|
||||
self.centralwidget = QWidget(MainWindow)
|
||||
self.centralwidget.setObjectName(u"centralwidget")
|
||||
self.verticalLayout = QVBoxLayout(self.centralwidget)
|
||||
self.verticalLayout.setObjectName(u"verticalLayout")
|
||||
self.casesView = QTableView(self.centralwidget)
|
||||
self.casesView.setObjectName(u"casesView")
|
||||
|
||||
self.verticalLayout.addWidget(self.casesView)
|
||||
|
||||
self.docketWidget = QTableWidget(self.centralwidget)
|
||||
self.docketWidget.setObjectName(u"docketWidget")
|
||||
|
||||
self.verticalLayout.addWidget(self.docketWidget)
|
||||
|
||||
MainWindow.setCentralWidget(self.centralwidget)
|
||||
self.menubar = QMenuBar(MainWindow)
|
||||
self.menubar.setObjectName(u"menubar")
|
||||
self.menubar.setGeometry(QRect(0, 0, 800, 24))
|
||||
MainWindow.setMenuBar(self.menubar)
|
||||
self.statusbar = QStatusBar(MainWindow)
|
||||
self.statusbar.setObjectName(u"statusbar")
|
||||
MainWindow.setStatusBar(self.statusbar)
|
||||
|
||||
self.retranslateUi(MainWindow)
|
||||
|
||||
QMetaObject.connectSlotsByName(MainWindow)
|
||||
# setupUi
|
||||
|
||||
def retranslateUi(self, MainWindow):
|
||||
MainWindow.setWindowTitle(QCoreApplication.translate("MainWindow", u"MainWindow", None))
|
||||
# retranslateUi
|
||||
|
||||
40
ui/MainWindow.ui
Normal file
40
ui/MainWindow.ui
Normal file
@@ -0,0 +1,40 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<ui version="4.0">
|
||||
<class>MainWindow</class>
|
||||
<widget class="QMainWindow" name="MainWindow">
|
||||
<property name="geometry">
|
||||
<rect>
|
||||
<x>0</x>
|
||||
<y>0</y>
|
||||
<width>800</width>
|
||||
<height>600</height>
|
||||
</rect>
|
||||
</property>
|
||||
<property name="windowTitle">
|
||||
<string>MainWindow</string>
|
||||
</property>
|
||||
<widget class="QWidget" name="centralwidget">
|
||||
<layout class="QVBoxLayout" name="verticalLayout">
|
||||
<item>
|
||||
<widget class="QTableView" name="casesView"/>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QTableWidget" name="docketWidget"/>
|
||||
</item>
|
||||
</layout>
|
||||
</widget>
|
||||
<widget class="QMenuBar" name="menubar">
|
||||
<property name="geometry">
|
||||
<rect>
|
||||
<x>0</x>
|
||||
<y>0</y>
|
||||
<width>800</width>
|
||||
<height>24</height>
|
||||
</rect>
|
||||
</property>
|
||||
</widget>
|
||||
<widget class="QStatusBar" name="statusbar"/>
|
||||
</widget>
|
||||
<resources/>
|
||||
<connections/>
|
||||
</ui>
|
||||
0
ui/__init.py__
Normal file
0
ui/__init.py__
Normal file
Reference in New Issue
Block a user