Files
scotus-watch/workers.py
Christopher T. Johnson dfa604e846 Lint
2025-02-25 15:33:20 -05:00

413 lines
14 KiB
Python

import datetime
import re
import dateparser
import requests
from bs4 import BeautifulSoup, Tag
from PySide6.QtCore import QDateTime, QThread, Signal
from PySide6.QtSql import QSqlDatabase, QSqlQuery
from lib.utils import query_error
def update_proceedings(
case_id: int, bs: BeautifulSoup, db: QSqlDatabase
) -> bool:
table = bs.find("table", id="proceedings")
assert isinstance(table, Tag)
trs = table.find_all("tr")
tr = trs.pop(0)
query = QSqlQuery(db)
while len(trs) > 0:
tr = trs.pop(0)
assert isinstance(tr, Tag)
td = tr.contents[0]
assert isinstance(td, Tag) and isinstance(td.string, str)
tmp = dateparser.parse(td.string)
assert isinstance(tmp, datetime.datetime)
date = QDateTime.fromSecsSinceEpoch(int(tmp.timestamp()))
td = tr.contents[1]
#
# When a case is GVRed, the <td> will contain <i> which means that
# td is a Tag but not a simple string.
# We need to convert to HTML and store the HTML in the entry, not just plaintext.
assert isinstance(td, Tag)
text = td.string
if not text:
text = "".join([str(x) for x in td.contents])
query.prepare(
"SELECT * FROM entries WHERE case_id = :cid AND date = :date AND text=:text"
)
query.bindValue(":cid", case_id)
query.bindValue(":text", text)
query.bindValue(":date", date)
if not query.exec():
query_error(query)
if not query.next():
query.prepare(
"INSERT INTO entries (case_id, date, text) VALUES (:cid,:date,:text)"
)
query.bindValue(":cid", case_id)
query.bindValue(":date", date)
query.bindValue(":text", text)
if not query.exec():
query_error(query)
entry_id = query.lastInsertId()
else:
entry_id = query.value(0)
tr = trs.pop(0)
assert isinstance(tr, Tag)
assert isinstance(tr.contents[1], Tag)
for a in tr.contents[1]:
assert isinstance(a, Tag)
url = a.attrs["href"]
name = a.string
query.prepare(
"SELECT * FROM documents WHERE url=:url AND entry_id = :eid"
)
query.bindValue(":url", url)
query.bindValue(":eid", entry_id)
if not query.exec():
query_error(query)
if not query.next():
query.prepare(
"INSERT INTO documents (entry_id, name, url) "
"VALUES (:eid, :name, :url)"
)
query.bindValue(":eid", entry_id)
query.bindValue(":name", name)
query.bindValue(":url", url)
if not query.exec():
query_error(query)
assert isinstance(text, str)
print(f"text: {text.lower()}")
#
# If cert is denied, a petion for rehearing can be requested.
# The petitioner has 40 days to file for a rehearing.
#
# Translation, if the last entry is "petition denied\..*$" and 40 days
# have passed, the case is final
active = True
text = text.lower()
if text.startswith("rehearing denied.") or text.startswith(
"judgment issued."
):
active = False
if text.startswith("petition denied."):
assert isinstance(date, QDateTime)
delta = date.daysTo(QDateTime.currentDateTime())
if delta > 40:
active = False
return active
def update_db(case_id: str, db: QSqlDatabase) -> int:
#
# See if this case already exists.
#
# We assume that case_id == docket_id at this point. If it does not,
# then we will build out from the request we get
print(f"Updating {case_id}")
matches = re.match(r"(\d\d)[-A](\d+)(.*)$", case_id)
if matches is None:
raise Exception(f"Not a match {case_id}")
else:
case_id = matches.group()
query = QSqlQuery(db)
query.prepare("SELECT * FROM cases WHERE docket_id = :did")
query.bindValue(":did", case_id)
if not query.exec():
query_error(query)
exists = query.next()
if exists:
active = query.value("active") == 1
else:
active = True
if not active:
return int(query.value("case_id"))
r = requests.get(
f"https://www.supremecourt.gov/docket/docketfiles/html/public/{case_id}.html"
)
if r.status_code != 200:
print(r.status_code)
exit(1)
bs = BeautifulSoup(r.text, "lxml")
#
# SCOTUS does not return 404 for page not found.
#
title = bs.find("title")
assert isinstance(title, Tag) and isinstance(title.string, str)
if title.string.strip() == "":
return -1
#
# docket_id, previous_docket, petitioners, respondents, date
# all come from the docketinfo table
#
di = bs.find("table", id="docketinfo")
assert di is not None and isinstance(di, Tag)
#
# docket_id is first row, first column
span = di.find("span")
assert span is not None and isinstance(span, Tag)
tmp = span.contents[0]
assert isinstance(tmp, str)
matches = re.match(r"(No.)?\s*(\d+[-A]\d+).*$", tmp)
assert matches is not None
print(matches, matches.groups())
docket_id = matches.group(2)
print(f"Found {docket_id}")
#
# Title is second row, first column
tr = di.contents[1]
assert isinstance(tr, Tag) and isinstance(tr.contents[0], Tag)
assert tr.contents[0].string == "Title:"
td = tr.contents[1]
assert isinstance(td, Tag)
span = td.contents[0]
assert isinstance(span, Tag) and isinstance(span.contents[0], str)
petitioners = span.contents[0].strip()
#
# XXX - We need to deal with other titles. Change this to an RE
# UPDATED: we are just handling the two we know about.
#
petitioners = petitioners.replace(", Petitioners", "")
petitioners = petitioners.replace(", Applicants", "")
if len(span.contents) >= 4:
assert isinstance(span.contents[4], str)
respondent = span.contents[4].strip()
else:
respondent = ""
#
# Date on which the case was docketed
tr = di.contents[2]
assert isinstance(tr, Tag) and isinstance(tr.contents[1], Tag)
td = tr.contents[1]
assert isinstance(td, Tag)
docket_date = td.string
if docket_date is None:
docket_date = "July 7, 1776"
tmp = dateparser.parse(docket_date) # type: ignore[assignment]
assert tmp is not None
date = QDateTime.fromSecsSinceEpoch(int(tmp.timestamp())) # type: ignore[assignment, attr-defined]
#
# linked case is row 3, column 0
tr = di.contents[3]
assert isinstance(tr, Tag) and isinstance(tr.contents[0], Tag)
linked = tr.contents[0].string
#
# if this case does not exists, create it. This stops a recursion loop.
#
if not exists:
query.prepare(
"INSERT INTO cases (docket_id, petitioners, respondents, date, active) "
"VALUES (:did, :pet, :resp, :date, 1)"
)
query.bindValue(":did", docket_id)
query.bindValue(":pet", petitioners)
query.bindValue(":resp", respondent)
query.bindValue(":date", date)
if not query.exec():
query_error(query)
case_id = query.lastInsertId()
else:
case_id = query.value(0)
assert isinstance(case_id, int)
#
# If there is a linked case, we need to get the ID for that case.
if linked is not None:
#
# If this case is on the Emergency Docket and it is linked to
# a case on the regular docket, then this case is no longer active
#
deactivate = False
linked = linked.replace("Linked with ", "")
for did in linked.split(","):
did = did.strip()
if re.match(r"\d+-\d+$", did):
deactivate = True
query.prepare("SELECT * FROM cases WHERE docket_id = :did")
query.bindValue(":did", linked)
if not query.exec():
query_error(query)
if not query.next():
linked_id = update_db(did, db)
if linked_id <= 0:
raise Exception(f"Unable to create linked case: {did}")
else:
linked_id = query.value("case_id")
#
#
query.prepare(
"SELECT * FROM cases_cases WHERE lhs = :lhs " "AND rhs = :rhs"
)
query.bindValue(":lhs", case_id)
query.bindValue(":rhs", linked_id)
if not query.exec():
query_error(query)
if not query.next():
query.prepare(
"INSERT INTO cases_cases (lhs, rhs) "
"VALUES ( :lhs, :rhs)"
)
query.bindValue(":lhs", case_id)
query.bindValue(":rhs", linked_id)
if not query.exec():
query_error(query)
if re.match(r"\d+-\d+$", docket_id):
deactivate = False
if deactivate:
query.prepare("UPDATE cases SET active=0 WHERE case_id = :cid")
query.bindValue(":cid", case_id)
if not query.exec():
query_error(query)
#
# XXX - Process lower courts
#
active = update_proceedings(case_id, bs, db)
if not active:
query.prepare("UPDATE cases SET active=0 WHERE case_id = :cid")
query.bindValue(":cid", case_id)
if not query.exec():
query_error(query)
return case_id
class updateThread(QThread):
docket_id = None
def __init__(self) -> None:
super(updateThread, self).__init__()
print("updateThread: __init__(docket_id)")
return
def setDocketId(self, docket_id: str) -> None:
self.docket_id = docket_id
return
def run(self) -> None:
print(f"updateThread: running on {self.currentThread()}")
db = QSqlDatabase.cloneDatabase("qt_sql_default_connection", "update")
if not db.open():
print(db.lastError())
raise Exception("db.open()")
case_id = update_db(str(self.docket_id), db)
db.close()
del db
QSqlDatabase.removeDatabase("update")
print(f"updateThread: run() returns {case_id}")
return
class loadCases(QThread):
caseLoaded = Signal(int)
year = QDateTime.currentDateTime().toString("yy")
number = 0
def run(self) -> None:
db = QSqlDatabase.cloneDatabase("qt_sql_default_connection", "load")
if not db.open():
raise Exception("db.open()")
query = QSqlQuery(db)
query.prepare("SELECT * FROM history WHERE year = :year")
print(f"year = {self.year}")
query.bindValue(":year", self.year)
if not query.exec():
query_error(query)
if not query.next():
query.prepare(
"INSERT INTO history (year, edocket, number) "
"VALUES (:year, 0, 1)"
)
query.bindValue(":year", self.year)
if not query.exec():
query_error(query)
edocket = 0
self.number = 1
history_id = query.lastInsertId()
else:
history_id = query.value("history_id")
edocket = query.value("edocket")
self.number = query.value("number")
count = 0
while self.year > "00" and count < 100:
self.caseLoaded.emit(count)
query.prepare("SELECT * FROM cases WHERE docket_id = :did")
if edocket == 1:
docket_id = f"{self.year}A{self.number}"
else:
docket_id = f"{self.year}-{self.number}"
query.bindValue(":did", docket_id)
if not query.exec():
query_error(query)
if query.next():
if query.value("active") == 0:
print("Already exists and is inactive")
self.number += 1
continue
result = update_db(docket_id, db)
print(f"result: {result}")
if result < 0:
self.year = f"{int(self.year) - 1:02d}"
if self.number > 1:
query.prepare(
"UPDATE history set number = :number WHERE history_id=:hid"
)
query.bindValue(":number", self.number - 1)
query.bindValue(":hid", history_id)
if not query.exec():
query_error(query)
query.prepare("SELECT * FROM history WHERE year = :year")
print(f"year = {self.year}")
query.bindValue(":year", self.year)
if not query.exec():
query_error(query)
if not query.next():
query.prepare(
"INSERT INTO history (year, edocket, number) "
"VALUES (:year, 0, 1)"
)
query.bindValue(":year", self.year)
if not query.exec():
query_error(query)
edocket = 0
self.number = 1
history_id = query.lastInsertId()
else:
history_id = query.value("history_id")
edocket = query.value("edocket")
self.number = query.value("number")
continue
self.number += 1
count += 1
if self.number > 1:
query.prepare(
"UPDATE history SET number= :number WHERE year = :year"
)
query.bindValue(":number", self.number)
query.bindValue(":year", self.year)
if not query.exec():
query_error(query)
db.close()
del db
QSqlDatabase.removeDatabase("load")
self.caseLoaded.emit(0)
return