Updated workers.py to hold "current" values. Status bar is three widgets, status label, progressbar and load button. Fixes: #8, #9
413 lines
14 KiB
Python
413 lines
14 KiB
Python
import datetime
|
|
import re
|
|
|
|
import dateparser
|
|
import requests
|
|
from bs4 import BeautifulSoup, Tag
|
|
from PySide6.QtCore import QDateTime, QThread, Signal
|
|
from PySide6.QtSql import QSqlDatabase, QSqlQuery
|
|
|
|
from lib.utils import query_error
|
|
|
|
|
|
def update_proceedings(
|
|
case_id: int, bs: BeautifulSoup, db: QSqlDatabase
|
|
) -> bool:
|
|
table = bs.find("table", id="proceedings")
|
|
assert isinstance(table, Tag)
|
|
trs = table.find_all("tr")
|
|
tr = trs.pop(0)
|
|
query = QSqlQuery(db)
|
|
while len(trs) > 0:
|
|
tr = trs.pop(0)
|
|
assert isinstance(tr, Tag)
|
|
td = tr.contents[0]
|
|
assert isinstance(td, Tag) and isinstance(td.string, str)
|
|
tmp = dateparser.parse(td.string)
|
|
assert isinstance(tmp, datetime.datetime)
|
|
date = QDateTime.fromSecsSinceEpoch(int(tmp.timestamp()))
|
|
td = tr.contents[1]
|
|
#
|
|
# When a case is GVRed, the <td> will contain <i> which means that
|
|
# td is a Tag but not a simple string.
|
|
# We need to convert to HTML and store the HTML in the entry, not just plaintext.
|
|
assert isinstance(td, Tag)
|
|
text = td.string
|
|
if not text:
|
|
text = "".join([str(x) for x in td.contents])
|
|
query.prepare(
|
|
"SELECT * FROM entries WHERE case_id = :cid AND date = :date AND text=:text"
|
|
)
|
|
query.bindValue(":cid", case_id)
|
|
query.bindValue(":text", text)
|
|
query.bindValue(":date", date)
|
|
if not query.exec():
|
|
query_error(query)
|
|
if not query.next():
|
|
query.prepare(
|
|
"INSERT INTO entries (case_id, date, text) VALUES (:cid,:date,:text)"
|
|
)
|
|
query.bindValue(":cid", case_id)
|
|
query.bindValue(":date", date)
|
|
query.bindValue(":text", text)
|
|
if not query.exec():
|
|
query_error(query)
|
|
entry_id = query.lastInsertId()
|
|
else:
|
|
entry_id = query.value(0)
|
|
tr = trs.pop(0)
|
|
assert isinstance(tr, Tag)
|
|
assert isinstance(tr.contents[1], Tag)
|
|
for a in tr.contents[1]:
|
|
assert isinstance(a, Tag)
|
|
url = a.attrs["href"]
|
|
name = a.string
|
|
query.prepare(
|
|
"SELECT * FROM documents WHERE url=:url AND entry_id = :eid"
|
|
)
|
|
query.bindValue(":url", url)
|
|
query.bindValue(":eid", entry_id)
|
|
if not query.exec():
|
|
query_error(query)
|
|
if not query.next():
|
|
query.prepare(
|
|
"INSERT INTO documents (entry_id, name, url) "
|
|
"VALUES (:eid, :name, :url)"
|
|
)
|
|
query.bindValue(":eid", entry_id)
|
|
query.bindValue(":name", name)
|
|
query.bindValue(":url", url)
|
|
if not query.exec():
|
|
query_error(query)
|
|
assert isinstance(text, str)
|
|
print(f"text: {text.lower()}")
|
|
#
|
|
# If cert is denied, a petion for rehearing can be requested.
|
|
# The petitioner has 40 days to file for a rehearing.
|
|
#
|
|
# Translation, if the last entry is "petition denied\..*$" and 40 days
|
|
# have passed, the case is final
|
|
active = True
|
|
text = text.lower()
|
|
if text.startswith("rehearing denied.") or text.startswith(
|
|
"judgment issued."
|
|
):
|
|
active = False
|
|
if text.startswith("petition denied."):
|
|
assert isinstance(date, QDateTime)
|
|
delta = date.daysTo(QDateTime.currentDateTime())
|
|
if delta > 40:
|
|
active = False
|
|
return active
|
|
|
|
|
|
def update_db(case_id: str, db: QSqlDatabase) -> int:
|
|
#
|
|
# See if this case already exists.
|
|
#
|
|
# We assume that case_id == docket_id at this point. If it does not,
|
|
# then we will build out from the request we get
|
|
print(f"Updating {case_id}")
|
|
matches = re.match(r"(\d\d)[-A](\d+)(.*)$", case_id)
|
|
if matches is None:
|
|
raise Exception(f"Not a match {case_id}")
|
|
else:
|
|
case_id = matches.group()
|
|
|
|
query = QSqlQuery(db)
|
|
query.prepare("SELECT * FROM cases WHERE docket_id = :did")
|
|
query.bindValue(":did", case_id)
|
|
if not query.exec():
|
|
query_error(query)
|
|
exists = query.next()
|
|
if exists:
|
|
active = query.value("active") == 1
|
|
else:
|
|
active = True
|
|
if not active:
|
|
return int(query.value("case_id"))
|
|
|
|
r = requests.get(
|
|
f"https://www.supremecourt.gov/docket/docketfiles/html/public/{case_id}.html"
|
|
)
|
|
if r.status_code != 200:
|
|
print(r.status_code)
|
|
exit(1)
|
|
bs = BeautifulSoup(r.text, "lxml")
|
|
#
|
|
# SCOTUS does not return 404 for page not found.
|
|
#
|
|
title = bs.find("title")
|
|
assert isinstance(title, Tag) and isinstance(title.string, str)
|
|
if title.string.strip() == "":
|
|
return -1
|
|
#
|
|
# docket_id, previous_docket, petitioners, respondents, date
|
|
# all come from the docketinfo table
|
|
#
|
|
di = bs.find("table", id="docketinfo")
|
|
assert di is not None and isinstance(di, Tag)
|
|
|
|
#
|
|
# docket_id is first row, first column
|
|
span = di.find("span")
|
|
assert span is not None and isinstance(span, Tag)
|
|
tmp = span.contents[0]
|
|
assert isinstance(tmp, str)
|
|
matches = re.match(r"(No.)?\s*(\d+[-A]\d+).*$", tmp)
|
|
assert matches is not None
|
|
print(matches, matches.groups())
|
|
docket_id = matches.group(2)
|
|
print(f"Found {docket_id}")
|
|
|
|
#
|
|
# Title is second row, first column
|
|
tr = di.contents[1]
|
|
assert isinstance(tr, Tag) and isinstance(tr.contents[0], Tag)
|
|
assert tr.contents[0].string == "Title:"
|
|
td = tr.contents[1]
|
|
assert isinstance(td, Tag)
|
|
span = td.contents[0]
|
|
assert isinstance(span, Tag) and isinstance(span.contents[0], str)
|
|
petitioners = span.contents[0].strip()
|
|
#
|
|
# XXX - We need to deal with other titles. Change this to an RE
|
|
# UPDATED: we are just handling the two we know about.
|
|
#
|
|
petitioners = petitioners.replace(", Petitioners", "")
|
|
petitioners = petitioners.replace(", Applicants", "")
|
|
if len(span.contents) >= 4:
|
|
assert isinstance(span.contents[4], str)
|
|
respondent = span.contents[4].strip()
|
|
else:
|
|
respondent = ""
|
|
|
|
#
|
|
# Date on which the case was docketed
|
|
tr = di.contents[2]
|
|
assert isinstance(tr, Tag) and isinstance(tr.contents[1], Tag)
|
|
td = tr.contents[1]
|
|
assert isinstance(td, Tag)
|
|
docket_date = td.string
|
|
if docket_date is None:
|
|
docket_date = "July 7, 1776"
|
|
|
|
tmp = dateparser.parse(docket_date) # type: ignore[assignment]
|
|
assert tmp is not None
|
|
date = QDateTime.fromSecsSinceEpoch(int(tmp.timestamp())) # type: ignore[assignment, attr-defined]
|
|
|
|
#
|
|
# linked case is row 3, column 0
|
|
tr = di.contents[3]
|
|
assert isinstance(tr, Tag) and isinstance(tr.contents[0], Tag)
|
|
linked = tr.contents[0].string
|
|
|
|
#
|
|
# if this case does not exists, create it. This stops a recursion loop.
|
|
#
|
|
if not exists:
|
|
query.prepare(
|
|
"INSERT INTO cases (docket_id, petitioners, respondents, date, active) "
|
|
"VALUES (:did, :pet, :resp, :date, 1)"
|
|
)
|
|
query.bindValue(":did", docket_id)
|
|
query.bindValue(":pet", petitioners)
|
|
query.bindValue(":resp", respondent)
|
|
query.bindValue(":date", date)
|
|
if not query.exec():
|
|
query_error(query)
|
|
case_id = query.lastInsertId()
|
|
else:
|
|
case_id = query.value(0)
|
|
assert isinstance(case_id, int)
|
|
#
|
|
# If there is a linked case, we need to get the ID for that case.
|
|
if linked is not None:
|
|
#
|
|
# If this case is on the Emergency Docket and it is linked to
|
|
# a case on the regular docket, then this case is no longer active
|
|
#
|
|
deactivate = False
|
|
|
|
linked = linked.replace("Linked with ", "")
|
|
for did in linked.split(","):
|
|
did = did.strip()
|
|
if re.match(r"\d+-\d+$", did):
|
|
deactivate = True
|
|
query.prepare("SELECT * FROM cases WHERE docket_id = :did")
|
|
query.bindValue(":did", linked)
|
|
if not query.exec():
|
|
query_error(query)
|
|
if not query.next():
|
|
linked_id = update_db(did, db)
|
|
if linked_id <= 0:
|
|
raise Exception(f"Unable to create linked case: {did}")
|
|
else:
|
|
linked_id = query.value("case_id")
|
|
#
|
|
#
|
|
|
|
query.prepare(
|
|
"SELECT * FROM cases_cases WHERE lhs = :lhs " "AND rhs = :rhs"
|
|
)
|
|
query.bindValue(":lhs", case_id)
|
|
query.bindValue(":rhs", linked_id)
|
|
if not query.exec():
|
|
query_error(query)
|
|
if not query.next():
|
|
query.prepare(
|
|
"INSERT INTO cases_cases (lhs, rhs) "
|
|
"VALUES ( :lhs, :rhs)"
|
|
)
|
|
query.bindValue(":lhs", case_id)
|
|
query.bindValue(":rhs", linked_id)
|
|
if not query.exec():
|
|
query_error(query)
|
|
if re.match(r"\d+-\d+$", docket_id):
|
|
deactivate = False
|
|
if deactivate:
|
|
query.prepare("UPDATE cases SET active=0 WHERE case_id = :cid")
|
|
query.bindValue(":cid", case_id)
|
|
if not query.exec():
|
|
query_error(query)
|
|
|
|
#
|
|
# XXX - Process lower courts
|
|
#
|
|
active = update_proceedings(case_id, bs, db)
|
|
if not active:
|
|
query.prepare("UPDATE cases SET active=0 WHERE case_id = :cid")
|
|
query.bindValue(":cid", case_id)
|
|
if not query.exec():
|
|
query_error(query)
|
|
return case_id
|
|
|
|
|
|
class updateThread(QThread):
|
|
docket_id = None
|
|
|
|
def __init__(self) -> None:
|
|
super(updateThread, self).__init__()
|
|
print("updateThread: __init__(docket_id)")
|
|
return
|
|
|
|
def setDocketId(self, docket_id: str) -> None:
|
|
self.docket_id = docket_id
|
|
return
|
|
|
|
def run(self) -> None:
|
|
print(f"updateThread: running on {self.currentThread()}")
|
|
db = QSqlDatabase.cloneDatabase("qt_sql_default_connection", "update")
|
|
if not db.open():
|
|
print(db.lastError())
|
|
raise Exception("db.open()")
|
|
|
|
case_id = update_db(str(self.docket_id), db)
|
|
db.close()
|
|
del db
|
|
QSqlDatabase.removeDatabase("update")
|
|
print(f"updateThread: run() returns {case_id}")
|
|
return
|
|
|
|
|
|
class loadCases(QThread):
|
|
caseLoaded = Signal(int)
|
|
year = QDateTime.currentDateTime().toString("yy")
|
|
number = 0
|
|
|
|
def run(self) -> None:
|
|
db = QSqlDatabase.cloneDatabase("qt_sql_default_connection", "load")
|
|
if not db.open():
|
|
raise Exception("db.open()")
|
|
|
|
query = QSqlQuery(db)
|
|
query.prepare("SELECT * FROM history WHERE year = :year")
|
|
print(f"year = {self.year}")
|
|
query.bindValue(":year", self.year)
|
|
if not query.exec():
|
|
query_error(query)
|
|
|
|
if not query.next():
|
|
query.prepare(
|
|
"INSERT INTO history (year, edocket, number) "
|
|
"VALUES (:year, 0, 1)"
|
|
)
|
|
query.bindValue(":year", self.year)
|
|
if not query.exec():
|
|
query_error(query)
|
|
edocket = 0
|
|
self.number = 1
|
|
history_id = query.lastInsertId()
|
|
else:
|
|
history_id = query.value("history_id")
|
|
edocket = query.value("edocket")
|
|
self.number = query.value("number")
|
|
count = 0
|
|
|
|
while self.year > "00" and count < 100:
|
|
self.caseLoaded.emit(count)
|
|
query.prepare("SELECT * FROM cases WHERE docket_id = :did")
|
|
if edocket == 1:
|
|
docket_id = f"{self.year}A{self.number}"
|
|
else:
|
|
docket_id = f"{self.year}-{self.number}"
|
|
query.bindValue(":did", docket_id)
|
|
if not query.exec():
|
|
query_error(query)
|
|
if query.next():
|
|
if query.value("active") == 0:
|
|
print("Already exists and is inactive")
|
|
self.number += 1
|
|
continue
|
|
result = update_db(docket_id, db)
|
|
print(f"result: {result}")
|
|
if result < 0:
|
|
self.year = f"{int(self.year) - 1:02d}"
|
|
if self.number > 1:
|
|
query.prepare(
|
|
"UPDATE history set number = :number WHERE history_id=:hid"
|
|
)
|
|
query.bindValue(":number", self.number - 1)
|
|
query.bindValue(":hid", history_id)
|
|
if not query.exec():
|
|
query_error(query)
|
|
|
|
query.prepare("SELECT * FROM history WHERE year = :year")
|
|
print(f"year = {self.year}")
|
|
query.bindValue(":year", self.year)
|
|
if not query.exec():
|
|
query_error(query)
|
|
|
|
if not query.next():
|
|
query.prepare(
|
|
"INSERT INTO history (year, edocket, number) "
|
|
"VALUES (:year, 0, 1)"
|
|
)
|
|
query.bindValue(":year", self.year)
|
|
if not query.exec():
|
|
query_error(query)
|
|
edocket = 0
|
|
self.number = 1
|
|
history_id = query.lastInsertId()
|
|
else:
|
|
history_id = query.value("history_id")
|
|
edocket = query.value("edocket")
|
|
self.number = query.value("number")
|
|
continue
|
|
|
|
self.number += 1
|
|
count += 1
|
|
if self.number > 1:
|
|
query.prepare(
|
|
"UPDATE history SET number= :number WHERE year = :year"
|
|
)
|
|
query.bindValue(":number", self.number)
|
|
query.bindValue(":year", self.year)
|
|
if not query.exec():
|
|
query_error(query)
|
|
db.close()
|
|
del db
|
|
QSqlDatabase.removeDatabase("load")
|
|
self.caseLoaded.emit(0)
|
|
return
|