This commit is contained in:
Christopher T. Johnson
2025-02-12 10:21:32 -05:00
parent bc4f778aba
commit a1eb44f6cb

View File

@@ -1,5 +1,4 @@
import datetime import datetime
from re import template
import dateparser import dateparser
import requests import requests
@@ -10,21 +9,30 @@ from PySide6.QtSql import QSqlDatabase, QSqlQuery
from lib.utils import query_error from lib.utils import query_error
def update_proceedings(case_id: int, bs: BeautifulSoup) -> bool: def update_proceedings(
case_id: int, bs: BeautifulSoup, db: QSqlDatabase
) -> bool:
table = bs.find("table", id="proceedings") table = bs.find("table", id="proceedings")
assert isinstance(table, Tag) assert isinstance(table, Tag)
trs = table.find_all("tr") trs = table.find_all("tr")
tr = trs.pop(0) tr = trs.pop(0)
query = QSqlQuery(QSqlDatabase.database("update")) query = QSqlQuery(db)
while len(trs) > 0: while len(trs) > 0:
tr = trs.pop(0) tr = trs.pop(0)
assert isinstance(tr, Tag) assert isinstance(tr, Tag)
print(tr.contents)
td = tr.contents[0] td = tr.contents[0]
assert isinstance(td, Tag) and isinstance(td.string, str) assert isinstance(td, Tag) and isinstance(td.string, str)
date = dateparser.parse(td.string) date = dateparser.parse(td.string)
td = tr.contents[1] td = tr.contents[1]
assert isinstance(td, Tag) and isinstance(td.string, str) #
text = td.string.strip() # When a case is GVRed, the <td> will contain <i> which means that
# td is a Tag but not a simple string.
# We need to convert to HTML and store the HTML in the entry, not just plaintext.
assert isinstance(td, Tag)
text = td.string
if not text:
text = "".join([str(x) for x in td.contents])
query.prepare( query.prepare(
"SELECT * FROM entries WHERE case_id = :cid AND date = :date AND text=:text" "SELECT * FROM entries WHERE case_id = :cid AND date = :date AND text=:text"
) )
@@ -70,21 +78,24 @@ def update_proceedings(case_id: int, bs: BeautifulSoup) -> bool:
query.bindValue(":url", url) query.bindValue(":url", url)
if not query.exec(): if not query.exec():
query_error(query) query_error(query)
assert isinstance(text, str)
print(f"text: {text.lower()}") print(f"text: {text.lower()}")
result = not text.lower() in [ result = not text.lower() in [
"petition denied.", "petition denied.",
"rehearing denied.",
# "judgement issued.",
] ]
return result return result
def update_db(case_id) -> int: def update_db(case_id: str, db: QSqlDatabase) -> int:
# #
# See if this case already exists. # See if this case already exists.
# #
# We assume that case_id == docket_id at this point. If it does not, # We assume that case_id == docket_id at this point. If it does not,
# then we will build out from the request we get # then we will build out from the request we get
query = QSqlQuery(QSqlDatabase.database("update")) query = QSqlQuery(db)
query.prepare("SELECT * FROM cases WHERE docket_id = :did") query.prepare("SELECT * FROM cases WHERE docket_id = :did")
query.bindValue(":did", case_id) query.bindValue(":did", case_id)
if not query.exec(): if not query.exec():
@@ -105,6 +116,13 @@ def update_db(case_id) -> int:
exit(1) exit(1)
bs = BeautifulSoup(r.text, "lxml") bs = BeautifulSoup(r.text, "lxml")
# #
# SCOTUS does not return 404 for page not found.
#
title = bs.find("title")
assert isinstance(title, Tag) and isinstance(title.string, str)
if title.string.strip() == "":
return -1
#
# docket_id, previous_docket, petitioners, respondents, date # docket_id, previous_docket, petitioners, respondents, date
# all come from the docketinfo table # all come from the docketinfo table
# #
@@ -136,8 +154,11 @@ def update_db(case_id) -> int:
# #
petitioners = petitioners.replace(", Petitioners", "") petitioners = petitioners.replace(", Petitioners", "")
petitioners = petitioners.replace(", Applicants", "") petitioners = petitioners.replace(", Applicants", "")
assert isinstance(span.contents[4], str) if len(span.contents) >= 4:
respondent = span.contents[4].strip() assert isinstance(span.contents[4], str)
respondent = span.contents[4].strip()
else:
respondent = ""
# #
# Date on which the case was docketed # Date on which the case was docketed
@@ -184,7 +205,7 @@ def update_db(case_id) -> int:
if not query.exec(): if not query.exec():
query_error(query) query_error(query)
if not query.next(): if not query.next():
new_id = update_db(linked) new_id = update_db(linked, db)
else: else:
new_id = query.value(0) new_id = query.value(0)
if new_id != linked_id: if new_id != linked_id:
@@ -196,7 +217,7 @@ def update_db(case_id) -> int:
# #
# XXX - Process lower courts # XXX - Process lower courts
# #
active = update_proceedings(case_id, bs) active = update_proceedings(case_id, bs, db)
if not active: if not active:
query.prepare("UPDATE cases SET active=0 WHERE case_id = :cid") query.prepare("UPDATE cases SET active=0 WHERE case_id = :cid")
query.bindValue(":cid", case_id) query.bindValue(":cid", case_id)
@@ -208,25 +229,115 @@ def update_db(case_id) -> int:
class updateThread(QThread): class updateThread(QThread):
docket_id = None docket_id = None
def __init__(self): def __init__(self) -> None:
super(updateThread, self).__init__() super(updateThread, self).__init__()
print("updateThread: __init__(docket_id)") print("updateThread: __init__(docket_id)")
return return
def setDocketId(self, docket_id): def setDocketId(self, docket_id: str) -> None:
self.docket_id = docket_id self.docket_id = docket_id
return return
def run(self): def run(self) -> None:
print("updateThread: run()") print(f"updateThread: running on {self.currentThread()}")
if not "update" in QSqlDatabase.connectionNames(): db = QSqlDatabase.cloneDatabase("qt_sql_default_connection", "update")
db = QSqlDatabase.cloneDatabase( if not db.open():
"qt_sql_default_connection", "update" print(db.lastError())
) raise Exception("db.open()")
if not db.open():
print(db.lastError())
raise Exception("db.open()")
case_id = update_db(self.docket_id) case_id = update_db(str(self.docket_id), db)
db.close()
del db
QSqlDatabase.removeDatabase("update")
print(f"updateThread: run() returns {case_id}") print(f"updateThread: run() returns {case_id}")
self.exit(1) return
class loadCases(QThread):
def run(self) -> None:
db = QSqlDatabase.cloneDatabase("qt_sql_default_connection", "load")
if not db.open():
raise Exception("db.open()")
dt = datetime.datetime.now()
year = dt.strftime("%y")
query = QSqlQuery(db)
query.prepare("SELECT * FROM history WHERE year = :year")
print(f"year = {year}")
query.bindValue(":year", year)
if not query.exec():
query_error(query)
if not query.next():
query.prepare(
"INSERT INTO history (year, edocket, number) "
"VALUES (:year, 0, 1)"
)
query.bindValue(":year", year)
if not query.exec():
query_error(query)
edocket = 0
number = 1
history_id = query.lastInsertId()
else:
history_id = query.value("history_id")
edocket = query.value("edocket")
number = query.value("number")
count = 0
while year > "00" and count < 100:
query.prepare("SELECT * FROM cases WHERE docket_id = :did")
if edocket == 1:
docket_id = f"{year}A{number}"
else:
docket_id = f"{year}-{number}"
query.bindValue(":did", docket_id)
print(f"Loading {docket_id}")
if not query.exec():
query_error(query)
if query.next():
if query.value("active") == 0:
print("Already exists and is inactive")
number += 1
continue
result = update_db(docket_id, db)
print(f"result: {result}")
if result < 0:
year = f"{int(year) - 1:02d}"
query.prepare(
"UPDATE history set number = :number WHERE history_id=:hid"
)
query.bindValue(":number", number - 1)
query.bindValue(":hid", history_id)
if not query.exec():
query_error(query)
query.prepare("SELECT * FROM history WHERE year = :year")
print(f"year = {year}")
query.bindValue(":year", year)
if not query.exec():
query_error(query)
if not query.next():
query.prepare(
"INSERT INTO history (year, edocket, number) "
"VALUES (:year, 0, 1)"
)
query.bindValue(":year", year)
if not query.exec():
query_error(query)
edocket = 0
number = 1
history_id = query.lastInsertId()
else:
history_id = query.value("history_id")
edocket = query.value("edocket")
number = query.value("number")
continue
number += 1
count += 1
db.close()
del db
QSqlDatabase.removeDatabase("load")
return