Split database work to separte Thread

This commit is contained in:
Christopher T. Johnson
2025-02-08 16:42:05 -05:00
parent 58f7e1b59c
commit ffc840dc66
2 changed files with 260 additions and 171 deletions

View File

@@ -2,20 +2,40 @@
import datetime
import re
import sys
import dateparser
import requests
from typing import NoReturn
from PySide6.QtCore import QCoreApplication, QModelIndex, Signal, Qt, Slot
from PySide6.QtSql import QSqlDatabase, QSqlQuery, QSqlQueryModel
from PySide6.QtWidgets import QAbstractItemView, QApplication, QHeaderView, QMainWindow, QStyledItemDelegate, QTableWidgetItem
from bs4 import BeautifulSoup, Tag
from PySide6.QtCore import (
QCoreApplication,
QModelIndex,
QPersistentModelIndex,
QRect,
QSize,
Signal,
Slot,
)
from PySide6.QtGui import QPainter, QTextDocument
from PySide6.QtSql import (
QSqlDatabase,
QSqlQuery,
QSqlTableModel,
)
from PySide6.QtWidgets import (
QAbstractItemView,
QApplication,
QHeaderView,
QMainWindow,
QStyle,
QStyledItemDelegate,
QStyleOptionViewItem,
)
from docketModel import docketModel
from ui.MainWindow import Ui_MainWindow
from lib.utils import query_error
from ui.MainWindow import Ui_MainWindow
from workers import updateThread
translate = QCoreApplication.translate
class dateDelegate(QStyledItemDelegate):
def displayText(self, value, locale) -> str:
date = datetime.date.fromtimestamp(value)
@@ -183,168 +203,7 @@ def schema_update(db: QSqlDatabase) -> None:
db.commit()
return
def update_proceedings(case_id: int, bs: BeautifulSoup) -> None:
table = bs.find('table', id="proceedings")
assert isinstance(table, Tag)
trs = table.find_all('tr')
tr = trs.pop(0)
query = QSqlQuery()
while len(trs) > 0:
tr = trs.pop(0)
assert isinstance(tr, Tag)
td = tr.contents[0]
assert isinstance(td, Tag) and isinstance(td.string, str)
date = dateparser.parse(td.string)
td = tr.contents[1]
assert isinstance(td, Tag) and isinstance(td.string, str)
text = td.string.strip()
query.prepare("SELECT * FROM entries WHERE case_id = :cid AND date = :date AND text=:text")
query.bindValue(':cid', case_id)
query.bindValue(':text', text)
assert isinstance(date, datetime.date)
query.bindValue(':date', date.timestamp())
if not query.exec():
query_error(query)
if not query.next():
query.prepare("INSERT INTO entries (case_id, date, text) VALUES (:cid,:date,:text)")
query.bindValue(':cid', case_id)
query.bindValue(':date', date.timestamp())
query.bindValue(':text', text)
if not query.exec():
query_error(query)
entry_id = query.lastInsertId()
else:
entry_id = query.value(0)
tr = trs.pop(0)
assert isinstance(tr, Tag)
assert isinstance(tr.contents[1], Tag)
for a in tr.contents[1]:
assert isinstance(a, Tag)
url = a.attrs['href']
name = a.string
query.prepare("SELECT * FROM documents WHERE url=:url AND entry_id = :eid")
query.bindValue(':url', url)
query.bindValue(":eid", entry_id)
if not query.exec():
query_error(query)
if not query.next():
query.prepare("INSERT INTO documents (entry_id, name, url) "
"VALUES (:eid, :name, :url)")
query.bindValue(":eid", entry_id)
query.bindValue(":name", name)
query.bindValue(":url", url)
if not query.exec():
query_error(query)
return
def update_db(case_id) -> int:
r = requests.get('https://www.supremecourt.gov/docket/docketfiles/html/public/{}.html'.format(case_id))
if r.status_code != 200:
print(r.status_code)
exit(1)
bs = BeautifulSoup(r.text,'lxml')
#
# docket_id, previous_docket, petitioners, respondents, date
# all come from the docketinfo table
#
di = bs.find('table',id='docketinfo')
assert di is not None and isinstance(di, Tag)
#
# docket_id is first row, first column
docket_id = di.find('span')
assert docket_id is not None and isinstance(docket_id, Tag)
docket_id = docket_id.contents[0]
assert isinstance(docket_id, str)
docket_id = docket_id.strip()
docket_id = docket_id.replace('No. ','')
#
# Title is second row, first column
tr = di.contents[1]
assert isinstance(tr, Tag) and isinstance(tr.contents[0], Tag)
assert tr.contents[0].string == 'Title:'
td = tr.contents[1]
assert isinstance(td, Tag)
span = td.contents[0]
assert isinstance(span, Tag) and isinstance(span.contents[0], str)
petitioners = span.contents[0].strip()
#
# XXX - We need to deal with other titles. Change this to an RE
# UPDATED: we are just handling the two we know about.
#
petitioners = petitioners.replace(', Petitioners','')
petitioners = petitioners.replace(', Applicants','')
assert isinstance(span.contents[4], str)
respondent = span.contents[4].strip()
#
# Date on which the case was docketed
tr = di.contents[2]
assert isinstance(tr,Tag) and isinstance(tr.contents[1], Tag)
td = tr.contents[1]
assert isinstance(td, Tag) and td.string is not None
docket_date = td.string.strip()
date = dateparser.parse(docket_date)
#
# linked case is row 3, column 0
tr = di.contents[3]
assert isinstance(tr, Tag) and isinstance(tr.contents[0], Tag)
linked = tr.contents[0].string
#
# See if this case already exists.
#
query = QSqlQuery()
query.prepare("SELECT * FROM cases WHERE docket_id = :did")
query.bindValue(':did', docket_id)
if not query.exec():
query_error(query)
#
# if it does not exists, create it. This stops a recursion loop.
#
if not query.next():
query.prepare("INSERT INTO cases (docket_id, petitioners, respondents, date, linked) "
"VALUES (:did, :pet, :resp, :date, NULL)")
query.bindValue(':did', docket_id)
query.bindValue(':pet', petitioners)
query.bindValue(':resp', respondent)
assert isinstance(date, datetime.date)
query.bindValue(':date', date.timestamp())
if not query.exec():
query_error(query)
case_id = query.lastInsertId()
linked_id = None
else:
case_id = query.value(0)
linked_id = query.value('linked')
assert isinstance(case_id, int)
#
# If there is a linked case, we need to get the ID for that case.
if linked is not None:
linked = linked.replace('Linked with ','')
query.prepare("SELECT * FROM cases WHERE docket_id = :did")
query.bindValue(':did', linked)
if not query.exec():
query_error(query)
if not query.next():
new_id = update_db(linked)
else:
new_id = query.value(0)
if new_id != linked_id:
query.prepare("UPDATE cases SET linked=:lid WHERE case_id = :cid")
query.bindValue(':lid', new_id)
query.bindValue(':cid', case_id)
if not query.exec():
query_error(query)
#
# XXX - Process lower courts
#
update_proceedings(case_id, bs)
return(case_id)
def main() -> int:
app = QApplication(sys.argv)
db = QSqlDatabase.addDatabase("QSQLITE")
@@ -352,8 +211,6 @@ def main() -> int:
db.setDatabaseName("scotus.db")
db.open()
schema_update(db)
update_db('24-203')
update_db('23A1058')
window = MainWindow()
return app.exec()