388 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			388 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| import datetime
 | |
| import re
 | |
| 
 | |
| import dateparser
 | |
| import requests
 | |
| from bs4 import BeautifulSoup, Tag
 | |
| from PySide6.QtCore import QDateTime, QThread
 | |
| from PySide6.QtSql import QSqlDatabase, QSqlQuery
 | |
| 
 | |
| from lib.utils import query_error
 | |
| 
 | |
| 
 | |
| def update_proceedings(
 | |
|     case_id: int, bs: BeautifulSoup, db: QSqlDatabase
 | |
| ) -> bool:
 | |
|     table = bs.find("table", id="proceedings")
 | |
|     assert isinstance(table, Tag)
 | |
|     trs = table.find_all("tr")
 | |
|     tr = trs.pop(0)
 | |
|     query = QSqlQuery(db)
 | |
|     while len(trs) > 0:
 | |
|         tr = trs.pop(0)
 | |
|         assert isinstance(tr, Tag)
 | |
|         td = tr.contents[0]
 | |
|         assert isinstance(td, Tag) and isinstance(td.string, str)
 | |
|         tmp = dateparser.parse(td.string)
 | |
|         assert isinstance(tmp, datetime.datetime)
 | |
|         date = QDateTime.fromSecsSinceEpoch(int(tmp.timestamp()))
 | |
|         td = tr.contents[1]
 | |
|         #
 | |
|         # When a case is GVRed, the <td> will contain <i> which means that
 | |
|         # td is a Tag but not a simple string.
 | |
|         # We need to convert to HTML and store the HTML in the entry, not just plaintext.
 | |
|         assert isinstance(td, Tag)
 | |
|         text = td.string
 | |
|         if not text:
 | |
|             text = "".join([str(x) for x in td.contents])
 | |
|         query.prepare(
 | |
|             "SELECT * FROM entries WHERE case_id = :cid AND date = :date AND text=:text"
 | |
|         )
 | |
|         query.bindValue(":cid", case_id)
 | |
|         query.bindValue(":text", text)
 | |
|         query.bindValue(":date", date)
 | |
|         if not query.exec():
 | |
|             query_error(query)
 | |
|         if not query.next():
 | |
|             query.prepare(
 | |
|                 "INSERT INTO entries (case_id, date, text) VALUES (:cid,:date,:text)"
 | |
|             )
 | |
|             query.bindValue(":cid", case_id)
 | |
|             query.bindValue(":date", date)
 | |
|             query.bindValue(":text", text)
 | |
|             if not query.exec():
 | |
|                 query_error(query)
 | |
|             entry_id = query.lastInsertId()
 | |
|         else:
 | |
|             entry_id = query.value(0)
 | |
|         tr = trs.pop(0)
 | |
|         assert isinstance(tr, Tag)
 | |
|         assert isinstance(tr.contents[1], Tag)
 | |
|         for a in tr.contents[1]:
 | |
|             assert isinstance(a, Tag)
 | |
|             url = a.attrs["href"]
 | |
|             name = a.string
 | |
|             query.prepare(
 | |
|                 "SELECT * FROM documents WHERE url=:url AND entry_id = :eid"
 | |
|             )
 | |
|             query.bindValue(":url", url)
 | |
|             query.bindValue(":eid", entry_id)
 | |
|             if not query.exec():
 | |
|                 query_error(query)
 | |
|             if not query.next():
 | |
|                 query.prepare(
 | |
|                     "INSERT INTO documents (entry_id, name, url) "
 | |
|                     "VALUES (:eid, :name, :url)"
 | |
|                 )
 | |
|                 query.bindValue(":eid", entry_id)
 | |
|                 query.bindValue(":name", name)
 | |
|                 query.bindValue(":url", url)
 | |
|                 if not query.exec():
 | |
|                     query_error(query)
 | |
|     assert isinstance(text, str)
 | |
|     print(f"text: {text.lower()}")
 | |
|     #
 | |
|     # If cert is denied, a petion for rehearing can be requested.
 | |
|     # The petitioner has 40 days to file for a rehearing.
 | |
|     #
 | |
|     # Translation, if the last entry is "petition denied\..*$" and 40 days
 | |
|     # have passed, the case is final
 | |
|     active = True
 | |
|     text = text.lower()
 | |
|     if text.startswith("rehearing denied.") or text.startswith(
 | |
|         "judgment issued."
 | |
|     ):
 | |
|         active = False
 | |
|     if text.startswith("petition denied."):
 | |
|         assert isinstance(date, QDateTime)
 | |
|         delta = date.daysTo(QDateTime.currentDateTime())
 | |
|         if delta > 40:
 | |
|             active = False
 | |
|     return active
 | |
| 
 | |
| 
 | |
| def update_db(case_id: str, db: QSqlDatabase) -> int:
 | |
|     #
 | |
|     # See if this case already exists.
 | |
|     #
 | |
|     # We assume that case_id == docket_id at this point.  If it does not,
 | |
|     # then we will build out from the request we get
 | |
|     print(f"Updating {case_id}")
 | |
|     matches = re.match(r"(\d\d)[-A](\d+)(.*)$", case_id)
 | |
|     if matches is None:
 | |
|         raise Exception(f"Not a match {case_id}")
 | |
|     else:
 | |
|         case_id = matches.group()
 | |
| 
 | |
|     query = QSqlQuery(db)
 | |
|     query.prepare("SELECT * FROM cases WHERE docket_id = :did")
 | |
|     query.bindValue(":did", case_id)
 | |
|     if not query.exec():
 | |
|         query_error(query)
 | |
|     exists = query.next()
 | |
|     if exists:
 | |
|         active = query.value("active") == 1
 | |
|     else:
 | |
|         active = True
 | |
|     if not active:
 | |
|         return int(query.value("case_id"))
 | |
| 
 | |
|     r = requests.get(
 | |
|         f"https://www.supremecourt.gov/docket/docketfiles/html/public/{case_id}.html"
 | |
|     )
 | |
|     if r.status_code != 200:
 | |
|         print(r.status_code)
 | |
|         exit(1)
 | |
|     bs = BeautifulSoup(r.text, "lxml")
 | |
|     #
 | |
|     # SCOTUS does not return 404 for page not found.
 | |
|     #
 | |
|     title = bs.find("title")
 | |
|     assert isinstance(title, Tag) and isinstance(title.string, str)
 | |
|     if title.string.strip() == "":
 | |
|         return -1
 | |
|     #
 | |
|     # docket_id, previous_docket, petitioners, respondents, date
 | |
|     # all come from the docketinfo table
 | |
|     #
 | |
|     di = bs.find("table", id="docketinfo")
 | |
|     assert di is not None and isinstance(di, Tag)
 | |
| 
 | |
|     #
 | |
|     # docket_id is first row, first column
 | |
|     span = di.find("span")
 | |
|     assert span is not None and isinstance(span, Tag)
 | |
|     tmp = span.contents[0]
 | |
|     assert isinstance(tmp, str)
 | |
|     matches = re.match(r"(No.)?\s*(\d+[-A]\d+).*$", tmp)
 | |
|     assert matches is not None
 | |
|     print(matches,matches.groups())
 | |
|     docket_id = matches.group(2)
 | |
|     print(f"Found {docket_id}")
 | |
| 
 | |
|     #
 | |
|     # Title is second row, first column
 | |
|     tr = di.contents[1]
 | |
|     assert isinstance(tr, Tag) and isinstance(tr.contents[0], Tag)
 | |
|     assert tr.contents[0].string == "Title:"
 | |
|     td = tr.contents[1]
 | |
|     assert isinstance(td, Tag)
 | |
|     span = td.contents[0]
 | |
|     assert isinstance(span, Tag) and isinstance(span.contents[0], str)
 | |
|     petitioners = span.contents[0].strip()
 | |
|     #
 | |
|     # XXX - We need to deal with other titles.  Change this to an RE
 | |
|     # UPDATED: we are just handling the two we know about.
 | |
|     #
 | |
|     petitioners = petitioners.replace(", Petitioners", "")
 | |
|     petitioners = petitioners.replace(", Applicants", "")
 | |
|     if len(span.contents) >= 4:
 | |
|         assert isinstance(span.contents[4], str)
 | |
|         respondent = span.contents[4].strip()
 | |
|     else:
 | |
|         respondent = ""
 | |
| 
 | |
|     #
 | |
|     # Date on which the case was docketed
 | |
|     tr = di.contents[2]
 | |
|     assert isinstance(tr, Tag) and isinstance(tr.contents[1], Tag)
 | |
|     td = tr.contents[1]
 | |
|     assert isinstance(td, Tag)
 | |
|     if td.string is None:
 | |
|         tmp = datetime.datetime(year=1776, month=7, day=4)
 | |
|     else:
 | |
|         docket_date = td.string.strip()
 | |
|         tmp = dateparser.parse(docket_date)
 | |
|     assert isinstance(tmp, datetime.datetime)
 | |
|     date = QDateTime.fromSecsSinceEpoch(int(tmp.timestamp()))
 | |
| 
 | |
|     #
 | |
|     # linked case is row 3, column 0
 | |
|     tr = di.contents[3]
 | |
|     assert isinstance(tr, Tag) and isinstance(tr.contents[0], Tag)
 | |
|     linked = tr.contents[0].string
 | |
| 
 | |
|     #
 | |
|     # if this case does not exists, create it.  This stops a recursion loop.
 | |
|     #
 | |
|     if not exists:
 | |
|         query.prepare(
 | |
|             "INSERT INTO cases (docket_id, petitioners, respondents, date, active) "
 | |
|             "VALUES (:did, :pet, :resp, :date, 1)"
 | |
|         )
 | |
|         query.bindValue(":did", docket_id)
 | |
|         query.bindValue(":pet", petitioners)
 | |
|         query.bindValue(":resp", respondent)
 | |
|         query.bindValue(":date", date)
 | |
|         if not query.exec():
 | |
|             query_error(query)
 | |
|         case_id = query.lastInsertId()
 | |
|     else:
 | |
|         case_id = query.value(0)
 | |
|     assert isinstance(case_id, int)
 | |
|     #
 | |
|     # If there is a linked case, we need to get the ID for that case.
 | |
|     if linked is not None:
 | |
|         linked = linked.replace("Linked with ", "")
 | |
|         for did in linked.split(','):
 | |
|             did = did.strip()
 | |
|             query.prepare("SELECT * FROM cases WHERE docket_id = :did")
 | |
|             query.bindValue(":did", linked)
 | |
|             if not query.exec():
 | |
|                 query_error(query)
 | |
|             if not query.next():
 | |
|                 linked_id = update_db(did, db)
 | |
|                 if linked_id <= 0:
 | |
|                     raise Exception(f"Unable to create linked case: {did}")
 | |
|             else:
 | |
|                 linked_id = query.value('case_id')
 | |
|             #
 | |
|             # 
 | |
| 
 | |
|             query.prepare("SELECT * FROM cases_cases WHERE lhs = :lhs "
 | |
|                           "AND rhs = :rhs")
 | |
|             query.bindValue(':lhs', case_id)
 | |
|             query.bindValue(':rhs', linked_id)
 | |
|             if not query.exec():
 | |
|                 query_error(query)
 | |
|             if not query.next():
 | |
|                 query.prepare("INSERT INTO  cases_cases (lhs, rhs) "
 | |
|                               "VALUES ( :lhs, :rhs)")
 | |
|                 query.bindValue(":lhs", case_id)
 | |
|                 query.bindValue(":rhs", linked_id)
 | |
|                 if not query.exec():
 | |
|                     query_error(query)
 | |
|     #
 | |
|     # XXX - Process lower courts
 | |
|     #
 | |
|     active = update_proceedings(case_id, bs, db)
 | |
|     if not active:
 | |
|         query.prepare("UPDATE cases SET active=0 WHERE case_id = :cid")
 | |
|         query.bindValue(":cid", case_id)
 | |
|         if not query.exec():
 | |
|             query_error(query)
 | |
|     return case_id
 | |
| 
 | |
| 
 | |
| class updateThread(QThread):
 | |
|     docket_id = None
 | |
| 
 | |
|     def __init__(self) -> None:
 | |
|         super(updateThread, self).__init__()
 | |
|         print("updateThread: __init__(docket_id)")
 | |
|         return
 | |
| 
 | |
|     def setDocketId(self, docket_id: str) -> None:
 | |
|         self.docket_id = docket_id
 | |
|         return
 | |
| 
 | |
|     def run(self) -> None:
 | |
|         print(f"updateThread: running on {self.currentThread()}")
 | |
|         db = QSqlDatabase.cloneDatabase("qt_sql_default_connection", "update")
 | |
|         if not db.open():
 | |
|             print(db.lastError())
 | |
|             raise Exception("db.open()")
 | |
| 
 | |
|         case_id = update_db(str(self.docket_id), db)
 | |
|         db.close()
 | |
|         del db
 | |
|         QSqlDatabase.removeDatabase("update")
 | |
|         print(f"updateThread: run() returns {case_id}")
 | |
|         return
 | |
| 
 | |
| 
 | |
| class loadCases(QThread):
 | |
|     def run(self) -> None:
 | |
|         db = QSqlDatabase.cloneDatabase("qt_sql_default_connection", "load")
 | |
|         if not db.open():
 | |
|             raise Exception("db.open()")
 | |
|         year = QDateTime.currentDateTime().toString("yy")
 | |
|         query = QSqlQuery(db)
 | |
|         query.prepare("SELECT * FROM history WHERE year = :year")
 | |
|         print(f"year = {year}")
 | |
|         query.bindValue(":year", year)
 | |
|         if not query.exec():
 | |
|             query_error(query)
 | |
| 
 | |
|         if not query.next():
 | |
|             query.prepare(
 | |
|                 "INSERT INTO history (year, edocket, number) "
 | |
|                 "VALUES (:year, 0, 1)"
 | |
|             )
 | |
|             query.bindValue(":year", year)
 | |
|             if not query.exec():
 | |
|                 query_error(query)
 | |
|             edocket = 0
 | |
|             number = 1
 | |
|             history_id = query.lastInsertId()
 | |
|         else:
 | |
|             history_id = query.value("history_id")
 | |
|             edocket = query.value("edocket")
 | |
|             number = query.value("number")
 | |
|         count = 0
 | |
| 
 | |
|         while year > "00" and count < 100:
 | |
|             query.prepare("SELECT * FROM cases WHERE docket_id = :did")
 | |
|             if edocket == 1:
 | |
|                 docket_id = f"{year}A{number}"
 | |
|             else:
 | |
|                 docket_id = f"{year}-{number}"
 | |
|             query.bindValue(":did", docket_id)
 | |
|             if not query.exec():
 | |
|                 query_error(query)
 | |
|             if query.next():
 | |
|                 if query.value("active") == 0:
 | |
|                     print("Already exists and is inactive")
 | |
|                     number += 1
 | |
|                     continue
 | |
|             result = update_db(docket_id, db)
 | |
|             print(f"result: {result}")
 | |
|             if result < 0:
 | |
|                 year = f"{int(year) - 1:02d}"
 | |
|                 if number > 1:
 | |
|                     query.prepare(
 | |
|                         "UPDATE history set number = :number WHERE history_id=:hid"
 | |
|                     )
 | |
|                     query.bindValue(":number", number - 1)
 | |
|                     query.bindValue(":hid", history_id)
 | |
|                     if not query.exec():
 | |
|                         query_error(query)
 | |
| 
 | |
|                 query.prepare("SELECT * FROM history WHERE year = :year")
 | |
|                 print(f"year = {year}")
 | |
|                 query.bindValue(":year", year)
 | |
|                 if not query.exec():
 | |
|                     query_error(query)
 | |
| 
 | |
|                 if not query.next():
 | |
|                     query.prepare(
 | |
|                         "INSERT INTO history (year, edocket, number) "
 | |
|                         "VALUES (:year, 0, 1)"
 | |
|                     )
 | |
|                     query.bindValue(":year", year)
 | |
|                     if not query.exec():
 | |
|                         query_error(query)
 | |
|                     edocket = 0
 | |
|                     number = 1
 | |
|                     history_id = query.lastInsertId()
 | |
|                 else:
 | |
|                     history_id = query.value("history_id")
 | |
|                     edocket = query.value("edocket")
 | |
|                     number = query.value("number")
 | |
|                 continue
 | |
| 
 | |
|             number += 1
 | |
|             count += 1
 | |
|         if number > 1:
 | |
|             query.prepare(
 | |
|                 "UPDATE history SET number= :number WHERE year = :year"
 | |
|             )
 | |
|             query.bindValue(":number", number)
 | |
|             query.bindValue(":year", year)
 | |
|             if not query.exec():
 | |
|                 query_error(query)
 | |
|         db.close()
 | |
|         del db
 | |
|         QSqlDatabase.removeDatabase("load")
 | |
|         return
 |