General cleanup as we move to Mariadb
This commit is contained in:
		
							
								
								
									
										138
									
								
								workers.py
									
									
									
									
									
								
							
							
						
						
									
										138
									
								
								workers.py
									
									
									
									
									
								
							| @@ -1,9 +1,10 @@ | |||||||
| import datetime | import datetime | ||||||
|  | import re | ||||||
|  |  | ||||||
| import dateparser | import dateparser | ||||||
| import requests | import requests | ||||||
| from bs4 import BeautifulSoup, Tag | from bs4 import BeautifulSoup, Tag | ||||||
| from PySide6.QtCore import QThread | from PySide6.QtCore import QDateTime, QThread | ||||||
| from PySide6.QtSql import QSqlDatabase, QSqlQuery | from PySide6.QtSql import QSqlDatabase, QSqlQuery | ||||||
|  |  | ||||||
| from lib.utils import query_error | from lib.utils import query_error | ||||||
| @@ -20,10 +21,11 @@ def update_proceedings( | |||||||
|     while len(trs) > 0: |     while len(trs) > 0: | ||||||
|         tr = trs.pop(0) |         tr = trs.pop(0) | ||||||
|         assert isinstance(tr, Tag) |         assert isinstance(tr, Tag) | ||||||
|         print(tr.contents) |  | ||||||
|         td = tr.contents[0] |         td = tr.contents[0] | ||||||
|         assert isinstance(td, Tag) and isinstance(td.string, str) |         assert isinstance(td, Tag) and isinstance(td.string, str) | ||||||
|         date = dateparser.parse(td.string) |         tmp = dateparser.parse(td.string) | ||||||
|  |         assert isinstance(tmp, datetime.datetime) | ||||||
|  |         date = QDateTime.fromSecsSinceEpoch(int(tmp.timestamp())) | ||||||
|         td = tr.contents[1] |         td = tr.contents[1] | ||||||
|         # |         # | ||||||
|         # When a case is GVRed, the <td> will contain <i> which means that |         # When a case is GVRed, the <td> will contain <i> which means that | ||||||
| @@ -38,8 +40,7 @@ def update_proceedings( | |||||||
|         ) |         ) | ||||||
|         query.bindValue(":cid", case_id) |         query.bindValue(":cid", case_id) | ||||||
|         query.bindValue(":text", text) |         query.bindValue(":text", text) | ||||||
|         assert isinstance(date, datetime.date) |         query.bindValue(":date", date) | ||||||
|         query.bindValue(":date", date.timestamp()) |  | ||||||
|         if not query.exec(): |         if not query.exec(): | ||||||
|             query_error(query) |             query_error(query) | ||||||
|         if not query.next(): |         if not query.next(): | ||||||
| @@ -47,7 +48,7 @@ def update_proceedings( | |||||||
|                 "INSERT INTO entries (case_id, date, text) VALUES (:cid,:date,:text)" |                 "INSERT INTO entries (case_id, date, text) VALUES (:cid,:date,:text)" | ||||||
|             ) |             ) | ||||||
|             query.bindValue(":cid", case_id) |             query.bindValue(":cid", case_id) | ||||||
|             query.bindValue(":date", date.timestamp()) |             query.bindValue(":date", date) | ||||||
|             query.bindValue(":text", text) |             query.bindValue(":text", text) | ||||||
|             if not query.exec(): |             if not query.exec(): | ||||||
|                 query_error(query) |                 query_error(query) | ||||||
| @@ -80,12 +81,24 @@ def update_proceedings( | |||||||
|                     query_error(query) |                     query_error(query) | ||||||
|     assert isinstance(text, str) |     assert isinstance(text, str) | ||||||
|     print(f"text: {text.lower()}") |     print(f"text: {text.lower()}") | ||||||
|     result = not text.lower() in [ |     # | ||||||
|         "petition denied.", |     # If cert is denied, a petion for rehearing can be requested. | ||||||
|         "rehearing denied.", |     # The petitioner has 40 days to file for a rehearing. | ||||||
|         # "judgement issued.", |     # | ||||||
|     ] |     # Translation, if the last entry is "petition denied\..*$" and 40 days | ||||||
|     return result |     # have passed, the case is final | ||||||
|  |     active = True | ||||||
|  |     text = text.lower() | ||||||
|  |     if text.startswith("rehearing denied.") or text.startswith( | ||||||
|  |         "judgment issued." | ||||||
|  |     ): | ||||||
|  |         active = False | ||||||
|  |     if text.startswith("petition denied."): | ||||||
|  |         assert isinstance(date, QDateTime) | ||||||
|  |         delta = date.daysTo(QDateTime.currentDateTime()) | ||||||
|  |         if delta > 40: | ||||||
|  |             active = False | ||||||
|  |     return active | ||||||
|  |  | ||||||
|  |  | ||||||
| def update_db(case_id: str, db: QSqlDatabase) -> int: | def update_db(case_id: str, db: QSqlDatabase) -> int: | ||||||
| @@ -94,6 +107,12 @@ def update_db(case_id: str, db: QSqlDatabase) -> int: | |||||||
|     # |     # | ||||||
|     # We assume that case_id == docket_id at this point.  If it does not, |     # We assume that case_id == docket_id at this point.  If it does not, | ||||||
|     # then we will build out from the request we get |     # then we will build out from the request we get | ||||||
|  |     print(f"Updating {case_id}") | ||||||
|  |     matches = re.match(r"(\d\d)[-A](\d+)(.*)$", case_id) | ||||||
|  |     if matches is None: | ||||||
|  |         raise Exception(f"Not a match {case_id}") | ||||||
|  |     else: | ||||||
|  |         case_id = matches.group() | ||||||
|  |  | ||||||
|     query = QSqlQuery(db) |     query = QSqlQuery(db) | ||||||
|     query.prepare("SELECT * FROM cases WHERE docket_id = :did") |     query.prepare("SELECT * FROM cases WHERE docket_id = :did") | ||||||
| @@ -135,8 +154,11 @@ def update_db(case_id: str, db: QSqlDatabase) -> int: | |||||||
|     assert span is not None and isinstance(span, Tag) |     assert span is not None and isinstance(span, Tag) | ||||||
|     tmp = span.contents[0] |     tmp = span.contents[0] | ||||||
|     assert isinstance(tmp, str) |     assert isinstance(tmp, str) | ||||||
|     docket_id = tmp.strip() |     matches = re.match(r"(No.)?\s*(\d+[-A]\d+).*$", tmp) | ||||||
|     docket_id = docket_id.replace("No. ", "") |     assert matches is not None | ||||||
|  |     print(matches,matches.groups()) | ||||||
|  |     docket_id = matches.group(2) | ||||||
|  |     print(f"Found {docket_id}") | ||||||
|  |  | ||||||
|     # |     # | ||||||
|     # Title is second row, first column |     # Title is second row, first column | ||||||
| @@ -165,9 +187,14 @@ def update_db(case_id: str, db: QSqlDatabase) -> int: | |||||||
|     tr = di.contents[2] |     tr = di.contents[2] | ||||||
|     assert isinstance(tr, Tag) and isinstance(tr.contents[1], Tag) |     assert isinstance(tr, Tag) and isinstance(tr.contents[1], Tag) | ||||||
|     td = tr.contents[1] |     td = tr.contents[1] | ||||||
|     assert isinstance(td, Tag) and td.string is not None |     assert isinstance(td, Tag) | ||||||
|     docket_date = td.string.strip() |     if td.string is None: | ||||||
|     date = dateparser.parse(docket_date) |         tmp = datetime.datetime(year=1776, month=7, day=4) | ||||||
|  |     else: | ||||||
|  |         docket_date = td.string.strip() | ||||||
|  |         tmp = dateparser.parse(docket_date) | ||||||
|  |     assert isinstance(tmp, datetime.datetime) | ||||||
|  |     date = QDateTime.fromSecsSinceEpoch(int(tmp.timestamp())) | ||||||
|  |  | ||||||
|     # |     # | ||||||
|     # linked case is row 3, column 0 |     # linked case is row 3, column 0 | ||||||
| @@ -176,44 +203,55 @@ def update_db(case_id: str, db: QSqlDatabase) -> int: | |||||||
|     linked = tr.contents[0].string |     linked = tr.contents[0].string | ||||||
|  |  | ||||||
|     # |     # | ||||||
|     # if it does not exists, create it.  This stops a recursion loop. |     # if this case does not exists, create it.  This stops a recursion loop. | ||||||
|     # |     # | ||||||
|     if not exists: |     if not exists: | ||||||
|         query.prepare( |         query.prepare( | ||||||
|             "INSERT INTO cases (docket_id, petitioners, respondents, date, active, linked) " |             "INSERT INTO cases (docket_id, petitioners, respondents, date, active) " | ||||||
|             "VALUES (:did, :pet, :resp, :date, 1, NULL)" |             "VALUES (:did, :pet, :resp, :date, 1)" | ||||||
|         ) |         ) | ||||||
|         query.bindValue(":did", docket_id) |         query.bindValue(":did", docket_id) | ||||||
|         query.bindValue(":pet", petitioners) |         query.bindValue(":pet", petitioners) | ||||||
|         query.bindValue(":resp", respondent) |         query.bindValue(":resp", respondent) | ||||||
|         assert isinstance(date, datetime.date) |         query.bindValue(":date", date) | ||||||
|         query.bindValue(":date", date.timestamp()) |  | ||||||
|         if not query.exec(): |         if not query.exec(): | ||||||
|             query_error(query) |             query_error(query) | ||||||
|         case_id = query.lastInsertId() |         case_id = query.lastInsertId() | ||||||
|         linked_id = None |  | ||||||
|     else: |     else: | ||||||
|         case_id = query.value(0) |         case_id = query.value(0) | ||||||
|         linked_id = query.value("linked") |  | ||||||
|     assert isinstance(case_id, int) |     assert isinstance(case_id, int) | ||||||
|     # |     # | ||||||
|     # If there is a linked case, we need to get the ID for that case. |     # If there is a linked case, we need to get the ID for that case. | ||||||
|     if linked is not None: |     if linked is not None: | ||||||
|         linked = linked.replace("Linked with ", "") |         linked = linked.replace("Linked with ", "") | ||||||
|         query.prepare("SELECT * FROM cases WHERE docket_id = :did") |         for did in linked.split(','): | ||||||
|         query.bindValue(":did", linked) |             did = did.strip() | ||||||
|         if not query.exec(): |             query.prepare("SELECT * FROM cases WHERE docket_id = :did") | ||||||
|             query_error(query) |             query.bindValue(":did", linked) | ||||||
|         if not query.next(): |  | ||||||
|             new_id = update_db(linked, db) |  | ||||||
|         else: |  | ||||||
|             new_id = query.value(0) |  | ||||||
|         if new_id != linked_id: |  | ||||||
|             query.prepare("UPDATE cases SET linked=:lid WHERE case_id = :cid") |  | ||||||
|             query.bindValue(":lid", new_id) |  | ||||||
|             query.bindValue(":cid", case_id) |  | ||||||
|             if not query.exec(): |             if not query.exec(): | ||||||
|                 query_error(query) |                 query_error(query) | ||||||
|  |             if not query.next(): | ||||||
|  |                 linked_id = update_db(did, db) | ||||||
|  |                 if linked_id <= 0: | ||||||
|  |                     raise Exception(f"Unable to create linked case: {did}") | ||||||
|  |             else: | ||||||
|  |                 linked_id = query.value('case_id') | ||||||
|  |             # | ||||||
|  |             #  | ||||||
|  |  | ||||||
|  |             query.prepare("SELECT * FROM cases_cases WHERE lhs = :lhs " | ||||||
|  |                           "AND rhs = :rhs") | ||||||
|  |             query.bindValue(':lhs', case_id) | ||||||
|  |             query.bindValue(':rhs', linked_id) | ||||||
|  |             if not query.exec(): | ||||||
|  |                 query_error(query) | ||||||
|  |             if not query.next(): | ||||||
|  |                 query.prepare("INSERT INTO  cases_cases (lhs, rhs) " | ||||||
|  |                               "VALUES ( :lhs, :rhs)") | ||||||
|  |                 query.bindValue(":lhs", case_id) | ||||||
|  |                 query.bindValue(":rhs", linked_id) | ||||||
|  |                 if not query.exec(): | ||||||
|  |                     query_error(query) | ||||||
|     # |     # | ||||||
|     # XXX - Process lower courts |     # XXX - Process lower courts | ||||||
|     # |     # | ||||||
| @@ -258,8 +296,7 @@ class loadCases(QThread): | |||||||
|         db = QSqlDatabase.cloneDatabase("qt_sql_default_connection", "load") |         db = QSqlDatabase.cloneDatabase("qt_sql_default_connection", "load") | ||||||
|         if not db.open(): |         if not db.open(): | ||||||
|             raise Exception("db.open()") |             raise Exception("db.open()") | ||||||
|         dt = datetime.datetime.now() |         year = QDateTime.currentDateTime().toString("yy") | ||||||
|         year = dt.strftime("%y") |  | ||||||
|         query = QSqlQuery(db) |         query = QSqlQuery(db) | ||||||
|         query.prepare("SELECT * FROM history WHERE year = :year") |         query.prepare("SELECT * FROM history WHERE year = :year") | ||||||
|         print(f"year = {year}") |         print(f"year = {year}") | ||||||
| @@ -291,7 +328,6 @@ class loadCases(QThread): | |||||||
|             else: |             else: | ||||||
|                 docket_id = f"{year}-{number}" |                 docket_id = f"{year}-{number}" | ||||||
|             query.bindValue(":did", docket_id) |             query.bindValue(":did", docket_id) | ||||||
|             print(f"Loading {docket_id}") |  | ||||||
|             if not query.exec(): |             if not query.exec(): | ||||||
|                 query_error(query) |                 query_error(query) | ||||||
|             if query.next(): |             if query.next(): | ||||||
| @@ -303,13 +339,14 @@ class loadCases(QThread): | |||||||
|             print(f"result: {result}") |             print(f"result: {result}") | ||||||
|             if result < 0: |             if result < 0: | ||||||
|                 year = f"{int(year) - 1:02d}" |                 year = f"{int(year) - 1:02d}" | ||||||
|                 query.prepare( |                 if number > 1: | ||||||
|                     "UPDATE history set number = :number WHERE history_id=:hid" |                     query.prepare( | ||||||
|                 ) |                         "UPDATE history set number = :number WHERE history_id=:hid" | ||||||
|                 query.bindValue(":number", number - 1) |                     ) | ||||||
|                 query.bindValue(":hid", history_id) |                     query.bindValue(":number", number - 1) | ||||||
|                 if not query.exec(): |                     query.bindValue(":hid", history_id) | ||||||
|                     query_error(query) |                     if not query.exec(): | ||||||
|  |                         query_error(query) | ||||||
|  |  | ||||||
|                 query.prepare("SELECT * FROM history WHERE year = :year") |                 query.prepare("SELECT * FROM history WHERE year = :year") | ||||||
|                 print(f"year = {year}") |                 print(f"year = {year}") | ||||||
| @@ -336,7 +373,14 @@ class loadCases(QThread): | |||||||
|  |  | ||||||
|             number += 1 |             number += 1 | ||||||
|             count += 1 |             count += 1 | ||||||
|  |         if number > 1: | ||||||
|  |             query.prepare( | ||||||
|  |                 "UPDATE history SET number= :number WHERE year = :year" | ||||||
|  |             ) | ||||||
|  |             query.bindValue(":number", number) | ||||||
|  |             query.bindValue(":year", year) | ||||||
|  |             if not query.exec(): | ||||||
|  |                 query_error(query) | ||||||
|         db.close() |         db.close() | ||||||
|         del db |         del db | ||||||
|         QSqlDatabase.removeDatabase("load") |         QSqlDatabase.removeDatabase("load") | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user