Process PDR G Report

Code
from bs4 import BeautifulSoup
import re
Code
with open('../data/source/html/USDA-APHIS.html', 'r', encoding='utf-8') as file:
    html_content = file.read()

soup = BeautifulSoup(html_content, 'html')

Use webscraping to get the url codes for each state so we can use them to build all the urls

Code
options = soup.find_all("option")



for option in options:
    input_string = option["value"]
    if "STATE" in input_string :
        # State
        # ==================
        # Regex to find the state abbreviation following 'STATE:'
        match = re.search(r"STATE:(\w\w):", input_string)
        # Extract the state abbreviation if the pattern is found
        state_abbr = match.group(1) if match else None

        # State Code
        # ===========
        state_code = input_string.split("k6Slc6nBda61qZ")[1]
        print(f"'{state_abbr}' = '{state_code}',")
'AK' = '-1b2o=',
'AL' = '-1cGo=',
'AR' = '-1dmo=',
'AZ' = '-1fmo=',
'CA' = '-3ZWo=',
'CO' = '-3c2o=',
'CT' = '-3eGo=',
'DC' = '-4Z2o=',
'DD' = '-4aGo=',
'DE' = '-4aWo=',
'FL' = '-6cGo=',
'GA' = '-7ZWo=',
'GU' = '-7eWo=',
'HI' = '-8bWo=',
'IA' = '-9ZWo=',
'ID' = '-9aGo=',
'IL' = '-9cGo=',
'IN' = '-9cmo=',
'KS' = '-_d2o=',
'KY' = '-_fWo=',
'LA' = '_AZWo=',
'MA' = '_BZWo=',
'MD' = '_BaGo=',
'ME' = '_BaWo=',
'MI' = '_BbWo=',
'MN' = '_Bcmo=',
'MO' = '_Bc2o=',
'MS' = '_Bd2o=',
'MT' = '_BeGo=',
'NC' = '_CZ2o=',
'ND' = '_CaGo=',
'NE' = '_CaWo=',
'NH' = '_CbGo=',
'NJ' = '_Cbmo=',
'NM' = '_CcWo=',
'NV' = '_Cemo=',
'NY' = '_CfWo=',
'OH' = '_DbGo=',
'OK' = '_Db2o=',
'OR' = '_Ddmo=',
'PA' = '_EZWo=',
'PR' = '_Edmo=',
'RI' = '_GbWo=',
'SC' = '_HZ2o=',
'SD' = '_HaGo=',
'TN' = '_Icmo=',
'TX' = '_IfGo=',
'UT' = '_JeGo=',
'VA' = '_KZWo=',
'None' = '_Jd4a-nA==',
'VT' = '_KeGo=',
'WA' = '_LZWo=',
'WI' = '_LbWo=',
'WV' = '_Lemo=',
'WY' = '_LfWo=',