Code
from bs4 import BeautifulSoup
import re
from bs4 import BeautifulSoup
import re
with open('../data/source/html/USDA-APHIS.html', 'r', encoding='utf-8') as file:
= file.read()
html_content
= BeautifulSoup(html_content, 'html') soup
Use webscraping to get the url codes for each state so we can use them to build all the urls
= soup.find_all("option")
options
for option in options:
= option["value"]
input_string if "STATE" in input_string :
# State
# ==================
# Regex to find the state abbreviation following 'STATE:'
= re.search(r"STATE:(\w\w):", input_string)
match # Extract the state abbreviation if the pattern is found
= match.group(1) if match else None
state_abbr
# State Code
# ===========
= input_string.split("k6Slc6nBda61qZ")[1]
state_code print(f"'{state_abbr}' = '{state_code}',")
'AK' = '-1b2o=',
'AL' = '-1cGo=',
'AR' = '-1dmo=',
'AZ' = '-1fmo=',
'CA' = '-3ZWo=',
'CO' = '-3c2o=',
'CT' = '-3eGo=',
'DC' = '-4Z2o=',
'DD' = '-4aGo=',
'DE' = '-4aWo=',
'FL' = '-6cGo=',
'GA' = '-7ZWo=',
'GU' = '-7eWo=',
'HI' = '-8bWo=',
'IA' = '-9ZWo=',
'ID' = '-9aGo=',
'IL' = '-9cGo=',
'IN' = '-9cmo=',
'KS' = '-_d2o=',
'KY' = '-_fWo=',
'LA' = '_AZWo=',
'MA' = '_BZWo=',
'MD' = '_BaGo=',
'ME' = '_BaWo=',
'MI' = '_BbWo=',
'MN' = '_Bcmo=',
'MO' = '_Bc2o=',
'MS' = '_Bd2o=',
'MT' = '_BeGo=',
'NC' = '_CZ2o=',
'ND' = '_CaGo=',
'NE' = '_CaWo=',
'NH' = '_CbGo=',
'NJ' = '_Cbmo=',
'NM' = '_CcWo=',
'NV' = '_Cemo=',
'NY' = '_CfWo=',
'OH' = '_DbGo=',
'OK' = '_Db2o=',
'OR' = '_Ddmo=',
'PA' = '_EZWo=',
'PR' = '_Edmo=',
'RI' = '_GbWo=',
'SC' = '_HZ2o=',
'SD' = '_HaGo=',
'TN' = '_Icmo=',
'TX' = '_IfGo=',
'UT' = '_JeGo=',
'VA' = '_KZWo=',
'None' = '_Jd4a-nA==',
'VT' = '_KeGo=',
'WA' = '_LZWo=',
'WI' = '_LbWo=',
'WV' = '_Lemo=',
'WY' = '_LfWo=',