Code
import requests
import pandas as pd
import json
from bs4 import BeautifulSoup
Web Scrap and extracting list of municipalities, ghost towns, and unincorporated places in Texas.
import requests
import pandas as pd
import json
from bs4 import BeautifulSoup
def table_extraction(a,b):
= a
urlfile = f"list of texas places/{b}.txt"
= requests.get(url)
s = BeautifulSoup(s.text, "html.parser") #to tell the program to read the site as HTML (lxml)
soup= soup.find("table", class_="wikitable sortable")
table with open (file, "w", encoding="utf-8")as f:
#rows=list()
#headerString = ""
= ""
dataString for row in table.find_all("tr"):
#for th in row.find_all("th"):
#headerString = headerString+th.get_text(strip=True)+"|"
#print(headerString)
#ending a line break to organize the txt file
for td in row.find_all("td"):
= dataString+td.get_text(strip=True)+"|"
dataString =dataString + "\n"
dataString#f.write(headerString)
f.write(dataString)
def do_all(a, b):
table_extraction(a,b)"https://en.wikipedia.org/wiki/List_of_municipalities_in_Texas","List of municipalities in Texas") do_all(
def table_extraction(a,b):
= a
urlfile = f"list of texas places/{b}.txt"
= requests.get(url)
s = BeautifulSoup(s.text, "html.parser") #to tell the program to read the site as HTML (lxml)
soup= soup.find("table", class_="wikitable sortable mw-collapsible")
table with open (file, "w", encoding="utf-8")as f:
#rows=list()
#headerString = ""
= ""
dataString for row in table.find_all("tr"):
#for th in row.find_all("th"):
#headerString = headerString+th.get_text(strip=True)+"|"
#print(headerString)
#ending a line break to organize the txt file
for td in row.find_all("td"):
= dataString+td.get_text(strip=True)+"|"
dataString =dataString + "\n"
dataString#f.write(headerString)
f.write(dataString)
def do_all(a, b):
table_extraction(a,b)
"https://en.wikipedia.org/wiki/List_of_unincorporated_communities_in_Texas", "List of unincorporated communities in Texas") do_all(
def table_extraction(a,b):
= a
urlfile = f"list of texas places/{b}.txt"
= requests.get(url)
s = BeautifulSoup(s.text, "html.parser") #to tell the program to read the site as HTML (lxml)
soup= soup.find("table", class_="wikitable sortable")
table with open (file, "w", encoding="utf-8")as f:
#rows=list()
#headerString = ""
= ""
dataString for row in table.find_all("tr"):
#for th in row.find_all("th"):
#headerString = headerString+th.get_text(strip=True)+"|"
#print(headerString)
#ending a line break to organize the txt file
for td in row.find_all("td"):
= dataString+td.get_text(strip=True)+"|"
dataString =dataString + "\n"
dataString#f.write(headerString)
f.write(dataString)
def do_all(a, b):
table_extraction(a,b)
"https://en.wikipedia.org/wiki/List_of_ghost_towns_in_Texas", "List of ghost towns in Texas") do_all(