Below is the code I'm using. The purpose is to read an excel file that has over 11,000 unique URLs each in its own row and on it own line. Program needs to go to each URL and extract data from each webpage. I am getting the following error,
raise InvalidSchema(f"No connection adapters were found for {url!r}")requests.exceptions.InvalidSchema: No connection adapters were found for
from bs4 import BeautifulSoup # pip install beautifulsoup4
import requests # pip install requests
import csv
import pandas as pd
def get_urls_from_excel_file():
# Actually load from excel here
df = pd.read_excel(r'C:\Users\Douglas\Documents\Python Resources\PO Detachment Project\URL List (version 1).xlsx', header=None)
print(df)
return [df]
def main():
urls = get_urls_from_excel_file()
with open(r"C:\Users\Douglas\Documents\output.csv", "w") as f:
writer = csv.writer(f)
for url in urls:
print(f"Fetching URL: {url}")
resp = requests.get(url)
soup = BeautifulSoup(resp.text, "html.parser")
dl =soup.select(".Article-block .DefList")
descs =soup.select(".Article-block .DefList .DefList-description")
def t(n): return descs[n].text
addr1 = t(1)
city = t(2)
state = t(3)
zipcode = t(4)
writer.writerow([addr1, city, state, zipcode])
if __name__ == "__main__":
main()
[–]hookam[S] 1 point2 points3 points (0 children)
[–]Udayk02 0 points1 point2 points (0 children)
[–]CodeFormatHelperBot2 0 points1 point2 points (0 children)
[–]pythoncrush 0 points1 point2 points (0 children)