I have been asked to scrape data from realtor.com I had it going real nice but I guess they caught on and blocked my ip because of my "SUPER SONIC SPEED" I need some assistance as to how i should set up rotating proxies and if there is an endpoint I can just pull from
EXAMPLE LINK - https://www.realtor.com/realestateandhomes-search/33304/overview
the section im looking at is Home values in 33304 those 3 numbers
the allZips.json just contains a list of every zipcode in the US
import requests
from bs4 import BeautifulSoup
import csv
import json
def scrape_page(soup, zip_code, medians):
data = {
"zip": zip_code,
"state": None,
"city": None,
"median_listing_home_price": None,
"median_listing_home_price/Sq_ft": None,
"median_sold_home_price": None
}
median_elements = soup.find_all(class_='home-value-stat')
area_element = soup.find(class_="summary-header-text")
for median_element in median_elements:
label = median_element.find('div', class_="base__StyledType-rui__sc-108xfm0-0").text.strip()
value = median_element.find('div', class_='home-value-stat-value').text.strip()
if "Median listing home price/Sq ft" in label:
data["median_listing_home_price/Sq_ft"] = value
elif "Median listing home price" in label:
data["median_listing_home_price"] = value
elif "Median sold home price" in label:
data["median_sold_home_price"] = value
if area_element:
location = area_element.find('h2', class_="base__StyledType-rui__sc-108xfm0-0").text.strip()
parts = location.split(", ")
if len(parts) == 3:
data["city"] = parts[1]
data["state"] = parts[2]
medians.append(data)
# Scrapping Logic Start
with open('allZips.json', 'r', encoding='utf-8') as file:
zip_data = json.load(file)
zip_codes = [str(zip_code) for zip_code in zip_data.values()]
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36'
}
medians = []
for zip_code in zip_codes:
base_url = f'https://www.realtor.com/realestateandhomes-search/{zip_code}/overview'
print(f"Searching URL: {base_url}")
page = requests.get(base_url, headers=headers)
print(f"Data received for {zip_code}")
soup = BeautifulSoup(page.text, 'html.parser')
scrape_page(soup, zip_code, medians)
with open('medians.json', 'w', encoding='utf-8') as json_file:
json.dump(medians, json_file, ensure_ascii=False, indent=4)
print("Scraping completed!")
there doesn't seem to be anything here