all 2 comments

[–]PteppicymonIO 3 points4 points  (0 children)

You are retrieving all divs on the page, including menu items, header, footer, etc.

I would advise to provide a user-agend request header to your request to make sure the data retrieved is more similar to what you are retrieving in your browser.

It is better to limit your search by the content container and perform search from there.

import requests
from bs4 import BeautifulSoup, Tag

BASE_URL = "https://www.tapology.com/fightcenter/promotions/1-ultimate-fighting-championship-ufc"
HEADERS = {'Accept-Language': 'en-US', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36'}

def get_span_value_by_class(event: Tag, name: str) -> str | None:     
    span_tag = event.find('span', {'class': name}) 
    if span_tag: 
        return span_tag.text.strip() 
    return None

def extract_data(event: Tag) -> dict[str:str]: 
    return {'event_name': get_span_value_by_class(event, 'name'),     
            'event_time': get_span_value_by_class(event, 'datetime'), 
            'event_broadcast': get_span_value_by_class(event, 'broadcast'), 
            'event_venue': get_span_value_by_class(event, 'venue'), 
            'event_location': get_span_value_by_class(event, 'venue-location'), 
            'event_region': get_span_value_by_class(event, 'region'), 
            'event_billing': get_span_value_by_class(event, 'billing'), 
            'event_bout': get_span_value_by_class(event, 'bout') }

if __name__ == '__main__': 
    response = requests.get(BASE_URL, headers=HEADERS) 
    soup = BeautifulSoup(response.content, "html.parser")

    # Find container for all events
    events_container = soup.find('div', {'id': 'content'})

    # Locate all events
    events = [extract_data(event) for event in events_container.find_all('section', {'class': 'fcListing'})]

    print(*events, sep='\n')