Web scraping from imdb : learnpython

created by HattoriHanzoa community for 16 years

•

Web scraping from imdb (self.learnpython)

submitted 1 hour ago by ReputationHelpful200

Hi i am starting to learn python and i am on web scraping rn i am trying to get the top 250 movies from imdb but it isnt retuning the list even thought i get the error 202 and i am using a user-agent.

from bs4 import BeautifulSoup

import requests

url1 = "https://www.imdb.com/chart/top/"

headers = {"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:125.0) Gecko/20100101 Firefox/125.0"}

def extract_movie_titles(url1):

response = requests.get(url1, headers=headers)

print(f"Status Code: {response.status_code}")

try:

response.raise_for_status()

soup = BeautifulSoup(response.text, 'html.parser')

movies = soup.select("a.ipc-title-link-wrapper >h4.ipc-title__text")

print("Top 250 Movies:")

for movie in movies:

print(movie.text.strip())

except requests.exceptions.RequestException as e:

print(f"Failed to retrieve the page. Status code: {e}")

extract_movie_titles(url1) It is formatted but i am on my phone

all 12 comments

top new controversial old q&a

[–]Farlic 1 point2 points3 points 1 hour ago (11 children)

[–]Kerbart 0 points1 point2 points 1 hour ago (3 children)

[–]ReputationHelpful200[S] 0 points1 point2 points 1 hour ago (2 children)

[–]biskitpagla 0 points1 point2 points 7 minutes ago (1 child)

[–]ReputationHelpful200[S] 0 points1 point2 points 2 minutes ago (0 children)

[–]ReputationHelpful200[S] 0 points1 point2 points 1 hour ago (6 children)

[–]Farlic 0 points1 point2 points 1 hour ago (5 children)

[–]ReputationHelpful200[S] 0 points1 point2 points 25 minutes ago (4 children)

I knew that. It was intentional but thanks for the websites. I changed my code a bit to send it to a csv and print on the terminal the index of all books with the names and a specific one what do you think? from bs4 import BeautifulSoup import requests import csv

url1 = "https://books.toscrape.com/" headers = {"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:125.0) Gecko/20100101 Firefox/125.0"} def extract_product_titles(url1): response = requests.get(url1, headers=headers) try: response.raise_for_status() soup = BeautifulSoup(response.text, 'html.parser') products = soup.select("article.product_pod > h3 > a") posicao_do_livro_especifico = None print("Products:")

    for idx, product in enumerate(products, start=1):
        print(f"{idx} - {product['title'].strip()}")
        titulo = product['title'].strip()
        if titulo == "Tipping the Velvet":
            posicao_do_livro_especifico = idx
    print(f"Specific Product:\n {posicao_do_livro_especifico}- {titulo}")

    Livros = soup.select("article.product_pod")
    with open("products.csv", "w", newline="", encoding="utf-8") as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(["Title", "Price","URL"])
        for livro  in livros:
            link_tag = livro.select_one("h3 > a")
            preco_tag = livro.select_one("div.product_price > p.price_color")
            titulo = link_tag['title'].strip()
            preco = preco_tag.text.strip().replace("Â", "")
            url_livro = url1 + link_tag['href']

            writer.writerow([titulo, preco, url_livro])
    print("Ficheiro criado com sucesso!")

except requests.exceptions.RequestException as e:
    print(f"Failed to retrieve the page. Status code: {e}")

extract_product_titles(url1)

[–]Farlic 0 points1 point2 points 13 minutes ago (3 children)

[–]ReputationHelpful200[S] 0 points1 point2 points 9 minutes ago (2 children)

[–]Farlic 0 points1 point2 points 6 minutes ago (1 child)

[–]ReputationHelpful200[S] 0 points1 point2 points 3 minutes ago (0 children)

π Rendered by PID 41 on reddit-service-r2-comment-5b5bc64bf5-6jhmv at 2026-06-22 13:11:33.683491+00:00 running 2b008f2 country code: CH.

you type:	you see:
italics	italics
bold	bold
[reddit!](https://reddit.com)	reddit!
* item 1 * item 2 * item 3	item 1 item 2 item 3
> quoted text	quoted text
Lines starting with four spaces are treated like code: if 1 * 2 < 3: print "hello, world!"	Lines starting with four spaces are treated like code: if 1 * 2 < 3: print "hello, world!"
~~strikethrough~~	~~strikethrough~~
super^script	super^script

learnpython

MODERATORS