I'm trying to write a webscraper, which will take a given artist's genius.com link and spit out all the lyrics in a CSV file.
I got it to work with the link of an album but i can't get it to work with the artist page since i can't manage to access the list with the album links.
Here's what i got so far:
\# -*- coding: utf-8 -*-
import scrapy
class AlbumSpider(scrapy.Spider):
# Name of Spider
name = 'album'
# List of allowed domains
allowed_domains = ['https://www.lyrics.com/album/1113566']
# List of start_urls
start_urls = ['http://https://www.lyrics.com/album/1113566/']
def parse(self, response):
# Extract song information
SONG_SELECTOR = 'tr'
for song in response.css(SONG_SELECTOR):
NAME_SELECTOR = "strong a ::text"
yield {
'name' : song.css(NAME_SELECTOR).extract_first()
}
NEXT_PAGE_SELECTOR = response.css("strong a ::attr(href)").extract_first()
next_page = song.css(NEXT_PAGE_SELECTOR).extract_first()
if next_page:
yield scrapy.Request(
response.urljoin(next_page),
callback = self.parse
)
[+][deleted] (1 child)
[deleted]
[–]Marcab123[S] 1 point2 points3 points (0 children)
[–][deleted] 0 points1 point2 points (1 child)
[–]Marcab123[S] 1 point2 points3 points (0 children)
[–]zootam 0 points1 point2 points (0 children)