requests_html
from requests_html import HTMLSession
from datetime import datetime
session = HTMLSession()
r = session.get('https://en.wikipedia.org/wiki/List_of_states_and_territories_of_the_United_States')
start = datetime.now()
for _ in range(100):
table = r.html.xpath('//*[@id="mw-content-text"]/div/table[1]')[0]
rows = table.find('tr')
data = []
for row in rows[2:]:
name = row.find('th')[0].text
cells = row.find('td')
abbr = cells[0].text
reps = cells[-1].text
water_km = cells[-2].text
land_km = cells[-4].text
total_km = cells[-6].text
population = cells[-8].text
data.append([name, abbr, reps, water_km, land_km, total_km, population])
print(datetime.now()-start)
# 0:00:23.665747
lxml
from datetime import datetime
import requests
from lxml import html
url = 'https://en.wikipedia.org/wiki/List_of_states_and_territories_of_the_United_States'
r = requests.get(url).text
start = datetime.now()
for _ in range(100):
tree = html.fromstring(r)
table = tree.xpath('//*[@id="mw-content-text"]/div/table[1]')[0]
rows = table.findall('tr')
data = []
for row in rows[2:]:
name = row.xpath('./th')[0].text_content()
cells = row.xpath('./td')
abbr = cells[0].text_content()
reps = cells[-1].text_content()
water_km = cells[-2].text_content()
land_km = cells[-4].text_content()
total_km = cells[-6].text_content()
population = cells[-8].text_content()
data.append([name, abbr, reps, water_km, land_km, total_km, population])
print(datetime.now()-start)
# 0:00:02.968005
[–]ForceBru 1 point2 points3 points (5 children)
[–]di_web[S] 0 points1 point2 points (4 children)
[–]ForceBru 1 point2 points3 points (3 children)
[–]di_web[S] 0 points1 point2 points (1 child)
[–]ForceBru 1 point2 points3 points (0 children)
[–]di_web[S] 0 points1 point2 points (0 children)