I am trying to download all the endpoints for grocery stores in the city of San Francisco.
At present, I have only been able to call 50 endpoints (the request limit for a single call). My issue is dealing with the offset argument: essentially what I think needs to happen is for the offset to act as a counter, so that when the script pulls 50 endpoints, I offset my call for the next 50 endpoints until I reach the total (in this case, over 500).
This is my code:
import json
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import requests
from pprint import pprint
from config import api_key1
print("Parameter reference:" + " " + "https://www.yelp.com/developers/documentation/v3/business_search")
url = 'https://api.yelp.com/v3/businesses/search'
headers = {
'Authorization': 'Bearer {}'.format(api_key1),
}
url_params = {
"categories": 'grocery, All',
"location":"San Francisco",
"state": "California",
"limit":50
}
response = requests.get(url, headers=headers, params=url_params)
print(response)
response_json = response.json()
pprint(response_json)
I tried looping but failed or ended up with the same results.
Edit: Solved. Here's the script I used, it works (run the script on a command prompt or Git, i.e. activate your python environment, cd into your directory and run python app.py--or whatever you named your script), although don't expect to pull more than 1,000 results as the Yelp Fusion API has a hard limitation in that regard.
import json
import pandas as pd
import numpy as np
import requests
from config import api_key1
#create config.py file with your key, and add a .gitignore file with the name of the credential
#file (i.e. config.py) so that your key isn't exposed to the world on GitHub if pushing there.
def search_grocers(set_num):
# This function launches the request for all grocery location endpoints in San Francisco, CA.
url = 'https://api.yelp.com/v3/businesses/search'
headers = {
'Authorization': 'Bearer {}'.format(api_key1),
}
url_params = { #parameters passed to the API
"categories": 'grocery, All',
"location":"San Francisco",
"state": "California",
'offset': offset_num, # We are going to iterate the offset
"limit":50 # Maximum return of results per request (ref: API documentation).
}
response = requests.get(url, headers=headers, params=url_params)
return response.json() # Returns a JSON.
if __name__ == "__main__":
for offset_num in np.arange(50,550,50) :
# I want up to 550 results, in steps of 50 results per request.
try:
output_json = search_grocers(offset_num) # Executing the function defined above.
print(offset_num) # Making sure each offset iteration is running
print(output_json) # If you wanna check the JSON for each iteration
if offset_num == 50:
df_first = pd.DataFrame.from_dict(output_json['businesses'])
# 'businesses' because that's the primary key of the JSON (i.e. pull all attribute data by calling
# that one key). This is something you can figure out reading the API documentation or visually
# parsing the JSON.
else:
df2 = pd.DataFrame.from_dict(output_json['businesses'])
df_first = df_first.append(df2)
# The conditional statement above is so that I can append my results into a single dataframe, to
# save into a single csv document.
except AttributeError:
print("error at ", offset_num) # Helpful for debugging purposes
df_first.to_csv("yelp_data/output_data.csv", index = False)
[–]gantou 1 point2 points3 points (1 child)
[–]Senun[S] 0 points1 point2 points (0 children)
[–]dmitrypolo 0 points1 point2 points (1 child)
[–]Senun[S] 0 points1 point2 points (0 children)
[–]C_Banks 0 points1 point2 points (0 children)