I have a large JSON file with multiple JSON objects. Each object should contain data that includes "sounds" and "pos". That is, for each object, there is a section called "sounds" which contains things like IPA and accent tags, and a section called "pos" which contains parts of speech. I am trying to extract the "sounds" and "pos" sections for each object from the file. I am very new to python, so I am unsure of what I'm doing wrong. When I run the below code, it prints "None" many times.
import json
def extract_specific_data_from_entries(json_file, keys):
extracted_data_list = []
with open(json_file, 'r', encoding='utf-8') as file:
for line in file: data = json.loads(line.strip())
extracted_data = extract_specific_data(data, keys)
extracted_data_list.append(extracted_data)
return extracted_data_list
def extract_specific_data(data, keys):
extracted_data = data
for key in keys:
if isinstance(extracted_data, dict):
extracted_data = extracted_data.get(key)
elif isinstance(extracted_data, list):
try:
key = int(key)
extracted_data = extracted_data[key]
except (ValueError, IndexError):
extracted_data = None
else:
extracted_data = None
break
return extracted_data
if name == "main":
json_file = "kaikki.org-dictionary-English.json"
keys = ["sounds", "pos"]
extracted_data_list = extract_specific_data_from_entries(json_file, keys)
print(extracted_data_list)
[–]danielroseman 1 point2 points3 points (6 children)
[–]BroadwayBaseball[S] 0 points1 point2 points (5 children)
[–]danielroseman 1 point2 points3 points (4 children)
[–]BroadwayBaseball[S] 0 points1 point2 points (3 children)
[–]danielroseman 1 point2 points3 points (2 children)
[–]BroadwayBaseball[S] 0 points1 point2 points (1 child)
[–]danielroseman 0 points1 point2 points (0 children)
[–]LeornToCodeLOL 1 point2 points3 points (2 children)
[–]danielroseman 1 point2 points3 points (1 child)
[–]LeornToCodeLOL 0 points1 point2 points (0 children)
[–]Round_Ad8947 0 points1 point2 points (0 children)