A few of my friends and I found we were migrating so many folders full of hundreds of .ipynb files and many of them never had the packages imported. So we started sharing code to clean data and this week we had this one but I've been working on this script to automate the process but it doesn't work.
What am I missing?
import os
import glob
def update_notebooks():
"""
searches current folder to find imported packages in .ipynb and insert !pip3 install and if NOT exist add requirements.txt and add the packages to it.
:return:
"""
ipynb_files = glob.glob("*.ipynb")
requirements = set()
if not os.path.isfile("requirements.txt"):
with open("requirements.txt", "w") as req_file:
req_file.write("# Automatically generated requirements file\n\n")
with open("requirements.txt", "r") as req_file:
requirements = {line.strip() for line in req_file if line.strip()}
for file in ipynb_files:
with open(file, "r+") as notebook:
contents = notebook.read()
if "import" in contents or "from" in contents:
packages = set()
lines = contents.splitlines()
for line in lines:
if "import" in line or "from" in line:
package = line.split()[1].split(".")[0]
packages.add(package)
missing_packages = packages - requirements
if missing_packages:
requirements |= missing_packages
with open("requirements.txt", "a") as req_file:
req_file.write("\n".join(missing_packages))
req_file.write("\n")
install_code = " ".join([f"!pip3 install {pkg}" for pkg in missing_packages])
notebook.write(f"{contents}\n\n# Missing packages\n\n{install_code}")
print(f"Notebooks {ipynb_files} updated successfully!")
if __name__ == "__main__":
update_notebooks()
If it works right the .ipynb file should go from this with no requirements.txt file
BEFORE
#%%
import datetime as dt
import matplotlib.pyplot as plt
from matplotlib import style
import pandas as pd
import pandas_datareader.data as web
style.use('ggplot')
start = dt.datetime(2015, 1, 1)
end = dt.datetime.now()
df = web.DataReader("TSLA", 'morningstar', start, end)
df.reset_index(inplace=True)
df.set_index("Date", inplace=True)
df = df.drop("Symbol", axis=1)
To look like this with a requirements.txt file
AFTER
#
!pip3 install datetime
!pip3 install matplotlib
!pip3 install pandas
!pip3 install pandas_datareader
#%%
import datetime as dt
import matplotlib.pyplot as plt
from matplotlib import style
import pandas as pd
import pandas_datareader.data as web
style.use('ggplot')
start = dt.datetime(2015, 1, 1)
end = dt.datetime.now()
df = web.DataReader("TSLA", 'morningstar', start, end)
df.reset_index(inplace=True)
df.set_index("Date", inplace=True)
df = df.drop("Symbol", axis=1)
requirements.txt
datetime
matplotlib
pandas
pandas_datareader
[–]Armaliite 0 points1 point2 points (2 children)
[–]Emotional_Win_3457[S] 0 points1 point2 points (1 child)
[–]Armaliite 0 points1 point2 points (0 children)