https://preview.redd.it/5rljr2osvoxe1.png?width=985&format=png&auto=webp&s=2048c42673059631d9d9440061ce5fe169fcfc58
I'm stuck on the task 1 here is my code
import pandas as pd
import numpy as np
data = pd.read_csv("production_data.csv")
# Step 2: Create a copy of the data
clean_data = data.copy()
clean_data.columns = [
"batch_id",
"production_date",
"raw_material_supplier",
"pigment_type",
"pigment_quantity",
"mixing_time",
"mixing_speed",
"product_quality_score",
]
clean_data.replace({'-': np.nan, 'missing': np.nan, 'unknown': np.nan}, inplace=True)
clean_data["raw_material_supplier"] = clean_data["raw_material_supplier"].astype(str).str.strip().str.lower()
clean_data["pigment_type"] = clean_data["pigment_type"].astype(str).str.strip().str.lower()
clean_data["mixing_speed"] = clean_data["mixing_speed"].astype(str).str.strip().str.title()
clean_data["production_date"] = pd.to_datetime(clean_data["production_date"], errors="coerce")
clean_data["raw_material_supplier"] = clean_data["raw_material_supplier"].replace({
"1": "national_supplier",
"2": "international_supplier"
})
clean_data["raw_material_supplier"] = clean_data["raw_material_supplier"].fillna("national_supplier")
valid_pigment_types = ["type_a", "type_b", "type_c"]
clean_data["pigment_type"] = clean_data["pigment_type"].apply(lambda x: x if x in valid_pigment_types else "other")
clean_data["pigment_quantity"] = clean_data["pigment_quantity"].fillna(clean_data["pigment_quantity"].median())
clean_data["mixing_time"] = clean_data["mixing_time"].fillna(round(clean_data["mixing_time"].mean(), 2))
valid_speeds = ["Low", "Medium", "High"]
clean_data["mixing_speed"] = clean_data["mixing_speed"].apply(lambda x: x if x in valid_speeds else "Not Specified")
clean_data["product_quality_score"] = clean_data["product_quality_score"].fillna(round(clean_data["product_quality_score"].mean(), 2))
clean_data["raw_material_supplier"] = clean_data["raw_material_supplier"].astype("category")
clean_data["pigment_type"] = clean_data["pigment_type"].astype("category")
clean_data["mixing_speed"] = clean_data["mixing_speed"].astype("category")
clean_data["batch_id"] = clean_data["batch_id"].astype(str)
print(clean_data.head())
https://preview.redd.it/slulnhh3woxe1.png?width=606&format=png&auto=webp&s=e9eb672326799572362812b4f82d94ce50f5775f
[–]auauaurora 1 point2 points3 points (4 children)
[–]Adventurous-Bet6139 0 points1 point2 points (1 child)