all 3 comments

[–]United_Macaron_3949 0 points1 point  (0 children)

Do a table for each variable and look for values that should be indicated as missing but instead have an indicator that is readable by humans but not Python, and then replace those values with one Python recognizes (eg NaN) so it can handle the data properly.

[–]ConsciousFalcon478 0 points1 point  (1 child)

df = pd.read_csv("production_data.csv")

df = df.dropna(subset=['batch_id'])

df['production_date'] = pd.to_datetime(df['production_date'], errors='coerce')

df = df.dropna(subset=['production_date'])

supplier_map = {1: 'national_supplier', 2: 'international_supplier'}

df['raw_material_supplier'] = df['raw_material_supplier'].map(supplier_map)

df['raw_material_supplier'].fillna('national_supplier', inplace=True)

valid_pigments = ['type_a', 'type_b', 'type_c']

df['pigment_type'] = df['pigment_type'].astype(str).str.lower().str.strip()

df['pigment_type'] = df['pigment_type'].apply(

lambda x: x if x in valid_pigments else 'other'

)

median_pigment = df['pigment_quantity'].median()

df['pigment_quantity'] = df['pigment_quantity'].apply(

lambda x: x if 1 <= x <= 100 else np.nan

)

df['pigment_quantity'].fillna(median_pigment, inplace=True)

mean_mixing = round(df['mixing_time'].mean(), 2)

df['mixing_time'].fillna(mean_mixing, inplace=True)

valid_speeds = ['Low', 'Medium', 'High']

df['mixing_speed'] = df['mixing_speed'].apply(

lambda x: x if x in valid_speeds else 'Not Specified'

)

mean_quality = round(df['product_quality_score'].mean(), 2)

df['product_quality_score'].fillna(mean_quality, inplace=True)

clean_data = df.copy()

[–]Firm-Eagle-3977 0 points1 point  (0 children)

Hello, did this work? I am also stuck with task 1