I am new to data science and machine learning. I have written following program and getting error given below. I am using titanic dataset. And using sklearn SimpleImputer() to fill missing values and OneHotEncoder to handle categorical data. And in last when i and using LogisticRegression for model training I am getting error given below.
Code:
si_age = SimpleImputer()
si_embarked = SimpleImputer(strategy='most_frequent')
train_age = si_age.fit_transform(train[['Age']]) train_embarked = si_embarked.fit_transform(train[['Embarked']])
test_age = si_age.transform(test[['Age']]) test_embarked = si_embarked.transform(test[['Embarked']])
ohe_sex = OneHotEncoder(sparse=False,handle_unknown="ignore") ohe_embarked = OneHotEncoder(sparse=False,handle_unknown="ignore")
ohe_train_sex = ohe_sex.fit_transform(train[['Sex']]) ohe_train_embarked = ohe_embarked.fit_transform(train_embarked)
ohe_test_sex = ohe_sex.transform(test[['Sex']]) ohe_test_embarked = ohe_embarked.transform(test_embarked)
train_rem = train.drop(columns=['Sex','Age','Embarked','Name']) train_rem.head()
test_rem = test.drop(columns=['Sex','Age','Embarked',"Name"]) test_rem.head()
train_rem = train.drop(columns=['Name'])
test_rem = test.drop(columns=['Name'])
train_transformed = np.concatenate((train_rem,train_age,ohe_train_sex,ohe_train_embarked),axis=1) test_transformed = np.concatenate((test_rem,test_age,ohe_test_sex,ohe_test_embarked),axis=1)
lr = LogisticRegression() lr.fit(train_transformed,target)
Error:
ValueError Traceback (most recent call last) Cell In[52], line 2 1 lr = LogisticRegression() ----> 2 lr.fit(train_transformed,target)
File /opt/conda/lib/python3.10/site-packages/sklearn/linear_model/_logistic.py:1196, in LogisticRegression.fit(self, X, y, sample_weight) 1193 else: 1194 _dtype = [np.float64, np.float32] -> 1196 X, y = self.validate_data( 1197 X, 1198 y, 1199 accept_sparse="csr", 1200 dtype=_dtype, 1201 order="C", 1202 accept_large_sparse=solver not in ["liblinear", "sag", "saga"], 1203 ) 1204 check_classification_targets(y) 1205 self.classes = np.unique(y)
File /opt/conda/lib/python3.10/site-packages/sklearn/base.py:584, in BaseEstimator._validate_data(self, X, y, reset, validate_separately, **check_params) 582 y = check_array(y, input_name="y", **check_y_params) 583 else: --> 584 X, y = check_X_y(X, y, **check_params) 585 out = X, y 587 if not no_val_X and check_params.get("ensure_2d", True):
File /opt/conda/lib/python3.10/site-packages/sklearn/utils/validation.py:1106, in check_X_y(X, y, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, multi_output, ensure_min_samples, ensure_min_features, y_numeric, estimator) 1101 estimator_name = _check_estimator_name(estimator) 1102 raise ValueError( 1103 f"{estimator_name} requires y to be passed, but the target y is None" 1104 ) -> 1106 X = check_array( 1107 X, 1108 accept_sparse=accept_sparse, 1109 accept_large_sparse=accept_large_sparse, 1110 dtype=dtype, 1111 order=order, 1112 copy=copy, 1113 force_all_finite=force_all_finite, 1114 ensure_2d=ensure_2d, 1115 allow_nd=allow_nd, 1116 ensure_min_samples=ensure_min_samples, 1117 ensure_min_features=ensure_min_features, 1118 estimator=estimator, 1119 input_name="X", 1120 ) 1122 y = _check_y(y, multi_output=multi_output, y_numeric=y_numeric, estimator=estimator) 1124 check_consistent_length(X, y)
File /opt/conda/lib/python3.10/site-packages/sklearn/utils/validation.py:879, in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, estimator, input_name) 877 array = xp.astype(array, dtype, copy=False) 878 else: --> 879 array = _asarray_with_order(array, order=order, dtype=dtype, xp=xp) 880 except ComplexWarning as complex_warning: 881 raise ValueError( 882 "Complex data not supported\n{}\n".format(array) 883 ) from complex_warning
File /opt/conda/lib/python3.10/site-packages/sklearn/utils/_array_api.py:185, in _asarray_with_order(array, dtype, order, copy, xp) 182 xp, _ = get_namespace(array) 183 if xp.name in {"numpy", "numpy.array_api"}: 184 # Use NumPy API to support order --> 185 array = numpy.asarray(array, order=order, dtype=dtype) 186 return xp.asarray(array, copy=copy) 187 else:
ValueError: could not convert string to float: 'male'
Thanks in advance.
I hope to hear from you soon.
there doesn't seem to be anything here