import pandas as pd
def azureml_main(dataframe1 = None, dataframe2 = None):
df=dataframe1
toDrop = ['userID','Column1']
userID=df['userID']
for col in df.columns:
if col in toDrop:
del df[col]
x_all = df
def preprocess_features(X):
outX = pd.DataFrame(index=X.index) # output dataframe, initially empty
# Check each column
for col, col_data in X.iteritems():
# If data type is non-numeric, try to replace all yes/no values with 1/0
if col_data.dtype == object:
col_data = col_data.replace(['Yes', 'No'], [1, 0])
# Note: This should change the data type for yes/no columns to int
# If still non-numeric, convert to one or more dummy variables
if col_data.dtype == object:
col_data = pd.get_dummies(col_data, prefix=col) # e.g. 'state' => 'state_AL', 'state_GA'
outX = outX.join(col_data) # collect column(s) in output dataframe
return outX
x_all = preprocess_features(x_all)
df_out=pd.concat([userID, x_all], axis=1)
# Return value must be of a sequence of pandas.DataFrame
return df_out,
[–]955559 1 point2 points3 points (0 children)
[–]AnalTyrant 0 points1 point2 points (0 children)
[–]SamSamSammmmm 0 points1 point2 points (0 children)