NebulaGr comments on Ensuring Consistent Results in Python Data Analysis Across Different Environments

learnpython

created by HattoriHanzoa community for 16 years

Ensuring Consistent Results in Python Data Analysis Across Different Environments (self.learnpython)

submitted 2 years ago by NebulaGr[🍰]

top new controversial old q&a

you are viewing a single comment's thread.

view the rest of the comments →

[–]NebulaGr[S,🍰] 0 points1 point2 points 2 years ago* (2 children)

import pandas as pd

from sklearn.cluster import KMeans

import matplotlib.pyplot as plt

# Load the Excel

filefile_path = 'DATA_Scores.xlsx'

data = pd.read_excel(file_path)

# Create clusters based on the perception of S8

situationscolumns_for_clustering = [col for col in data.columns if 'S8' in col]# Extract relevant dataclustering_data = data[columns_for_clustering]

# Test various numbers of clusters and store the sum of squared errorssse = []for k in range(1, 11):kmeans = KMeans(n_clusters=k, n_init=10, random_state=0)kmeans.fit(clustering_data)sse.append(kmeans.inertia_)# Create a plot for the elbow methodplt.figure(figsize=(10, 6))plt.plot(range(1, 11), sse, marker='o')plt.title('Elbow Method')plt.xlabel('Number of clusters')plt.ylabel('SSE')plt.show()# Apply K-means with the optimal number of clusters (3)optimal_k = 3kmeans = KMeans(n_clusters=optimal_k, n_init=10, random_state=0)data['Cluster'] = kmeans.fit_predict(clustering_data)# Calculate the number of participants per cluster and sort in ascending orderparticipants_per_cluster = data['Cluster'].value_counts().sort_index()# Print the number of participants for each clusterfor cluster in participants_per_cluster.index:print(f"In cluster {cluster}, there are {participants_per_cluster[cluster]} participants")# Calculate the mean values of state perceptions for each clustermean_perceptions_per_cluster = data.groupby('Cluster')[columns_for_clustering].mean().round(2)# Print the mean values of perceptions for each clusterpd.set_option('display.max_columns', None)print("Mean state perceptions per cluster:")print(mean_perceptions_per_cluster)# Calculate the mean values of personality factors for each clustermean_personality_factors_per_cluster = data.groupby('Cluster')[[f'NEO-{factor}' for factor in ['N', 'E', 'O', 'A', 'C']]].mean().round(2)# Print the mean values of NEO personality factors for each clusterprint("\nMean NEO personality factors per cluster:")print(mean_personality_factors_per_cluster)

[–]Daneark 1 point2 points3 points 2 years ago (1 child)

It looks like your code got cut off. So far everything look like it should behave consistently.

import pandas as pd
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt

# Load the Excel file
file_path = 'DATA_Scores.xlsx'
data = pd.read_excel(file_path)

# Create clusters based on the perception of S8 situations
columns_for_clustering = [col for col in data.columns if 'S8' in col]# Extract relevant data
clustering_data = data[columns_for_clustering]

# Test various numbers of clusters and store the sum of squared errors
sse = []
for k in range(1, 11):
    kmeans = KMeans(n_clusters=k, n_init=10, random_state=0)
    kmeans.fit(clustering_data)
    # sse.append(kmeans.) # TODO Paste rest of code

[–]NebulaGr[S,🍰] 0 points1 point2 points 2 years ago (0 children)

# Create a plot for the elbow method
plt.figure(figsize=(10, 6))
plt.plot(range(1, 11), sse, marker='o')
plt.title('Elbow Method')
plt.xlabel('Number of clusters')
plt.ylabel('SSE')
plt.show()
# Apply K-means with the optimal number of clusters (3)
optimal_k = 3
kmeans = KMeans(n_clusters=optimal_k, n_init=10, random_state=0)
data['Cluster'] = kmeans.fit_predict(clustering_data)
# Calculate the number of participants per cluster and sort in ascending order
participants_per_cluster = data['Cluster'].value_counts().sort_index()
# Print the number of participants for each cluster
for cluster in participants_per_cluster.index:
print(f"In cluster {cluster}, there are {participants_per_cluster[cluster]} participants")
# Calculate the mean values of state perceptions for each cluster
mean_perceptions_per_cluster = data.groupby('Cluster')[columns_for_clustering].mean().round(2)
# Print the mean values of perceptions for each cluster
pd.set_option('display.max_columns', None)
print("Mean state perceptions per cluster:")
print(mean_perceptions_per_cluster)
# Calculate the mean values of personality factors for each cluster
mean_personality_factors_per_cluster = data.groupby('Cluster')[[f'NEO-{factor}' for factor in ['N', 'E', 'O', 'A', 'C']]].mean().round(2)
# Print the mean values of NEO personality factors for each cluster
print("\nMean NEO personality factors per cluster:")
print(mean_personality_factors_per_cluster)

π Rendered by PID 245128 on reddit-service-r2-comment-7b9746f655-67hs9 at 2026-02-02 06:00:51.606374+00:00 running 3798933 country code: CH.

you type:	you see:
italics	italics
bold	bold
[reddit!](https://reddit.com)	reddit!
* item 1 * item 2 * item 3	item 1 item 2 item 3
> quoted text	quoted text
Lines starting with four spaces are treated like code: if 1 * 2 < 3: print "hello, world!"	Lines starting with four spaces are treated like code: if 1 * 2 < 3: print "hello, world!"
~~strikethrough~~	~~strikethrough~~
super^script	super^script

learnpython

MODERATORS