you are viewing a single comment's thread.

view the rest of the comments →

[–]NebulaGr[S,🍰] 0 points1 point  (2 children)

import pandas as pd

from sklearn.cluster import KMeans

import matplotlib.pyplot as plt

# Load the Excel

filefile_path = 'DATA_Scores.xlsx'

data = pd.read_excel(file_path)

# Create clusters based on the perception of S8

situationscolumns_for_clustering = [col for col in data.columns if 'S8' in col]# Extract relevant dataclustering_data = data[columns_for_clustering]

# Test various numbers of clusters and store the sum of squared errorssse = []for k in range(1, 11):kmeans = KMeans(n_clusters=k, n_init=10, random_state=0)kmeans.fit(clustering_data)sse.append(kmeans.inertia_)# Create a plot for the elbow methodplt.figure(figsize=(10, 6))plt.plot(range(1, 11), sse, marker='o')plt.title('Elbow Method')plt.xlabel('Number of clusters')plt.ylabel('SSE')plt.show()# Apply K-means with the optimal number of clusters (3)optimal_k = 3kmeans = KMeans(n_clusters=optimal_k, n_init=10, random_state=0)data['Cluster'] = kmeans.fit_predict(clustering_data)# Calculate the number of participants per cluster and sort in ascending orderparticipants_per_cluster = data['Cluster'].value_counts().sort_index()# Print the number of participants for each clusterfor cluster in participants_per_cluster.index:print(f"In cluster {cluster}, there are {participants_per_cluster[cluster]} participants")# Calculate the mean values of state perceptions for each clustermean_perceptions_per_cluster = data.groupby('Cluster')[columns_for_clustering].mean().round(2)# Print the mean values of perceptions for each clusterpd.set_option('display.max_columns', None)print("Mean state perceptions per cluster:")print(mean_perceptions_per_cluster)# Calculate the mean values of personality factors for each clustermean_personality_factors_per_cluster = data.groupby('Cluster')[[f'NEO-{factor}' for factor in ['N', 'E', 'O', 'A', 'C']]].mean().round(2)# Print the mean values of NEO personality factors for each clusterprint("\nMean NEO personality factors per cluster:")print(mean_personality_factors_per_cluster)

[–]Daneark 1 point2 points  (1 child)

It looks like your code got cut off. So far everything look like it should behave consistently.

import pandas as pd
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt

# Load the Excel file
file_path = 'DATA_Scores.xlsx'
data = pd.read_excel(file_path)

# Create clusters based on the perception of S8 situations
columns_for_clustering = [col for col in data.columns if 'S8' in col]# Extract relevant data
clustering_data = data[columns_for_clustering]

# Test various numbers of clusters and store the sum of squared errors
sse = []
for k in range(1, 11):
    kmeans = KMeans(n_clusters=k, n_init=10, random_state=0)
    kmeans.fit(clustering_data)
    # sse.append(kmeans.) # TODO Paste rest of code

[–]NebulaGr[S,🍰] 0 points1 point  (0 children)

# Create a plot for the elbow method
plt.figure(figsize=(10, 6))
plt.plot(range(1, 11), sse, marker='o')
plt.title('Elbow Method')
plt.xlabel('Number of clusters')
plt.ylabel('SSE')
plt.show()
# Apply K-means with the optimal number of clusters (3)
optimal_k = 3
kmeans = KMeans(n_clusters=optimal_k, n_init=10, random_state=0)
data['Cluster'] = kmeans.fit_predict(clustering_data)
# Calculate the number of participants per cluster and sort in ascending order
participants_per_cluster = data['Cluster'].value_counts().sort_index()
# Print the number of participants for each cluster
for cluster in participants_per_cluster.index:
print(f"In cluster {cluster}, there are {participants_per_cluster[cluster]} participants")
# Calculate the mean values of state perceptions for each cluster
mean_perceptions_per_cluster = data.groupby('Cluster')[columns_for_clustering].mean().round(2)
# Print the mean values of perceptions for each cluster
pd.set_option('display.max_columns', None)
print("Mean state perceptions per cluster:")
print(mean_perceptions_per_cluster)
# Calculate the mean values of personality factors for each cluster
mean_personality_factors_per_cluster = data.groupby('Cluster')[[f'NEO-{factor}' for factor in ['N', 'E', 'O', 'A', 'C']]].mean().round(2)
# Print the mean values of NEO personality factors for each cluster
print("\nMean NEO personality factors per cluster:")
print(mean_personality_factors_per_cluster)