import numpy as np
import pylab as pl
import pandas as pd
import importlib
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn import decomposition
from sklearn import preprocessing
from sklearn import metrics
from sklearn.metrics import completeness_score, homogeneity_score
from sklearn.metrics import silhouette_samples


%pwd

'/Users/cl'


# Load dataset to Pandas dataframe:
df = pd.read_csv('/Users/cl/ObesityDataset.csv', header=0)


# View dataframe:
df


#remove the class label column
df2 = df.iloc[:,:16]
df2


# Create a copy to clean the data:
cleaned_data = df2
cleaned_data


# Convert FCVC, NCP, CH20, FAF, and TUE into a Categorical Feature by first, converting it from Float to Integer: 
cleaned_data['FCVC'] = cleaned_data['FCVC'].astype('int')
cleaned_data['NCP'] = cleaned_data['NCP'].astype('int')
cleaned_data['CH2O'] = cleaned_data['CH2O'].astype('int')
cleaned_data['FAF'] = cleaned_data['FAF'].astype('int')
cleaned_data['TUE'] = cleaned_data['TUE'].astype('int')

# Convert Age from Float to Integer:
cleaned_data['Age'] = cleaned_data['Age'].astype('int')
cleaned_data.dtypes

Gender                             object
Age                                 int64
Height                            float64
Weight                            float64
family_history_with_overweight     object
FAVC                               object
FCVC                                int64
NCP                                 int64
CAEC                               object
SMOKE                              object
CH2O                                int64
SCC                                object
FAF                                 int64
TUE                                 int64
CALC                               object
MTRANS                             object
dtype: object


# Rename values in FCVC into Categorical Names: 
cleaned_data['FCVC'] = cleaned_data['FCVC'].replace({1: 'Never'})
cleaned_data['FCVC'] = cleaned_data['FCVC'].replace({2: 'Sometimes'})
cleaned_data['FCVC'] = cleaned_data['FCVC'].replace({3: 'Always'})

# Rename values in NCP into Categorical Names: 
cleaned_data['NCP'] = cleaned_data['NCP'].replace({1: '1'})
cleaned_data['NCP'] = cleaned_data['NCP'].replace({2: '2'})
cleaned_data['NCP'] = cleaned_data['NCP'].replace({3: '3'})
cleaned_data['NCP'] = cleaned_data['NCP'].replace({4: '3+'})

# Rename values in CH2O into Categorical Names: 
cleaned_data['CH2O'] = cleaned_data['CH2O'].replace({1: 'Less than a liter'})
cleaned_data['CH2O'] = cleaned_data['CH2O'].replace({2: 'Between 1 and 2 L'})
cleaned_data['CH2O'] = cleaned_data['CH2O'].replace({3: 'More than 2 L'})

# Rename values in FAF into Categorical Names: 
cleaned_data['FAF'] = cleaned_data['FAF'].replace({0: 'I do not have'})
cleaned_data['FAF'] = cleaned_data['FAF'].replace({1: '1 or 2 days'})
cleaned_data['FAF'] = cleaned_data['FAF'].replace({2: '2 or 4 days'})
cleaned_data['FAF'] = cleaned_data['FAF'].replace({3: '4 or 5 days'})

# Rename values in TUE into Categorical Names:
cleaned_data['TUE'] = cleaned_data['TUE'].replace({0: '0-2 Hours'})
cleaned_data['TUE'] = cleaned_data['TUE'].replace({1: '3-5 Hours'})
cleaned_data['TUE'] = cleaned_data['TUE'].replace({2: 'More than 5 Hours'})

cleaned_data


# create dummy variables for cleaned dataset:
data_numeric = pd.get_dummies(cleaned_data)
pd.set_option("display.max_columns", 999)
data_numeric


# Save Numeric Dataframe for future use:
data_numeric.to_csv('/Users/cl/Obesity_numeric.csv', index = False)


# Normalize the numeric dataset with Min-Max Scaling:
df_min_max_scaled = data_numeric.copy()
for column in df_min_max_scaled.columns:
    df_min_max_scaled[column] = (df_min_max_scaled[column] - df_min_max_scaled[column].min()) / (df_min_max_scaled[column].max() - df_min_max_scaled[column].min())


# View normalized data:
print(df_min_max_scaled)

           Age    Height    Weight  Gender_Female  Gender_Male  \
0     0.148936  0.320755  0.186567            1.0          0.0   
1     0.148936  0.132075  0.126866            1.0          0.0   
2     0.191489  0.660377  0.283582            0.0          1.0   
3     0.276596  0.660377  0.358209            0.0          1.0   
4     0.170213  0.622642  0.379104            0.0          1.0   
...        ...       ...       ...            ...          ...   
2106  0.127660  0.491943  0.689616            1.0          0.0   
2107  0.148936  0.563366  0.707037            1.0          0.0   
2108  0.170213  0.570200  0.706637            1.0          0.0   
2109  0.212766  0.546132  0.704079            1.0          0.0   
2110  0.191489  0.544974  0.705020            1.0          0.0   

      family_history_with_overweight_no  family_history_with_overweight_yes  \
0                                   0.0                                 1.0   
1                                   0.0                                 1.0   
2                                   0.0                                 1.0   
3                                   1.0                                 0.0   
4                                   1.0                                 0.0   
...                                 ...                                 ...   
2106                                0.0                                 1.0   
2107                                0.0                                 1.0   
2108                                0.0                                 1.0   
2109                                0.0                                 1.0   
2110                                0.0                                 1.0   

      FAVC_no  FAVC_yes  FCVC_Always  FCVC_Never  FCVC_Sometimes  NCP_1  \
0         1.0       0.0          0.0         0.0             1.0    0.0   
1         1.0       0.0          1.0         0.0             0.0    0.0   
2         1.0       0.0          0.0         0.0             1.0    0.0   
3         1.0       0.0          1.0         0.0             0.0    0.0   
4         1.0       0.0          0.0         0.0             1.0    1.0   
...       ...       ...          ...         ...             ...    ...   
2106      0.0       1.0          1.0         0.0             0.0    0.0   
2107      0.0       1.0          1.0         0.0             0.0    0.0   
2108      0.0       1.0          1.0         0.0             0.0    0.0   
2109      0.0       1.0          1.0         0.0             0.0    0.0   
2110      0.0       1.0          1.0         0.0             0.0    0.0   

      NCP_2  NCP_3  NCP_3+  CAEC_Always  CAEC_Frequently  CAEC_Sometimes  \
0       0.0    1.0     0.0          0.0              0.0             1.0   
1       0.0    1.0     0.0          0.0              0.0             1.0   
2       0.0    1.0     0.0          0.0              0.0             1.0   
3       0.0    1.0     0.0          0.0              0.0             1.0   
4       0.0    0.0     0.0          0.0              0.0             1.0   
...     ...    ...     ...          ...              ...             ...   
2106    0.0    1.0     0.0          0.0              0.0             1.0   
2107    0.0    1.0     0.0          0.0              0.0             1.0   
2108    0.0    1.0     0.0          0.0              0.0             1.0   
2109    0.0    1.0     0.0          0.0              0.0             1.0   
2110    0.0    1.0     0.0          0.0              0.0             1.0   

      CAEC_no  SMOKE_no  SMOKE_yes  CH2O_Between 1 and 2 L  \
0         0.0       1.0        0.0                     1.0   
1         0.0       0.0        1.0                     0.0   
2         0.0       1.0        0.0                     1.0   
3         0.0       1.0        0.0                     1.0   
4         0.0       1.0        0.0                     1.0   
...       ...       ...        ...                     ...   
2106      0.0       1.0        0.0                     0.0   
2107      0.0       1.0        0.0                     1.0   
2108      0.0       1.0        0.0                     1.0   
2109      0.0       1.0        0.0                     1.0   
2110      0.0       1.0        0.0                     1.0   

      CH2O_Less than a liter  CH2O_More than 2 L  SCC_no  SCC_yes  \
0                        0.0                 0.0     1.0      0.0   
1                        0.0                 1.0     0.0      1.0   
2                        0.0                 0.0     1.0      0.0   
3                        0.0                 0.0     1.0      0.0   
4                        0.0                 0.0     1.0      0.0   
...                      ...                 ...     ...      ...   
2106                     1.0                 0.0     1.0      0.0   
2107                     0.0                 0.0     1.0      0.0   
2108                     0.0                 0.0     1.0      0.0   
2109                     0.0                 0.0     1.0      0.0   
2110                     0.0                 0.0     1.0      0.0   

      FAF_1 or 2 days  FAF_2 or 4 days  FAF_4 or 5 days  FAF_I do not have  \
0                 0.0              0.0              0.0                1.0   
1                 0.0              0.0              1.0                0.0   
2                 0.0              1.0              0.0                0.0   
3                 0.0              1.0              0.0                0.0   
4                 0.0              0.0              0.0                1.0   
...               ...              ...              ...                ...   
2106              1.0              0.0              0.0                0.0   
2107              1.0              0.0              0.0                0.0   
2108              1.0              0.0              0.0                0.0   
2109              1.0              0.0              0.0                0.0   
2110              1.0              0.0              0.0                0.0   

      TUE_0-2 Hours  TUE_3-5 Hours  TUE_More than 5 Hours  CALC_Always  \
0               0.0            1.0                    0.0          0.0   
1               1.0            0.0                    0.0          0.0   
2               0.0            1.0                    0.0          0.0   
3               1.0            0.0                    0.0          0.0   
4               1.0            0.0                    0.0          0.0   
...             ...            ...                    ...          ...   
2106            1.0            0.0                    0.0          0.0   
2107            1.0            0.0                    0.0          0.0   
2108            1.0            0.0                    0.0          0.0   
2109            1.0            0.0                    0.0          0.0   
2110            1.0            0.0                    0.0          0.0   

      CALC_Frequently  CALC_Sometimes  CALC_no  MTRANS_Automobile  \
0                 0.0             0.0      1.0                0.0   
1                 0.0             1.0      0.0                0.0   
2                 1.0             0.0      0.0                0.0   
3                 1.0             0.0      0.0                0.0   
4                 0.0             1.0      0.0                0.0   
...               ...             ...      ...                ...   
2106              0.0             1.0      0.0                0.0   
2107              0.0             1.0      0.0                0.0   
2108              0.0             1.0      0.0                0.0   
2109              0.0             1.0      0.0                0.0   
2110              0.0             1.0      0.0                0.0   

      MTRANS_Bike  MTRANS_Motorbike  MTRANS_Public_Transportation  \
0             0.0               0.0                           1.0   
1             0.0               0.0                           1.0   
2             0.0               0.0                           1.0   
3             0.0               0.0                           0.0   
4             0.0               0.0                           1.0   
...           ...               ...                           ...   
2106          0.0               0.0                           1.0   
2107          0.0               0.0                           1.0   
2108          0.0               0.0                           1.0   
2109          0.0               0.0                           1.0   
2110          0.0               0.0                           1.0   

      MTRANS_Walking  
0                0.0  
1                0.0  
2                0.0  
3                1.0  
4                0.0  
...              ...  
2106             0.0  
2107             0.0  
2108             0.0  
2109             0.0  
2110             0.0  

[2111 rows x 43 columns]


# View class labels: 
labels_df =  df['NObeyesdad']
labels_df

0             Normal_Weight
1             Normal_Weight
2             Normal_Weight
3        Overweight_Level_I
4       Overweight_Level_II
               ...         
2106       Obesity_Type_III
2107       Obesity_Type_III
2108       Obesity_Type_III
2109       Obesity_Type_III
2110       Obesity_Type_III
Name: NObeyesdad, Length: 2111, dtype: object


# Transform class label into numeric: 
le = preprocessing.LabelEncoder()
labels_num = le.fit_transform(labels_df)
labels_num

array([1, 1, 1, ..., 4, 4, 4])


# View class label names and numeric association:
label_names = dict(zip(le.transform(le.classes_), le.classes_))
print(label_names)

{0: 'Insufficient_Weight', 1: 'Normal_Weight', 2: 'Obesity_Type_I', 3: 'Obesity_Type_II', 4: 'Obesity_Type_III', 5: 'Overweight_Level_I', 6: 'Overweight_Level_II'}


kmeans = KMeans(n_clusters=5, max_iter=500, verbose=1) #initialize k-means with n = 5


kmeans.fit(df_min_max_scaled)

Initialization complete
Iteration 0, inertia 11787.00508682668
Iteration 1, inertia 8566.056202078382
Iteration 2, inertia 8338.71778464269
Iteration 3, inertia 8237.82330971913
Iteration 4, inertia 8176.41527470928
Iteration 5, inertia 8099.540960517189
Iteration 6, inertia 8076.56013777138
Iteration 7, inertia 8068.9930112547
Iteration 8, inertia 8068.221972230656
Iteration 9, inertia 8068.076896983779
Iteration 10, inertia 8067.991083649056
Iteration 11, inertia 8067.917526159745
Iteration 12, inertia 8067.434112719851
Iteration 13, inertia 8067.300618383514
Iteration 14, inertia 8066.690183145394
Iteration 15, inertia 8065.724907535046
Iteration 16, inertia 8065.1555932972105
Iteration 17, inertia 8064.406578178977
Iteration 18, inertia 8055.777491215358
Iteration 19, inertia 8050.3019028424
Iteration 20, inertia 8042.536097506457
Iteration 21, inertia 8026.138983270792
Iteration 22, inertia 7994.538660965364
Iteration 23, inertia 7981.791935092773
Iteration 24, inertia 7972.431678248945
Iteration 25, inertia 7970.020550287187
Iteration 26, inertia 7968.48651338652
Iteration 27, inertia 7966.955938122063
Iteration 28, inertia 7964.781849553093
Iteration 29, inertia 7958.222148843893
Iteration 30, inertia 7956.01524580361
Iteration 31, inertia 7955.555539283562
Iteration 32, inertia 7955.527509697825
Iteration 33, inertia 7955.494519204406
Converged at iteration 33: strict convergence.
Initialization complete
Iteration 0, inertia 11825.274489539268
Iteration 1, inertia 8581.769457282187
Iteration 2, inertia 8468.439310863918
Iteration 3, inertia 8354.00074441653
Iteration 4, inertia 8210.581719370295
Iteration 5, inertia 8130.050202902669
Iteration 6, inertia 8093.4953218710225
Iteration 7, inertia 8076.467461725121
Iteration 8, inertia 8059.590914960909
Iteration 9, inertia 8051.452820847045
Iteration 10, inertia 8044.333927734175
Iteration 11, inertia 8039.893289001246
Iteration 12, inertia 8036.123631679592
Iteration 13, inertia 8032.046237708075
Iteration 14, inertia 8029.674990266088
Iteration 15, inertia 8026.254886825644
Iteration 16, inertia 8023.089343537067
Iteration 17, inertia 8019.880934083217
Iteration 18, inertia 8008.990523342801
Iteration 19, inertia 7969.300639772133
Iteration 20, inertia 7919.97712322982
Iteration 21, inertia 7910.686198322982
Iteration 22, inertia 7906.238208315535
Iteration 23, inertia 7899.756117486131
Iteration 24, inertia 7892.545244873981
Iteration 25, inertia 7890.019624999294
Iteration 26, inertia 7889.072892812861
Iteration 27, inertia 7888.575279501931
Iteration 28, inertia 7888.2990556955165
Iteration 29, inertia 7887.869135061676
Iteration 30, inertia 7887.622886266341
Iteration 31, inertia 7887.258246367743
Iteration 32, inertia 7886.838669332199
Iteration 33, inertia 7886.583695027855
Iteration 34, inertia 7886.48352229928
Converged at iteration 34: strict convergence.
Initialization complete
Iteration 0, inertia 12174.525656749454
Iteration 1, inertia 8507.8234001361
Iteration 2, inertia 8262.156761189228
Iteration 3, inertia 8135.565056552313
Iteration 4, inertia 8072.3981424163885
Iteration 5, inertia 8047.041186769761
Iteration 6, inertia 8036.157681658016
Iteration 7, inertia 8032.793289742063
Iteration 8, inertia 8030.945913788686
Iteration 9, inertia 8030.700347934296
Iteration 10, inertia 8030.595081706321
Iteration 11, inertia 8030.463150369052
Iteration 12, inertia 8030.425825530467
Converged at iteration 12: strict convergence.
Initialization complete
Iteration 0, inertia 12478.757883097682
Iteration 1, inertia 8699.700914801515
Iteration 2, inertia 8423.74786273518
Iteration 3, inertia 8246.866008200308
Iteration 4, inertia 8137.329970995708
Iteration 5, inertia 8103.804191725505
Iteration 6, inertia 8092.207693677892
Iteration 7, inertia 8089.336054818334
Iteration 8, inertia 8087.034378963835
Iteration 9, inertia 8086.246069705694
Iteration 10, inertia 8084.619362324052
Iteration 11, inertia 8082.67643373668
Iteration 12, inertia 8082.188515787069
Converged at iteration 12: strict convergence.
Initialization complete
Iteration 0, inertia 11832.53539722714
Iteration 1, inertia 8466.1801078797
Iteration 2, inertia 8226.758916757437
Iteration 3, inertia 8115.239236717611
Iteration 4, inertia 8062.949256711981
Iteration 5, inertia 8047.895976309511
Iteration 6, inertia 8035.457973021038
Iteration 7, inertia 8028.199810257129
Iteration 8, inertia 8007.65676812841
Iteration 9, inertia 7963.399143602098
Iteration 10, inertia 7936.853393483585
Iteration 11, inertia 7924.273139615089
Iteration 12, inertia 7918.766371499822
Iteration 13, inertia 7916.290733124256
Iteration 14, inertia 7915.13212820532
Iteration 15, inertia 7913.658090371352
Iteration 16, inertia 7912.216196347794
Iteration 17, inertia 7911.340690474468
Iteration 18, inertia 7911.151814669988
Iteration 19, inertia 7911.113667420209
Converged at iteration 19: strict convergence.
Initialization complete
Iteration 0, inertia 11823.752663657368
Iteration 1, inertia 8276.46780690895
Iteration 2, inertia 8098.68081942101
Iteration 3, inertia 8037.29250680346
Iteration 4, inertia 7997.19138403611
Iteration 5, inertia 7925.487303831875
Iteration 6, inertia 7872.633962217118
Iteration 7, inertia 7853.57013270058
Iteration 8, inertia 7849.995965259756
Iteration 9, inertia 7843.743550028235
Iteration 10, inertia 7837.019905347132
Iteration 11, inertia 7834.827151309026
Iteration 12, inertia 7833.546296354347
Iteration 13, inertia 7833.009378377764
Iteration 14, inertia 7832.554446255171
Iteration 15, inertia 7832.210277934463
Iteration 16, inertia 7831.1753466434475
Iteration 17, inertia 7831.074055893812
Iteration 18, inertia 7831.027292617523
Iteration 19, inertia 7830.977013027618
Converged at iteration 19: strict convergence.
Initialization complete
Iteration 0, inertia 11596.309798286276
Iteration 1, inertia 8646.1997670067
Iteration 2, inertia 8394.42223224813
Iteration 3, inertia 8250.441907007784
Iteration 4, inertia 8175.292252415538
Iteration 5, inertia 8131.1038719677545
Iteration 6, inertia 8088.55452146436
Iteration 7, inertia 8032.992455465169
Iteration 8, inertia 7978.52526813498
Iteration 9, inertia 7956.801794915094
Iteration 10, inertia 7947.744891687089
Iteration 11, inertia 7941.340182646654
Iteration 12, inertia 7937.882158960003
Iteration 13, inertia 7936.406248808135
Iteration 14, inertia 7934.818469379045
Iteration 15, inertia 7934.023804051918
Iteration 16, inertia 7932.807323423373
Iteration 17, inertia 7931.7472552303025
Iteration 18, inertia 7931.066262593041
Iteration 19, inertia 7930.419081526247
Iteration 20, inertia 7915.339857061065
Iteration 21, inertia 7886.689696466735
Iteration 22, inertia 7859.242618003011
Iteration 23, inertia 7850.823479552829
Iteration 24, inertia 7847.789675688788
Iteration 25, inertia 7846.48853874517
Iteration 26, inertia 7842.455320611688
Iteration 27, inertia 7839.4277804122285
Iteration 28, inertia 7836.555716423136
Iteration 29, inertia 7835.294209939629
Iteration 30, inertia 7834.188876497395
Iteration 31, inertia 7833.097135623066
Iteration 32, inertia 7832.671449816852
Iteration 33, inertia 7832.380999582051
Iteration 34, inertia 7832.259089755594
Iteration 35, inertia 7832.232553830457
Converged at iteration 35: strict convergence.
Initialization complete
Iteration 0, inertia 12864.885199156606
Iteration 1, inertia 8534.055041539128
Iteration 2, inertia 8208.402584930189
Iteration 3, inertia 8134.677983160359
Iteration 4, inertia 8061.954032765476
Iteration 5, inertia 8012.741078486168
Iteration 6, inertia 7973.675852821764
Iteration 7, inertia 7960.156905577035
Iteration 8, inertia 7955.948758245115
Iteration 9, inertia 7937.8692456423405
Iteration 10, inertia 7929.672981125681
Iteration 11, inertia 7912.395873582756
Iteration 12, inertia 7884.993609624193
Iteration 13, inertia 7867.601997438204
Iteration 14, inertia 7864.718177831521
Iteration 15, inertia 7863.772078183434
Iteration 16, inertia 7863.723388842099
Converged at iteration 16: strict convergence.
Initialization complete
Iteration 0, inertia 11871.688456665612
Iteration 1, inertia 8444.666140084128
Iteration 2, inertia 8172.684939816708
Iteration 3, inertia 8022.5123919112075
Iteration 4, inertia 7989.103278787667
Iteration 5, inertia 7979.526024439475
Iteration 6, inertia 7972.321268249555
Iteration 7, inertia 7965.892255679502
Iteration 8, inertia 7959.862725195303
Iteration 9, inertia 7948.061310377594
Iteration 10, inertia 7926.876581312946
Iteration 11, inertia 7922.760473428007
Iteration 12, inertia 7920.461356227191
Iteration 13, inertia 7918.936152811791
Iteration 14, inertia 7918.355463902865
Iteration 15, inertia 7918.230916735995
Converged at iteration 15: strict convergence.
Initialization complete
Iteration 0, inertia 12258.81732786365
Iteration 1, inertia 8490.528120624003
Iteration 2, inertia 8287.639034367927
Iteration 3, inertia 8199.024276953247
Iteration 4, inertia 8142.15513015745
Iteration 5, inertia 8082.207427076215
Iteration 6, inertia 8037.963976518285
Iteration 7, inertia 8007.417500726093
Iteration 8, inertia 7978.869398652809
Iteration 9, inertia 7937.0245485312225
Iteration 10, inertia 7903.678870109956
Iteration 11, inertia 7866.78478195413
Iteration 12, inertia 7853.215930528641
Iteration 13, inertia 7847.326673615622
Iteration 14, inertia 7845.271749303697
Iteration 15, inertia 7844.525551419671
Iteration 16, inertia 7844.433188252878
Iteration 17, inertia 7844.348811651154
Converged at iteration 17: strict convergence.

KMeans(max_iter=500, n_clusters=5, verbose=1)


clusters5 = kmeans.predict(df_min_max_scaled)


pd.DataFrame(clusters5, columns=["Cluster"])


def cluster_sizes(clusters):
    #clusters is an array of cluster labels for each instance in the data
    
    size = {}
    cluster_labels = np.unique(clusters)
    n_clusters = cluster_labels.shape[0]

    for c in cluster_labels:
        size[c] = len(df[clusters == c])
    return size


size5 = cluster_sizes(clusters5)

for c5 in size5.keys():
    print("Size of Cluster", c5, "= ", size5[c5])

Size of Cluster 0 =  420
Size of Cluster 1 =  423
Size of Cluster 2 =  455
Size of Cluster 3 =  355
Size of Cluster 4 =  458


# The centroids provide an aggregate representation and a characterization of each cluster:
pd.options.display.float_format='{:,.2f}'.format

centroids5 = pd.DataFrame(kmeans.cluster_centers_, columns=df_min_max_scaled.columns.values)
centroids5


# Silhouette Analysis at n = 5:
c5_silhouette = metrics.silhouette_samples(df_min_max_scaled, clusters5)
print('Mean Silhouette Value :', c5_silhouette.mean())

Mean Silhouette Value : 0.12696817428675627


def plot_silhouettes(data, clusters, metric='euclidean'):
    
    from matplotlib import cm
    from sklearn.metrics import silhouette_samples

    cluster_labels = np.unique(clusters)
    n_clusters = cluster_labels.shape[0]
    silhouette_vals = metrics.silhouette_samples(data, clusters, metric='euclidean')
    c_ax_lower, c_ax_upper = 0, 0
    cticks = []
    for i, k in enumerate(cluster_labels):
        c_silhouette_vals = silhouette_vals[clusters == k]
        c_silhouette_vals.sort()
        c_ax_upper += len(c_silhouette_vals)
        color = cm.jet(float(i) / n_clusters)
        pl.barh(range(c_ax_lower, c_ax_upper), c_silhouette_vals, height=1.0, 
                      edgecolor='none', color=color)

        cticks.append((c_ax_lower + c_ax_upper) / 2)
        c_ax_lower += len(c_silhouette_vals)
    
    silhouette_avg = np.mean(silhouette_vals)
    pl.axvline(silhouette_avg, color="red", linestyle="--") 

    pl.yticks(cticks, cluster_labels)
    pl.ylabel('Cluster')
    pl.xlabel('Silhouette coefficient')

    pl.tight_layout()
    #pl.savefig('images/11_04.png', dpi=300)
    pl.show()
    
    return


# Plot and Evaluate the Silhouettes:
plot_silhouettes(df_min_max_scaled, clusters5)


kmeans3 = KMeans(n_clusters=3, max_iter=500, verbose=1) # k-means with n = 3


kmeans3.fit(df_min_max_scaled)

Initialization complete
Iteration 0, inertia 12879.623786128155
Iteration 1, inertia 9001.37311540421
Iteration 2, inertia 8858.998139520381
Iteration 3, inertia 8809.939108030303
Iteration 4, inertia 8791.272204868248
Iteration 5, inertia 8786.47670789417
Iteration 6, inertia 8785.4661617069
Iteration 7, inertia 8783.363746212328
Iteration 8, inertia 8772.54056168085
Iteration 9, inertia 8769.557983795672
Iteration 10, inertia 8768.483498001466
Iteration 11, inertia 8767.764124919666
Iteration 12, inertia 8766.35120571748
Iteration 13, inertia 8765.43867230093
Iteration 14, inertia 8765.398780834916
Converged at iteration 14: strict convergence.
Initialization complete
Iteration 0, inertia 14534.508672109176
Iteration 1, inertia 9309.441488842096
Iteration 2, inertia 9037.604181226367
Iteration 3, inertia 8938.658231693586
Iteration 4, inertia 8916.402643453937
Iteration 5, inertia 8888.331390013225
Iteration 6, inertia 8870.074074790271
Iteration 7, inertia 8861.211103290092
Iteration 8, inertia 8853.162119584285
Iteration 9, inertia 8846.868576454352
Iteration 10, inertia 8827.84407699484
Iteration 11, inertia 8799.1609794804
Iteration 12, inertia 8785.909269034124
Iteration 13, inertia 8774.601325517255
Iteration 14, inertia 8769.592571719972
Iteration 15, inertia 8767.63492356255
Iteration 16, inertia 8766.444849568958
Iteration 17, inertia 8765.971992499608
Iteration 18, inertia 8765.867441373892
Iteration 19, inertia 8765.66116613612
Iteration 20, inertia 8765.486144212005
Iteration 21, inertia 8765.461045034992
Converged at iteration 21: strict convergence.
Initialization complete
Iteration 0, inertia 13336.164818796795
Iteration 1, inertia 9567.269152448953
Iteration 2, inertia 9445.553561777502
Iteration 3, inertia 9356.780028542624
Iteration 4, inertia 9157.130002021417
Iteration 5, inertia 9032.754433029859
Iteration 6, inertia 8966.21740202369
Iteration 7, inertia 8938.469471906204
Iteration 8, inertia 8923.340927558393
Iteration 9, inertia 8916.997734419223
Iteration 10, inertia 8915.764064629098
Iteration 11, inertia 8915.37813313638
Iteration 12, inertia 8915.127678549246
Iteration 13, inertia 8914.954033413907
Iteration 14, inertia 8914.93152766286
Iteration 15, inertia 8914.917473861125
Converged at iteration 15: strict convergence.
Initialization complete
Iteration 0, inertia 12235.950953731766
Iteration 1, inertia 9177.059040690088
Iteration 2, inertia 8997.9480370336
Iteration 3, inertia 8886.478645540206
Iteration 4, inertia 8833.780310727954
Iteration 5, inertia 8820.17237743324
Iteration 6, inertia 8819.429241417094
Iteration 7, inertia 8819.16005670056
Iteration 8, inertia 8818.804519317317
Iteration 9, inertia 8818.682843171513
Iteration 10, inertia 8818.311487274063
Iteration 11, inertia 8818.159940170133
Iteration 12, inertia 8817.98907376872
Iteration 13, inertia 8817.865936421687
Iteration 14, inertia 8817.806917535885
Iteration 15, inertia 8817.776092265041
Iteration 16, inertia 8817.693711678381
Iteration 17, inertia 8817.668602183467
Converged at iteration 17: strict convergence.
Initialization complete
Iteration 0, inertia 14283.23672461032
Iteration 1, inertia 9655.242214042786
Iteration 2, inertia 9501.150166979987
Iteration 3, inertia 9385.8241565813
Iteration 4, inertia 9306.25122999164
Iteration 5, inertia 9258.611440798086
Iteration 6, inertia 9220.655830148746
Iteration 7, inertia 9205.884675342035
Iteration 8, inertia 9201.291862951928
Iteration 9, inertia 9200.842008469204
Iteration 10, inertia 9200.67414699048
Iteration 11, inertia 9200.629965113641
Converged at iteration 11: strict convergence.
Initialization complete
Iteration 0, inertia 12423.535126370873
Iteration 1, inertia 9135.382864146819
Iteration 2, inertia 9028.876396630194
Iteration 3, inertia 8968.812555470771
Iteration 4, inertia 8910.48089075226
Iteration 5, inertia 8856.621151522155
Iteration 6, inertia 8821.480910675746
Iteration 7, inertia 8818.661138265279
Iteration 8, inertia 8818.274441804033
Iteration 9, inertia 8818.095197547456
Iteration 10, inertia 8817.924137895994
Iteration 11, inertia 8817.828887772916
Iteration 12, inertia 8817.806917535885
Iteration 13, inertia 8817.776092265041
Iteration 14, inertia 8817.693711678381
Iteration 15, inertia 8817.668602183467
Converged at iteration 15: strict convergence.
Initialization complete
Iteration 0, inertia 15560.349375046955
Iteration 1, inertia 9451.504598782774
Iteration 2, inertia 9142.603376735178
Iteration 3, inertia 8984.738489060503
Iteration 4, inertia 8903.206814994957
Iteration 5, inertia 8849.255938606113
Iteration 6, inertia 8805.887151861752
Iteration 7, inertia 8787.427510335308
Iteration 8, inertia 8777.334874885435
Iteration 9, inertia 8773.33734831006
Iteration 10, inertia 8770.052840575934
Iteration 11, inertia 8768.924265588485
Iteration 12, inertia 8768.062375098845
Iteration 13, inertia 8766.037804380696
Iteration 14, inertia 8765.5613568244
Iteration 15, inertia 8765.461045034992
Converged at iteration 15: strict convergence.
Initialization complete
Iteration 0, inertia 14256.23978990368
Iteration 1, inertia 9202.001056480733
Iteration 2, inertia 8958.453465999479
Iteration 3, inertia 8866.570788484862
Iteration 4, inertia 8824.832909813438
Iteration 5, inertia 8819.837479151136
Iteration 6, inertia 8819.10078183813
Iteration 7, inertia 8819.035876469317
Iteration 8, inertia 8818.817563005117
Iteration 9, inertia 8818.646974003661
Iteration 10, inertia 8818.289586515795
Iteration 11, inertia 8818.159940170133
Iteration 12, inertia 8817.98907376872
Iteration 13, inertia 8817.865936421687
Iteration 14, inertia 8817.806917535887
Iteration 15, inertia 8817.776092265041
Iteration 16, inertia 8817.693711678381
Iteration 17, inertia 8817.668602183467
Converged at iteration 17: strict convergence.
Initialization complete
Iteration 0, inertia 14045.247730533238
Iteration 1, inertia 9577.932151433946
Iteration 2, inertia 9503.316242864512
Iteration 3, inertia 9481.915359950877
Iteration 4, inertia 9463.72742189162
Iteration 5, inertia 9441.526557119001
Iteration 6, inertia 9420.35228646382
Iteration 7, inertia 9406.373961589723
Iteration 8, inertia 9402.970170630471
Iteration 9, inertia 9400.047385253652
Iteration 10, inertia 9399.555304602607
Iteration 11, inertia 9399.498839497075
Iteration 12, inertia 9399.41018122203
Iteration 13, inertia 9399.298956532946
Iteration 14, inertia 9399.274680230332
Iteration 15, inertia 9399.252245628535
Converged at iteration 15: strict convergence.
Initialization complete
Iteration 0, inertia 13317.99475078725
Iteration 1, inertia 8977.851327856566
Iteration 2, inertia 8895.604119959466
Iteration 3, inertia 8873.687473142223
Iteration 4, inertia 8849.570415401187
Iteration 5, inertia 8805.858315057407
Iteration 6, inertia 8780.421553985545
Iteration 7, inertia 8775.53080855752
Iteration 8, inertia 8772.61887038512
Iteration 9, inertia 8769.175262861294
Iteration 10, inertia 8768.382144887562
Iteration 11, inertia 8767.704981473666
Iteration 12, inertia 8766.274753740365
Iteration 13, inertia 8765.417169729853
Iteration 14, inertia 8765.398780834916
Converged at iteration 14: strict convergence.

KMeans(max_iter=500, n_clusters=3, verbose=1)


clusters3 = kmeans3.predict(df_min_max_scaled)


size3 = cluster_sizes(clusters3)

for c in size3.keys():
    print("Size of Cluster", c, "= ", size3[c])

Size of Cluster 0 =  1058
Size of Cluster 1 =  636
Size of Cluster 2 =  417


# View centroids for an aggregate representation and a characterization of each cluster:
pd.options.display.float_format='{:,.2f}'.format

centroids3 = pd.DataFrame(kmeans3.cluster_centers_, columns=df_min_max_scaled.columns.values)
centroids3


# Silhouette Analysis at n = 3:
c3_silhouette = metrics.silhouette_samples(df_min_max_scaled, clusters3)
print('Mean Silhouette Value :', c3_silhouette.mean())

Mean Silhouette Value : 0.11634874352766442


# Plot and Evaluate the Silhouettes:
plot_silhouettes(df_min_max_scaled, clusters3)


kmeans2 = KMeans(n_clusters=2, max_iter=500, verbose=1) # k-means with n = 2


kmeans2.fit(df_min_max_scaled)

Initialization complete
Iteration 0, inertia 21141.470043487447
Iteration 1, inertia 10146.649638459037
Iteration 2, inertia 10052.393229584075
Iteration 3, inertia 9978.55949142651
Iteration 4, inertia 9931.576115637514
Iteration 5, inertia 9882.008890876072
Iteration 6, inertia 9833.033569152649
Iteration 7, inertia 9820.76522415991
Iteration 8, inertia 9810.381012107375
Iteration 9, inertia 9806.43745895125
Iteration 10, inertia 9802.52445906197
Iteration 11, inertia 9788.188974953495
Iteration 12, inertia 9784.590860736775
Iteration 13, inertia 9781.587461096071
Iteration 14, inertia 9764.62868015089
Iteration 15, inertia 9703.250240710777
Iteration 16, inertia 9654.064419604409
Iteration 17, inertia 9640.924590400316
Iteration 18, inertia 9633.269288654592
Iteration 19, inertia 9627.22637779535
Iteration 20, inertia 9626.211067157712
Iteration 21, inertia 9625.966275262466
Iteration 22, inertia 9625.900707169734
Iteration 23, inertia 9625.880356300455
Converged at iteration 23: strict convergence.
Initialization complete
Iteration 0, inertia 16459.557174738147
Iteration 1, inertia 9900.362639271163
Iteration 2, inertia 9489.377342057842
Iteration 3, inertia 9440.231621454297
Iteration 4, inertia 9439.746651906471
Iteration 5, inertia 9439.70314554615
Converged at iteration 5: strict convergence.
Initialization complete
Iteration 0, inertia 16654.425161503892
Iteration 1, inertia 9928.457371508537
Iteration 2, inertia 9838.23752341025
Iteration 3, inertia 9788.863247705292
Iteration 4, inertia 9548.861615101538
Iteration 5, inertia 9442.673615150014
Iteration 6, inertia 9439.835639639505
Iteration 7, inertia 9439.703145546147
Converged at iteration 7: strict convergence.
Initialization complete
Iteration 0, inertia 18427.311216298647
Iteration 1, inertia 10184.320163749075
Iteration 2, inertia 10006.009344174558
Iteration 3, inertia 9946.739537737863
Iteration 4, inertia 9924.402102243388
Iteration 5, inertia 9910.780896241331
Iteration 6, inertia 9905.110900156222
Iteration 7, inertia 9902.264415823509
Iteration 8, inertia 9897.72566174957
Iteration 9, inertia 9895.57672400781
Iteration 10, inertia 9894.774416642193
Iteration 11, inertia 9892.907792732634
Iteration 12, inertia 9889.535477719926
Iteration 13, inertia 9885.189090329697
Iteration 14, inertia 9882.336535503198
Iteration 15, inertia 9878.48056558997
Iteration 16, inertia 9874.201130739495
Iteration 17, inertia 9869.024206385815
Iteration 18, inertia 9859.9664768237
Iteration 19, inertia 9851.6930077618
Iteration 20, inertia 9844.315605926562
Iteration 21, inertia 9838.560305605188
Iteration 22, inertia 9828.42281943376
Iteration 23, inertia 9813.08654486762
Iteration 24, inertia 9801.820301212163
Iteration 25, inertia 9796.444293037726
Iteration 26, inertia 9794.424054751174
Iteration 27, inertia 9794.231982639427
Iteration 28, inertia 9794.184865647467
Converged at iteration 28: center shift 1.1769791973055172e-05 within tolerance 1.1825910645989139e-05.
Initialization complete
Iteration 0, inertia 16402.36648533896
Iteration 1, inertia 9994.565703554776
Iteration 2, inertia 9934.77322692749
Iteration 3, inertia 9909.912786027671
Iteration 4, inertia 9883.607078127476
Iteration 5, inertia 9850.433915715637
Iteration 6, inertia 9791.58073461284
Iteration 7, inertia 9734.449876536686
Iteration 8, inertia 9646.394815196176
Iteration 9, inertia 9627.336127098832
Iteration 10, inertia 9625.954342511643
Iteration 11, inertia 9625.884133923937
Iteration 12, inertia 9625.86108434419
Iteration 13, inertia 9625.840681351561
Converged at iteration 13: strict convergence.
Initialization complete
Iteration 0, inertia 14969.584620698479
Iteration 1, inertia 9901.195321252426
Iteration 2, inertia 9853.041301002952
Iteration 3, inertia 9828.346078346132
Iteration 4, inertia 9821.497722659567
Iteration 5, inertia 9816.797972478129
Iteration 6, inertia 9815.160587833674
Iteration 7, inertia 9813.203694536212
Iteration 8, inertia 9812.379447087993
Iteration 9, inertia 9811.952753836797
Iteration 10, inertia 9811.747610338476
Iteration 11, inertia 9811.640620134325
Iteration 12, inertia 9811.132844140293
Iteration 13, inertia 9810.651492658839
Iteration 14, inertia 9810.461157499503
Iteration 15, inertia 9809.569339539601
Iteration 16, inertia 9808.659582332328
Iteration 17, inertia 9808.2561123607
Iteration 18, inertia 9807.986336248636
Iteration 19, inertia 9807.906534765058
Iteration 20, inertia 9807.487360220597
Iteration 21, inertia 9806.667738269354
Iteration 22, inertia 9805.834740565373
Iteration 23, inertia 9800.048920420628
Iteration 24, inertia 9788.128577786762
Iteration 25, inertia 9779.417579204792
Iteration 26, inertia 9775.38432414417
Iteration 27, inertia 9761.684802413005
Iteration 28, inertia 9709.236797762258
Iteration 29, inertia 9653.799784178347
Iteration 30, inertia 9642.172114603334
Iteration 31, inertia 9636.202113008087
Iteration 32, inertia 9620.09415505929
Iteration 33, inertia 9617.977388582693
Iteration 34, inertia 9615.658558431482
Iteration 35, inertia 9610.65542615645
Iteration 36, inertia 9607.248540288658
Iteration 37, inertia 9603.493341174244
Iteration 38, inertia 9599.523654632054
Iteration 39, inertia 9597.642822643924
Iteration 40, inertia 9596.209483776676
Iteration 41, inertia 9595.35635092364
Iteration 42, inertia 9594.882428291552
Iteration 43, inertia 9594.678998923426
Iteration 44, inertia 9594.37636625157
Iteration 45, inertia 9594.115982262558
Iteration 46, inertia 9594.001524563828
Iteration 47, inertia 9592.870319464864
Iteration 48, inertia 9587.715138968875
Iteration 49, inertia 9578.793233518765
Iteration 50, inertia 9556.18315100723
Iteration 51, inertia 9517.365630351003
Iteration 52, inertia 9457.17665798308
Iteration 53, inertia 9440.017782540526
Iteration 54, inertia 9439.746651906471
Iteration 55, inertia 9439.703145546147
Converged at iteration 55: strict convergence.
Initialization complete
Iteration 0, inertia 16595.25826599754
Iteration 1, inertia 10109.37538869094
Iteration 2, inertia 9915.261803411886
Iteration 3, inertia 9789.099147804835
Iteration 4, inertia 9778.126542918279
Iteration 5, inertia 9777.915684703536
Converged at iteration 5: strict convergence.
Initialization complete
Iteration 0, inertia 15375.183674102089
Iteration 1, inertia 9952.678493621155
Iteration 2, inertia 9884.256882726944
Iteration 3, inertia 9864.875145644339
Iteration 4, inertia 9859.869224463644
Iteration 5, inertia 9858.379095829157
Iteration 6, inertia 9854.219459620635
Iteration 7, inertia 9841.672028071058
Iteration 8, inertia 9832.676859254072
Iteration 9, inertia 9818.161920484818
Iteration 10, inertia 9805.876930142955
Iteration 11, inertia 9798.29221813926
Iteration 12, inertia 9794.467009896513
Iteration 13, inertia 9794.22218892144
Iteration 14, inertia 9794.184865647467
Converged at iteration 14: center shift 1.1769791973055237e-05 within tolerance 1.1825910645989139e-05.
Initialization complete
Iteration 0, inertia 18359.985955431566
Iteration 1, inertia 10101.593413896659
Iteration 2, inertia 9982.754190681128
Iteration 3, inertia 9917.840303560613
Iteration 4, inertia 9890.496715960406
Iteration 5, inertia 9876.352494100358
Iteration 6, inertia 9865.682282638423
Iteration 7, inertia 9853.175178022406
Iteration 8, inertia 9841.405257717575
Iteration 9, inertia 9828.743052973643
Iteration 10, inertia 9812.10020465191
Iteration 11, inertia 9800.966469728079
Iteration 12, inertia 9795.474672343822
Iteration 13, inertia 9794.39208071576
Iteration 14, inertia 9794.247097906948
Iteration 15, inertia 9794.203342487108
Converged at iteration 15: center shift 1.1189193902305637e-05 within tolerance 1.1825910645989139e-05.
Initialization complete
Iteration 0, inertia 14840.19634222251
Iteration 1, inertia 10012.892987021458
Iteration 2, inertia 9937.527748973907
Iteration 3, inertia 9874.816695079277
Iteration 4, inertia 9840.194716250331
Iteration 5, inertia 9814.311264293689
Iteration 6, inertia 9800.700957677194
Iteration 7, inertia 9771.602530465227
Iteration 8, inertia 9541.236198237983
Iteration 9, inertia 9441.157699191357
Iteration 10, inertia 9439.835639639505
Iteration 11, inertia 9439.70314554615
Converged at iteration 11: strict convergence.

KMeans(max_iter=500, n_clusters=2, verbose=1)


clusters2 = kmeans2.predict(df_min_max_scaled)


size2 = cluster_sizes(clusters2)

for c in size2.keys():
    print("Size of Cluster", c, "= ", size2[c])

Size of Cluster 0 =  1067
Size of Cluster 1 =  1044


# View centroids for an aggregate representation and a characterization of each cluster:
pd.options.display.float_format='{:,.2f}'.format

centroids2 = pd.DataFrame(kmeans2.cluster_centers_, columns=df_min_max_scaled.columns.values)
centroids2


# Silhouette Analysis at n = 2:
c2_silhouette = metrics.silhouette_samples(df_min_max_scaled, clusters2)
print('Mean Silhouette Value :', c2_silhouette.mean())

Mean Silhouette Value : 0.13093478332005926


# Plot and Evaluate the Silhouettes:
plot_silhouettes(df_min_max_scaled, clusters2)


data_numeric.Age.min() #youngest age in the dataset

14


data_numeric.Age.max() #oldest age in the dataset

61


age_bins = pd.qcut(data_numeric.Age, [0, .61, .972, 1])
age_bins.head(5)

0    (13.999, 24.0]
1    (13.999, 24.0]
2    (13.999, 24.0]
3      (24.0, 40.0]
4    (13.999, 24.0]
Name: Age, dtype: category
Categories (3, interval[float64]): [(13.999, 24.0] < (24.0, 40.0] < (40.0, 61.0]]


age_bins = pd.qcut(data_numeric.Age, [0, .61, .972, 1], labels = ['Gen-Z', 'Millenials', 'Gen-X & Boomers'])
age_df = pd.concat([age_bins, df2['Age']], axis=1)
age_df.columns = ['Age Group', 'Age']
age_df.head(10)


data_age_groups = data_numeric 
data_age_groups["Age"] = age_df['Age Group']


data_age_groups.head(10)


# Create Dummy Variables for Binned Dataset:
df_age_groups = pd.get_dummies(data_age_groups)
df_age_groups.head(5)


# Perform K-Means Clustering with N = 3:
kmeans = KMeans(n_clusters=3, max_iter=500, verbose=1) #initialize k-means with n = 3


kmeans.fit(df_age_groups)

Initialization complete
Iteration 0, inertia 266216.09575717594
Iteration 1, inertia 215521.27371160412
Iteration 2, inertia 210755.68046562248
Iteration 3, inertia 209722.10499704749
Iteration 4, inertia 209418.75170585237
Iteration 5, inertia 209024.46758980726
Iteration 6, inertia 208957.87320545508
Iteration 7, inertia 208952.94787903182
Converged at iteration 7: center shift 0.0008306717121226465 within tolerance 0.0015358503717230955.
Initialization complete
Iteration 0, inertia 502841.1735953951
Iteration 1, inertia 327560.09540897014
Iteration 2, inertia 315527.1447145528
Iteration 3, inertia 309720.53549766104
Iteration 4, inertia 300890.08942123153
Iteration 5, inertia 292303.3695465401
Iteration 6, inertia 287217.5099774456
Iteration 7, inertia 283800.36697429675
Iteration 8, inertia 278425.3310745159
Iteration 9, inertia 270545.8711724134
Iteration 10, inertia 246805.06029650217
Iteration 11, inertia 232750.70198980303
Iteration 12, inertia 226653.87489398956
Iteration 13, inertia 216920.98018306002
Iteration 14, inertia 212292.1259899532
Iteration 15, inertia 209925.16585043436
Iteration 16, inertia 209641.8566251863
Iteration 17, inertia 209612.29209838895
Iteration 18, inertia 209601.88285883892
Iteration 19, inertia 209598.80247956378
Converged at iteration 19: strict convergence.
Initialization complete
Iteration 0, inertia 326936.9440904384
Iteration 1, inertia 226724.44726938492
Iteration 2, inertia 213994.4657317486
Iteration 3, inertia 210480.97315506768
Iteration 4, inertia 209660.8099794086
Iteration 5, inertia 209614.6304476699
Iteration 6, inertia 209601.88285883892
Iteration 7, inertia 209598.80247956383
Converged at iteration 7: strict convergence.
Initialization complete
Iteration 0, inertia 244660.38351519656
Iteration 1, inertia 211950.75265903442
Iteration 2, inertia 209849.7543072008
Iteration 3, inertia 209518.7500452044
Iteration 4, inertia 209060.11326694212
Iteration 5, inertia 208957.87320545508
Iteration 6, inertia 208952.94787903182
Converged at iteration 6: center shift 0.0008306717121226465 within tolerance 0.0015358503717230955.
Initialization complete
Iteration 0, inertia 293038.3352946884
Iteration 1, inertia 213135.03302011758
Iteration 2, inertia 210104.25819821077
Iteration 3, inertia 209618.1187074107
Iteration 4, inertia 209113.16767690537
Iteration 5, inertia 208962.37801962328
Iteration 6, inertia 208952.94787903185
Converged at iteration 6: center shift 0.0008306717121226769 within tolerance 0.0015358503717230955.
Initialization complete
Iteration 0, inertia 275553.3335700919
Iteration 1, inertia 222295.79489500792
Iteration 2, inertia 212492.7176855583
Iteration 3, inertia 209869.48135591563
Iteration 4, inertia 209628.24667814613
Iteration 5, inertia 209612.29209838895
Iteration 6, inertia 209601.88285883892
Iteration 7, inertia 209598.80247956383
Converged at iteration 7: strict convergence.
Initialization complete
Iteration 0, inertia 286761.1414317467
Iteration 1, inertia 210695.01280865865
Iteration 2, inertia 209668.3036758006
Iteration 3, inertia 209614.6304476699
Iteration 4, inertia 209601.88285883892
Iteration 5, inertia 209598.80247956383
Converged at iteration 5: strict convergence.
Initialization complete
Iteration 0, inertia 383050.70537416794
Iteration 1, inertia 268032.44644019724
Iteration 2, inertia 222407.19095729562
Iteration 3, inertia 213626.56770579072
Iteration 4, inertia 210456.74923156487
Iteration 5, inertia 209703.02658441686
Iteration 6, inertia 209362.03202101542
Iteration 7, inertia 209013.56709803338
Iteration 8, inertia 208954.93374886544
Iteration 9, inertia 208952.94787903185
Converged at iteration 9: center shift 0.0008306717121226467 within tolerance 0.0015358503717230955.
Initialization complete
Iteration 0, inertia 374254.3643704856
Iteration 1, inertia 227441.0113142989
Iteration 2, inertia 210113.7182635559
Iteration 3, inertia 209144.6971247129
Iteration 4, inertia 209037.23080006722
Iteration 5, inertia 208970.75843448716
Iteration 6, inertia 208958.73649992305
Converged at iteration 6: center shift 0.0010520324381042456 within tolerance 0.0015358503717230955.
Initialization complete
Iteration 0, inertia 306059.6127144407
Iteration 1, inertia 230381.17640096927
Iteration 2, inertia 216298.96923962721
Iteration 3, inertia 211115.72555699918
Iteration 4, inertia 209886.61690318544
Iteration 5, inertia 209525.64261090878
Iteration 6, inertia 209040.15129423398
Iteration 7, inertia 208957.87320545508
Iteration 8, inertia 208952.94787903182
Converged at iteration 8: center shift 0.0008306717121226166 within tolerance 0.0015358503717230955.

KMeans(max_iter=500, n_clusters=3, verbose=1)


age_clusters = kmeans.predict(df_age_groups)


size = cluster_sizes(age_clusters)

for c in size.keys():
    print("Size of Cluster", c, "= ", size[c])

Size of Cluster 0 =  789
Size of Cluster 1 =  731
Size of Cluster 2 =  591


# View centroids for an aggregate representation and a characterization of each cluster:
pd.options.display.float_format='{:,.2f}'.format

centroids = pd.DataFrame(kmeans.cluster_centers_, columns=df_age_groups.columns.values)
centroids


centroids['Age_Gen-Z'] #clusters containing Gen-Z

0   0.64
1   0.43
2   0.90
Name: Age_Gen-Z, dtype: float64


centroids['Age_Millenials'] #clusters containing Millenials

0   0.31
1   0.56
2   0.10
Name: Age_Millenials, dtype: float64


centroids['Age_Gen-X & Boomers'] #clusters containing Gen-X and Boomers

0   0.04
1   0.01
2   0.00
Name: Age_Gen-X & Boomers, dtype: float64


# Silhouette Analysis at n = 3:
age_silhouette = metrics.silhouette_samples(df_age_groups, age_clusters)
print('Mean Silhouette Value :', age_silhouette.mean())

Mean Silhouette Value : 0.5691256560102319


# Plot and Evaluate the Silhouettes:
plot_silhouettes(df_age_groups, age_clusters)


# Calculate Completeness and Homogeneity for the clusters:
complete = completeness_score(labels_num, age_clusters)
print(f"Completeness Score for Clusters: {complete}")
homogene = homogeneity_score(labels_num, age_clusters)
print(f"Homogeneity Score for Clusters: {homogene}")

Completeness Score for Clusters: 0.7020884578966542
Homogeneity Score for Clusters: 0.39448267211636195


# Normalize the dataset with Min-Max Scaling:
df_age_groups_norm = df_age_groups.copy()
for column in df_age_groups_norm.columns:
    df_age_groups_norm[column] = (df_age_groups_norm[column] - df_age_groups_norm[column].min()) / (df_age_groups_norm[column].max() - df_age_groups_norm[column].min())


# View normalized data:
print(df_age_groups_norm)

      Height  Weight  Gender_Female  Gender_Male  \
0       0.32    0.19           1.00         0.00   
1       0.13    0.13           1.00         0.00   
2       0.66    0.28           0.00         1.00   
3       0.66    0.36           0.00         1.00   
4       0.62    0.38           0.00         1.00   
...      ...     ...            ...          ...   
2106    0.49    0.69           1.00         0.00   
2107    0.56    0.71           1.00         0.00   
2108    0.57    0.71           1.00         0.00   
2109    0.55    0.70           1.00         0.00   
2110    0.54    0.71           1.00         0.00   

      family_history_with_overweight_no  family_history_with_overweight_yes  \
0                                  0.00                                1.00   
1                                  0.00                                1.00   
2                                  0.00                                1.00   
3                                  1.00                                0.00   
4                                  1.00                                0.00   
...                                 ...                                 ...   
2106                               0.00                                1.00   
2107                               0.00                                1.00   
2108                               0.00                                1.00   
2109                               0.00                                1.00   
2110                               0.00                                1.00   

      FAVC_no  FAVC_yes  FCVC_Always  FCVC_Never  FCVC_Sometimes  NCP_1  \
0        1.00      0.00         0.00        0.00            1.00   0.00   
1        1.00      0.00         1.00        0.00            0.00   0.00   
2        1.00      0.00         0.00        0.00            1.00   0.00   
3        1.00      0.00         1.00        0.00            0.00   0.00   
4        1.00      0.00         0.00        0.00            1.00   1.00   
...       ...       ...          ...         ...             ...    ...   
2106     0.00      1.00         1.00        0.00            0.00   0.00   
2107     0.00      1.00         1.00        0.00            0.00   0.00   
2108     0.00      1.00         1.00        0.00            0.00   0.00   
2109     0.00      1.00         1.00        0.00            0.00   0.00   
2110     0.00      1.00         1.00        0.00            0.00   0.00   

      NCP_2  NCP_3  NCP_3+  CAEC_Always  CAEC_Frequently  CAEC_Sometimes  \
0      0.00   1.00    0.00         0.00             0.00            1.00   
1      0.00   1.00    0.00         0.00             0.00            1.00   
2      0.00   1.00    0.00         0.00             0.00            1.00   
3      0.00   1.00    0.00         0.00             0.00            1.00   
4      0.00   0.00    0.00         0.00             0.00            1.00   
...     ...    ...     ...          ...              ...             ...   
2106   0.00   1.00    0.00         0.00             0.00            1.00   
2107   0.00   1.00    0.00         0.00             0.00            1.00   
2108   0.00   1.00    0.00         0.00             0.00            1.00   
2109   0.00   1.00    0.00         0.00             0.00            1.00   
2110   0.00   1.00    0.00         0.00             0.00            1.00   

      CAEC_no  SMOKE_no  SMOKE_yes  CH2O_Between 1 and 2 L  \
0        0.00      1.00       0.00                    1.00   
1        0.00      0.00       1.00                    0.00   
2        0.00      1.00       0.00                    1.00   
3        0.00      1.00       0.00                    1.00   
4        0.00      1.00       0.00                    1.00   
...       ...       ...        ...                     ...   
2106     0.00      1.00       0.00                    0.00   
2107     0.00      1.00       0.00                    1.00   
2108     0.00      1.00       0.00                    1.00   
2109     0.00      1.00       0.00                    1.00   
2110     0.00      1.00       0.00                    1.00   

      CH2O_Less than a liter  CH2O_More than 2 L  SCC_no  SCC_yes  \
0                       0.00                0.00    1.00     0.00   
1                       0.00                1.00    0.00     1.00   
2                       0.00                0.00    1.00     0.00   
3                       0.00                0.00    1.00     0.00   
4                       0.00                0.00    1.00     0.00   
...                      ...                 ...     ...      ...   
2106                    1.00                0.00    1.00     0.00   
2107                    0.00                0.00    1.00     0.00   
2108                    0.00                0.00    1.00     0.00   
2109                    0.00                0.00    1.00     0.00   
2110                    0.00                0.00    1.00     0.00   

      FAF_1 or 2 days  FAF_2 or 4 days  FAF_4 or 5 days  FAF_I do not have  \
0                0.00             0.00             0.00               1.00   
1                0.00             0.00             1.00               0.00   
2                0.00             1.00             0.00               0.00   
3                0.00             1.00             0.00               0.00   
4                0.00             0.00             0.00               1.00   
...               ...              ...              ...                ...   
2106             1.00             0.00             0.00               0.00   
2107             1.00             0.00             0.00               0.00   
2108             1.00             0.00             0.00               0.00   
2109             1.00             0.00             0.00               0.00   
2110             1.00             0.00             0.00               0.00   

      TUE_0-2 Hours  TUE_3-5 Hours  TUE_More than 5 Hours  CALC_Always  \
0              0.00           1.00                   0.00         0.00   
1              1.00           0.00                   0.00         0.00   
2              0.00           1.00                   0.00         0.00   
3              1.00           0.00                   0.00         0.00   
4              1.00           0.00                   0.00         0.00   
...             ...            ...                    ...          ...   
2106           1.00           0.00                   0.00         0.00   
2107           1.00           0.00                   0.00         0.00   
2108           1.00           0.00                   0.00         0.00   
2109           1.00           0.00                   0.00         0.00   
2110           1.00           0.00                   0.00         0.00   

      CALC_Frequently  CALC_Sometimes  CALC_no  MTRANS_Automobile  \
0                0.00            0.00     1.00               0.00   
1                0.00            1.00     0.00               0.00   
2                1.00            0.00     0.00               0.00   
3                1.00            0.00     0.00               0.00   
4                0.00            1.00     0.00               0.00   
...               ...             ...      ...                ...   
2106             0.00            1.00     0.00               0.00   
2107             0.00            1.00     0.00               0.00   
2108             0.00            1.00     0.00               0.00   
2109             0.00            1.00     0.00               0.00   
2110             0.00            1.00     0.00               0.00   

      MTRANS_Bike  MTRANS_Motorbike  MTRANS_Public_Transportation  \
0            0.00              0.00                          1.00   
1            0.00              0.00                          1.00   
2            0.00              0.00                          1.00   
3            0.00              0.00                          0.00   
4            0.00              0.00                          1.00   
...           ...               ...                           ...   
2106         0.00              0.00                          1.00   
2107         0.00              0.00                          1.00   
2108         0.00              0.00                          1.00   
2109         0.00              0.00                          1.00   
2110         0.00              0.00                          1.00   

      MTRANS_Walking  Age_Gen-Z  Age_Millenials  Age_Gen-X & Boomers  
0               0.00       1.00            0.00                 0.00  
1               0.00       1.00            0.00                 0.00  
2               0.00       1.00            0.00                 0.00  
3               1.00       0.00            1.00                 0.00  
4               0.00       1.00            0.00                 0.00  
...              ...        ...             ...                  ...  
2106            0.00       1.00            0.00                 0.00  
2107            0.00       1.00            0.00                 0.00  
2108            0.00       1.00            0.00                 0.00  
2109            0.00       1.00            0.00                 0.00  
2110            0.00       1.00            0.00                 0.00  

[2111 rows x 45 columns]


# Perform K-Means Clustering with N = 3:
kmeans3 = KMeans(n_clusters=3, max_iter=500, verbose=1)


kmeans3.fit(df_age_groups_norm)

Initialization complete
Iteration 0, inertia 15293.30577388341
Iteration 1, inertia 9968.066331837832
Iteration 2, inertia 9920.180131785271
Iteration 3, inertia 9903.77236856074
Iteration 4, inertia 9896.176445738252
Iteration 5, inertia 9891.495588792406
Iteration 6, inertia 9889.624965572522
Iteration 7, inertia 9888.823257389276
Iteration 8, inertia 9888.487377405678
Iteration 9, inertia 9888.340285312179
Iteration 10, inertia 9888.20252958357
Iteration 11, inertia 9887.532993038576
Iteration 12, inertia 9887.504196402364
Converged at iteration 12: strict convergence.
Initialization complete
Iteration 0, inertia 15755.748152264905
Iteration 1, inertia 10291.16329342617
Iteration 2, inertia 10228.905860318164
Iteration 3, inertia 10188.107451127948
Iteration 4, inertia 10144.37857128053
Iteration 5, inertia 10107.001253617971
Iteration 6, inertia 10086.05688167689
Iteration 7, inertia 10077.969028399091
Iteration 8, inertia 10068.115905357135
Iteration 9, inertia 10054.640327576219
Iteration 10, inertia 10036.68992669457
Iteration 11, inertia 10012.308155223136
Iteration 12, inertia 9995.089501496252
Iteration 13, inertia 9979.257157992513
Iteration 14, inertia 9938.759373638944
Iteration 15, inertia 9911.724132573296
Iteration 16, inertia 9891.1788188847
Iteration 17, inertia 9754.311626803314
Iteration 18, inertia 9678.154213662237
Iteration 19, inertia 9667.068511677915
Iteration 20, inertia 9660.214612839805
Iteration 21, inertia 9654.574790207527
Iteration 22, inertia 9645.63794782316
Iteration 23, inertia 9644.899891129677
Iteration 24, inertia 9644.748605586845
Iteration 25, inertia 9644.595919167177
Iteration 26, inertia 9644.549083693555
Iteration 27, inertia 9641.042116346523
Iteration 28, inertia 9638.110099852393
Iteration 29, inertia 9634.979187430297
Iteration 30, inertia 9632.762887140108
Iteration 31, inertia 9632.086194242833
Iteration 32, inertia 9631.65577226331
Iteration 33, inertia 9631.569838198466
Iteration 34, inertia 9631.539746213291
Converged at iteration 34: strict convergence.
Initialization complete
Iteration 0, inertia 14748.903594063975
Iteration 1, inertia 9965.186003070134
Iteration 2, inertia 9825.169923880945
Iteration 3, inertia 9744.272576918649
Iteration 4, inertia 9667.587922972407
Iteration 5, inertia 9644.10573691932
Iteration 6, inertia 9635.267693732794
Iteration 7, inertia 9633.29914147962
Iteration 8, inertia 9632.952141350772
Iteration 9, inertia 9632.87014898023
Converged at iteration 9: strict convergence.
Initialization complete
Iteration 0, inertia 14484.898807356742
Iteration 1, inertia 9955.81303074818
Iteration 2, inertia 9713.282859132194
Iteration 3, inertia 9671.355873128363
Iteration 4, inertia 9650.513068725491
Iteration 5, inertia 9645.317685969698
Iteration 6, inertia 9642.530506536332
Iteration 7, inertia 9641.485885635826
Iteration 8, inertia 9640.28240915911
Iteration 9, inertia 9639.857937703955
Iteration 10, inertia 9639.774644478792
Iteration 11, inertia 9639.697843285941
Iteration 12, inertia 9639.504960365535
Iteration 13, inertia 9638.95769401947
Iteration 14, inertia 9638.573356540428
Iteration 15, inertia 9636.940111858781
Iteration 16, inertia 9635.980505559797
Iteration 17, inertia 9635.293549236569
Iteration 18, inertia 9633.992196284771
Iteration 19, inertia 9631.294896772604
Iteration 20, inertia 9629.157517862288
Iteration 21, inertia 9629.034229492028
Iteration 22, inertia 9628.960271079659
Iteration 23, inertia 9628.812549454027
Iteration 24, inertia 9628.623882692866
Iteration 25, inertia 9628.59305637934
Iteration 26, inertia 9628.524787673914
Iteration 27, inertia 9628.459530177464
Converged at iteration 27: strict convergence.
Initialization complete
Iteration 0, inertia 16152.65491388627
Iteration 1, inertia 10375.533418947318
Iteration 2, inertia 10144.857929200623
Iteration 3, inertia 10053.027516666378
Iteration 4, inertia 10011.007658000435
Iteration 5, inertia 9990.350407887636
Iteration 6, inertia 9980.01909365632
Iteration 7, inertia 9973.147134155095
Iteration 8, inertia 9962.93227967642
Iteration 9, inertia 9950.804233790155
Iteration 10, inertia 9929.106524971192
Iteration 11, inertia 9893.61445739094
Iteration 12, inertia 9871.997645307263
Iteration 13, inertia 9853.145440604721
Iteration 14, inertia 9818.078861860033
Iteration 15, inertia 9757.278886917615
Iteration 16, inertia 9713.510022024357
Iteration 17, inertia 9680.926171314575
Iteration 18, inertia 9674.777601266724
Iteration 19, inertia 9673.122071852886
Iteration 20, inertia 9672.685183118592
Iteration 21, inertia 9672.518137679992
Iteration 22, inertia 9672.500575556094
Converged at iteration 22: strict convergence.
Initialization complete
Iteration 0, inertia 16505.93842126068
Iteration 1, inertia 10174.540781006854
Iteration 2, inertia 9896.176365470483
Iteration 3, inertia 9755.217403382474
Iteration 4, inertia 9710.856879173405
Iteration 5, inertia 9695.291352573073
Iteration 6, inertia 9683.148926900818
Iteration 7, inertia 9667.5491075782
Iteration 8, inertia 9655.586520230188
Iteration 9, inertia 9649.747659712908
Iteration 10, inertia 9645.46312963105
Iteration 11, inertia 9644.40630542299
Iteration 12, inertia 9644.101013829977
Iteration 13, inertia 9643.642758888995
Iteration 14, inertia 9643.008226227485
Iteration 15, inertia 9642.844684153424
Iteration 16, inertia 9642.701135334233
Iteration 17, inertia 9642.57061072763
Iteration 18, inertia 9642.519519077938
Iteration 19, inertia 9642.504477800321
Converged at iteration 19: strict convergence.
Initialization complete
Iteration 0, inertia 17172.960091144152
Iteration 1, inertia 10194.671271050402
Iteration 2, inertia 10127.428286254335
Iteration 3, inertia 10110.648670434139
Iteration 4, inertia 10088.305356307605
Iteration 5, inertia 10056.98392140109
Iteration 6, inertia 10014.54147542604
Iteration 7, inertia 9984.782947200316
Iteration 8, inertia 9948.035333426422
Iteration 9, inertia 9907.185271483926
Iteration 10, inertia 9884.78043245219
Iteration 11, inertia 9863.689071376455
Iteration 12, inertia 9845.824302775172
Iteration 13, inertia 9831.477466918255
Iteration 14, inertia 9805.29705603225
Iteration 15, inertia 9788.493454238513
Iteration 16, inertia 9782.947379376854
Iteration 17, inertia 9781.810910498489
Iteration 18, inertia 9781.446129264701
Iteration 19, inertia 9781.063080143778
Iteration 20, inertia 9780.165908514351
Iteration 21, inertia 9779.376078944326
Iteration 22, inertia 9778.781046860006
Iteration 23, inertia 9777.25564144428
Iteration 24, inertia 9771.593080768085
Iteration 25, inertia 9759.993073682037
Iteration 26, inertia 9741.127172080985
Iteration 27, inertia 9725.59447725809
Iteration 28, inertia 9693.057299937595
Iteration 29, inertia 9674.633488354748
Iteration 30, inertia 9668.883857942214
Iteration 31, inertia 9668.062939210902
Iteration 32, inertia 9667.795522508974
Converged at iteration 32: strict convergence.
Initialization complete
Iteration 0, inertia 16121.151887575434
Iteration 1, inertia 10180.975017955254
Iteration 2, inertia 10012.790506392183
Iteration 3, inertia 9945.149592230093
Iteration 4, inertia 9902.19445401729
Iteration 5, inertia 9854.551471824738
Iteration 6, inertia 9796.629809393482
Iteration 7, inertia 9733.824033247713
Iteration 8, inertia 9709.60496866079
Iteration 9, inertia 9701.710992691354
Iteration 10, inertia 9698.331774194536
Iteration 11, inertia 9696.490391355273
Iteration 12, inertia 9695.20652681061
Iteration 13, inertia 9695.015903631236
Iteration 14, inertia 9694.98725574485
Converged at iteration 14: strict convergence.
Initialization complete
Iteration 0, inertia 15719.84699885392
Iteration 1, inertia 10093.886021577151
Iteration 2, inertia 9883.872126085509
Iteration 3, inertia 9824.269228240664
Iteration 4, inertia 9779.058685068434
Iteration 5, inertia 9725.75686585067
Iteration 6, inertia 9686.098533273536
Iteration 7, inertia 9677.796926090543
Iteration 8, inertia 9674.731965248277
Iteration 9, inertia 9673.142492501147
Iteration 10, inertia 9672.65337922592
Iteration 11, inertia 9672.51584737018
Iteration 12, inertia 9672.498249204844
Converged at iteration 12: strict convergence.
Initialization complete
Iteration 0, inertia 15272.42924127074
Iteration 1, inertia 10146.7544705174
Iteration 2, inertia 10060.416621065493
Iteration 3, inertia 10009.051773331206
Iteration 4, inertia 9916.53892424252
Iteration 5, inertia 9817.043864074043
Iteration 6, inertia 9800.317052776645
Iteration 7, inertia 9790.614521302858
Iteration 8, inertia 9786.58080875628
Iteration 9, inertia 9785.526700118644
Iteration 10, inertia 9783.706006793813
Iteration 11, inertia 9782.330522508842
Iteration 12, inertia 9781.663433153548
Iteration 13, inertia 9781.595470810445
Iteration 14, inertia 9781.360992187556
Iteration 15, inertia 9781.307176755947
Converged at iteration 15: strict convergence.

KMeans(max_iter=500, n_clusters=3, verbose=1)


clusters_norm3 = kmeans3.predict(df_age_groups_norm)


size3 = cluster_sizes(clusters_norm3)

for c in size3.keys():
    print("Size of Cluster", c, "= ", size3[c])

Size of Cluster 0 =  541
Size of Cluster 1 =  1018
Size of Cluster 2 =  552


# View centroids for an aggregate representation and a characterization of each cluster:
pd.options.display.float_format='{:,.2f}'.format

centroids3 = pd.DataFrame(kmeans3.cluster_centers_, columns=df_age_groups_norm.columns.values)
centroids3


centroids3['Age_Gen-Z'] #clusters containing Gen-Z Normalized

0   0.62
1   0.99
2   0.01
Name: Age_Gen-Z, dtype: float64


centroids3['Age_Millenials'] #clusters containing Millenials Normalized

0   0.37
1   0.01
2   0.92
Name: Age_Millenials, dtype: float64


centroids3['Age_Gen-X & Boomers'] #clusters containing Gen-X and Boomers Normalized

0   0.01
1   0.00
2   0.07
Name: Age_Gen-X & Boomers, dtype: float64


# Silhouette Analysis at n = 3:
age_norm_silhouette = metrics.silhouette_samples(df_age_groups_norm, clusters_norm3)
print('Mean Silhouette Value :', age_norm_silhouette.mean())

Mean Silhouette Value : 0.11905854619225616


# Plot and Evaluate the Silhouettes:
plot_silhouettes(df_age_groups_norm, clusters_norm3)


# Calculate Completeness and Homogeneity for the clusters:
complete_norm = completeness_score(labels_num, clusters_norm3)
print(f"Completeness Score for Clusters: {complete_norm}")
homogene_norm = homogeneity_score(labels_num, clusters_norm3)
print(f"Homogeneity Score for Clusters: {homogene_norm}")

Completeness Score for Clusters: 0.3552093808452009
Homogeneity Score for Clusters: 0.19224754943375816


# Create a copy of the data with the Age Groups:
data_age_groups = data_numeric 
data_age_groups["Age"] = age_df['Age Group']


data_age_groups


# Add the class labels as a column to the dataset:
data_age_groups['NObeyesdad'] = labels_df
data_age_groups


genz_df = data_age_groups[data_age_groups["Age"] == 'Gen-Z']
genz_df


#Save Gen-Z dataframe to CSV:
genz_df.to_csv('/Users/cl/genz_dataframe.csv', index = False)


millen_df = data_age_groups[data_age_groups["Age"] == 'Millenials']
millen_df


# Save Millenials dataframe to CSV:
millen_df.to_csv('/Users/cl/millenials_dataframe.csv', index = False)


genxboomers_df = data_age_groups[data_age_groups["Age"] == 'Gen-X & Boomers']
genxboomers_df


# Save Gen-X and Boomers dataframe to CSV:
genxboomers_df.to_csv('/Users/cl/genxboomers_dataframe.csv', index = False)

Preprocessing and Clustering Analysis Exploration: Obesity Level Analysis¶

Clustering Exporation with K-Means:¶

Next, we will create age groups and seperate the age of each individual based on generation. Exploring age groups will allow us to re-evaluate the clusters and determine if a pattern exists also within age group for classification.¶

Discretize the Age attribute into 4 seperate age groups and re-run K-Means Clustering:¶

Gen-Z (1997 – 2012), Age: 9 – 24¶

Millennials (1981 – 1996), Age: 25 – 40¶

Gen-X (1965 – 1980), Age: 41 – 56¶

Boomers (1955 - 1964), Age: 57 - 66¶

Perform K-Means with Normalized Data on Age Groups for Comparsion:¶

Save Output of Data-Set (non-normalized) based on Age-Groups for Classifier Use:¶

	Gender	Age	Height	Weight	family_history_with_overweight	FAVC	FCVC	NCP	CAEC	SMOKE	CH2O	SCC	FAF	TUE	CALC	MTRANS	NObeyesdad
0	Female	21.000000	1.620000	64.000000	yes	no	2.0	3.0	Sometimes	no	2.000000	no	0.000000	1.000000	no	Public_Transportation	Normal_Weight
1	Female	21.000000	1.520000	56.000000	yes	no	3.0	3.0	Sometimes	yes	3.000000	yes	3.000000	0.000000	Sometimes	Public_Transportation	Normal_Weight
2	Male	23.000000	1.800000	77.000000	yes	no	2.0	3.0	Sometimes	no	2.000000	no	2.000000	1.000000	Frequently	Public_Transportation	Normal_Weight
3	Male	27.000000	1.800000	87.000000	no	no	3.0	3.0	Sometimes	no	2.000000	no	2.000000	0.000000	Frequently	Walking	Overweight_Level_I
4	Male	22.000000	1.780000	89.800000	no	no	2.0	1.0	Sometimes	no	2.000000	no	0.000000	0.000000	Sometimes	Public_Transportation	Overweight_Level_II
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
2106	Female	20.976842	1.710730	131.408528	yes	yes	3.0	3.0	Sometimes	no	1.728139	no	1.676269	0.906247	Sometimes	Public_Transportation	Obesity_Type_III
2107	Female	21.982942	1.748584	133.742943	yes	yes	3.0	3.0	Sometimes	no	2.005130	no	1.341390	0.599270	Sometimes	Public_Transportation	Obesity_Type_III
2108	Female	22.524036	1.752206	133.689352	yes	yes	3.0	3.0	Sometimes	no	2.054193	no	1.414209	0.646288	Sometimes	Public_Transportation	Obesity_Type_III
2109	Female	24.361936	1.739450	133.346641	yes	yes	3.0	3.0	Sometimes	no	2.852339	no	1.139107	0.586035	Sometimes	Public_Transportation	Obesity_Type_III
2110	Female	23.664709	1.738836	133.472641	yes	yes	3.0	3.0	Sometimes	no	2.863513	no	1.026452	0.714137	Sometimes	Public_Transportation	Obesity_Type_III

	Age	Height	Weight	Gender_Female	Gender_Male	family_history_with_overweight_no	family_history_with_overweight_yes	FAVC_no	FAVC_yes	FCVC_Always	FCVC_Never	FCVC_Sometimes	NCP_1	NCP_2	NCP_3	NCP_3+	CAEC_Always	CAEC_Frequently	CAEC_Sometimes	CAEC_no	SMOKE_no	SMOKE_yes	CH2O_Between 1 and 2 L	CH2O_Less than a liter	CH2O_More than 2 L	SCC_no	SCC_yes	FAF_1 or 2 days	FAF_2 or 4 days	FAF_4 or 5 days	FAF_I do not have	TUE_0-2 Hours	TUE_3-5 Hours	TUE_More than 5 Hours	CALC_Always	CALC_Frequently	CALC_Sometimes	CALC_no	MTRANS_Automobile	MTRANS_Bike	MTRANS_Motorbike	MTRANS_Public_Transportation	MTRANS_Walking
0	0.19	0.43	0.52	1.00	0.00	0.00	1.00	0.05	0.95	0.90	0.06	0.04	0.03	0.01	0.96	0.00	0.01	0.05	0.92	0.02	0.99	0.01	0.65	0.32	0.03	0.98	0.02	0.36	0.03	0.01	0.60	0.90	0.08	0.02	0.00	0.01	0.96	0.03	0.02	-0.00	0.00	0.97	0.01
1	0.37	0.51	0.36	0.33	0.67	0.06	0.94	0.05	0.95	0.13	0.07	0.80	0.18	0.19	0.59	0.05	0.02	0.04	0.94	0.00	0.98	0.02	0.54	0.40	0.06	0.98	0.02	0.27	0.19	0.02	0.52	0.78	0.20	0.01	-0.00	0.06	0.59	0.35	0.99	0.00	0.00	-0.00	0.00
2	0.16	0.41	0.30	0.55	0.45	0.00	1.00	0.16	0.84	0.08	0.06	0.86	0.35	0.20	0.43	0.02	0.02	0.14	0.83	0.01	0.99	0.01	0.53	0.38	0.10	0.95	0.05	0.32	0.17	0.05	0.46	0.46	0.44	0.10	0.00	0.04	0.11	0.85	0.00	0.00	0.00	0.96	0.03
3	0.15	0.37	0.14	0.65	0.35	0.98	0.02	0.29	0.71	0.34	0.11	0.55	0.30	0.07	0.54	0.10	0.05	0.34	0.51	0.10	0.98	0.02	0.39	0.48	0.13	0.86	0.14	0.31	0.21	0.07	0.41	0.53	0.40	0.07	0.00	0.04	0.72	0.24	0.07	0.00	0.01	0.85	0.06
4	0.18	0.63	0.42	0.00	1.00	0.02	0.98	0.05	0.95	0.14	0.17	0.69	0.09	0.19	0.71	0.02	0.02	0.05	0.93	0.01	0.96	0.04	0.66	0.27	0.08	0.98	0.02	0.45	0.12	0.02	0.41	0.67	0.27	0.05	-0.00	0.02	0.97	0.01	-0.00	0.01	0.00	0.95	0.03

	Age	Height	Weight	Gender_Female	Gender_Male	family_history_with_overweight_no	family_history_with_overweight_yes	FAVC_no	FAVC_yes	FCVC_Always	FCVC_Never	FCVC_Sometimes	NCP_1	NCP_2	NCP_3	NCP_3+	CAEC_Always	CAEC_Frequently	CAEC_Sometimes	CAEC_no	SMOKE_no	SMOKE_yes	CH2O_Between 1 and 2 L	CH2O_Less than a liter	CH2O_More than 2 L	SCC_no	SCC_yes	FAF_1 or 2 days	FAF_2 or 4 days	FAF_4 or 5 days	FAF_I do not have	TUE_0-2 Hours	TUE_3-5 Hours	TUE_More than 5 Hours	CALC_Frequently	CALC_Sometimes	CALC_no	MTRANS_Automobile	MTRANS_Bike	MTRANS_Motorbike	MTRANS_Public_Transportation	MTRANS_Walking
0	0.22	0.58	0.39	-0.00	1.00	0.14	0.86	0.09	0.91	0.13	0.11	0.76	0.16	0.18	0.61	0.04	0.03	0.07	0.86	0.03	0.97	0.03	0.61	0.28	0.11	0.98	0.02	0.38	0.17	0.05	0.40	0.63	0.31	0.06	0.04	0.64	0.31	0.28	0.01	0.01	0.68	0.03
1	0.21	0.33	0.19	0.99	0.01	0.34	0.66	0.19	0.81	0.19	0.10	0.71	0.35	0.13	0.48	0.03	0.03	0.23	0.73	0.01	0.98	0.02	0.38	0.55	0.07	0.90	0.10	0.27	0.16	0.03	0.55	0.58	0.36	0.07	0.04	0.50	0.46	0.23	-0.00	0.00	0.74	0.03
2	0.19	0.43	0.52	0.99	0.01	0.04	0.96	0.06	0.94	0.95	0.05	0.01	0.01	0.01	0.97	0.00	0.01	0.04	0.93	0.02	0.99	0.01	0.69	0.29	0.01	0.98	0.02	0.36	0.04	0.01	0.59	0.91	0.08	0.01	0.00	0.97	0.03	0.05	-0.00	0.00	0.94	0.01

	Age Group	Age
0	Gen-Z	21
1	Gen-Z	21
2	Gen-Z	23
3	Millenials	27
4	Gen-Z	22
5	Millenials	29
6	Gen-Z	23
7	Gen-Z	22
8	Gen-Z	24
9	Gen-Z	22

	Age	Height	Weight	Gender_Female	Gender_Male	family_history_with_overweight_no	family_history_with_overweight_yes	FAVC_no	FAVC_yes	FCVC_Always	FCVC_Sometimes	NCP_1	NCP_3	CAEC_Sometimes	SMOKE_no	SMOKE_yes	CH2O_Between 1 and 2 L	CH2O_More than 2 L	SCC_no	SCC_yes	FAF_1 or 2 days	FAF_2 or 4 days	FAF_4 or 5 days	FAF_I do not have	TUE_0-2 Hours	TUE_3-5 Hours	CALC_Frequently	CALC_Sometimes	CALC_no	MTRANS_Automobile	MTRANS_Motorbike	MTRANS_Public_Transportation	MTRANS_Walking
0	Gen-Z	1.62	64.00	1	0	0	1	1	0	0	1	0	1	1	1	0	1	0	1	0	0	0	0	1	0	1	0	0	1	0	0	1	0
1	Gen-Z	1.52	56.00	1	0	0	1	1	0	1	0	0	1	1	0	1	0	1	0	1	0	0	1	0	1	0	0	1	0	0	0	1	0
2	Gen-Z	1.80	77.00	0	1	0	1	1	0	0	1	0	1	1	1	0	1	0	1	0	0	1	0	0	0	1	1	0	0	0	0	1	0
3	Millenials	1.80	87.00	0	1	1	0	1	0	1	0	0	1	1	1	0	1	0	1	0	0	1	0	0	1	0	1	0	0	0	0	0	1
4	Gen-Z	1.78	89.80	0	1	1	0	1	0	0	1	1	0	1	1	0	1	0	1	0	0	0	0	1	1	0	0	1	0	0	0	1	0
5	Millenials	1.62	53.00	0	1	1	0	0	1	0	1	0	1	1	1	0	1	0	1	0	0	0	0	1	1	0	0	1	0	1	0	0	0
6	Gen-Z	1.50	55.00	1	0	0	1	0	1	1	0	0	1	1	1	0	1	0	1	0	1	0	0	0	1	0	0	1	0	0	1	0	0
7	Gen-Z	1.64	53.00	0	1	1	0	1	0	0	1	0	1	1	1	0	1	0	1	0	0	0	1	0	1	0	0	1	0	0	0	1	0
8	Gen-Z	1.78	64.00	0	1	0	1	0	1	1	0	0	1	1	1	0	1	0	1	0	1	0	0	0	0	1	1	0	0	0	0	1	0
9	Gen-Z	1.72	68.00	0	1	0	1	0	1	0	1	0	1	1	1	0	1	0	1	0	1	0	0	0	0	1	0	0	1	0	0	1	0

	Height	Weight	Gender_Female	Gender_Male	family_history_with_overweight_no	family_history_with_overweight_yes	FAVC_no	FAVC_yes	FCVC_Always	FCVC_Never	FCVC_Sometimes	NCP_1	NCP_2	NCP_3	NCP_3+	CAEC_Always	CAEC_Frequently	CAEC_Sometimes	CAEC_no	SMOKE_no	SMOKE_yes	CH2O_Between 1 and 2 L	CH2O_Less than a liter	CH2O_More than 2 L	SCC_no	SCC_yes	FAF_1 or 2 days	FAF_2 or 4 days	FAF_4 or 5 days	FAF_I do not have	TUE_0-2 Hours	TUE_3-5 Hours	TUE_More than 5 Hours	CALC_Frequently	CALC_Sometimes	CALC_no	MTRANS_Automobile	MTRANS_Bike	MTRANS_Motorbike	MTRANS_Public_Transportation	MTRANS_Walking	Age_Gen-Z	Age_Millenials	Age_Gen-X & Boomers
0	1.70	82.18	0.39	0.61	0.13	0.87	0.12	0.88	0.15	0.11	0.75	0.26	0.19	0.52	0.02	0.02	0.07	0.86	0.04	0.98	0.02	0.53	0.35	0.12	0.97	0.03	0.31	0.17	0.05	0.47	0.65	0.29	0.06	0.06	0.56	0.38	0.31	0.00	0.01	0.64	0.03	0.64	0.31	0.04
1	1.74	116.59	0.44	0.56	0.00	1.00	0.01	0.99	0.50	0.07	0.44	0.08	0.13	0.79	0.00	0.01	0.01	0.99	0.00	0.98	0.02	0.62	0.35	0.03	1.00	0.00	0.43	0.03	0.00	0.54	0.83	0.15	0.01	0.01	0.85	0.14	0.16	0.00	0.00	0.83	0.00	0.43	0.56	0.01
2	1.65	55.37	0.69	0.31	0.48	0.52	0.23	0.77	0.29	0.12	0.59	0.21	0.07	0.63	0.09	0.05	0.30	0.62	0.03	0.98	0.02	0.52	0.41	0.07	0.87	0.13	0.28	0.25	0.05	0.42	0.50	0.42	0.09	0.02	0.57	0.41	0.15	0.01	0.01	0.79	0.05	0.90	0.10	0.00

	Age	Height	Weight	Gender_Female	Gender_Male	family_history_with_overweight_no	family_history_with_overweight_yes	FAVC_no	FAVC_yes	FCVC_Always	FCVC_Sometimes	NCP_1	NCP_2	NCP_3	NCP_3+	CAEC_Always	CAEC_Frequently	CAEC_Sometimes	SMOKE_no	SMOKE_yes	CH2O_Between 1 and 2 L	CH2O_Less than a liter	CH2O_More than 2 L	SCC_no	SCC_yes	FAF_1 or 2 days	FAF_2 or 4 days	FAF_4 or 5 days	FAF_I do not have	TUE_0-2 Hours	TUE_3-5 Hours	CALC_Frequently	CALC_Sometimes	CALC_no	MTRANS_Automobile	MTRANS_Bike	MTRANS_Motorbike	MTRANS_Public_Transportation	MTRANS_Walking	NObeyesdad
13	Gen-X & Boomers	1.80	99.00	0	1	1	0	0	1	0	1	0	0	1	0	0	0	1	1	0	1	0	0	1	0	0	1	0	0	0	1	1	0	0	1	0	0	0	0	Obesity_Type_I
21	Gen-X & Boomers	1.69	87.00	1	0	0	1	0	1	1	0	1	0	0	0	0	0	1	0	1	1	0	0	1	0	0	0	0	1	1	0	0	0	1	1	0	0	0	0	Obesity_Type_I
92	Gen-X & Boomers	1.78	84.00	0	1	0	1	1	0	1	0	0	0	0	1	0	1	0	1	0	0	0	1	0	1	0	0	1	0	1	0	1	0	0	0	0	0	0	1	Overweight_Level_I
133	Gen-X & Boomers	1.65	66.00	1	0	1	0	0	1	1	0	0	0	1	0	1	0	0	1	0	1	0	0	1	0	1	0	0	0	0	1	1	0	0	0	0	0	1	0	Normal_Weight
137	Gen-X & Boomers	1.60	80.00	0	1	0	1	1	0	0	1	0	0	1	0	0	0	1	0	1	0	0	1	1	0	0	0	0	1	1	0	0	0	1	0	0	1	0	0	Obesity_Type_I
161	Gen-X & Boomers	1.65	80.00	0	1	1	0	0	1	0	1	0	0	1	0	0	0	1	1	0	1	0	0	1	0	1	0	0	0	1	0	0	0	1	1	0	0	0	0	Overweight_Level_II
169	Gen-X & Boomers	1.63	77.00	1	0	0	1	0	1	0	1	0	0	1	0	0	1	0	1	0	0	1	0	1	0	0	0	0	1	1	0	0	0	1	1	0	0	0	0	Overweight_Level_II
197	Gen-X & Boomers	1.75	118.00	0	1	0	1	0	1	0	1	0	0	1	0	0	0	1	1	0	1	0	0	1	0	0	0	0	1	1	0	0	1	0	0	1	0	0	0	Obesity_Type_II
201	Gen-X & Boomers	1.54	80.00	1	0	0	1	0	1	0	1	0	0	1	0	1	0	0	1	0	0	1	0	1	0	0	0	0	1	1	0	0	1	0	1	0	0	0	0	Obesity_Type_I
232	Gen-X & Boomers	1.59	50.00	1	0	0	1	1	0	1	0	0	0	1	0	0	0	1	0	1	0	0	1	0	1	0	1	0	0	1	0	0	0	1	0	0	0	1	0	Normal_Weight
252	Gen-X & Boomers	1.79	90.00	0	1	0	1	1	0	0	1	0	0	1	0	0	0	1	0	1	1	0	0	1	0	1	0	0	0	1	0	1	0	0	1	0	0	0	0	Overweight_Level_II
358	Gen-X & Boomers	1.75	110.00	0	1	0	1	1	0	0	1	1	0	0	0	0	0	1	1	0	0	1	0	1	0	1	0	0	0	1	0	1	0	0	1	0	0	0	0	Obesity_Type_II
375	Gen-X & Boomers	1.80	92.00	0	1	0	1	0	1	0	1	1	0	0	0	0	1	0	1	0	0	0	1	1	0	1	0	0	0	1	0	0	1	0	1	0	0	0	0	Overweight_Level_II
492	Gen-X & Boomers	1.70	86.00	0	1	1	0	0	1	1	0	0	0	1	0	0	1	0	1	0	0	1	0	1	0	0	0	0	1	1	0	0	0	1	1	0	0	0	0	Overweight_Level_II
751	Gen-X & Boomers	1.72	82.92	1	0	1	0	0	1	0	1	0	0	1	0	0	0	1	1	0	1	0	0	1	0	0	0	1	0	1	0	0	1	0	1	0	0	0	0	Overweight_Level_I
813	Gen-X & Boomers	1.77	75.63	1	0	0	1	0	1	1	0	0	1	0	0	0	0	1	1	0	1	0	0	1	0	0	0	0	1	1	0	0	1	0	1	0	0	0	0	Overweight_Level_I
1013	Gen-X & Boomers	1.77	80.49	0	1	1	0	0	1	0	1	0	0	1	0	0	0	1	1	0	1	0	0	1	0	1	0	0	0	1	0	0	0	1	1	0	0	0	0	Overweight_Level_II
1017	Gen-X & Boomers	1.65	79.17	1	0	0	1	0	1	0	1	0	0	1	0	0	0	1	1	0	0	1	0	1	0	0	0	0	1	0	1	0	0	1	1	0	0	0	0	Overweight_Level_II
1034	Gen-X & Boomers	1.75	82.13	0	1	0	1	0	1	0	1	0	0	1	0	0	0	1	1	0	0	1	0	1	0	0	0	0	1	1	0	0	0	1	1	0	0	0	0	Overweight_Level_II
1062	Gen-X & Boomers	1.73	86.95	1	0	0	1	0	1	0	1	0	1	0	0	0	0	1	1	0	0	1	0	1	0	0	0	0	1	1	0	0	0	1	1	0	0	0	0	Overweight_Level_II
1063	Gen-X & Boomers	1.68	79.67	1	0	0	1	0	1	0	1	0	0	1	0	0	0	1	1	0	0	1	0	1	0	0	0	0	1	1	0	0	0	1	1	0	0	0	0	Overweight_Level_II
1088	Gen-X & Boomers	1.66	80.99	0	1	0	1	0	1	0	1	0	0	1	0	0	0	1	1	0	1	0	0	1	0	1	0	0	0	1	0	0	0	1	1	0	0	0	0	Overweight_Level_II
1101	Gen-X & Boomers	1.72	88.60	0	1	0	1	0	1	0	1	0	0	1	0	0	0	1	1	0	0	1	0	1	0	0	0	0	1	1	0	0	0	1	1	0	0	0	0	Overweight_Level_II
1158	Gen-X & Boomers	1.67	80.40	0	1	0	1	0	1	0	1	0	0	1	0	0	0	1	1	0	1	0	0	1	0	1	0	0	0	1	0	0	0	1	1	0	0	0	0	Overweight_Level_II
1162	Gen-X & Boomers	1.68	79.85	1	0	0	1	0	1	0	1	0	0	1	0	0	0	1	1	0	0	1	0	1	0	0	0	0	1	0	1	0	0	1	1	0	0	0	0	Overweight_Level_II
1179	Gen-X & Boomers	1.74	84.73	0	1	0	1	0	1	0	1	0	0	1	0	0	0	1	1	0	0	1	0	1	0	0	0	0	1	1	0	0	0	1	1	0	0	0	0	Overweight_Level_II
1208	Gen-X & Boomers	1.69	80.41	1	0	0	1	0	1	0	1	0	0	1	0	0	0	1	1	0	0	1	0	1	0	0	0	0	1	1	0	0	0	1	1	0	0	0	0	Overweight_Level_II
1215	Gen-X & Boomers	1.57	81.83	1	0	0	1	0	1	0	1	1	0	0	0	0	0	1	1	0	1	0	0	1	0	1	0	0	0	1	0	0	0	1	1	0	0	0	0	Obesity_Type_I
1216	Gen-X & Boomers	1.58	81.94	1	0	0	1	0	1	0	1	0	1	0	0	0	0	1	1	0	0	1	0	1	0	0	0	0	1	1	0	0	0	1	1	0	0	0	0	Obesity_Type_I
1267	Gen-X & Boomers	1.59	76.13	1	0	0	1	0	1	0	1	0	0	1	0	0	0	1	1	0	1	0	0	1	0	0	0	0	1	1	0	0	1	0	1	0	0	0	0	Obesity_Type_I
1285	Gen-X & Boomers	1.65	86.64	1	0	0	1	0	1	0	1	0	1	0	0	0	0	1	1	0	0	1	0	1	0	0	0	0	1	1	0	0	0	1	1	0	0	0	0	Obesity_Type_I
1286	Gen-X & Boomers	1.64	81.98	1	0	0	1	0	1	0	1	1	0	0	0	0	0	1	1	0	0	1	0	1	0	0	0	0	1	1	0	0	0	1	1	0	0	0	0	Obesity_Type_I
1305	Gen-X & Boomers	1.60	77.35	1	0	0	1	0	1	0	1	0	0	1	0	0	0	1	1	0	1	0	0	1	0	0	0	0	1	1	0	0	0	1	1	0	0	0	0	Obesity_Type_I
1325	Gen-X & Boomers	1.57	81.06	1	0	0	1	0	1	0	1	0	1	0	0	0	0	1	1	0	0	1	0	1	0	0	0	0	1	1	0	0	1	0	1	0	0	0	0	Obesity_Type_I
1385	Gen-X & Boomers	1.57	81.92	1	0	0	1	0	1	0	1	1	0	0	0	0	0	1	1	0	0	1	0	1	0	1	0	0	0	1	0	0	0	1	1	0	0	0	0	Obesity_Type_I
1386	Gen-X & Boomers	1.58	80.99	1	0	0	1	0	1	0	1	0	1	0	0	0	0	1	1	0	1	0	0	1	0	0	0	0	1	1	0	0	0	1	1	0	0	0	0	Obesity_Type_I
1387	Gen-X & Boomers	1.58	81.92	1	0	0	1	0	1	0	1	1	0	0	0	0	0	1	1	0	0	1	0	1	0	0	0	0	1	1	0	0	0	1	1	0	0	0	0	Obesity_Type_I
1489	Gen-X & Boomers	1.54	77.05	1	0	0	1	0	1	0	1	0	0	1	0	0	0	1	1	0	1	0	0	1	0	0	0	0	1	1	0	0	1	0	1	0	0	0	0	Obesity_Type_I
1490	Gen-X & Boomers	1.59	77.00	1	0	0	1	0	1	0	1	0	0	1	0	0	0	1	1	0	1	0	0	1	0	0	0	0	1	1	0	0	1	0	1	0	0	0	0	Obesity_Type_I
1529	Gen-X & Boomers	1.75	116.59	0	1	0	1	0	1	0	1	0	1	0	0	0	0	1	1	0	0	1	0	1	0	0	0	0	1	1	0	0	1	0	1	0	0	0	0	Obesity_Type_II
1618	Gen-X & Boomers	1.75	115.81	0	1	0	1	0	1	0	1	0	1	0	0	0	0	1	1	0	0	1	0	1	0	0	0	0	1	1	0	0	0	1	1	0	0	0	0	Obesity_Type_II