import numpy as np
import pylab as pl
import pandas as pd
import importlib
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn import model_selection
from sklearn import tree
from sklearn import feature_selection
from sklearn import preprocessing
from sklearn import metrics


%pwd

'/Users/cl'


# Load original dataset to Pandas dataframe:
df = pd.read_csv('/Users/cl/ObesityDataset.csv', header=0)
# Load transformed dataset with numeric values only:
data_numeric = pd.read_csv('/Users/cl/Obesity_numeric.csv', header=0)
# Load Gen-Z Dataframe:
genz_df = pd.read_csv('/Users/cl/genz_dataframe.csv', header=0)
# Load Millenials Dataframe:
millen_df = pd.read_csv('/Users/cl/millenials_dataframe.csv', header=0)
# Load Gen-X and Boomers Dataframe:
genxboomers_df = pd.read_csv('/Users/cl/genxboomers_dataframe.csv', header=0)


# Obtain the class label from original dataset:
labels_df =  df['NObeyesdad']
labels_df

0             Normal_Weight
1             Normal_Weight
2             Normal_Weight
3        Overweight_Level_I
4       Overweight_Level_II
               ...         
2106       Obesity_Type_III
2107       Obesity_Type_III
2108       Obesity_Type_III
2109       Obesity_Type_III
2110       Obesity_Type_III
Name: NObeyesdad, Length: 2111, dtype: object


# Transform class label into numeric: 
le = preprocessing.LabelEncoder()
labels_num = le.fit_transform(labels_df)
labels_num

array([1, 1, 1, ..., 4, 4, 4])


# View class label names and numeric association:
label_names = dict(zip(le.transform(le.classes_), le.classes_))
print(label_names)

{0: 'Insufficient_Weight', 1: 'Normal_Weight', 2: 'Obesity_Type_I', 3: 'Obesity_Type_II', 4: 'Obesity_Type_III', 5: 'Overweight_Level_I', 6: 'Overweight_Level_II'}


# View Transformed Numeric Data:
data_numeric


# Build training and test sets:
x_train, x_test, label_train, label_test = train_test_split(data_numeric, labels_num, test_size=0.2, random_state=1)


# View Training Set: 
x_train


# View Testing Set: 
x_test


# View Labels for Training Set: 
label_train

array([1, 6, 4, ..., 6, 1, 6])


# View Labels for Test Set: 
label_test

array([0, 1, 5, 4, 3, 4, 2, 4, 2, 2, 1, 3, 2, 2, 5, 2, 5, 3, 0, 6, 2, 2,
       6, 2, 2, 2, 2, 5, 6, 3, 2, 4, 6, 5, 0, 2, 0, 0, 6, 6, 1, 2, 5, 1,
       0, 4, 0, 0, 5, 2, 4, 2, 0, 5, 4, 2, 0, 2, 0, 1, 0, 3, 4, 6, 1, 5,
       4, 2, 6, 3, 2, 0, 4, 4, 3, 3, 0, 6, 3, 4, 5, 5, 4, 2, 0, 6, 1, 4,
       1, 4, 6, 6, 4, 5, 0, 3, 0, 5, 4, 4, 0, 2, 5, 1, 4, 6, 1, 3, 2, 6,
       2, 1, 0, 0, 6, 6, 4, 6, 0, 0, 2, 2, 2, 2, 4, 4, 5, 3, 4, 5, 1, 5,
       2, 2, 6, 2, 1, 4, 6, 3, 3, 0, 6, 6, 0, 6, 6, 5, 4, 4, 2, 2, 0, 6,
       5, 2, 4, 0, 6, 3, 2, 4, 1, 3, 4, 1, 5, 0, 6, 0, 5, 4, 5, 5, 4, 3,
       6, 3, 2, 2, 5, 5, 6, 1, 6, 3, 3, 2, 4, 3, 1, 2, 3, 1, 2, 2, 4, 2,
       0, 2, 6, 2, 5, 5, 1, 2, 0, 0, 2, 3, 6, 5, 5, 3, 4, 1, 2, 0, 1, 5,
       1, 5, 2, 5, 3, 6, 4, 4, 5, 0, 3, 5, 4, 6, 5, 1, 1, 2, 4, 3, 2, 0,
       6, 6, 3, 0, 4, 0, 5, 0, 2, 5, 6, 5, 2, 5, 6, 3, 3, 0, 3, 5, 2, 4,
       2, 4, 5, 4, 4, 4, 0, 0, 2, 3, 1, 0, 0, 1, 1, 2, 3, 5, 6, 2, 2, 1,
       6, 5, 6, 1, 0, 3, 2, 3, 3, 2, 6, 0, 0, 0, 2, 6, 6, 5, 3, 5, 1, 0,
       6, 4, 4, 5, 5, 2, 4, 5, 3, 5, 5, 3, 1, 0, 6, 6, 3, 3, 2, 1, 1, 3,
       5, 0, 5, 1, 3, 5, 4, 0, 5, 1, 1, 4, 3, 6, 6, 4, 5, 4, 6, 3, 5, 1,
       2, 6, 0, 4, 2, 6, 2, 4, 6, 0, 5, 2, 6, 5, 5, 0, 4, 4, 5, 6, 5, 3,
       0, 0, 4, 4, 1, 0, 3, 6, 4, 0, 1, 2, 3, 4, 2, 3, 2, 0, 6, 2, 2, 3,
       3, 2, 2, 2, 4, 6, 1, 0, 4, 0, 4, 6, 1, 3, 1, 1, 2, 1, 4, 2, 3, 2,
       6, 1, 6, 0, 3])


# Train Decision tree Classifier on the Training Data:
d_tree = tree.DecisionTreeClassifier()
dt_all = d_tree.fit(x_train, label_train)


# Function for Measure Performance:
def measure_performance(X, y, clf, show_accuracy=True, show_classification_report=True, show_confussion_matrix=True):
    y_pred = clf.predict(X)   
    if show_accuracy:
         print ("Accuracy:{0:.3f}".format(metrics.accuracy_score(y, y_pred)),"\n")
    
    if show_classification_report:
        print ("Classification report")
        print (metrics.classification_report(y, y_pred, zero_division=0),"\n")
      
    if show_confussion_matrix:
        print ("Confussion matrix")
        print (metrics.confusion_matrix(y, y_pred),"\n")


# Predict on Test Set, View Performance, and Accuracy of Decision Tree Model:
measure_performance(x_test, label_test, dt_all, show_confussion_matrix=True, show_classification_report=True)

Accuracy:0.941 

Classification report
              precision    recall  f1-score   support

           0       0.97      0.98      0.98        61
           1       0.91      0.89      0.90        45
           2       0.89      0.95      0.92        79
           3       0.95      0.96      0.95        54
           4       1.00      1.00      1.00        63
           5       0.95      0.90      0.92        61
           6       0.93      0.88      0.91        60

    accuracy                           0.94       423
   macro avg       0.94      0.94      0.94       423
weighted avg       0.94      0.94      0.94       423
 

Confussion matrix
[[60  1  0  0  0  0  0]
 [ 2 40  0  0  0  3  0]
 [ 0  0 75  3  0  0  1]
 [ 0  0  2 52  0  0  0]
 [ 0  0  0  0 63  0  0]
 [ 0  3  0  0  0 55  3]
 [ 0  0  7  0  0  0 53]]


# View the Accuracy of the Test and Training Sets:
print('Average Test Accuracy: ', d_tree.score(x_test, label_test))
print('Average Train Accuracy: ', d_tree.score(x_train, label_train))

Average Test Accuracy:  0.9408983451536643
Average Train Accuracy:  1.0


# Perform feature selection for top 15%
fs = feature_selection.SelectPercentile(feature_selection.chi2, percentile=15)
x_train_fs = fs.fit_transform(x_train, label_train)


# View the top 15% of the most important features:
print(data_numeric.columns[fs.get_support()].values)

['Age' 'Weight' 'Gender_Female' 'Gender_Male'
 'family_history_with_overweight_no' 'FCVC_Always' 'CAEC_Frequently']


# View scores for each top feature:
for i in range(len(data_numeric.columns.values)):
    if fs.get_support()[i]:
        print(data_numeric.columns.values[i], '\t\t\t\t', fs.scores_[i])

Age 				 470.510134679508
Weight 				 11390.601482312912
Gender_Female 				 274.57777589368993
Gender_Male 				 262.4874450895646
family_history_with_overweight_no 				 405.00183379903723
FCVC_Always 				 542.9949158091111
CAEC_Frequently 				 348.88961093191773


# Evaluate the Classifier with the top 15% feature set:
d_tree.fit(x_train_fs, label_train)
x_test_fs = fs.transform(x_test)
measure_performance(x_test_fs, label_test, d_tree, show_confussion_matrix=True, show_classification_report=True)

Accuracy:0.863 

Classification report
              precision    recall  f1-score   support

           0       0.95      0.93      0.94        61
           1       0.71      0.78      0.74        45
           2       0.84      0.84      0.84        79
           3       0.91      0.98      0.95        54
           4       1.00      1.00      1.00        63
           5       0.81      0.79      0.80        61
           6       0.78      0.72      0.75        60

    accuracy                           0.86       423
   macro avg       0.86      0.86      0.86       423
weighted avg       0.86      0.86      0.86       423
 

Confussion matrix
[[57  4  0  0  0  0  0]
 [ 3 35  0  0  0  6  1]
 [ 0  0 66  5  0  1  7]
 [ 0  0  1 53  0  0  0]
 [ 0  0  0  0 63  0  0]
 [ 0  9  0  0  0 48  4]
 [ 0  1 12  0  0  4 43]]


# View Gen-Z Dataset:
genz_df


#Remove the age and class label column for Gen-Z DF:
data_genz = genz_df.iloc[:,1:43]
data_genz


# View Class Labels for Gen-Z DF: 
labels_genz =  genz_df['NObeyesdad']
labels_genz

0             Normal_Weight
1             Normal_Weight
2             Normal_Weight
3       Overweight_Level_II
4             Normal_Weight
               ...         
1348       Obesity_Type_III
1349       Obesity_Type_III
1350       Obesity_Type_III
1351       Obesity_Type_III
1352       Obesity_Type_III
Name: NObeyesdad, Length: 1353, dtype: object


# Transform class label into numeric: 
le_z = preprocessing.LabelEncoder()
genz_labels = le_z.fit_transform(labels_genz)
genz_labels

array([1, 1, 1, ..., 4, 4, 4])


# Build training and test sets for Gen-Z:
genz_train, genz_test, genz_label_train, genz_label_test = train_test_split(data_genz, genz_labels, test_size=0.2, random_state=1)


# View Gen-Z Training Set:
genz_train


# View Gen-Z Testing Set:
genz_test


# View Gen-Z Labels for Training Set:
genz_label_train

array([4, 0, 1, ..., 2, 1, 2])


# View Gen-Z Labels for Testing Set:
genz_label_test

array([1, 0, 2, 5, 6, 1, 6, 5, 6, 0, 5, 5, 2, 1, 2, 0, 0, 2, 1, 1, 4, 3,
       5, 0, 1, 0, 1, 1, 6, 5, 4, 5, 5, 5, 2, 1, 2, 5, 5, 2, 5, 6, 1, 2,
       1, 3, 2, 6, 2, 2, 1, 3, 0, 0, 2, 6, 6, 4, 0, 5, 5, 1, 2, 2, 5, 6,
       2, 4, 0, 6, 5, 5, 4, 2, 2, 4, 4, 0, 3, 1, 0, 5, 6, 2, 1, 6, 2, 0,
       5, 0, 1, 0, 4, 5, 3, 3, 0, 5, 3, 4, 1, 0, 6, 1, 6, 3, 4, 2, 2, 2,
       1, 1, 3, 2, 0, 2, 1, 2, 4, 1, 2, 2, 5, 0, 5, 6, 5, 4, 0, 5, 0, 0,
       5, 0, 3, 2, 3, 0, 2, 5, 6, 0, 1, 6, 6, 1, 2, 4, 6, 6, 0, 2, 5, 1,
       4, 1, 0, 2, 1, 2, 3, 5, 3, 0, 1, 2, 1, 4, 2, 4, 0, 0, 2, 2, 5, 5,
       5, 1, 5, 3, 2, 0, 1, 0, 1, 2, 5, 2, 1, 6, 5, 6, 5, 1, 5, 4, 0, 2,
       1, 0, 5, 5, 5, 5, 6, 0, 2, 6, 5, 2, 5, 2, 0, 3, 5, 1, 0, 6, 0, 2,
       0, 2, 2, 5, 2, 5, 3, 0, 6, 4, 5, 5, 5, 1, 5, 4, 1, 6, 5, 4, 1, 1,
       2, 0, 3, 2, 1, 0, 5, 1, 1, 0, 6, 3, 5, 0, 5, 1, 2, 1, 0, 5, 6, 1,
       0, 0, 1, 0, 0, 1, 4])


# Train Decision tree Classifier on the Training Data:
dt_genz = d_tree.fit(genz_train, genz_label_train)


# Predict on Gen-Z Test Set, View Performance, and Accuracy of Decision Tree Model:
measure_performance(genz_test, genz_label_test, dt_genz, show_confussion_matrix=True, show_classification_report=True)

Accuracy:0.919 

Classification report
              precision    recall  f1-score   support

           0       0.96      1.00      0.98        49
           1       0.93      0.83      0.88        48
           2       0.91      1.00      0.95        51
           3       1.00      0.72      0.84        18
           4       0.87      0.95      0.91        21
           5       0.92      0.89      0.91        55
           6       0.84      0.93      0.89        29

    accuracy                           0.92       271
   macro avg       0.92      0.90      0.91       271
weighted avg       0.92      0.92      0.92       271
 

Confussion matrix
[[49  0  0  0  0  0  0]
 [ 2 40  0  0  0  3  3]
 [ 0  0 51  0  0  0  0]
 [ 0  0  2 13  3  0  0]
 [ 0  0  1  0 20  0  0]
 [ 0  3  1  0  0 49  2]
 [ 0  0  1  0  0  1 27]]


# View the Accuracy of the Test and Training Sets:
print('Average Test Accuracy: ', d_tree.score(genz_test, genz_label_test))
print('Average Train Accuracy: ', d_tree.score(genz_train, genz_label_train))


# Perform feature selection for top 15% of Gen-Z DF: 
fs_genz = feature_selection.SelectPercentile(feature_selection.chi2, percentile=15)
genz_train_fs = fs_genz.fit_transform(genz_train, genz_label_train)


# View the top 15% of the most important features for Gen-Z:
print(data_genz.columns[fs_genz.get_support()].values)

['Weight' 'Gender_Male' 'family_history_with_overweight_no' 'FAVC_no'
 'FCVC_Always' 'NCP_2' 'CAEC_Frequently']


# View scores for each top feature:
for i in range(len(data_genz.columns.values)):
    if fs_genz.get_support()[i]:
        print(data_genz.columns.values[i], '\t\t\t\t', fs_genz.scores_[i])

Weight 				 9715.93028639861
Gender_Male 				 119.37576369900033
family_history_with_overweight_no 				 230.20882848759723
FAVC_no 				 135.28900924705363
FCVC_Always 				 292.28229522019336
NCP_2 				 167.46598969723757
CAEC_Frequently 				 202.1009043591279


# Evaluate the Classifier with the top 15% feature set for Gen-Z DF:
d_tree.fit(genz_train_fs, genz_label_train)
genz_test_fs = fs_genz.transform(genz_test)
measure_performance(genz_test_fs, genz_label_test, d_tree, show_confussion_matrix=True, show_classification_report=True)

Accuracy:0.808 

Classification report
              precision    recall  f1-score   support

           0       0.91      0.88      0.90        49
           1       0.76      0.77      0.76        48
           2       0.86      0.86      0.86        51
           3       0.80      0.67      0.73        18
           4       0.95      1.00      0.98        21
           5       0.79      0.82      0.80        55
           6       0.57      0.59      0.58        29

    accuracy                           0.81       271
   macro avg       0.81      0.80      0.80       271
weighted avg       0.81      0.81      0.81       271
 

Confussion matrix
[[43  6  0  0  0  0  0]
 [ 4 37  0  0  0  3  4]
 [ 0  0 44  3  0  1  3]
 [ 0  0  5 12  1  0  0]
 [ 0  0  0  0 21  0  0]
 [ 0  4  0  0  0 45  6]
 [ 0  2  2  0  0  8 17]]


# View Millenials Dataset:
millen_df


#Remove the age and class label column for Millenials DF:
data_millen = millen_df.iloc[:,1:43]
data_millen


# View Class Labels for Millenials DF: 
labels_millen =  millen_df['NObeyesdad']
labels_millen

0       Overweight_Level_I
1            Normal_Weight
2           Obesity_Type_I
3      Overweight_Level_II
4           Obesity_Type_I
              ...         
712       Obesity_Type_III
713       Obesity_Type_III
714       Obesity_Type_III
715       Obesity_Type_III
716       Obesity_Type_III
Name: NObeyesdad, Length: 717, dtype: object


# Transform class label into numeric: 
le_m = preprocessing.LabelEncoder()
millen_labels = le_m.fit_transform(labels_millen)
millen_labels

array([5, 1, 2, 6, 2, 6, 5, 6, 6, 3, 6, 6, 6, 3, 1, 2, 1, 1, 6, 2, 1, 1,
       1, 1, 5, 2, 1, 2, 1, 1, 5, 6, 1, 1, 5, 6, 1, 6, 3, 1, 1, 1, 5, 2,
       2, 2, 1, 5, 4, 2, 1, 1, 3, 6, 3, 6, 6, 1, 5, 0, 6, 1, 1, 1, 5, 1,
       1, 1, 1, 1, 5, 1, 5, 1, 1, 5, 1, 2, 5, 6, 5, 6, 1, 6, 1, 2, 1, 1,
       6, 4, 1, 1, 1, 1, 6, 2, 5, 2, 6, 5, 1, 1, 1, 6, 6, 1, 4, 4, 4, 4,
       0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
       5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
       5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6,
       6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
       6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
       6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
       6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
       4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
       4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
       4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
       4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
       4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
       4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
       4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4])


# Build training and test sets for Millenials:
mi_train, mi_test, mi_label_train, mi_label_test = train_test_split(data_millen, millen_labels, test_size=0.2, random_state=1)


# View Millenials Training Set:
mi_train


# View Millenials Testing Set:
mi_test


# View Millenials Labels for Training Set:
mi_label_train

array([4, 3, 4, 4, 3, 5, 1, 3, 2, 4, 4, 1, 4, 4, 6, 6, 3, 2, 3, 6, 3, 6,
       6, 3, 3, 5, 3, 3, 2, 4, 2, 6, 5, 2, 6, 2, 6, 4, 3, 6, 4, 3, 5, 2,
       4, 2, 4, 3, 3, 2, 6, 4, 1, 3, 4, 4, 3, 5, 3, 3, 2, 3, 3, 3, 2, 1,
       4, 4, 1, 3, 2, 3, 3, 3, 4, 4, 3, 4, 5, 3, 4, 4, 4, 6, 3, 5, 4, 6,
       4, 5, 4, 5, 3, 4, 4, 3, 6, 6, 2, 3, 3, 4, 2, 4, 6, 3, 3, 3, 3, 3,
       3, 2, 6, 6, 4, 3, 3, 3, 3, 3, 4, 3, 2, 5, 1, 5, 1, 2, 1, 4, 5, 3,
       6, 2, 3, 4, 2, 4, 3, 6, 4, 4, 2, 2, 4, 3, 6, 4, 5, 3, 5, 5, 4, 4,
       6, 3, 3, 2, 6, 4, 3, 3, 3, 3, 3, 5, 4, 3, 4, 3, 2, 3, 1, 3, 4, 3,
       3, 2, 3, 5, 6, 0, 3, 4, 2, 3, 5, 4, 5, 2, 4, 6, 3, 5, 6, 4, 4, 6,
       4, 2, 3, 2, 3, 6, 1, 4, 6, 6, 6, 6, 2, 6, 4, 3, 2, 2, 4, 3, 4, 3,
       1, 6, 6, 2, 2, 3, 4, 5, 3, 4, 3, 1, 4, 3, 3, 6, 6, 6, 6, 3, 4, 4,
       3, 2, 6, 5, 4, 5, 4, 4, 4, 6, 6, 3, 2, 4, 5, 3, 4, 6, 3, 4, 3, 5,
       2, 6, 5, 4, 1, 3, 2, 4, 2, 1, 6, 3, 6, 3, 4, 4, 3, 4, 1, 4, 3, 4,
       5, 3, 4, 3, 2, 3, 6, 4, 3, 3, 4, 3, 3, 6, 0, 3, 4, 2, 3, 5, 3, 1,
       4, 3, 3, 6, 2, 3, 5, 3, 6, 4, 6, 2, 3, 3, 5, 5, 4, 1, 6, 0, 2, 2,
       2, 2, 6, 6, 1, 2, 3, 4, 3, 6, 6, 5, 2, 6, 3, 3, 2, 5, 5, 3, 5, 5,
       3, 6, 6, 2, 2, 5, 5, 6, 6, 6, 6, 2, 4, 4, 4, 3, 6, 4, 4, 6, 3, 1,
       2, 3, 4, 6, 3, 4, 4, 6, 3, 3, 2, 2, 0, 5, 6, 6, 3, 1, 4, 5, 2, 6,
       1, 2, 6, 4, 4, 4, 4, 3, 3, 3, 1, 3, 3, 4, 2, 3, 6, 4, 3, 3, 4, 2,
       4, 6, 4, 2, 1, 3, 2, 6, 3, 5, 3, 6, 4, 4, 4, 3, 4, 6, 3, 4, 2, 6,
       4, 4, 3, 2, 1, 6, 3, 4, 4, 6, 2, 6, 3, 4, 2, 3, 5, 2, 4, 3, 4, 6,
       5, 3, 3, 5, 4, 6, 2, 5, 6, 4, 4, 3, 4, 3, 4, 2, 5, 2, 2, 5, 4, 3,
       4, 3, 4, 3, 6, 3, 2, 5, 2, 6, 4, 2, 6, 1, 4, 3, 2, 5, 4, 4, 5, 3,
       3, 2, 5, 1, 4, 2, 3, 3, 2, 6, 5, 2, 3, 2, 4, 2, 1, 4, 2, 3, 3, 4,
       4, 3, 3, 1, 3, 3, 4, 3, 4, 4, 2, 6, 2, 3, 2, 3, 2, 6, 3, 5, 1, 3,
       3, 6, 4, 4, 3, 4, 3, 6, 3, 3, 6, 2, 6, 2, 3, 3, 4, 5, 5, 4, 5, 6,
       6])


# View Millenials Labels for Testing Set:
mi_label_test

array([0, 6, 3, 5, 6, 1, 2, 6, 4, 6, 3, 3, 3, 3, 2, 2, 5, 6, 4, 3, 1, 4,
       3, 3, 5, 6, 4, 2, 3, 6, 1, 1, 4, 3, 6, 3, 3, 3, 4, 3, 3, 3, 4, 3,
       3, 4, 4, 6, 6, 3, 4, 3, 3, 5, 6, 4, 2, 5, 2, 4, 3, 3, 6, 2, 2, 4,
       2, 3, 1, 3, 4, 4, 2, 1, 3, 3, 2, 2, 2, 4, 6, 6, 2, 4, 3, 1, 1, 3,
       3, 4, 4, 5, 4, 6, 1, 3, 6, 1, 3, 6, 2, 4, 6, 4, 3, 3, 2, 1, 5, 6,
       2, 4, 3, 3, 5, 6, 4, 0, 5, 3, 1, 3, 4, 4, 5, 4, 1, 3, 4, 5, 3, 3,
       4, 6, 1, 3, 1, 1, 2, 4, 5, 2, 3, 5])


# Train Decision tree Classifier on the Training Data:
dt_mi = d_tree.fit(mi_train, mi_label_train)


# Predict on Millenials Test Set, View Performance, and Accuracy of Decision Tree Model:
measure_performance(mi_test, mi_label_test, dt_mi, show_confussion_matrix=True, show_classification_report=True)

Accuracy:0.896 

Classification report
              precision    recall  f1-score   support

           0       0.50      1.00      0.67         2
           1       0.83      0.62      0.71        16
           2       0.77      0.89      0.83        19
           3       1.00      0.91      0.95        43
           4       1.00      1.00      1.00        30
           5       0.69      0.85      0.76        13
           6       0.95      0.95      0.95        21

    accuracy                           0.90       144
   macro avg       0.82      0.89      0.84       144
weighted avg       0.91      0.90      0.90       144
 

Confussion matrix
[[ 2  0  0  0  0  0  0]
 [ 2 10  0  0  0  3  1]
 [ 0  0 17  0  0  2  0]
 [ 0  0  4 39  0  0  0]
 [ 0  0  0  0 30  0  0]
 [ 0  1  1  0  0 11  0]
 [ 0  1  0  0  0  0 20]]


# View the Accuracy of the Test and Training Sets:
print('Average Test Accuracy: ', d_tree.score(mi_test, mi_label_test))
print('Average Train Accuracy: ', d_tree.score(mi_train, mi_label_train))

Average Test Accuracy:  0.8958333333333334
Average Train Accuracy:  1.0


# Perform feature selection for top 15% of Millenials DF: 
fs_mi = feature_selection.SelectPercentile(feature_selection.chi2, percentile=15)
mi_train_fs = fs_mi.fit_transform(mi_train, mi_label_train)


# View the top 15% of the most important features for Millenails:
print(data_millen.columns[fs_mi.get_support()].values)

['Weight' 'Gender_Female' 'Gender_Male'
 'family_history_with_overweight_no' 'FCVC_Always' 'CAEC_Frequently'
 'MTRANS_Automobile']


# View scores for each top feature:
for i in range(len(data_millen.columns.values)):
    if fs_mi.get_support()[i]:
        print(data_millen.columns.values[i], '\t\t\t\t', fs_mi.scores_[i])

Weight 				 1599.4360572768592
Gender_Female 				 173.42692440859898
Gender_Male 				 146.10242506447887
family_history_with_overweight_no 				 169.8920540299759
FCVC_Always 				 249.67035685056916
CAEC_Frequently 				 166.34169934064465
MTRANS_Automobile 				 110.75284152840752


# Evaluate the Classifier with the top 15% feature set for Millenials DF:
d_tree.fit(mi_train_fs, mi_label_train)
mi_test_fs = fs_mi.transform(mi_test)
measure_performance(mi_test_fs, mi_label_test, d_tree, show_confussion_matrix=True, show_classification_report=True)

Accuracy:0.799 

Classification report
              precision    recall  f1-score   support

           0       0.40      1.00      0.57         2
           1       0.82      0.56      0.67        16
           2       0.58      0.74      0.65        19
           3       0.95      0.86      0.90        43
           4       1.00      1.00      1.00        30
           5       0.56      0.77      0.65        13
           6       0.76      0.62      0.68        21

    accuracy                           0.80       144
   macro avg       0.72      0.79      0.73       144
weighted avg       0.83      0.80      0.80       144
 

Confussion matrix
[[ 2  0  0  0  0  0  0]
 [ 3  9  0  0  0  3  1]
 [ 0  0 14  2  0  2  1]
 [ 0  0  4 37  0  1  1]
 [ 0  0  0  0 30  0  0]
 [ 0  1  1  0  0 10  1]
 [ 0  1  5  0  0  2 13]]


# View Gen-X and Boomers Dataset:
genxboomers_df


#Remove the age and class label column for Millenials DF:
data_genxb = genxboomers_df.iloc[:,1:43]
data_genxb


# View Class Labels for Gen-X & Boomers DF: 
labels_genxb =  genxboomers_df['NObeyesdad']
labels_genxb

0          Obesity_Type_I
1          Obesity_Type_I
2      Overweight_Level_I
3           Normal_Weight
4          Obesity_Type_I
5     Overweight_Level_II
6     Overweight_Level_II
7         Obesity_Type_II
8          Obesity_Type_I
9           Normal_Weight
10    Overweight_Level_II
11        Obesity_Type_II
12    Overweight_Level_II
13    Overweight_Level_II
14     Overweight_Level_I
15     Overweight_Level_I
16    Overweight_Level_II
17    Overweight_Level_II
18    Overweight_Level_II
19    Overweight_Level_II
20    Overweight_Level_II
21    Overweight_Level_II
22    Overweight_Level_II
23    Overweight_Level_II
24    Overweight_Level_II
25    Overweight_Level_II
26    Overweight_Level_II
27         Obesity_Type_I
28         Obesity_Type_I
29         Obesity_Type_I
30         Obesity_Type_I
31         Obesity_Type_I
32         Obesity_Type_I
33         Obesity_Type_I
34         Obesity_Type_I
35         Obesity_Type_I
36         Obesity_Type_I
37         Obesity_Type_I
38         Obesity_Type_I
39        Obesity_Type_II
40        Obesity_Type_II
Name: NObeyesdad, dtype: object


# Transform class label into numeric: 
le_x = preprocessing.LabelEncoder()
genxb_labels = le_m.fit_transform(labels_genxb)
genxb_labels

array([1, 1, 3, 0, 1, 4, 4, 2, 1, 0, 4, 2, 4, 4, 3, 3, 4, 4, 4, 4, 4, 4,
       4, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2])


# Build training and test sets for Gen-X and Boomers:
xb_train, xb_test, xb_label_train, xb_label_test = train_test_split(data_genxb, genxb_labels, test_size=0.2, random_state=1)


# View Gen-X and Boomers Training Set:
xb_train


# View Gen-X and Boomers Testing Set:
xb_test


# View Gen-X and Boomers Labels for Training Set:
xb_label_train

array([4, 4, 1, 1, 1, 1, 1, 2, 1, 3, 4, 1, 4, 4, 1, 4, 4, 4, 4, 4, 2, 2,
       1, 4, 1, 3, 4, 2, 0, 1, 4, 1])


# View Gen-X and Boomers Labels for Testing Set:
xb_label_test

array([0, 3, 4, 1, 4, 1, 4, 1, 1])


# Train Decision tree Classifier on the Training Data:
dt_xb = d_tree.fit(xb_train, xb_label_train)


# Predict on Gen-X and Boomers Test Set, View Performance, and Accuracy of Decision Tree Model:
measure_performance(xb_test, xb_label_test, dt_xb, show_confussion_matrix=True, show_classification_report=True)

Accuracy:0.667 

Classification report
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         1
           1       0.80      1.00      0.89         4
           3       0.00      0.00      0.00         1
           4       0.50      0.67      0.57         3

    accuracy                           0.67         9
   macro avg       0.33      0.42      0.37         9
weighted avg       0.52      0.67      0.59         9
 

Confussion matrix
[[0 0 0 1]
 [0 4 0 0]
 [0 0 0 1]
 [0 1 0 2]]


# View the Accuracy of the Test and Training Sets:
print('Average Test Accuracy: ', d_tree.score(xb_test, xb_label_test))
print('Average Train Accuracy: ', d_tree.score(xb_train, xb_label_train))

Average Test Accuracy:  0.6666666666666666
Average Train Accuracy:  1.0


# Perform feature selection for top 15% of Gen-X and Boomers DF: 
fs_xb = feature_selection.SelectPercentile(feature_selection.chi2, percentile=15)
xb_train_fs = fs_xb.fit_transform(xb_train, xb_label_train)


# View the top 15% of the most important features for Millenails:
print(data_genxb.columns[fs_xb.get_support()].values)

['Weight' 'FCVC_Always' 'CH2O_More than 2 L' 'SCC_yes' 'FAF_2 or 4 days'
 'FAF_4 or 5 days' 'MTRANS_Public_Transportation']


# View scores for each top feature:
for i in range(len(data_genxb.columns.values)):
    if fs_xb.get_support()[i]:
        print(data_genxb.columns.values[i], '\t\t\t\t', fs_xb.scores_[i])

Weight 				 59.13977455691617
FCVC_Always 				 9.282051282051283
CH2O_More than 2 L 				 9.376068376068377
SCC_yes 				 31.0
FAF_2 or 4 days 				 15.333333333333334
FAF_4 or 5 days 				 15.0
MTRANS_Public_Transportation 				 31.0


# Evaluate the Classifier with the top 15% feature set for Gen-X and Boomers DF:
d_tree.fit(xb_train_fs, xb_label_train)
xb_test_fs = fs_xb.transform(xb_test)
measure_performance(xb_test_fs, xb_label_test, d_tree, show_confussion_matrix=True, show_classification_report=True)

Accuracy:0.667 

Classification report
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         1
           1       1.00      0.50      0.67         4
           3       0.50      1.00      0.67         1
           4       0.60      1.00      0.75         3

    accuracy                           0.67         9
   macro avg       0.53      0.62      0.52         9
weighted avg       0.70      0.67      0.62         9
 

Confussion matrix
[[0 0 1 0]
 [0 2 0 2]
 [0 0 1 0]
 [0 0 0 3]]

	Height	Weight	Gender_Female	Gender_Male	family_history_with_overweight_no	family_history_with_overweight_yes	FAVC_no	FAVC_yes	FCVC_Always	...	CALC_Frequently	CALC_Sometimes	CALC_no	MTRANS_Automobile	MTRANS_Bike	MTRANS_Motorbike	MTRANS_Public_Transportation	MTRANS_Walking
0	1.800000	99.000000	0	1	1	0	0	1	0	...	1	0	0	1	0	0	0	0
1	1.690000	87.000000	1	0	0	1	0	1	1	...	0	0	1	1	0	0	0	0
2	1.780000	84.000000	0	1	0	1	1	0	1	...	1	0	0	0	0	0	0	1
3	1.650000	66.000000	1	0	1	0	0	1	1	...	1	0	0	0	0	0	1	0
4	1.600000	80.000000	0	1	0	1	1	0	0	...	0	0	1	0	0	1	0	0
5	1.650000	80.000000	0	1	1	0	0	1	0	...	0	0	1	1	0	0	0	0
6	1.630000	77.000000	1	0	0	1	0	1	0	...	0	0	1	1	0	0	0	0
7	1.750000	118.000000	0	1	0	1	0	1	0	...	0	1	0	0	1	0	0	0
8	1.540000	80.000000	1	0	0	1	0	1	0	...	0	1	0	1	0	0	0	0
9	1.590000	50.000000	1	0	0	1	1	0	1	...	0	0	1	0	0	0	1	0
10	1.790000	90.000000	0	1	0	1	1	0	0	...	1	0	0	1	0	0	0	0
11	1.750000	110.000000	0	1	0	1	1	0	0	...	1	0	0	1	0	0	0	0
12	1.800000	92.000000	0	1	0	1	0	1	0	...	0	1	0	1	0	0	0	0
13	1.700000	86.000000	0	1	1	0	0	1	1	...	0	0	1	1	0	0	0	0
14	1.721854	82.919584	1	0	1	0	0	1	0	...	0	1	0	1	0	0	0	0
15	1.768231	75.629310	1	0	0	1	0	1	1	...	0	1	0	1	0	0	0	0
16	1.769269	80.491339	0	1	1	0	0	1	0	...	0	0	1	1	0	0	0	0
17	1.647768	79.165306	1	0	0	1	0	1	0	...	0	0	1	1	0	0	0	0
18	1.745528	82.130728	0	1	0	1	0	1	0	...	0	0	1	1	0	0	0	0
19	1.733875	86.945380	1	0	0	1	0	1	0	...	0	0	1	1	0	0	0	0
20	1.675953	79.668320	1	0	0	1	0	1	0	...	0	0	1	1	0	0	0	0
21	1.657221	80.993213	0	1	0	1	0	1	0	...	0	0	1	1	0	0	0	0
22	1.718097	88.600878	0	1	0	1	0	1	0	...	0	0	1	1	0	0	0	0
23	1.673394	80.400306	0	1	0	1	0	1	0	...	0	0	1	1	0	0	0	0
24	1.678610	79.849252	1	0	0	1	0	1	0	...	0	0	1	1	0	0	0	0
25	1.743935	84.729197	0	1	0	1	0	1	0	...	0	0	1	1	0	0	0	0
26	1.687326	80.413997	1	0	0	1	0	1	0	...	0	0	1	1	0	0	0	0
27	1.569234	81.827288	1	0	0	1	0	1	0	...	0	0	1	1	0	0	0	0
28	1.583943	81.936398	1	0	0	1	0	1	0	...	0	0	1	1	0	0	0	0
29	1.587546	76.126112	1	0	0	1	0	1	0	...	0	1	0	1	0	0	0	0
30	1.646390	86.639861	1	0	0	1	0	1	0	...	0	0	1	1	0	0	0	0
31	1.643786	81.978743	1	0	0	1	0	1	0	...	0	0	1	1	0	0	0	0
32	1.595165	77.354744	1	0	0	1	0	1	0	...	0	0	1	1	0	0	0	0
33	1.567973	81.056851	1	0	0	1	0	1	0	...	0	1	0	1	0	0	0	0
34	1.571417	81.918809	1	0	0	1	0	1	0	...	0	0	1	1	0	0	0	0
35	1.584322	80.986496	1	0	0	1	0	1	0	...	0	0	1	1	0	0	0	0
36	1.582523	81.919454	1	0	0	1	0	1	0	...	0	0	1	1	0	0	0	0
37	1.544937	77.053948	1	0	0	1	0	1	0	...	0	1	0	1	0	0	0	0
38	1.592316	77.001030	1	0	0	1	0	1	0	...	0	1	0	1	0	0	0	0
39	1.750000	116.594351	0	1	0	1	0	1	0	...	0	1	0	1	0	0	0	0
40	1.750000	115.806977	0	1	0	1	0	1	0	...	0	0	1	1	0	0	0	0

	Height	Weight	Gender_Female	Gender_Male	family_history_with_overweight_no	family_history_with_overweight_yes	FAVC_no	FAVC_yes	FCVC_Always	...	CALC_Frequently	CALC_Sometimes	CALC_no	MTRANS_Automobile	MTRANS_Bike	MTRANS_Motorbike	MTRANS_Public_Transportation
22	1.718097	88.600878	0	1	0	1	0	1	0	...	0	0	1	1	0	0	0
21	1.657221	80.993213	0	1	0	1	0	1	0	...	0	0	1	1	0	0	0
32	1.595165	77.354744	1	0	0	1	0	1	0	...	0	0	1	1	0	0	0
27	1.569234	81.827288	1	0	0	1	0	1	0	...	0	0	1	1	0	0	0
33	1.567973	81.056851	1	0	0	1	0	1	0	...	0	1	0	1	0	0	0
29	1.587546	76.126112	1	0	0	1	0	1	0	...	0	1	0	1	0	0	0
31	1.643786	81.978743	1	0	0	1	0	1	0	...	0	0	1	1	0	0	0
40	1.750000	115.806977	0	1	0	1	0	1	0	...	0	0	1	1	0	0	0
4	1.600000	80.000000	0	1	0	1	1	0	0	...	0	0	1	0	0	1	0
14	1.721854	82.919584	1	0	1	0	0	1	0	...	0	1	0	1	0	0	0
10	1.790000	90.000000	0	1	0	1	1	0	0	...	1	0	0	1	0	0	0
36	1.582523	81.919454	1	0	0	1	0	1	0	...	0	0	1	1	0	0	0
24	1.678610	79.849252	1	0	0	1	0	1	0	...	0	0	1	1	0	0	0
26	1.687326	80.413997	1	0	0	1	0	1	0	...	0	0	1	1	0	0	0
35	1.584322	80.986496	1	0	0	1	0	1	0	...	0	0	1	1	0	0	0
20	1.675953	79.668320	1	0	0	1	0	1	0	...	0	0	1	1	0	0	0
18	1.745528	82.130728	0	1	0	1	0	1	0	...	0	0	1	1	0	0	0
25	1.743935	84.729197	0	1	0	1	0	1	0	...	0	0	1	1	0	0	0
6	1.630000	77.000000	1	0	0	1	0	1	0	...	0	0	1	1	0	0	0
13	1.700000	86.000000	0	1	1	0	0	1	1	...	0	0	1	1	0	0	0
7	1.750000	118.000000	0	1	0	1	0	1	0	...	0	1	0	0	1	0	0
39	1.750000	116.594351	0	1	0	1	0	1	0	...	0	1	0	1	0	0	0
1	1.690000	87.000000	1	0	0	1	0	1	1	...	0	0	1	1	0	0	0
16	1.769269	80.491339	0	1	1	0	0	1	0	...	0	0	1	1	0	0	0
0	1.800000	99.000000	0	1	1	0	0	1	0	...	1	0	0	1	0	0	0
15	1.768231	75.629310	1	0	0	1	0	1	1	...	0	1	0	1	0	0	0
5	1.650000	80.000000	0	1	1	0	0	1	0	...	0	0	1	1	0	0	0
11	1.750000	110.000000	0	1	0	1	1	0	0	...	1	0	0	1	0	0	0
9	1.590000	50.000000	1	0	0	1	1	0	1	...	0	0	1	0	0	0	1
8	1.540000	80.000000	1	0	0	1	0	1	0	...	0	1	0	1	0	0	0
12	1.800000	92.000000	0	1	0	1	0	1	0	...	0	1	0	1	0	0	0
37	1.544937	77.053948	1	0	0	1	0	1	0	...	0	1	0	1	0	0	0

	Height	Weight	Gender_Female	Gender_Male	family_history_with_overweight_no	family_history_with_overweight_yes	FAVC_no	FAVC_yes	FCVC_Always	...	CALC_Frequently	CALC_Sometimes	CALC_no	MTRANS_Automobile	MTRANS_Public_Transportation	MTRANS_Walking
3	1.650000	66.000000	1	0	1	0	0	1	1	...	1	0	0	0	1	0
2	1.780000	84.000000	0	1	0	1	1	0	1	...	1	0	0	0	0	1
23	1.673394	80.400306	0	1	0	1	0	1	0	...	0	0	1	1	0	0
38	1.592316	77.001030	1	0	0	1	0	1	0	...	0	1	0	1	0	0
17	1.647768	79.165306	1	0	0	1	0	1	0	...	0	0	1	1	0	0
28	1.583943	81.936398	1	0	0	1	0	1	0	...	0	0	1	1	0	0
19	1.733875	86.945380	1	0	0	1	0	1	0	...	0	0	1	1	0	0
34	1.571417	81.918809	1	0	0	1	0	1	0	...	0	0	1	1	0	0
30	1.646390	86.639861	1	0	0	1	0	1	0	...	0	0	1	1	0	0

Feature Selection with Decision Tree: Obesity Level Analysis¶

Decision Tree and Feature Selection with Full Dataset:¶

Decision Tree and Feature Selection with Gen-Z Dataset:¶

Decision Tree and Feature Selection with Millenials Dataset:¶

Decision Tree and Feature Selection with Gen-X & Boomers Dataset:¶

Comparsion of Results:¶

	Age	Height	Weight	Gender_Female	Gender_Male	family_history_with_overweight_no	family_history_with_overweight_yes	FAVC_no	FAVC_yes	FCVC_Always	...	TUE_More than 5 Hours	CALC_Always	CALC_Frequently	CALC_Sometimes	CALC_no	MTRANS_Automobile	MTRANS_Bike	MTRANS_Motorbike	MTRANS_Public_Transportation	MTRANS_Walking
0	21	1.620000	64.000000	1	0	0	1	1	0	0	...	0	0	0	0	1	0	0	0	1	0
1	21	1.520000	56.000000	1	0	0	1	1	0	1	...	0	0	0	1	0	0	0	0	1	0
2	23	1.800000	77.000000	0	1	0	1	1	0	0	...	0	0	1	0	0	0	0	0	1	0
3	27	1.800000	87.000000	0	1	1	0	1	0	1	...	0	0	1	0	0	0	0	0	0	1
4	22	1.780000	89.800000	0	1	1	0	1	0	0	...	0	0	0	1	0	0	0	0	1	0
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
2106	20	1.710730	131.408528	1	0	0	1	0	1	1	...	0	0	0	1	0	0	0	0	1	0
2107	21	1.748584	133.742943	1	0	0	1	0	1	1	...	0	0	0	1	0	0	0	0	1	0
2108	22	1.752206	133.689352	1	0	0	1	0	1	1	...	0	0	0	1	0	0	0	0	1	0
2109	24	1.739450	133.346641	1	0	0	1	0	1	1	...	0	0	0	1	0	0	0	0	1	0
2110	23	1.738836	133.472641	1	0	0	1	0	1	1	...	0	0	0	1	0	0	0	0	1	0

	Age	Height	Weight	Gender_Female	Gender_Male	family_history_with_overweight_no	family_history_with_overweight_yes	FAVC_no	FAVC_yes	FCVC_Always	...	TUE_More than 5 Hours	CALC_Always	CALC_Frequently	CALC_Sometimes	CALC_no	MTRANS_Automobile	MTRANS_Bike	MTRANS_Motorbike	MTRANS_Public_Transportation	MTRANS_Walking
53	23	1.630000	55.000000	1	0	0	1	1	0	1	...	0	0	0	0	1	0	0	0	1	0
267	38	1.700000	78.000000	0	1	1	0	0	1	1	...	0	0	1	0	0	1	0	0	0	0
1825	18	1.821566	142.102468	1	0	0	1	0	1	1	...	0	0	0	1	0	0	0	0	1	0
386	18	1.590000	53.000000	1	0	1	0	1	0	0	...	1	0	0	0	1	0	0	0	1	0
1413	40	1.559005	77.601483	1	0	0	1	0	1	0	...	0	0	0	1	0	1	0	0	0	0
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
960	17	1.618683	67.193585	1	0	0	1	0	1	0	...	0	0	0	0	1	0	0	0	1	0
905	20	1.849425	85.228116	0	1	0	1	0	1	0	...	0	0	0	1	0	0	0	0	1	0
1096	39	1.688354	79.278896	1	0	0	1	0	1	1	...	0	0	0	0	1	1	0	0	0	0
235	19	1.690000	70.000000	1	0	0	1	1	0	0	...	0	0	0	0	1	0	0	0	1	0
1061	23	1.725587	82.480214	0	1	0	1	0	1	0	...	0	0	0	1	0	0	0	0	1	0

	Age	Height	Weight	Gender_Female	Gender_Male	family_history_with_overweight_no	family_history_with_overweight_yes	FAVC_no	FAVC_yes	FCVC_Always	...	TUE_More than 5 Hours	CALC_Always	CALC_Frequently	CALC_Sometimes	CALC_no	MTRANS_Automobile	MTRANS_Bike	MTRANS_Motorbike	MTRANS_Public_Transportation	MTRANS_Walking
553	16	1.752755	50.000000	0	1	1	0	0	1	0	...	0	0	0	1	0	0	0	0	1	0
331	17	1.740000	56.000000	0	1	0	1	0	1	0	...	0	0	0	0	1	0	0	0	1	0
241	22	1.600000	66.000000	0	1	1	0	0	1	1	...	0	0	0	0	1	0	1	0	0	0
1957	26	1.641209	111.856492	1	0	0	1	0	1	1	...	0	0	0	1	0	0	0	0	1	0
1691	30	1.779325	120.751656	0	1	0	1	0	1	0	...	0	0	0	1	0	1	0	0	0	0
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
1201	24	1.789193	89.393589	0	1	0	1	0	1	0	...	0	0	0	1	0	0	0	0	1	0
363	19	1.800000	80.000000	0	1	1	0	0	1	0	...	0	0	0	1	0	0	0	0	1	0
11	21	1.720000	80.000000	1	0	0	1	0	1	0	...	0	0	0	1	0	0	0	0	1	0
510	22	1.675446	51.154201	1	0	0	1	0	1	1	...	0	0	0	0	1	0	0	0	1	0
1711	28	1.758618	113.501549	0	1	0	1	0	1	0	...	0	0	0	1	0	1	0	0	0	0

	Age	Height	Weight	Gender_Female	Gender_Male	family_history_with_overweight_no	family_history_with_overweight_yes	FAVC_no	FAVC_yes	FCVC_Always	...	CALC_Always	CALC_Frequently	CALC_Sometimes	CALC_no	MTRANS_Automobile	MTRANS_Bike	MTRANS_Motorbike	MTRANS_Public_Transportation	MTRANS_Walking	NObeyesdad
0	Gen-Z	1.620000	64.000000	1	0	0	1	1	0	0	...	0	0	0	1	0	0	0	1	0	Normal_Weight
1	Gen-Z	1.520000	56.000000	1	0	0	1	1	0	1	...	0	0	1	0	0	0	0	1	0	Normal_Weight
2	Gen-Z	1.800000	77.000000	0	1	0	1	1	0	0	...	0	1	0	0	0	0	0	1	0	Normal_Weight
3	Gen-Z	1.780000	89.800000	0	1	1	0	1	0	0	...	0	0	1	0	0	0	0	1	0	Overweight_Level_II
4	Gen-Z	1.500000	55.000000	1	0	0	1	0	1	1	...	0	0	1	0	0	0	1	0	0	Normal_Weight
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
1348	Gen-Z	1.710730	131.408528	1	0	0	1	0	1	1	...	0	0	1	0	0	0	0	1	0	Obesity_Type_III
1349	Gen-Z	1.748584	133.742943	1	0	0	1	0	1	1	...	0	0	1	0	0	0	0	1	0	Obesity_Type_III
1350	Gen-Z	1.752206	133.689352	1	0	0	1	0	1	1	...	0	0	1	0	0	0	0	1	0	Obesity_Type_III
1351	Gen-Z	1.739450	133.346641	1	0	0	1	0	1	1	...	0	0	1	0	0	0	0	1	0	Obesity_Type_III
1352	Gen-Z	1.738836	133.472641	1	0	0	1	0	1	1	...	0	0	1	0	0	0	0	1	0	Obesity_Type_III

	Height	Weight	Gender_Female	Gender_Male	family_history_with_overweight_no	family_history_with_overweight_yes	FAVC_no	FAVC_yes	FCVC_Always	FCVC_Never	...	TUE_More than 5 Hours	CALC_Always	CALC_Frequently	CALC_Sometimes	CALC_no	MTRANS_Automobile	MTRANS_Bike	MTRANS_Motorbike	MTRANS_Public_Transportation	MTRANS_Walking
382	1.770612	133.963349	1	0	0	1	0	1	1	0	...	0	0	0	1	0	0	0	0	1	0
584	1.524926	42.000000	1	0	1	0	0	1	0	0	...	0	0	0	1	0	0	0	0	1	0
6	1.780000	64.000000	0	1	0	1	0	1	1	0	...	0	0	1	0	0	0	0	0	1	0
699	1.712061	75.000000	0	1	0	1	0	1	0	0	...	0	0	0	1	0	0	0	0	1	0
705	1.456346	55.523481	1	0	1	0	0	1	0	0	...	0	0	0	1	0	0	0	0	1	0
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
715	1.624831	69.975607	1	0	0	1	0	1	0	0	...	0	0	0	0	1	0	0	0	1	0
905	1.589100	72.713611	1	0	0	1	1	0	1	0	...	0	0	0	1	0	0	0	0	1	0
1096	1.769328	105.000576	0	1	0	1	0	1	0	0	...	0	0	0	1	0	0	0	0	1	0
235	1.600000	57.000000	1	0	1	0	0	1	1	0	...	0	0	0	0	1	0	0	0	1	0
1061	1.607182	82.368441	1	0	0	1	0	1	0	1	...	0	0	0	0	1	0	0	0	1	0

	Height	Weight	Gender_Female	Gender_Male	family_history_with_overweight_no	family_history_with_overweight_yes	FAVC_no	FAVC_yes	FCVC_Always	FCVC_Never	...	TUE_More than 5 Hours	CALC_Always	CALC_Frequently	CALC_Sometimes	CALC_no	MTRANS_Automobile	MTRANS_Bike	MTRANS_Motorbike	MTRANS_Public_Transportation	MTRANS_Walking
91	1.560000	51.000000	1	0	0	1	0	1	0	0	...	0	0	0	1	0	0	0	0	1	0
442	1.759358	55.010450	1	0	0	1	0	1	0	0	...	0	0	0	0	1	0	0	0	1	0
1078	1.738397	93.890682	1	0	0	1	0	1	0	1	...	0	0	0	1	0	0	0	0	1	0
686	1.800000	85.000000	0	1	0	1	0	1	0	0	...	0	0	0	1	0	0	0	0	1	0
857	1.717722	81.929910	0	1	0	1	0	1	0	0	...	0	0	0	1	0	0	0	0	1	0
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
78	1.660000	60.000000	1	0	0	1	1	0	0	0	...	0	0	0	1	0	0	0	0	1	0
561	1.757958	52.094320	0	1	1	0	0	1	0	0	...	0	0	0	1	0	0	0	0	1	0
292	1.700000	50.000000	1	0	1	0	1	0	0	0	...	0	0	0	1	0	0	0	0	0	1
35	1.820000	72.000000	0	1	0	1	0	1	0	1	...	0	0	0	1	0	0	0	0	1	0
1316	1.682594	127.427458	1	0	0	1	0	1	1	0	...	0	0	0	1	0	0	0	0	1	0

	Age	Height	Weight	Gender_Female	Gender_Male	family_history_with_overweight_no	family_history_with_overweight_yes	FAVC_no	FAVC_yes	FCVC_Always	...	CALC_Always	CALC_Frequently	CALC_Sometimes	CALC_no	MTRANS_Automobile	MTRANS_Bike	MTRANS_Motorbike	MTRANS_Public_Transportation	MTRANS_Walking	NObeyesdad
0	Millenials	1.800000	87.000000	0	1	1	0	1	0	1	...	0	1	0	0	0	0	0	0	1	Overweight_Level_I
1	Millenials	1.620000	53.000000	0	1	1	0	0	1	0	...	0	0	1	0	1	0	0	0	0	Normal_Weight
2	Millenials	1.850000	105.000000	0	1	0	1	0	1	1	...	0	0	1	0	0	0	0	1	0	Obesity_Type_I
3	Millenials	1.930000	102.000000	0	1	0	1	0	1	0	...	0	0	1	0	0	0	0	1	0	Overweight_Level_II
4	Millenials	1.530000	78.000000	1	0	1	0	0	1	0	...	0	0	0	1	1	0	0	0	0	Obesity_Type_I
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
712	Millenials	1.606474	104.954291	1	0	0	1	0	1	1	...	0	0	1	0	0	0	0	1	0	Obesity_Type_III
713	Millenials	1.628855	108.090006	1	0	0	1	0	1	1	...	0	0	1	0	0	0	0	1	0	Obesity_Type_III
714	Millenials	1.628205	107.378702	1	0	0	1	0	1	1	...	0	0	1	0	0	0	0	1	0	Obesity_Type_III
715	Millenials	1.628470	107.218949	1	0	0	1	0	1	1	...	0	0	1	0	0	0	0	1	0	Obesity_Type_III
716	Millenials	1.627839	108.107360	1	0	0	1	0	1	1	...	0	0	1	0	0	0	0	1	0	Obesity_Type_III

	Height	Weight	Gender_Female	Gender_Male	family_history_with_overweight_no	family_history_with_overweight_yes	FAVC_no	FAVC_yes	FCVC_Always	FCVC_Never	...	TUE_More than 5 Hours	CALC_Always	CALC_Frequently	CALC_Sometimes	CALC_no	MTRANS_Automobile	MTRANS_Bike	MTRANS_Motorbike	MTRANS_Public_Transportation	MTRANS_Walking
654	1.624950	111.004920	1	0	0	1	0	1	1	0	...	0	0	0	1	0	0	0	0	1	0
544	1.836592	118.377601	0	1	0	1	0	1	0	1	...	0	0	0	1	0	0	0	0	1	0
711	1.626580	105.037203	1	0	0	1	0	1	1	0	...	0	0	0	1	0	0	0	0	1	0
666	1.611452	102.363149	1	0	0	1	0	1	1	0	...	0	0	0	1	0	0	0	0	1	0
516	1.805445	119.484614	0	1	0	1	0	1	0	0	...	0	0	0	1	0	1	0	0	0	0
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
144	1.550000	62.877347	1	0	1	0	0	1	0	0	...	0	0	0	1	0	0	0	0	1	0
645	1.607734	102.305767	1	0	0	1	0	1	1	0	...	0	0	0	1	0	0	0	0	1	0
72	1.650000	71.000000	1	0	0	1	0	1	0	0	...	0	0	0	1	0	0	0	0	1	0
235	1.837399	95.952027	0	1	0	1	0	1	0	1	...	0	0	0	1	0	0	0	0	1	0
37	1.770000	85.000000	0	1	0	1	1	0	1	0	...	0	0	0	1	0	1	0	0	0	0

	Age	Height	Weight	Gender_Female	Gender_Male	family_history_with_overweight_no	family_history_with_overweight_yes	FAVC_no	FAVC_yes	FCVC_Always	...	CALC_Frequently	CALC_Sometimes	CALC_no	MTRANS_Automobile	MTRANS_Bike	MTRANS_Motorbike	MTRANS_Public_Transportation	MTRANS_Walking	NObeyesdad
0	Gen-X & Boomers	1.800000	99.000000	0	1	1	0	0	1	0	...	1	0	0	1	0	0	0	0	Obesity_Type_I
1	Gen-X & Boomers	1.690000	87.000000	1	0	0	1	0	1	1	...	0	0	1	1	0	0	0	0	Obesity_Type_I
2	Gen-X & Boomers	1.780000	84.000000	0	1	0	1	1	0	1	...	1	0	0	0	0	0	0	1	Overweight_Level_I
3	Gen-X & Boomers	1.650000	66.000000	1	0	1	0	0	1	1	...	1	0	0	0	0	0	1	0	Normal_Weight
4	Gen-X & Boomers	1.600000	80.000000	0	1	0	1	1	0	0	...	0	0	1	0	0	1	0	0	Obesity_Type_I
5	Gen-X & Boomers	1.650000	80.000000	0	1	1	0	0	1	0	...	0	0	1	1	0	0	0	0	Overweight_Level_II
6	Gen-X & Boomers	1.630000	77.000000	1	0	0	1	0	1	0	...	0	0	1	1	0	0	0	0	Overweight_Level_II
7	Gen-X & Boomers	1.750000	118.000000	0	1	0	1	0	1	0	...	0	1	0	0	1	0	0	0	Obesity_Type_II
8	Gen-X & Boomers	1.540000	80.000000	1	0	0	1	0	1	0	...	0	1	0	1	0	0	0	0	Obesity_Type_I
9	Gen-X & Boomers	1.590000	50.000000	1	0	0	1	1	0	1	...	0	0	1	0	0	0	1	0	Normal_Weight
10	Gen-X & Boomers	1.790000	90.000000	0	1	0	1	1	0	0	...	1	0	0	1	0	0	0	0	Overweight_Level_II
11	Gen-X & Boomers	1.750000	110.000000	0	1	0	1	1	0	0	...	1	0	0	1	0	0	0	0	Obesity_Type_II
12	Gen-X & Boomers	1.800000	92.000000	0	1	0	1	0	1	0	...	0	1	0	1	0	0	0	0	Overweight_Level_II
13	Gen-X & Boomers	1.700000	86.000000	0	1	1	0	0	1	1	...	0	0	1	1	0	0	0	0	Overweight_Level_II
14	Gen-X & Boomers	1.721854	82.919584	1	0	1	0	0	1	0	...	0	1	0	1	0	0	0	0	Overweight_Level_I
15	Gen-X & Boomers	1.768231	75.629310	1	0	0	1	0	1	1	...	0	1	0	1	0	0	0	0	Overweight_Level_I
16	Gen-X & Boomers	1.769269	80.491339	0	1	1	0	0	1	0	...	0	0	1	1	0	0	0	0	Overweight_Level_II
17	Gen-X & Boomers	1.647768	79.165306	1	0	0	1	0	1	0	...	0	0	1	1	0	0	0	0	Overweight_Level_II
18	Gen-X & Boomers	1.745528	82.130728	0	1	0	1	0	1	0	...	0	0	1	1	0	0	0	0	Overweight_Level_II
19	Gen-X & Boomers	1.733875	86.945380	1	0	0	1	0	1	0	...	0	0	1	1	0	0	0	0	Overweight_Level_II
20	Gen-X & Boomers	1.675953	79.668320	1	0	0	1	0	1	0	...	0	0	1	1	0	0	0	0	Overweight_Level_II
21	Gen-X & Boomers	1.657221	80.993213	0	1	0	1	0	1	0	...	0	0	1	1	0	0	0	0	Overweight_Level_II
22	Gen-X & Boomers	1.718097	88.600878	0	1	0	1	0	1	0	...	0	0	1	1	0	0	0	0	Overweight_Level_II
23	Gen-X & Boomers	1.673394	80.400306	0	1	0	1	0	1	0	...	0	0	1	1	0	0	0	0	Overweight_Level_II
24	Gen-X & Boomers	1.678610	79.849252	1	0	0	1	0	1	0	...	0	0	1	1	0	0	0	0	Overweight_Level_II
25	Gen-X & Boomers	1.743935	84.729197	0	1	0	1	0	1	0	...	0	0	1	1	0	0	0	0	Overweight_Level_II
26	Gen-X & Boomers	1.687326	80.413997	1	0	0	1	0	1	0	...	0	0	1	1	0	0	0	0	Overweight_Level_II
27	Gen-X & Boomers	1.569234	81.827288	1	0	0	1	0	1	0	...	0	0	1	1	0	0	0	0	Obesity_Type_I
28	Gen-X & Boomers	1.583943	81.936398	1	0	0	1	0	1	0	...	0	0	1	1	0	0	0	0	Obesity_Type_I
29	Gen-X & Boomers	1.587546	76.126112	1	0	0	1	0	1	0	...	0	1	0	1	0	0	0	0	Obesity_Type_I
30	Gen-X & Boomers	1.646390	86.639861	1	0	0	1	0	1	0	...	0	0	1	1	0	0	0	0	Obesity_Type_I
31	Gen-X & Boomers	1.643786	81.978743	1	0	0	1	0	1	0	...	0	0	1	1	0	0	0	0	Obesity_Type_I
32	Gen-X & Boomers	1.595165	77.354744	1	0	0	1	0	1	0	...	0	0	1	1	0	0	0	0	Obesity_Type_I
33	Gen-X & Boomers	1.567973	81.056851	1	0	0	1	0	1	0	...	0	1	0	1	0	0	0	0	Obesity_Type_I
34	Gen-X & Boomers	1.571417	81.918809	1	0	0	1	0	1	0	...	0	0	1	1	0	0	0	0	Obesity_Type_I
35	Gen-X & Boomers	1.584322	80.986496	1	0	0	1	0	1	0	...	0	0	1	1	0	0	0	0	Obesity_Type_I
36	Gen-X & Boomers	1.582523	81.919454	1	0	0	1	0	1	0	...	0	0	1	1	0	0	0	0	Obesity_Type_I
37	Gen-X & Boomers	1.544937	77.053948	1	0	0	1	0	1	0	...	0	1	0	1	0	0	0	0	Obesity_Type_I
38	Gen-X & Boomers	1.592316	77.001030	1	0	0	1	0	1	0	...	0	1	0	1	0	0	0	0	Obesity_Type_I
39	Gen-X & Boomers	1.750000	116.594351	0	1	0	1	0	1	0	...	0	1	0	1	0	0	0	0	Obesity_Type_II
40	Gen-X & Boomers	1.750000	115.806977	0	1	0	1	0	1	0	...	0	0	1	1	0	0	0	0	Obesity_Type_II