import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

activity=pd.read_csv("C:/Users/Rajesh Prabhakar/Desktop/Hackathon/Human Activity Recognition/train.csv")

activity.shape # First is No of Rows(obs) & Second No of Columns(variables)

(7352, 563)

activity.dtypes

tBodyAcc-mean()-X                       float64
tBodyAcc-mean()-Y                       float64
tBodyAcc-mean()-Z                       float64
tBodyAcc-std()-X                        float64
tBodyAcc-std()-Y                        float64
tBodyAcc-std()-Z                        float64
tBodyAcc-mad()-X                        float64
tBodyAcc-mad()-Y                        float64
tBodyAcc-mad()-Z                        float64
tBodyAcc-max()-X                        float64
tBodyAcc-max()-Y                        float64
tBodyAcc-max()-Z                        float64
tBodyAcc-min()-X                        float64
tBodyAcc-min()-Y                        float64
tBodyAcc-min()-Z                        float64
tBodyAcc-sma()                          float64
tBodyAcc-energy()-X                     float64
tBodyAcc-energy()-Y                     float64
tBodyAcc-energy()-Z                     float64
tBodyAcc-iqr()-X                        float64
tBodyAcc-iqr()-Y                        float64
tBodyAcc-iqr()-Z                        float64
tBodyAcc-entropy()-X                    float64
tBodyAcc-entropy()-Y                    float64
tBodyAcc-entropy()-Z                    float64
tBodyAcc-arCoeff()-X,1                  float64
tBodyAcc-arCoeff()-X,2                  float64
tBodyAcc-arCoeff()-X,3                  float64
tBodyAcc-arCoeff()-X,4                  float64
tBodyAcc-arCoeff()-Y,1                  float64
                                         ...   
fBodyBodyGyroMag-sma()                  float64
fBodyBodyGyroMag-energy()               float64
fBodyBodyGyroMag-iqr()                  float64
fBodyBodyGyroMag-entropy()              float64
fBodyBodyGyroMag-maxInds                float64
fBodyBodyGyroMag-meanFreq()             float64
fBodyBodyGyroMag-skewness()             float64
fBodyBodyGyroMag-kurtosis()             float64
fBodyBodyGyroJerkMag-mean()             float64
fBodyBodyGyroJerkMag-std()              float64
fBodyBodyGyroJerkMag-mad()              float64
fBodyBodyGyroJerkMag-max()              float64
fBodyBodyGyroJerkMag-min()              float64
fBodyBodyGyroJerkMag-sma()              float64
fBodyBodyGyroJerkMag-energy()           float64
fBodyBodyGyroJerkMag-iqr()              float64
fBodyBodyGyroJerkMag-entropy()          float64
fBodyBodyGyroJerkMag-maxInds            float64
fBodyBodyGyroJerkMag-meanFreq()         float64
fBodyBodyGyroJerkMag-skewness()         float64
fBodyBodyGyroJerkMag-kurtosis()         float64
angle(tBodyAccMean,gravity)             float64
angle(tBodyAccJerkMean),gravityMean)    float64
angle(tBodyGyroMean,gravityMean)        float64
angle(tBodyGyroJerkMean,gravityMean)    float64
angle(X,gravityMean)                    float64
angle(Y,gravityMean)                    float64
angle(Z,gravityMean)                    float64
subject                                   int64
Activity                                 object
Length: 563, dtype: object

activity.head(5)

activity.Activity.unique() # MULTI CLASS LABEL OR TARGET VARIBLE or 'y'

array(['STANDING', 'SITTING', 'LAYING', 'WALKING', 'WALKING_DOWNSTAIRS',
       'WALKING_UPSTAIRS'], dtype=object)

from sklearn.preprocessing import LabelEncoder

pd.value_counts(activity.Activity)

LAYING                1407
STANDING              1374
SITTING               1286
WALKING               1226
WALKING_UPSTAIRS      1073
WALKING_DOWNSTAIRS     986
Name: Activity, dtype: int64

y=activity.Activity

le=LabelEncoder()

y=le.fit_transform(y)

pd.value_counts(y)

0    1407
2    1374
1    1286
3    1226
5    1073
4     986
dtype: int64

X=activity.drop(['Activity','subject'],axis=1) # axis=1 column del axis=0 row del

y.shape

(7352,)

X.shape

(7352, 561)

from sklearn.model_selection import train_test_split

X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2
                                               ,random_state=123)

X_train.shape

(5881, 561)

X_test.shape

(1471, 561)

y_train.shape

(5881,)

y_test.shape

(1471,)

from sklearn.linear_model import LogisticRegression

LogReg=LogisticRegression(multi_class="multinomial",solver="lbfgs")

LogRegmodel=LogReg.fit(X_train,y_train)

LogRegmodel.score(X_train,y_train)

0.9908178881142663

LogRegmodel.intercept_

array([-0.01177632, -0.11015127, -0.13925476,  0.12843795,  0.09740395,
        0.03534045])

LogRegmodel.coef_

array([[-0.14997069, -0.01990212,  0.05000961, ...,  1.43248441,
        -0.61225066, -0.17356775],
       [-0.06134427, -0.00510195, -0.00553526, ..., -0.50384596,
        -1.09958986, -0.2451083 ],
       [ 0.00191191,  0.01771874, -0.01079328, ..., -0.46493668,
         1.37262331,  0.23666297],
       [ 0.01382296, -0.0210481 , -0.00609633, ..., -0.32121973,
         0.00264182,  0.09715364],
       [ 0.24492967,  0.08885722,  0.12877737, ..., -0.41043606,
        -0.15687894, -0.11921278],
       [-0.04934958, -0.06052378, -0.15636212, ...,  0.26795401,
         0.49345432,  0.20407222]])

logitpredict=LogRegmodel.predict(X_train)

pd.crosstab(y_train,logitpredict)

(31+25)/(X_train.shape[0]) # 31 & 25 are misclassifications

0.009522190103723856

1-0.009522190103723856

0.9904778098962761

LogRegmodel.fit(X_test,y_test)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='multinomial',
          n_jobs=1, penalty='l2', random_state=None, solver='lbfgs',
          tol=0.0001, verbose=0, warm_start=False)

LogRegmodel.score(X_test,y_test)

0.99524133242692048

testlogitpredict=LogRegmodel.predict(X_test)

pd.crosstab(y_test,testlogitpredict)

X_train.corr().to_excel("Xtraincorr.xlsx")

from sklearn.decomposition import PCA # Dimensionality Reduction

pca=PCA(n_components=280) # Reducing no of variables to half

trainpca=pca.fit(X_train)

trainvariance=trainpca.explained_variance_ratio_

variance1=np.cumsum(np.round(trainvariance,decimals=4)*100)

plt.plot(variance1)

[<matplotlib.lines.Line2D at 0x25327446080>]

variance1

array([ 62.42,  67.34,  71.52,  73.39,  75.06,  76.34,  77.51,  78.58,
        79.56,  80.43,  81.2 ,  81.88,  82.46,  83.02,  83.52,  84.  ,
        84.47,  84.91,  85.34,  85.75,  86.14,  86.51,  86.86,  87.2 ,
        87.54,  87.87,  88.18,  88.48,  88.77,  89.05,  89.33,  89.59,
        89.85,  90.09,  90.32,  90.54,  90.76,  90.97,  91.18,  91.39,
        91.58,  91.77,  91.95,  92.13,  92.3 ,  92.46,  92.62,  92.78,
        92.93,  93.08,  93.23,  93.37,  93.51,  93.64,  93.76,  93.88,
        94.  ,  94.12,  94.23,  94.34,  94.45,  94.56,  94.66,  94.76,
        94.86,  94.96,  95.06,  95.15,  95.24,  95.33,  95.42,  95.5 ,
        95.58,  95.66,  95.74,  95.82,  95.9 ,  95.98,  96.05,  96.12,
        96.19,  96.26,  96.33,  96.4 ,  96.47,  96.54,  96.6 ,  96.66,
        96.72,  96.78,  96.84,  96.9 ,  96.96,  97.02,  97.08,  97.14,
        97.19,  97.24,  97.29,  97.34,  97.39,  97.44,  97.49,  97.54,
        97.59,  97.64,  97.69,  97.73,  97.77,  97.81,  97.85,  97.89,
        97.93,  97.97,  98.01,  98.05,  98.09,  98.13,  98.16,  98.19,
        98.22,  98.25,  98.28,  98.31,  98.34,  98.37,  98.4 ,  98.43,
        98.46,  98.49,  98.52,  98.55,  98.58,  98.61,  98.64,  98.66,
        98.68,  98.7 ,  98.72,  98.74,  98.76,  98.78,  98.8 ,  98.82,
        98.84,  98.86,  98.88,  98.9 ,  98.92,  98.94,  98.96,  98.98,
        99.  ,  99.02,  99.04,  99.06,  99.08,  99.1 ,  99.12,  99.14,
        99.16,  99.18,  99.19,  99.2 ,  99.21,  99.22,  99.23,  99.24,
        99.25,  99.26,  99.27,  99.28,  99.29,  99.3 ,  99.31,  99.32,
        99.33,  99.34,  99.35,  99.36,  99.37,  99.38,  99.39,  99.4 ,
        99.41,  99.42,  99.43,  99.44,  99.45,  99.46,  99.47,  99.48,
        99.49,  99.5 ,  99.51,  99.52,  99.53,  99.54,  99.55,  99.56,
        99.57,  99.58,  99.59,  99.6 ,  99.61,  99.62,  99.63,  99.64,
        99.65,  99.66,  99.67,  99.68,  99.69,  99.7 ,  99.71,  99.72,
        99.73,  99.74,  99.75,  99.76,  99.77,  99.78,  99.79,  99.8 ,
        99.81,  99.82,  99.83,  99.83,  99.83,  99.83,  99.83,  99.83,
        99.83,  99.83,  99.83,  99.83,  99.83,  99.83,  99.83,  99.83,
        99.83,  99.83,  99.83,  99.83,  99.83,  99.83,  99.83,  99.83,
        99.83,  99.83,  99.83,  99.83,  99.83,  99.83,  99.83,  99.83,
        99.83,  99.83,  99.83,  99.83,  99.83,  99.83,  99.83,  99.83,
        99.83,  99.83,  99.83,  99.83,  99.83,  99.83,  99.83,  99.83,
        99.83,  99.83,  99.83,  99.83,  99.83,  99.83,  99.83,  99.83])

pcafinal=PCA(n_components=218)

pcafinalmodel=pcafinal.fit(X_train)

X_train1=pcafinalmodel.transform(X_train)

X_train1.shape

(5881, 218)

LogRegmodel2=LogReg.fit(X_train1,y_train)

LogRegmodel2.score(X_train1,y_train)

0.99268831831321203

X_test1=pcafinalmodel.transform(X_test)

LogRegmodel2.score(X_test1,y_test)

0.9843643779741672

from keras.models import Sequential

Using TensorFlow backend.

from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from keras.utils.np_utils import to_categorical

from sklearn.preprocessing import StandardScaler

scaler=StandardScaler()

scaler.fit(X_train)

StandardScaler(copy=True, with_mean=True, with_std=True)

X_train=scaler.transform(X_train)
X_test=scaler.transform(X_test)

y_train=to_categorical(y_train)
y_test=to_categorical(y_test)

n_input=X_train.shape[1]
n_output=6
n_samples=X_train[0]
n_hidden_units=40

def create_model():
    model=Sequential()
    model.add(Dense(n_hidden_units,input_dim=n_input,activation="sigmoid"))
    model.add(Dense(n_hidden_units,input_dim=n_input,activation="sigmoid"))
    model.add(Dense(n_hidden_units,input_dim=n_input,activation="sigmoid"))
    model.add(Dense(n_output,activation="softmax"))
    model.compile(loss="categorical_crossentropy",optimizer="adam", 
                  metrics=['accuracy'])
    return model

estimator=KerasClassifier(build_fn=create_model,epochs=20,batch_size=10,
                          verbose=True)

estimator.fit(X_train,y_train)

Epoch 1/20
5881/5881 [==============================] - ETA: 0s - loss: 0.9930 - acc: 0.658 - 1s 217us/step - loss: 0.9768 - acc: 0.6642
Epoch 2/20
5881/5881 [==============================] - 1s 138us/step - loss: 0.3121 - acc: 0.8995
Epoch 3/20
5881/5881 [==============================] - 1s 148us/step - loss: 0.1426 - acc: 0.9612
Epoch 4/20
5881/5881 [==============================] - 1s 145us/step - loss: 0.0875 - acc: 0.9742
Epoch 5/20
5881/5881 [==============================] - 1s 144us/step - loss: 0.0649 - acc: 0.9796
Epoch 6/20
5881/5881 [==============================] - 1s 141us/step - loss: 0.0498 - acc: 0.9842
Epoch 7/20
5881/5881 [==============================] - 1s 139us/step - loss: 0.0403 - acc: 0.9876
Epoch 8/20
5881/5881 [==============================] - 1s 144us/step - loss: 0.0350 - acc: 0.9881
Epoch 9/20
5881/5881 [==============================] - 1s 147us/step - loss: 0.0313 - acc: 0.9903
Epoch 10/20
5881/5881 [==============================] - 1s 146us/step - loss: 0.0322 - acc: 0.9886
Epoch 11/20
5881/5881 [==============================] - 1s 143us/step - loss: 0.0222 - acc: 0.9927
Epoch 12/20
5881/5881 [==============================] - 1s 146us/step - loss: 0.0213 - acc: 0.9918
Epoch 13/20
5881/5881 [==============================] - 1s 147us/step - loss: 0.0246 - acc: 0.9913
Epoch 14/20
5881/5881 [==============================] - 1s 155us/step - loss: 0.0238 - acc: 0.9912
Epoch 15/20
5881/5881 [==============================] - 1s 152us/step - loss: 0.0186 - acc: 0.9934
Epoch 16/20
5881/5881 [==============================] - 1s 139us/step - loss: 0.0165 - acc: 0.9956
Epoch 17/20
5881/5881 [==============================] - 1s 141us/step - loss: 0.0092 - acc: 0.9969
Epoch 18/20
5881/5881 [==============================] - 1s 142us/step - loss: 0.0086 - acc: 0.9971
Epoch 19/20
5881/5881 [==============================] - 1s 138us/step - loss: 0.0117 - acc: 0.9966
Epoch 20/20
5881/5881 [==============================] - 1s 139us/step - loss: 0.0083 - acc: 0.9971

<keras.callbacks.History at 0x25325f54a90>

estimator.score(X_test,y_test)

1471/1471 [==============================] - 0s 79us/step

0.98028551697487898

	tBodyAcc-mean()-X	tBodyAcc-mean()-Y	tBodyAcc-mean()-Z	tBodyAcc-std()-X	tBodyAcc-std()-Y	tBodyAcc-std()-Z	tBodyAcc-mad()-X	tBodyAcc-mad()-Y	tBodyAcc-mad()-Z	tBodyAcc-max()-X	...	fBodyBodyGyroJerkMag-kurtosis()	angle(tBodyAccMean,gravity)	angle(tBodyAccJerkMean),gravityMean)	angle(tBodyGyroMean,gravityMean)	angle(tBodyGyroJerkMean,gravityMean)	angle(X,gravityMean)	angle(Y,gravityMean)	angle(Z,gravityMean)	subject	Activity
0	0.288585	-0.020294	-0.132905	-0.995279	-0.983111	-0.913526	-0.995112	-0.983185	-0.923527	-0.934724	...	-0.710304	-0.112754	0.030400	-0.464761	-0.018446	-0.841247	0.179941	-0.058627	1	STANDING
1	0.278419	-0.016411	-0.123520	-0.998245	-0.975300	-0.960322	-0.998807	-0.974914	-0.957686	-0.943068	...	-0.861499	0.053477	-0.007435	-0.732626	0.703511	-0.844788	0.180289	-0.054317	1	STANDING
2	0.279653	-0.019467	-0.113462	-0.995380	-0.967187	-0.978944	-0.996520	-0.963668	-0.977469	-0.938692	...	-0.760104	-0.118559	0.177899	0.100699	0.808529	-0.848933	0.180637	-0.049118	1	STANDING
3	0.279174	-0.026201	-0.123283	-0.996091	-0.983403	-0.990675	-0.997099	-0.982750	-0.989302	-0.938692	...	-0.482845	-0.036788	-0.012892	0.640011	-0.485366	-0.848649	0.181935	-0.047663	1	STANDING
4	0.276629	-0.016570	-0.115362	-0.998139	-0.980817	-0.990482	-0.998321	-0.979672	-0.990441	-0.942469	...	-0.699205	0.123320	0.122542	0.693578	-0.615971	-0.847865	0.185151	-0.043892	1	STANDING

col_0	0	1	2	3	4	5
row_0
0	1125	0	0	0	0	0
1	0	1016	26	0	0	0
2	0	28	1068	0	0	0
3	0	0	0	993	0	0
4	0	0	0	0	778	0
5	0	0	0	0	0	847

col_0	0	1	2	3	4	5
row_0
0	282	0	0	0	0	0
1	0	240	4	0	0	0
2	0	3	275	0	0	0
3	0	0	0	233	0	0
4	0	0	0	0	208	0
5	0	0	0	0	0	226

DataAdvanceR Labs

Thursday, October 11, 2018

Human Activity Recognition Multinomial Classification Models using Keras Python

No comments:

Post a Comment