In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
In [3]:
activity=pd.read_csv("C:/Users/Rajesh Prabhakar/Desktop/Hackathon/Human Activity Recognition/train.csv")
In [4]:
activity.shape # First is No of Rows(obs) & Second No of Columns(variables)
Out[4]:
In [5]:
activity.dtypes
Out[5]:
In [6]:
activity.head(5)
Out[6]:
In [7]:
activity.Activity.unique() # MULTI CLASS LABEL OR TARGET VARIBLE or 'y'
Out[7]:
In [8]:
from sklearn.preprocessing import LabelEncoder
In [9]:
pd.value_counts(activity.Activity)
Out[9]:
In [10]:
y=activity.Activity
In [11]:
le=LabelEncoder()
In [12]:
y=le.fit_transform(y)
In [13]:
pd.value_counts(y)
Out[13]:
In [14]:
X=activity.drop(['Activity','subject'],axis=1) # axis=1 column del axis=0 row del
In [15]:
y.shape
Out[15]:
In [16]:
X.shape
Out[16]:
In [17]:
from sklearn.model_selection import train_test_split
In [18]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2
,random_state=123)
In [19]:
X_train.shape
Out[19]:
In [20]:
X_test.shape
Out[20]:
In [21]:
y_train.shape
Out[21]:
In [22]:
y_test.shape
Out[22]:
In [23]:
from sklearn.linear_model import LogisticRegression
In [24]:
LogReg=LogisticRegression(multi_class="multinomial",solver="lbfgs")
In [25]:
LogRegmodel=LogReg.fit(X_train,y_train)
In [26]:
LogRegmodel.score(X_train,y_train)
Out[26]:
In [27]:
LogRegmodel.intercept_
Out[27]:
In [28]:
LogRegmodel.coef_
Out[28]:
In [29]:
logitpredict=LogRegmodel.predict(X_train)
In [30]:
pd.crosstab(y_train,logitpredict)
Out[30]:
In [31]:
(31+25)/(X_train.shape[0]) # 31 & 25 are misclassifications
Out[31]:
In [32]:
1-0.009522190103723856
Out[32]:
In [33]:
LogRegmodel.fit(X_test,y_test)
Out[33]:
In [34]:
LogRegmodel.score(X_test,y_test)
Out[34]:
In [35]:
testlogitpredict=LogRegmodel.predict(X_test)
In [36]:
pd.crosstab(y_test,testlogitpredict)
Out[36]:
In [37]:
X_train.corr().to_excel("Xtraincorr.xlsx")
In [38]:
from sklearn.decomposition import PCA # Dimensionality Reduction
In [39]:
pca=PCA(n_components=280) # Reducing no of variables to half
In [40]:
trainpca=pca.fit(X_train)
In [41]:
trainvariance=trainpca.explained_variance_ratio_
In [42]:
variance1=np.cumsum(np.round(trainvariance,decimals=4)*100)
In [43]:
plt.plot(variance1)
Out[43]:
In [44]:
variance1
Out[44]:
In [45]:
pcafinal=PCA(n_components=218)
In [46]:
pcafinalmodel=pcafinal.fit(X_train)
In [47]:
X_train1=pcafinalmodel.transform(X_train)
In [48]:
X_train1.shape
Out[48]:
In [49]:
LogRegmodel2=LogReg.fit(X_train1,y_train)
In [50]:
LogRegmodel2.score(X_train1,y_train)
Out[50]:
In [51]:
X_test1=pcafinalmodel.transform(X_test)
In [52]:
LogRegmodel2.score(X_test1,y_test)
Out[52]:
In [53]:
from keras.models import Sequential
In [54]:
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from keras.utils.np_utils import to_categorical
In [55]:
from sklearn.preprocessing import StandardScaler
In [56]:
scaler=StandardScaler()
In [57]:
scaler.fit(X_train)
Out[57]:
In [58]:
X_train=scaler.transform(X_train)
X_test=scaler.transform(X_test)
In [59]:
y_train=to_categorical(y_train)
y_test=to_categorical(y_test)
In [60]:
n_input=X_train.shape[1]
n_output=6
n_samples=X_train[0]
n_hidden_units=40
In [61]:
def create_model():
model=Sequential()
model.add(Dense(n_hidden_units,input_dim=n_input,activation="sigmoid"))
model.add(Dense(n_hidden_units,input_dim=n_input,activation="sigmoid"))
model.add(Dense(n_hidden_units,input_dim=n_input,activation="sigmoid"))
model.add(Dense(n_output,activation="softmax"))
model.compile(loss="categorical_crossentropy",optimizer="adam",
metrics=['accuracy'])
return model
In [62]:
estimator=KerasClassifier(build_fn=create_model,epochs=20,batch_size=10,
verbose=True)
In [63]:
estimator.fit(X_train,y_train)
Out[63]:
In [64]:
estimator.score(X_test,y_test)
Out[64]:
In [ ]:
No comments:
Post a Comment