Data Loading and Exploration

load essential libraries

import pandas as pd
import numpy as np
import matplotlib as mpl
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

X = pd.read_csv("data/har/time_series.csv") 
y = pd.read_csv("data/har/labels.csv").label

activities = {1:'standing', 2:'walking', 3:'stairs-down', 4:'stairs-up'}

labels = []
for i in range(len(y)):
    label = np.repeat(y[i], 9)
    labels.extend([*label, y[i]])
    
X['label'] = labels[:-6]
y = X.label

X.head()

	Unnamed: 0	timestamp	UTC time	accuracy	x	y	z	label
0	20586	1565109930787	2019-08-06T16:45:30.787	unknown	-0.006485	-0.934860	-0.069046	1
1	20587	1565109930887	2019-08-06T16:45:30.887	unknown	-0.066467	-1.015442	0.089554	1
2	20588	1565109930987	2019-08-06T16:45:30.987	unknown	-0.043488	-1.021255	0.178467	1
3	20589	1565109931087	2019-08-06T16:45:31.087	unknown	-0.053802	-0.987701	0.068985	1
4	20590	1565109931188	2019-08-06T16:45:31.188	unknown	-0.054031	-1.003616	0.126450	1

X.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3744 entries, 0 to 3743
Data columns (total 8 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Unnamed: 0  3744 non-null   int64  
 1   timestamp   3744 non-null   int64  
 2   UTC time    3744 non-null   object 
 3   accuracy    3744 non-null   object 
 4   x           3744 non-null   float64
 5   y           3744 non-null   float64
 6   z           3744 non-null   float64
 7   label       3744 non-null   int64  
dtypes: float64(3), int64(3), object(2)
memory usage: 234.1+ KB

0       1
1       1
2       1
3       1
4       1
       ..
3739    4
3740    4
3741    4
3742    4
3743    4
Name: label, Length: 3744, dtype: int64

standing = X.label == 1
walking = X.label == 2
stairs_down = X.label == 3
stairs_up = X.label == 4

x = np.linspace(0, len(labels)-6, len(labels)-6)

mpl.style.use("fivethirtyeight")
%matplotlib notebook

fig, ax = plt.subplots(2, 2, figsize=(12, 8))
ax[0, 0].plot(x[standing], X.x[standing], x[standing],
              X.y[standing], x[standing], X.z[standing], '-', alpha=0.4)
ax[0, 0].set_title(activities[1])

ax[0, 1].plot(x[walking], X.x[walking], x[walking],
              X.y[walking], x[walking], X.z[walking], '-', alpha=0.4)
ax[0, 1].set_title(activities[2])

ax[1, 0].plot(x[stairs_down],
              X.x[stairs_down], x[stairs_down],
              X.y[stairs_down], x[stairs_down],
              X.z[stairs_down], '-', alpha=0.4)
ax[1, 0].set_title(activities[3])

ax[1, 1].plot(X.timestamp[stairs_up], X.x[stairs_up], X.timestamp[stairs_up],
              X.y[stairs_up], X.timestamp[stairs_up], X.z[stairs_up], '-', alpha=0.4)
ax[1, 1].set_title(activities[4])

fig.suptitle("Tri-Axial Linear Acceleration", fontsize=25)
plt.gcf().autofmt_xdate()
fig.text(0.5, 0.05, 'time', ha='center', fontsize=16)
fig.text(0.01, 0.5, 'acceleration', va='center', rotation='vertical', fontsize=16)
fig.show()

mpl.style.use("fivethirtyeight")
plt.plot(X.timestamp, X.x, X.timestamp, X.y, X.timestamp, X.z, '-', alpha=0.4)
plt.title("Tri-Axial Linear Acceleration")
plt.xlabel("time")
plt.ylabel("acceleration")
plt.gcf().autofmt_xdate()
plt.show()

walking = X.label == 1
standing = X.label == 2
stairs_down = X.label == 3
stairs_up = X.label == 4
%matplotlib notebook
fig,axs = plt.subplots(4,1, figsize = (16,12), sharex=True)
sns.kdeplot(X.x[walking], shade=True, ax=axs[0])
sns.kdeplot(X.y[walking], shade=True, ax=axs[0])
sns.kdeplot(X.z[walking], shade=True, ax=axs[0])

sns.kdeplot(X.x[standing], shade=True, ax=axs[1])
sns.kdeplot(X.y[standing], shade=True, ax=axs[1])
sns.kdeplot(X.z[standing], shade=True, ax=axs[1])

sns.kdeplot(X.x[stairs_down], shade=True, ax=axs[2])
sns.kdeplot(X.y[stairs_down], shade=True, ax=axs[2])
sns.kdeplot(X.z[stairs_down], shade=True, ax=axs[2])

sns.kdeplot(X.x[stairs_up], shade=True, ax=axs[3])
sns.kdeplot(X.y[stairs_up], shade=True, ax=axs[3])
sns.kdeplot(X.z[stairs_up], shade=True, ax=axs[3])

axs[0].set_title(activities[1])
axs[1].set_title(activities[2])
axs[2].set_title(activities[3])
axs[3].set_title(activities[4])

axs[0].set_xlim((-3,2))
fig.suptitle("Tri-Axial Acceralometer Data", fontsize=20)
fig.show()

Modelling

from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import r2_score
from sklearn.model_selection import cross_val_score

train_covariates = X[['x', 'y', 'z']]
target = X.label
clf = RandomForestClassifier(max_depth=10, random_state=0)

def correlation(estimator, X, y):
    estimator.fit(X,y)
    y_pred = estimator.predict(X)
    return r2_score(y, y_pred)

def accuracy(estimator, X, y):
    estimator.fit(X,y)
    y_pred = estimator.predict(X)
    return accuracy_score(y, y_pred)

test_score = accuracy(clf, train_covariates, target)

val_scores = cross_val_score(clf,
                         train_covariates,
                         target,
                         cv=10,
                         scoring=accuracy)

scores

array([0.97066667, 0.968     , 0.96      , 0.976     , 0.93582888,
       0.9973262 , 0.94919786, 0.98930481, 0.98128342, 0.99197861])

test_score

0.7641559829059829

scores.mean()

0.9719586452762924