import pandas as pd
import numpy as np
import matplotlib as mpl
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inlineData Loading and Exploration
load essential libraries
X = pd.read_csv("data/har/time_series.csv")
y = pd.read_csv("data/har/labels.csv").label
activities = {1:'standing', 2:'walking', 3:'stairs-down', 4:'stairs-up'}labels = []
for i in range(len(y)):
label = np.repeat(y[i], 9)
labels.extend([*label, y[i]])
X['label'] = labels[:-6]
y = X.labelX.head()| Unnamed: 0 | timestamp | UTC time | accuracy | x | y | z | label | |
|---|---|---|---|---|---|---|---|---|
| 0 | 20586 | 1565109930787 | 2019-08-06T16:45:30.787 | unknown | -0.006485 | -0.934860 | -0.069046 | 1 |
| 1 | 20587 | 1565109930887 | 2019-08-06T16:45:30.887 | unknown | -0.066467 | -1.015442 | 0.089554 | 1 |
| 2 | 20588 | 1565109930987 | 2019-08-06T16:45:30.987 | unknown | -0.043488 | -1.021255 | 0.178467 | 1 |
| 3 | 20589 | 1565109931087 | 2019-08-06T16:45:31.087 | unknown | -0.053802 | -0.987701 | 0.068985 | 1 |
| 4 | 20590 | 1565109931188 | 2019-08-06T16:45:31.188 | unknown | -0.054031 | -1.003616 | 0.126450 | 1 |
X.info()<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3744 entries, 0 to 3743
Data columns (total 8 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Unnamed: 0 3744 non-null int64
1 timestamp 3744 non-null int64
2 UTC time 3744 non-null object
3 accuracy 3744 non-null object
4 x 3744 non-null float64
5 y 3744 non-null float64
6 z 3744 non-null float64
7 label 3744 non-null int64
dtypes: float64(3), int64(3), object(2)
memory usage: 234.1+ KB
y0 1
1 1
2 1
3 1
4 1
..
3739 4
3740 4
3741 4
3742 4
3743 4
Name: label, Length: 3744, dtype: int64
standing = X.label == 1
walking = X.label == 2
stairs_down = X.label == 3
stairs_up = X.label == 4
x = np.linspace(0, len(labels)-6, len(labels)-6)
mpl.style.use("fivethirtyeight")
%matplotlib notebook
fig, ax = plt.subplots(2, 2, figsize=(12, 8))
ax[0, 0].plot(x[standing], X.x[standing], x[standing],
X.y[standing], x[standing], X.z[standing], '-', alpha=0.4)
ax[0, 0].set_title(activities[1])
ax[0, 1].plot(x[walking], X.x[walking], x[walking],
X.y[walking], x[walking], X.z[walking], '-', alpha=0.4)
ax[0, 1].set_title(activities[2])
ax[1, 0].plot(x[stairs_down],
X.x[stairs_down], x[stairs_down],
X.y[stairs_down], x[stairs_down],
X.z[stairs_down], '-', alpha=0.4)
ax[1, 0].set_title(activities[3])
ax[1, 1].plot(X.timestamp[stairs_up], X.x[stairs_up], X.timestamp[stairs_up],
X.y[stairs_up], X.timestamp[stairs_up], X.z[stairs_up], '-', alpha=0.4)
ax[1, 1].set_title(activities[4])
fig.suptitle("Tri-Axial Linear Acceleration", fontsize=25)
plt.gcf().autofmt_xdate()
fig.text(0.5, 0.05, 'time', ha='center', fontsize=16)
fig.text(0.01, 0.5, 'acceleration', va='center', rotation='vertical', fontsize=16)
fig.show()mpl.style.use("fivethirtyeight")
plt.plot(X.timestamp, X.x, X.timestamp, X.y, X.timestamp, X.z, '-', alpha=0.4)
plt.title("Tri-Axial Linear Acceleration")
plt.xlabel("time")
plt.ylabel("acceleration")
plt.gcf().autofmt_xdate()
plt.show()walking = X.label == 1
standing = X.label == 2
stairs_down = X.label == 3
stairs_up = X.label == 4
%matplotlib notebook
fig,axs = plt.subplots(4,1, figsize = (16,12), sharex=True)
sns.kdeplot(X.x[walking], shade=True, ax=axs[0])
sns.kdeplot(X.y[walking], shade=True, ax=axs[0])
sns.kdeplot(X.z[walking], shade=True, ax=axs[0])
sns.kdeplot(X.x[standing], shade=True, ax=axs[1])
sns.kdeplot(X.y[standing], shade=True, ax=axs[1])
sns.kdeplot(X.z[standing], shade=True, ax=axs[1])
sns.kdeplot(X.x[stairs_down], shade=True, ax=axs[2])
sns.kdeplot(X.y[stairs_down], shade=True, ax=axs[2])
sns.kdeplot(X.z[stairs_down], shade=True, ax=axs[2])
sns.kdeplot(X.x[stairs_up], shade=True, ax=axs[3])
sns.kdeplot(X.y[stairs_up], shade=True, ax=axs[3])
sns.kdeplot(X.z[stairs_up], shade=True, ax=axs[3])
axs[0].set_title(activities[1])
axs[1].set_title(activities[2])
axs[2].set_title(activities[3])
axs[3].set_title(activities[4])
axs[0].set_xlim((-3,2))
fig.suptitle("Tri-Axial Acceralometer Data", fontsize=20)
fig.show()Modelling
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import r2_score
from sklearn.model_selection import cross_val_score
train_covariates = X[['x', 'y', 'z']]
target = X.label
clf = RandomForestClassifier(max_depth=10, random_state=0)
def correlation(estimator, X, y):
estimator.fit(X,y)
y_pred = estimator.predict(X)
return r2_score(y, y_pred)
def accuracy(estimator, X, y):
estimator.fit(X,y)
y_pred = estimator.predict(X)
return accuracy_score(y, y_pred)
test_score = accuracy(clf, train_covariates, target)
val_scores = cross_val_score(clf,
train_covariates,
target,
cv=10,
scoring=accuracy)scoresarray([0.97066667, 0.968 , 0.96 , 0.976 , 0.93582888,
0.9973262 , 0.94919786, 0.98930481, 0.98128342, 0.99197861])
test_score0.7641559829059829
scores.mean()0.9719586452762924