Skip to content
Snippets Groups Projects
Commit d9ec44fe authored by Raymond Chia's avatar Raymond Chia
Browse files

subject plots

parent cf1ea305
Branches
No related merge requests found
S01cal.png

240 KiB

python regress_rr.py -s 2 -f tsfresh -d imu+bvp
python regress_rr.py -s 3 -f tsfresh -d imu
python regress_rr.py -s 3 -f tsfresh -d imu -ts 0
# python regress_rr.py -s 4 -f tsfresh -d imu+bvp
python regress_rr.py -s 5 -f tsfresh -d imu+bvp
python regress_rr.py -s 6 -f tsfresh -d imu+bvp
python regress_rr.py -s -1 -f tsfresh -d bvp
python regress_rr.py -s -1 -f tsfresh -d imu
python regress_rr.py -s -1 -f tsfresh -d imu+bvp -ts 0
python regress_rr.py -s -1 -f tsfresh -d bvp -ts 0
python regress_rr.py -s -1 -f tsfresh -d imu -ts 0
......@@ -31,6 +31,7 @@ from sklearn.metrics import accuracy_score
from tsfresh.feature_extraction import extract_features
from tsfresh.feature_extraction import settings as tsfresh_settings
from tsfresh.feature_selection import relevance
from tsfresh.utilities.string_manipulation import get_config_from_string
from modules.datapipeline import get_file_list, load_and_snip, load_data, \
......@@ -295,7 +296,7 @@ def load_e4_file(e4_file:str):
dfs = {csv_file.filename: pd.read_csv(zip_file.open(csv_file.filename)
,header=None)
for csv_file in zip_file.infolist()
if csv_file.filename.endswith('.csv')}
if csv_file.filename.endswith('.csv') and csv_file.file_size>0}
bvp = dfs["BVP.csv"]
# First row is the initial time of the session as unix time.
# Second row is the sample rate in Hz
......@@ -303,8 +304,10 @@ def load_e4_file(e4_file:str):
fs = bvp.iloc[1].values[0]
nsamples = len(bvp) - 2
t0_datetime = datetime.utcfromtimestamp(t0)
t0_local = datetime_from_utc_to_local(t0_datetime)
# t0_datetime = datetime.utcfromtimestamp(t0)
# t0_local = datetime_from_utc_to_local(t0_datetime)
t0_datetime = datetime.fromtimestamp(t0, tz=timezone.utc)
t0_local = t0_datetime.astimezone(pytz.timezone('Australia/Sydney'))
time = [t0_local.timestamp() + ind*(1/fs) for ind in
range(nsamples)]
tmp = [np.nan, np.nan]
......@@ -341,7 +344,7 @@ def load_e4_files(f_list:list):
for d, h in tmp:
data.append(d)
hdr.append(h)
data_df = pd.concat(data, axis=0)
data_df = pd.concat(data, axis=0).sort_values(by='sec')
return data_df, hdr
# Synchronising data
......@@ -416,7 +419,7 @@ def df_win_task(w_inds, df, i, cols):
for col in cols:
data = w_df[col].values
# DSP
if sum(np.abs(data)) > 0:
if sum(np.abs(data)) > 0 and np.std(data) > 0:
sd_data = (data - np.mean(data, axis=0))/np.std(data, axis=0)
else:
sd_data = data.copy()
......@@ -562,7 +565,7 @@ def load_and_sync_xsens(subject, sens_list:list=['imu', 'bvp']):
if 'imu' in sens_list and 'bvp' in sens_list:
br_df, imu_df = sync_to_ref(br_df, imu_df_all.copy())
pss_df, _ = sync_to_ref(pss_df, imu_df_all.copy())
bvp_df, _ = sync_to_ref(bvp_df_all.copy(), pss_df.copy())
bvp_df, _ = sync_to_ref(bvp_df_all.copy(), imu_df_all.copy())
elif 'imu' in sens_list and not 'bvp' in sens_list:
br_df, imu_df = sync_to_ref(br_df, imu_df_all.copy())
pss_df, _ = sync_to_ref(pss_df, imu_df_all.copy())
......@@ -615,6 +618,7 @@ def load_and_sync_xsens(subject, sens_list:list=['imu', 'bvp']):
if len(xsens_list) > 1:
xsens_df = pd.concat(xsens_list, axis=0, ignore_index=True)
xsens_df.sort_values(by='sec', inplace=True)
xsens_df.reset_index(drop=True, inplace=True)
else:
xsens_df = xsens_list[0]
......@@ -623,7 +627,7 @@ def load_and_sync_xsens(subject, sens_list:list=['imu', 'bvp']):
def load_tsfresh(xsens_df, home_dir,
window_size=12, window_shift=0.2, fs=IMU_FS,
overwrite=False, data_cols=None, prefix=None):
overwrite=False, data_cols=None, prefix=None, **kwargs):
"""
Loads the tsfresh pickle file, or generates if it does not exist for the
given configuration
......@@ -654,7 +658,6 @@ def load_tsfresh(xsens_df, home_dir,
if exists(pkl_file) and not overwrite:
return pd.read_pickle(pkl_file)
assert 'acc_x' in xsens_df.columns.tolist() and \
'gyro_x' in xsens_df.columns.tolist() and \
'bvp' in xsens_df.columns.tolist(), \
......@@ -671,6 +674,7 @@ def load_tsfresh(xsens_df, home_dir,
x_df, column_sort='sec',
column_id='id',
# default_fc_parameters=tsfresh_settings.MinimalFCParameters(),
**kwargs
)
x_features_df.fillna(0, inplace=True)
x_features_df.reset_index(drop=True, inplace=True)
......@@ -755,7 +759,7 @@ def get_cal_data(event_df, xsens_df):
start_sec = sec
continue
elif event == 'Stop':
stop_sec = sec
stop_sec = min(sec, xsens_df['sec'].values[-1])
dsync = DataSynchronizer()
dsync.set_bounds(xsens_df['sec'].values, start_sec, stop_sec)
......@@ -967,7 +971,7 @@ class EvalHandler():
if len(index_list) == 0:
print("adding new entry")
eval_hist = eval_hist._append(self.entry, ignore_index=True)
elif index_list is not None and self.overwrite:
elif index_list is not None:
eval_hist.loc[index_list[0]] = self.entry
self.eval_hist = eval_hist
......@@ -1184,6 +1188,7 @@ def sens_rr_model(subject,
train_len:int=3,
test_standing=False,
data_input:str='imu+bvp',
ntop_features:int=5
):
"""Loads, preprocesses, and trains a select model using the configured
settings.
......@@ -1281,17 +1286,10 @@ def sens_rr_model(subject,
test_df_tmp = get_test_data(cal_df, activity_df, xsens_df, test_standing)
test_df = pd.concat([df for df in test_df_tmp['data']], axis=0)
y_cols = ['sec', 'br', 'pss', 'cpm']
if use_tsfresh:
cal_df_list = []
test_df = load_tsfresh(test_df,
pfh.home_directory,
window_size=window_size,
window_shift=window_shift,
fs=fs,
overwrite=overwrite_tsfresh,
data_cols=data_cols,
prefix='test',
)
top_cpm_features = {}
for index, row in cal_df.iterrows():
data = load_tsfresh(row['data'],
pfh.home_directory,
......@@ -1302,9 +1300,47 @@ def sens_rr_model(subject,
data_cols=data_cols,
prefix=f"calcpm_{row['cpm']}"
)
m_data_cols = data.columns.values[3:]
rel_df = relevance.calculate_relevance_table(
data[m_data_cols], data['pss'])
params = []
if 'imu' in sens_list:
tmp = [
val for val in rel_df['feature'] if 'acc' in val or
'gyr' in val
]
params += tmp[:ntop_features]
if 'bvp' in sens_list:
tmp = [val for val in rel_df['feature'] if 'bvp' in val]
params += tmp[:ntop_features]
assert params is not None, "Invalid selection of parameters"
top_cpm_features[row['cpm']] = params
cal_df_list.append({'cpm': row['cpm'], 'data': data})
param_df = pd.DataFrame.from_dict(top_cpm_features, orient='index')
top_cpm_params = param_df.columns.values
arr = param_df[top_cpm_params].values.flatten()
hist_df = pd.DataFrame.from_dict(Counter(arr), orient='index')
feature_strings = get_parameters_from_feature_string(
hist_df.index.values.tolist())
cal_df = pd.DataFrame(cal_df_list)
# Get the relevance for tables cal and extract top 20
test_df = load_tsfresh(test_df,
pfh.home_directory,
window_size=window_size,
window_shift=window_shift,
fs=fs,
overwrite=overwrite_tsfresh,
data_cols=data_cols,
prefix='test',
kind_to_fc_parameters=feature_strings
)
chosen_cols = [col for col in test_df.columns if col in
hist_df.index.tolist()+y_cols]
test_df = test_df[chosen_cols]
else:
x_test_df, y_test_df = get_df_windows(
test_df, df_win_task, window_size=window_size,
......@@ -1374,12 +1410,12 @@ def sens_rr_model(subject,
elif use_tsfresh:
y_cols = ['sec', 'br', 'pss', 'cpm']
x_cols = [col for col in train_df.columns.values if col not in y_cols]
x_train = train_df[x_cols].values
y_train = train_df['cpm'].values.reshape(-1, 1)
x_cols = [col for col in test_df.columns.values if col not in y_cols]
x_test = test_df[x_cols].values
y_test = test_df[lbl_str].values.reshape(-1, 1)
y_test_df = test_df[y_cols[:-1]]
x_train = train_df[x_cols].values
y_train = train_df['cpm'].values.reshape(-1, 1)
else:
x_train_df, y_train_df = get_df_windows(train_df,
df_win_task,
......@@ -1555,10 +1591,10 @@ def arg_parser():
default=0,
)
parser.add_argument('--win_size', type=int,
default=12,
default=20,
)
parser.add_argument('--win_shift', type=float,
default=0.2,
default=0.05,
)
parser.add_argument('-l', '--lbl_str', type=str,
default='pss',
......@@ -1664,6 +1700,9 @@ if __name__ == '__main__':
elif subject <= 0 and method == 'ml':
subjects = [subject_pre_string+str(i).zfill(2) for i in \
range(1, N_SUBJECT_MAX+1)]
if 'bvp' in data_input:
# Missing bvp for S03, S04
subjects = [sbf for sbj in subjects if sbj in ['S03']]
rr_func = partial(sens_rr_model,
window_size=window_size,
......
from datetime import datetime
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from regress_rr import load_tsfresh
from os.path import join
from os import makedirs
import ipdb
import joblib
import pandas as pd
from config import *
from regress_rr import get_activity_log
from modules.evaluations import Evaluation
from modules.digitalsignalprocessing import (
movingaverage, butter_lowpass_filter)
from sklearn.preprocessing import PolynomialFeatures, LabelEncoder
plt.close('all')
plt.rcParams.update({'figure.titlesize' : 8,
'axes.titlesize' : 7,
'axes.labelsize' : 7,
'xtick.labelsize' : 6,
'ytick.labelsize' : 6,
'legend.fontsize' : 6,
'legend.title_fontsize' : 6,
})
window_size = 20
window_shift = 0.05
# get the test data for the subject
# load imu, bvp, and imu+bvp data for the subject
# get the lin reg data for the subject
# plot the pss vs the linear regression data for the subject
# highlight standing periods
sbj = 'S01'
sens_list = ['imu', 'bvp', 'imu-bvp']
# cfg_ids = [4, 0, 0]
cfg_ids = [5, 11, 4]
lbl_str = 'pss'
mdl_dir = join(DATA_DIR, 'subject_specific', sbj)
mdl_file = 'linr_model.joblib'
tsfresh_dir = join(
mdl_dir,
'tsfresh__winsize_{0}__winshift_{1}'.format(window_size, window_shift)
)
combi_strs = ['combi7.0-10.0-12.0-15.0-17.0']*3
def get_model(sens, sbj, cfg_id, combi):
m_dir = f'linreg_{sens}_rr_{sbj}_id{cfg_id}_{combi}'
mdl_fname = join(mdl_dir, sens+'_rr', str(cfg_id).zfill(2), m_dir, mdl_file)
return joblib.load(mdl_fname)
def get_test_data():
test_fname = 'test__winsize_{0}__winshift_{1}__tsfresh.pkl'.format(
window_size, window_shift)
test_df = pd.read_pickle(join(tsfresh_dir, test_fname))
y_cols = ['sec', 'br', 'pss', 'cpm']
x_cols = [col for col in test_df.columns.values if col not in y_cols]
x_test = test_df[x_cols]
y_test = test_df[lbl_str].values.reshape(-1, 1)
time = test_df['sec']
return x_test, y_test, time
if __name__ == '__main__':
activity_log = get_activity_log(sbj)
standing_mask = activity_log['Activity'] == 'standing'
standing_log = activity_log[standing_mask]
x_test, y_test, time = get_test_data()
y_out = {}
metrics_dict = {}
x_test_cols = x_test.columns
y_test = movingaverage(y_test, 8)
cm = 1/2.54
fig, axs = plt.subplots(figsize=(14*cm, 7.5*cm), dpi=300)
axs.plot(y_test, label='ground-truth', linewidth=1, c='k');
standing_wins = 10*60
for sens, cfg_id, combi_str in zip(sens_list, cfg_ids, combi_strs):
model = get_model(sens, sbj, cfg_id, combi_str)
if sens == 'imu':
cols = [col for col in x_test_cols if 'acc' in col or 'gyr' in col]
elif sens == 'bvp':
cols = [col for col in x_test_cols if 'bvp' in col]
else:
cols = x_test.columns
poly = PolynomialFeatures(1)
x_test_sens = x_test[cols]
x_test_sens = poly.fit_transform(x_test_sens)
y_pred = model.predict(x_test_sens)
y_pred = movingaverage(y_pred, 64)
evals = Evaluation(y_test.flatten(), y_pred.flatten())
metrics_dict[sens]=evals.get_evals()
axs.plot(y_pred, label=sens.upper(), linewidth=1)
# axs.plot(np.abs(y_test - y_pred), label=sens.upper())
y0 = axs.get_ylim()[-1]
# y1 = axs[1].get_ylim()[-1]
fmt = "%d/%m/%Y %H:%M:%S"
x = np.arange(len(y_test))
for start_, end_ in zip(
standing_log.iloc[::2].iterrows(), standing_log.iloc[1::2].iterrows()
):
start = start_[1]
end = end_[1]
start_sec = datetime.strptime(start['Timestamps'], fmt).timestamp()
end_sec = datetime.strptime(end['Timestamps'], fmt).timestamp()
mask = (time >= start_sec) & (time <= end_sec)
axs.fill_between(x, y0, where=mask, facecolor='k', alpha=0.2)
# axs[1].fill_between(x, y1, where=mask, facecolor='k', alpha=0.2)
diff = np.diff(time)
diff = np.insert(diff, 0, 0)
new_day_idx = diff.argmax()
axs.axvline(new_day_idx, c='r', alpha=0.6, linewidth=1)
# axs[1].axvline(new_day_idx, c='r')
print(metrics_dict)
axs.legend(ncols=4, prop={'size': 6})
# fig.suptitle("RR Predictions during Rest Standing and Sitting")
axs.set_title("Multi-Modal RR Predictions during Rest Standing and Sitting")
# axs[1].set_title("Absolute Error over Time")
axs.set_xlabel("Time (indices)")
axs.set_ylabel("Respiration Rate (CPM)")
# axs[1].set_ylabel("Abs. Error")
fig.savefig('S01cal.png')
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment