Skip to content
Snippets Groups Projects
Commit d9ec44fe authored by Raymond Chia's avatar Raymond Chia
Browse files

subject plots

parent cf1ea305
Branches
No related merge requests found
S01cal.png

240 KiB

python regress_rr.py -s 2 -f tsfresh -d imu+bvp
python regress_rr.py -s 3 -f tsfresh -d imu
python regress_rr.py -s 3 -f tsfresh -d imu -ts 0
# python regress_rr.py -s 4 -f tsfresh -d imu+bvp
python regress_rr.py -s 5 -f tsfresh -d imu+bvp
python regress_rr.py -s 6 -f tsfresh -d imu+bvp
python regress_rr.py -s -1 -f tsfresh -d bvp
python regress_rr.py -s -1 -f tsfresh -d imu
python regress_rr.py -s -1 -f tsfresh -d imu+bvp -ts 0
python regress_rr.py -s -1 -f tsfresh -d bvp -ts 0
python regress_rr.py -s -1 -f tsfresh -d imu -ts 0
...@@ -31,6 +31,7 @@ from sklearn.metrics import accuracy_score ...@@ -31,6 +31,7 @@ from sklearn.metrics import accuracy_score
from tsfresh.feature_extraction import extract_features from tsfresh.feature_extraction import extract_features
from tsfresh.feature_extraction import settings as tsfresh_settings from tsfresh.feature_extraction import settings as tsfresh_settings
from tsfresh.feature_selection import relevance
from tsfresh.utilities.string_manipulation import get_config_from_string from tsfresh.utilities.string_manipulation import get_config_from_string
from modules.datapipeline import get_file_list, load_and_snip, load_data, \ from modules.datapipeline import get_file_list, load_and_snip, load_data, \
...@@ -295,7 +296,7 @@ def load_e4_file(e4_file:str): ...@@ -295,7 +296,7 @@ def load_e4_file(e4_file:str):
dfs = {csv_file.filename: pd.read_csv(zip_file.open(csv_file.filename) dfs = {csv_file.filename: pd.read_csv(zip_file.open(csv_file.filename)
,header=None) ,header=None)
for csv_file in zip_file.infolist() for csv_file in zip_file.infolist()
if csv_file.filename.endswith('.csv')} if csv_file.filename.endswith('.csv') and csv_file.file_size>0}
bvp = dfs["BVP.csv"] bvp = dfs["BVP.csv"]
# First row is the initial time of the session as unix time. # First row is the initial time of the session as unix time.
# Second row is the sample rate in Hz # Second row is the sample rate in Hz
...@@ -303,8 +304,10 @@ def load_e4_file(e4_file:str): ...@@ -303,8 +304,10 @@ def load_e4_file(e4_file:str):
fs = bvp.iloc[1].values[0] fs = bvp.iloc[1].values[0]
nsamples = len(bvp) - 2 nsamples = len(bvp) - 2
t0_datetime = datetime.utcfromtimestamp(t0) # t0_datetime = datetime.utcfromtimestamp(t0)
t0_local = datetime_from_utc_to_local(t0_datetime) # t0_local = datetime_from_utc_to_local(t0_datetime)
t0_datetime = datetime.fromtimestamp(t0, tz=timezone.utc)
t0_local = t0_datetime.astimezone(pytz.timezone('Australia/Sydney'))
time = [t0_local.timestamp() + ind*(1/fs) for ind in time = [t0_local.timestamp() + ind*(1/fs) for ind in
range(nsamples)] range(nsamples)]
tmp = [np.nan, np.nan] tmp = [np.nan, np.nan]
...@@ -341,7 +344,7 @@ def load_e4_files(f_list:list): ...@@ -341,7 +344,7 @@ def load_e4_files(f_list:list):
for d, h in tmp: for d, h in tmp:
data.append(d) data.append(d)
hdr.append(h) hdr.append(h)
data_df = pd.concat(data, axis=0) data_df = pd.concat(data, axis=0).sort_values(by='sec')
return data_df, hdr return data_df, hdr
# Synchronising data # Synchronising data
...@@ -416,7 +419,7 @@ def df_win_task(w_inds, df, i, cols): ...@@ -416,7 +419,7 @@ def df_win_task(w_inds, df, i, cols):
for col in cols: for col in cols:
data = w_df[col].values data = w_df[col].values
# DSP # DSP
if sum(np.abs(data)) > 0: if sum(np.abs(data)) > 0 and np.std(data) > 0:
sd_data = (data - np.mean(data, axis=0))/np.std(data, axis=0) sd_data = (data - np.mean(data, axis=0))/np.std(data, axis=0)
else: else:
sd_data = data.copy() sd_data = data.copy()
...@@ -562,7 +565,7 @@ def load_and_sync_xsens(subject, sens_list:list=['imu', 'bvp']): ...@@ -562,7 +565,7 @@ def load_and_sync_xsens(subject, sens_list:list=['imu', 'bvp']):
if 'imu' in sens_list and 'bvp' in sens_list: if 'imu' in sens_list and 'bvp' in sens_list:
br_df, imu_df = sync_to_ref(br_df, imu_df_all.copy()) br_df, imu_df = sync_to_ref(br_df, imu_df_all.copy())
pss_df, _ = sync_to_ref(pss_df, imu_df_all.copy()) pss_df, _ = sync_to_ref(pss_df, imu_df_all.copy())
bvp_df, _ = sync_to_ref(bvp_df_all.copy(), pss_df.copy()) bvp_df, _ = sync_to_ref(bvp_df_all.copy(), imu_df_all.copy())
elif 'imu' in sens_list and not 'bvp' in sens_list: elif 'imu' in sens_list and not 'bvp' in sens_list:
br_df, imu_df = sync_to_ref(br_df, imu_df_all.copy()) br_df, imu_df = sync_to_ref(br_df, imu_df_all.copy())
pss_df, _ = sync_to_ref(pss_df, imu_df_all.copy()) pss_df, _ = sync_to_ref(pss_df, imu_df_all.copy())
...@@ -615,6 +618,7 @@ def load_and_sync_xsens(subject, sens_list:list=['imu', 'bvp']): ...@@ -615,6 +618,7 @@ def load_and_sync_xsens(subject, sens_list:list=['imu', 'bvp']):
if len(xsens_list) > 1: if len(xsens_list) > 1:
xsens_df = pd.concat(xsens_list, axis=0, ignore_index=True) xsens_df = pd.concat(xsens_list, axis=0, ignore_index=True)
xsens_df.sort_values(by='sec', inplace=True)
xsens_df.reset_index(drop=True, inplace=True) xsens_df.reset_index(drop=True, inplace=True)
else: else:
xsens_df = xsens_list[0] xsens_df = xsens_list[0]
...@@ -623,7 +627,7 @@ def load_and_sync_xsens(subject, sens_list:list=['imu', 'bvp']): ...@@ -623,7 +627,7 @@ def load_and_sync_xsens(subject, sens_list:list=['imu', 'bvp']):
def load_tsfresh(xsens_df, home_dir, def load_tsfresh(xsens_df, home_dir,
window_size=12, window_shift=0.2, fs=IMU_FS, window_size=12, window_shift=0.2, fs=IMU_FS,
overwrite=False, data_cols=None, prefix=None): overwrite=False, data_cols=None, prefix=None, **kwargs):
""" """
Loads the tsfresh pickle file, or generates if it does not exist for the Loads the tsfresh pickle file, or generates if it does not exist for the
given configuration given configuration
...@@ -654,7 +658,6 @@ def load_tsfresh(xsens_df, home_dir, ...@@ -654,7 +658,6 @@ def load_tsfresh(xsens_df, home_dir,
if exists(pkl_file) and not overwrite: if exists(pkl_file) and not overwrite:
return pd.read_pickle(pkl_file) return pd.read_pickle(pkl_file)
assert 'acc_x' in xsens_df.columns.tolist() and \ assert 'acc_x' in xsens_df.columns.tolist() and \
'gyro_x' in xsens_df.columns.tolist() and \ 'gyro_x' in xsens_df.columns.tolist() and \
'bvp' in xsens_df.columns.tolist(), \ 'bvp' in xsens_df.columns.tolist(), \
...@@ -671,6 +674,7 @@ def load_tsfresh(xsens_df, home_dir, ...@@ -671,6 +674,7 @@ def load_tsfresh(xsens_df, home_dir,
x_df, column_sort='sec', x_df, column_sort='sec',
column_id='id', column_id='id',
# default_fc_parameters=tsfresh_settings.MinimalFCParameters(), # default_fc_parameters=tsfresh_settings.MinimalFCParameters(),
**kwargs
) )
x_features_df.fillna(0, inplace=True) x_features_df.fillna(0, inplace=True)
x_features_df.reset_index(drop=True, inplace=True) x_features_df.reset_index(drop=True, inplace=True)
...@@ -755,7 +759,7 @@ def get_cal_data(event_df, xsens_df): ...@@ -755,7 +759,7 @@ def get_cal_data(event_df, xsens_df):
start_sec = sec start_sec = sec
continue continue
elif event == 'Stop': elif event == 'Stop':
stop_sec = sec stop_sec = min(sec, xsens_df['sec'].values[-1])
dsync = DataSynchronizer() dsync = DataSynchronizer()
dsync.set_bounds(xsens_df['sec'].values, start_sec, stop_sec) dsync.set_bounds(xsens_df['sec'].values, start_sec, stop_sec)
...@@ -967,7 +971,7 @@ class EvalHandler(): ...@@ -967,7 +971,7 @@ class EvalHandler():
if len(index_list) == 0: if len(index_list) == 0:
print("adding new entry") print("adding new entry")
eval_hist = eval_hist._append(self.entry, ignore_index=True) eval_hist = eval_hist._append(self.entry, ignore_index=True)
elif index_list is not None and self.overwrite: elif index_list is not None:
eval_hist.loc[index_list[0]] = self.entry eval_hist.loc[index_list[0]] = self.entry
self.eval_hist = eval_hist self.eval_hist = eval_hist
...@@ -1184,6 +1188,7 @@ def sens_rr_model(subject, ...@@ -1184,6 +1188,7 @@ def sens_rr_model(subject,
train_len:int=3, train_len:int=3,
test_standing=False, test_standing=False,
data_input:str='imu+bvp', data_input:str='imu+bvp',
ntop_features:int=5
): ):
"""Loads, preprocesses, and trains a select model using the configured """Loads, preprocesses, and trains a select model using the configured
settings. settings.
...@@ -1281,17 +1286,10 @@ def sens_rr_model(subject, ...@@ -1281,17 +1286,10 @@ def sens_rr_model(subject,
test_df_tmp = get_test_data(cal_df, activity_df, xsens_df, test_standing) test_df_tmp = get_test_data(cal_df, activity_df, xsens_df, test_standing)
test_df = pd.concat([df for df in test_df_tmp['data']], axis=0) test_df = pd.concat([df for df in test_df_tmp['data']], axis=0)
y_cols = ['sec', 'br', 'pss', 'cpm']
if use_tsfresh: if use_tsfresh:
cal_df_list = [] cal_df_list = []
test_df = load_tsfresh(test_df, top_cpm_features = {}
pfh.home_directory,
window_size=window_size,
window_shift=window_shift,
fs=fs,
overwrite=overwrite_tsfresh,
data_cols=data_cols,
prefix='test',
)
for index, row in cal_df.iterrows(): for index, row in cal_df.iterrows():
data = load_tsfresh(row['data'], data = load_tsfresh(row['data'],
pfh.home_directory, pfh.home_directory,
...@@ -1302,9 +1300,47 @@ def sens_rr_model(subject, ...@@ -1302,9 +1300,47 @@ def sens_rr_model(subject,
data_cols=data_cols, data_cols=data_cols,
prefix=f"calcpm_{row['cpm']}" prefix=f"calcpm_{row['cpm']}"
) )
m_data_cols = data.columns.values[3:]
rel_df = relevance.calculate_relevance_table(
data[m_data_cols], data['pss'])
params = []
if 'imu' in sens_list:
tmp = [
val for val in rel_df['feature'] if 'acc' in val or
'gyr' in val
]
params += tmp[:ntop_features]
if 'bvp' in sens_list:
tmp = [val for val in rel_df['feature'] if 'bvp' in val]
params += tmp[:ntop_features]
assert params is not None, "Invalid selection of parameters"
top_cpm_features[row['cpm']] = params
cal_df_list.append({'cpm': row['cpm'], 'data': data}) cal_df_list.append({'cpm': row['cpm'], 'data': data})
param_df = pd.DataFrame.from_dict(top_cpm_features, orient='index')
top_cpm_params = param_df.columns.values
arr = param_df[top_cpm_params].values.flatten()
hist_df = pd.DataFrame.from_dict(Counter(arr), orient='index')
feature_strings = get_parameters_from_feature_string(
hist_df.index.values.tolist())
cal_df = pd.DataFrame(cal_df_list) cal_df = pd.DataFrame(cal_df_list)
# Get the relevance for tables cal and extract top 20
test_df = load_tsfresh(test_df,
pfh.home_directory,
window_size=window_size,
window_shift=window_shift,
fs=fs,
overwrite=overwrite_tsfresh,
data_cols=data_cols,
prefix='test',
kind_to_fc_parameters=feature_strings
)
chosen_cols = [col for col in test_df.columns if col in
hist_df.index.tolist()+y_cols]
test_df = test_df[chosen_cols]
else: else:
x_test_df, y_test_df = get_df_windows( x_test_df, y_test_df = get_df_windows(
test_df, df_win_task, window_size=window_size, test_df, df_win_task, window_size=window_size,
...@@ -1374,12 +1410,12 @@ def sens_rr_model(subject, ...@@ -1374,12 +1410,12 @@ def sens_rr_model(subject,
elif use_tsfresh: elif use_tsfresh:
y_cols = ['sec', 'br', 'pss', 'cpm'] y_cols = ['sec', 'br', 'pss', 'cpm']
x_cols = [col for col in train_df.columns.values if col not in y_cols] x_cols = [col for col in test_df.columns.values if col not in y_cols]
x_train = train_df[x_cols].values
y_train = train_df['cpm'].values.reshape(-1, 1)
x_test = test_df[x_cols].values x_test = test_df[x_cols].values
y_test = test_df[lbl_str].values.reshape(-1, 1) y_test = test_df[lbl_str].values.reshape(-1, 1)
y_test_df = test_df[y_cols[:-1]] y_test_df = test_df[y_cols[:-1]]
x_train = train_df[x_cols].values
y_train = train_df['cpm'].values.reshape(-1, 1)
else: else:
x_train_df, y_train_df = get_df_windows(train_df, x_train_df, y_train_df = get_df_windows(train_df,
df_win_task, df_win_task,
...@@ -1555,10 +1591,10 @@ def arg_parser(): ...@@ -1555,10 +1591,10 @@ def arg_parser():
default=0, default=0,
) )
parser.add_argument('--win_size', type=int, parser.add_argument('--win_size', type=int,
default=12, default=20,
) )
parser.add_argument('--win_shift', type=float, parser.add_argument('--win_shift', type=float,
default=0.2, default=0.05,
) )
parser.add_argument('-l', '--lbl_str', type=str, parser.add_argument('-l', '--lbl_str', type=str,
default='pss', default='pss',
...@@ -1664,6 +1700,9 @@ if __name__ == '__main__': ...@@ -1664,6 +1700,9 @@ if __name__ == '__main__':
elif subject <= 0 and method == 'ml': elif subject <= 0 and method == 'ml':
subjects = [subject_pre_string+str(i).zfill(2) for i in \ subjects = [subject_pre_string+str(i).zfill(2) for i in \
range(1, N_SUBJECT_MAX+1)] range(1, N_SUBJECT_MAX+1)]
if 'bvp' in data_input:
# Missing bvp for S03, S04
subjects = [sbf for sbj in subjects if sbj in ['S03']]
rr_func = partial(sens_rr_model, rr_func = partial(sens_rr_model,
window_size=window_size, window_size=window_size,
......
from datetime import datetime
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from regress_rr import load_tsfresh
from os.path import join
from os import makedirs
import ipdb
import joblib
import pandas as pd
from config import *
from regress_rr import get_activity_log
from modules.evaluations import Evaluation
from modules.digitalsignalprocessing import (
movingaverage, butter_lowpass_filter)
from sklearn.preprocessing import PolynomialFeatures, LabelEncoder
plt.close('all')
plt.rcParams.update({'figure.titlesize' : 8,
'axes.titlesize' : 7,
'axes.labelsize' : 7,
'xtick.labelsize' : 6,
'ytick.labelsize' : 6,
'legend.fontsize' : 6,
'legend.title_fontsize' : 6,
})
window_size = 20
window_shift = 0.05
# get the test data for the subject
# load imu, bvp, and imu+bvp data for the subject
# get the lin reg data for the subject
# plot the pss vs the linear regression data for the subject
# highlight standing periods
sbj = 'S01'
sens_list = ['imu', 'bvp', 'imu-bvp']
# cfg_ids = [4, 0, 0]
cfg_ids = [5, 11, 4]
lbl_str = 'pss'
mdl_dir = join(DATA_DIR, 'subject_specific', sbj)
mdl_file = 'linr_model.joblib'
tsfresh_dir = join(
mdl_dir,
'tsfresh__winsize_{0}__winshift_{1}'.format(window_size, window_shift)
)
combi_strs = ['combi7.0-10.0-12.0-15.0-17.0']*3
def get_model(sens, sbj, cfg_id, combi):
m_dir = f'linreg_{sens}_rr_{sbj}_id{cfg_id}_{combi}'
mdl_fname = join(mdl_dir, sens+'_rr', str(cfg_id).zfill(2), m_dir, mdl_file)
return joblib.load(mdl_fname)
def get_test_data():
test_fname = 'test__winsize_{0}__winshift_{1}__tsfresh.pkl'.format(
window_size, window_shift)
test_df = pd.read_pickle(join(tsfresh_dir, test_fname))
y_cols = ['sec', 'br', 'pss', 'cpm']
x_cols = [col for col in test_df.columns.values if col not in y_cols]
x_test = test_df[x_cols]
y_test = test_df[lbl_str].values.reshape(-1, 1)
time = test_df['sec']
return x_test, y_test, time
if __name__ == '__main__':
activity_log = get_activity_log(sbj)
standing_mask = activity_log['Activity'] == 'standing'
standing_log = activity_log[standing_mask]
x_test, y_test, time = get_test_data()
y_out = {}
metrics_dict = {}
x_test_cols = x_test.columns
y_test = movingaverage(y_test, 8)
cm = 1/2.54
fig, axs = plt.subplots(figsize=(14*cm, 7.5*cm), dpi=300)
axs.plot(y_test, label='ground-truth', linewidth=1, c='k');
standing_wins = 10*60
for sens, cfg_id, combi_str in zip(sens_list, cfg_ids, combi_strs):
model = get_model(sens, sbj, cfg_id, combi_str)
if sens == 'imu':
cols = [col for col in x_test_cols if 'acc' in col or 'gyr' in col]
elif sens == 'bvp':
cols = [col for col in x_test_cols if 'bvp' in col]
else:
cols = x_test.columns
poly = PolynomialFeatures(1)
x_test_sens = x_test[cols]
x_test_sens = poly.fit_transform(x_test_sens)
y_pred = model.predict(x_test_sens)
y_pred = movingaverage(y_pred, 64)
evals = Evaluation(y_test.flatten(), y_pred.flatten())
metrics_dict[sens]=evals.get_evals()
axs.plot(y_pred, label=sens.upper(), linewidth=1)
# axs.plot(np.abs(y_test - y_pred), label=sens.upper())
y0 = axs.get_ylim()[-1]
# y1 = axs[1].get_ylim()[-1]
fmt = "%d/%m/%Y %H:%M:%S"
x = np.arange(len(y_test))
for start_, end_ in zip(
standing_log.iloc[::2].iterrows(), standing_log.iloc[1::2].iterrows()
):
start = start_[1]
end = end_[1]
start_sec = datetime.strptime(start['Timestamps'], fmt).timestamp()
end_sec = datetime.strptime(end['Timestamps'], fmt).timestamp()
mask = (time >= start_sec) & (time <= end_sec)
axs.fill_between(x, y0, where=mask, facecolor='k', alpha=0.2)
# axs[1].fill_between(x, y1, where=mask, facecolor='k', alpha=0.2)
diff = np.diff(time)
diff = np.insert(diff, 0, 0)
new_day_idx = diff.argmax()
axs.axvline(new_day_idx, c='r', alpha=0.6, linewidth=1)
# axs[1].axvline(new_day_idx, c='r')
print(metrics_dict)
axs.legend(ncols=4, prop={'size': 6})
# fig.suptitle("RR Predictions during Rest Standing and Sitting")
axs.set_title("Multi-Modal RR Predictions during Rest Standing and Sitting")
# axs[1].set_title("Absolute Error over Time")
axs.set_xlabel("Time (indices)")
axs.set_ylabel("Respiration Rate (CPM)")
# axs[1].set_ylabel("Abs. Error")
fig.savefig('S01cal.png')
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment