diff --git a/S01cal.png b/S01cal.png new file mode 100644 index 0000000000000000000000000000000000000000..8cde599e24528f83cbc125db36a91a808b3b1ddc Binary files /dev/null and b/S01cal.png differ diff --git a/regress.sh b/regress.sh new file mode 100644 index 0000000000000000000000000000000000000000..0e46b88e226e801f799044fc22fc3a78d51d290d --- /dev/null +++ b/regress.sh @@ -0,0 +1,11 @@ +python regress_rr.py -s 2 -f tsfresh -d imu+bvp +python regress_rr.py -s 3 -f tsfresh -d imu +python regress_rr.py -s 3 -f tsfresh -d imu -ts 0 +# python regress_rr.py -s 4 -f tsfresh -d imu+bvp +python regress_rr.py -s 5 -f tsfresh -d imu+bvp +python regress_rr.py -s 6 -f tsfresh -d imu+bvp +python regress_rr.py -s -1 -f tsfresh -d bvp +python regress_rr.py -s -1 -f tsfresh -d imu +python regress_rr.py -s -1 -f tsfresh -d imu+bvp -ts 0 +python regress_rr.py -s -1 -f tsfresh -d bvp -ts 0 +python regress_rr.py -s -1 -f tsfresh -d imu -ts 0 diff --git a/regress_rr.py b/regress_rr.py index 66b7ab2805282d5258d048fb35540dff7f477ee5..c6c560e59f613a150fce774e09282662053135a4 100644 --- a/regress_rr.py +++ b/regress_rr.py @@ -31,6 +31,7 @@ from sklearn.metrics import accuracy_score from tsfresh.feature_extraction import extract_features from tsfresh.feature_extraction import settings as tsfresh_settings +from tsfresh.feature_selection import relevance from tsfresh.utilities.string_manipulation import get_config_from_string from modules.datapipeline import get_file_list, load_and_snip, load_data, \ @@ -295,7 +296,7 @@ def load_e4_file(e4_file:str): dfs = {csv_file.filename: pd.read_csv(zip_file.open(csv_file.filename) ,header=None) for csv_file in zip_file.infolist() - if csv_file.filename.endswith('.csv')} + if csv_file.filename.endswith('.csv') and csv_file.file_size>0} bvp = dfs["BVP.csv"] # First row is the initial time of the session as unix time. # Second row is the sample rate in Hz @@ -303,8 +304,10 @@ def load_e4_file(e4_file:str): fs = bvp.iloc[1].values[0] nsamples = len(bvp) - 2 - t0_datetime = datetime.utcfromtimestamp(t0) - t0_local = datetime_from_utc_to_local(t0_datetime) + # t0_datetime = datetime.utcfromtimestamp(t0) + # t0_local = datetime_from_utc_to_local(t0_datetime) + t0_datetime = datetime.fromtimestamp(t0, tz=timezone.utc) + t0_local = t0_datetime.astimezone(pytz.timezone('Australia/Sydney')) time = [t0_local.timestamp() + ind*(1/fs) for ind in range(nsamples)] tmp = [np.nan, np.nan] @@ -341,7 +344,7 @@ def load_e4_files(f_list:list): for d, h in tmp: data.append(d) hdr.append(h) - data_df = pd.concat(data, axis=0) + data_df = pd.concat(data, axis=0).sort_values(by='sec') return data_df, hdr # Synchronising data @@ -416,7 +419,7 @@ def df_win_task(w_inds, df, i, cols): for col in cols: data = w_df[col].values # DSP - if sum(np.abs(data)) > 0: + if sum(np.abs(data)) > 0 and np.std(data) > 0: sd_data = (data - np.mean(data, axis=0))/np.std(data, axis=0) else: sd_data = data.copy() @@ -562,7 +565,7 @@ def load_and_sync_xsens(subject, sens_list:list=['imu', 'bvp']): if 'imu' in sens_list and 'bvp' in sens_list: br_df, imu_df = sync_to_ref(br_df, imu_df_all.copy()) pss_df, _ = sync_to_ref(pss_df, imu_df_all.copy()) - bvp_df, _ = sync_to_ref(bvp_df_all.copy(), pss_df.copy()) + bvp_df, _ = sync_to_ref(bvp_df_all.copy(), imu_df_all.copy()) elif 'imu' in sens_list and not 'bvp' in sens_list: br_df, imu_df = sync_to_ref(br_df, imu_df_all.copy()) pss_df, _ = sync_to_ref(pss_df, imu_df_all.copy()) @@ -615,6 +618,7 @@ def load_and_sync_xsens(subject, sens_list:list=['imu', 'bvp']): if len(xsens_list) > 1: xsens_df = pd.concat(xsens_list, axis=0, ignore_index=True) + xsens_df.sort_values(by='sec', inplace=True) xsens_df.reset_index(drop=True, inplace=True) else: xsens_df = xsens_list[0] @@ -623,7 +627,7 @@ def load_and_sync_xsens(subject, sens_list:list=['imu', 'bvp']): def load_tsfresh(xsens_df, home_dir, window_size=12, window_shift=0.2, fs=IMU_FS, - overwrite=False, data_cols=None, prefix=None): + overwrite=False, data_cols=None, prefix=None, **kwargs): """ Loads the tsfresh pickle file, or generates if it does not exist for the given configuration @@ -654,7 +658,6 @@ def load_tsfresh(xsens_df, home_dir, if exists(pkl_file) and not overwrite: return pd.read_pickle(pkl_file) - assert 'acc_x' in xsens_df.columns.tolist() and \ 'gyro_x' in xsens_df.columns.tolist() and \ 'bvp' in xsens_df.columns.tolist(), \ @@ -671,6 +674,7 @@ def load_tsfresh(xsens_df, home_dir, x_df, column_sort='sec', column_id='id', # default_fc_parameters=tsfresh_settings.MinimalFCParameters(), + **kwargs ) x_features_df.fillna(0, inplace=True) x_features_df.reset_index(drop=True, inplace=True) @@ -755,7 +759,7 @@ def get_cal_data(event_df, xsens_df): start_sec = sec continue elif event == 'Stop': - stop_sec = sec + stop_sec = min(sec, xsens_df['sec'].values[-1]) dsync = DataSynchronizer() dsync.set_bounds(xsens_df['sec'].values, start_sec, stop_sec) @@ -967,7 +971,7 @@ class EvalHandler(): if len(index_list) == 0: print("adding new entry") eval_hist = eval_hist._append(self.entry, ignore_index=True) - elif index_list is not None and self.overwrite: + elif index_list is not None: eval_hist.loc[index_list[0]] = self.entry self.eval_hist = eval_hist @@ -1184,6 +1188,7 @@ def sens_rr_model(subject, train_len:int=3, test_standing=False, data_input:str='imu+bvp', + ntop_features:int=5 ): """Loads, preprocesses, and trains a select model using the configured settings. @@ -1281,17 +1286,10 @@ def sens_rr_model(subject, test_df_tmp = get_test_data(cal_df, activity_df, xsens_df, test_standing) test_df = pd.concat([df for df in test_df_tmp['data']], axis=0) + y_cols = ['sec', 'br', 'pss', 'cpm'] if use_tsfresh: cal_df_list = [] - test_df = load_tsfresh(test_df, - pfh.home_directory, - window_size=window_size, - window_shift=window_shift, - fs=fs, - overwrite=overwrite_tsfresh, - data_cols=data_cols, - prefix='test', - ) + top_cpm_features = {} for index, row in cal_df.iterrows(): data = load_tsfresh(row['data'], pfh.home_directory, @@ -1302,9 +1300,47 @@ def sens_rr_model(subject, data_cols=data_cols, prefix=f"calcpm_{row['cpm']}" ) + m_data_cols = data.columns.values[3:] + rel_df = relevance.calculate_relevance_table( + data[m_data_cols], data['pss']) + params = [] + if 'imu' in sens_list: + tmp = [ + val for val in rel_df['feature'] if 'acc' in val or + 'gyr' in val + ] + params += tmp[:ntop_features] + if 'bvp' in sens_list: + tmp = [val for val in rel_df['feature'] if 'bvp' in val] + params += tmp[:ntop_features] + + assert params is not None, "Invalid selection of parameters" + + top_cpm_features[row['cpm']] = params cal_df_list.append({'cpm': row['cpm'], 'data': data}) + param_df = pd.DataFrame.from_dict(top_cpm_features, orient='index') + top_cpm_params = param_df.columns.values + arr = param_df[top_cpm_params].values.flatten() + hist_df = pd.DataFrame.from_dict(Counter(arr), orient='index') + feature_strings = get_parameters_from_feature_string( + hist_df.index.values.tolist()) + cal_df = pd.DataFrame(cal_df_list) + # Get the relevance for tables cal and extract top 20 + test_df = load_tsfresh(test_df, + pfh.home_directory, + window_size=window_size, + window_shift=window_shift, + fs=fs, + overwrite=overwrite_tsfresh, + data_cols=data_cols, + prefix='test', + kind_to_fc_parameters=feature_strings + ) + chosen_cols = [col for col in test_df.columns if col in + hist_df.index.tolist()+y_cols] + test_df = test_df[chosen_cols] else: x_test_df, y_test_df = get_df_windows( test_df, df_win_task, window_size=window_size, @@ -1374,12 +1410,12 @@ def sens_rr_model(subject, elif use_tsfresh: y_cols = ['sec', 'br', 'pss', 'cpm'] - x_cols = [col for col in train_df.columns.values if col not in y_cols] - x_train = train_df[x_cols].values - y_train = train_df['cpm'].values.reshape(-1, 1) + x_cols = [col for col in test_df.columns.values if col not in y_cols] x_test = test_df[x_cols].values y_test = test_df[lbl_str].values.reshape(-1, 1) y_test_df = test_df[y_cols[:-1]] + x_train = train_df[x_cols].values + y_train = train_df['cpm'].values.reshape(-1, 1) else: x_train_df, y_train_df = get_df_windows(train_df, df_win_task, @@ -1555,10 +1591,10 @@ def arg_parser(): default=0, ) parser.add_argument('--win_size', type=int, - default=12, + default=20, ) parser.add_argument('--win_shift', type=float, - default=0.2, + default=0.05, ) parser.add_argument('-l', '--lbl_str', type=str, default='pss', @@ -1664,6 +1700,9 @@ if __name__ == '__main__': elif subject <= 0 and method == 'ml': subjects = [subject_pre_string+str(i).zfill(2) for i in \ range(1, N_SUBJECT_MAX+1)] + if 'bvp' in data_input: + # Missing bvp for S03, S04 + subjects = [sbf for sbj in subjects if sbj in ['S03']] rr_func = partial(sens_rr_model, window_size=window_size, diff --git a/single_subject_cal_plot.py b/single_subject_cal_plot.py new file mode 100644 index 0000000000000000000000000000000000000000..c5017ab4f6f4c884bcca043db802fd9ac7ba96f2 --- /dev/null +++ b/single_subject_cal_plot.py @@ -0,0 +1,144 @@ +from datetime import datetime +import numpy as np +import matplotlib.pyplot as plt +import seaborn as sns +from regress_rr import load_tsfresh +from os.path import join +from os import makedirs +import ipdb +import joblib + +import pandas as pd + +from config import * +from regress_rr import get_activity_log +from modules.evaluations import Evaluation +from modules.digitalsignalprocessing import ( + movingaverage, butter_lowpass_filter) +from sklearn.preprocessing import PolynomialFeatures, LabelEncoder + +plt.close('all') +plt.rcParams.update({'figure.titlesize' : 8, + 'axes.titlesize' : 7, + 'axes.labelsize' : 7, + 'xtick.labelsize' : 6, + 'ytick.labelsize' : 6, + 'legend.fontsize' : 6, + 'legend.title_fontsize' : 6, + }) +window_size = 20 +window_shift = 0.05 + +# get the test data for the subject + +# load imu, bvp, and imu+bvp data for the subject + +# get the lin reg data for the subject + +# plot the pss vs the linear regression data for the subject + +# highlight standing periods + +sbj = 'S01' +sens_list = ['imu', 'bvp', 'imu-bvp'] +# cfg_ids = [4, 0, 0] +cfg_ids = [5, 11, 4] + +lbl_str = 'pss' + +mdl_dir = join(DATA_DIR, 'subject_specific', sbj) +mdl_file = 'linr_model.joblib' + +tsfresh_dir = join( + mdl_dir, + 'tsfresh__winsize_{0}__winshift_{1}'.format(window_size, window_shift) +) + +combi_strs = ['combi7.0-10.0-12.0-15.0-17.0']*3 + +def get_model(sens, sbj, cfg_id, combi): + m_dir = f'linreg_{sens}_rr_{sbj}_id{cfg_id}_{combi}' + mdl_fname = join(mdl_dir, sens+'_rr', str(cfg_id).zfill(2), m_dir, mdl_file) + return joblib.load(mdl_fname) + +def get_test_data(): + test_fname = 'test__winsize_{0}__winshift_{1}__tsfresh.pkl'.format( + window_size, window_shift) + test_df = pd.read_pickle(join(tsfresh_dir, test_fname)) + y_cols = ['sec', 'br', 'pss', 'cpm'] + x_cols = [col for col in test_df.columns.values if col not in y_cols] + x_test = test_df[x_cols] + y_test = test_df[lbl_str].values.reshape(-1, 1) + time = test_df['sec'] + return x_test, y_test, time + +if __name__ == '__main__': + activity_log = get_activity_log(sbj) + standing_mask = activity_log['Activity'] == 'standing' + standing_log = activity_log[standing_mask] + + x_test, y_test, time = get_test_data() + y_out = {} + metrics_dict = {} + x_test_cols = x_test.columns + y_test = movingaverage(y_test, 8) + cm = 1/2.54 + fig, axs = plt.subplots(figsize=(14*cm, 7.5*cm), dpi=300) + axs.plot(y_test, label='ground-truth', linewidth=1, c='k'); + + standing_wins = 10*60 + for sens, cfg_id, combi_str in zip(sens_list, cfg_ids, combi_strs): + model = get_model(sens, sbj, cfg_id, combi_str) + if sens == 'imu': + cols = [col for col in x_test_cols if 'acc' in col or 'gyr' in col] + elif sens == 'bvp': + cols = [col for col in x_test_cols if 'bvp' in col] + else: + cols = x_test.columns + + poly = PolynomialFeatures(1) + x_test_sens = x_test[cols] + x_test_sens = poly.fit_transform(x_test_sens) + + y_pred = model.predict(x_test_sens) + y_pred = movingaverage(y_pred, 64) + + evals = Evaluation(y_test.flatten(), y_pred.flatten()) + metrics_dict[sens]=evals.get_evals() + + axs.plot(y_pred, label=sens.upper(), linewidth=1) + # axs.plot(np.abs(y_test - y_pred), label=sens.upper()) + + y0 = axs.get_ylim()[-1] + # y1 = axs[1].get_ylim()[-1] + fmt = "%d/%m/%Y %H:%M:%S" + x = np.arange(len(y_test)) + for start_, end_ in zip( + standing_log.iloc[::2].iterrows(), standing_log.iloc[1::2].iterrows() + ): + start = start_[1] + end = end_[1] + start_sec = datetime.strptime(start['Timestamps'], fmt).timestamp() + end_sec = datetime.strptime(end['Timestamps'], fmt).timestamp() + mask = (time >= start_sec) & (time <= end_sec) + axs.fill_between(x, y0, where=mask, facecolor='k', alpha=0.2) + # axs[1].fill_between(x, y1, where=mask, facecolor='k', alpha=0.2) + + diff = np.diff(time) + diff = np.insert(diff, 0, 0) + new_day_idx = diff.argmax() + + axs.axvline(new_day_idx, c='r', alpha=0.6, linewidth=1) + # axs[1].axvline(new_day_idx, c='r') + + print(metrics_dict) + axs.legend(ncols=4, prop={'size': 6}) + + # fig.suptitle("RR Predictions during Rest Standing and Sitting") + axs.set_title("Multi-Modal RR Predictions during Rest Standing and Sitting") + # axs[1].set_title("Absolute Error over Time") + + axs.set_xlabel("Time (indices)") + axs.set_ylabel("Respiration Rate (CPM)") + # axs[1].set_ylabel("Abs. Error") + fig.savefig('S01cal.png')