From cf1ea305dab81f6f1f0f4833dd0291f36e306c28 Mon Sep 17 00:00:00 2001 From: Raymond Chia <rqchia@mars2.ihpc.uts.edu.au> Date: Wed, 24 Jul 2024 12:51:31 +1000 Subject: [PATCH] minor edit --- modules/digitalsignalprocessing.py | 14 +- modules/utils.py | 8 +- regress_rr.py | 187 +++++++-- s02_check.py | 595 +++++++++++++++++++++++++++++ 4 files changed, 768 insertions(+), 36 deletions(-) create mode 100644 s02_check.py diff --git a/modules/digitalsignalprocessing.py b/modules/digitalsignalprocessing.py index d03d9b5..a0612f2 100644 --- a/modules/digitalsignalprocessing.py +++ b/modules/digitalsignalprocessing.py @@ -254,7 +254,7 @@ def acc_signal_processing(data, fs:int=100): def imu_signal_processing(data, fs:int=IMU_FS): bp = butter_bandpass_filter(data, 3/60, - 70/60, fs=fs, order=2) + 45/60, fs=fs, order=2) ma = movingaverage(bp, 8, axis=0) return ma @@ -332,7 +332,7 @@ def hernandez_sp(data=None, fs:int=IMU_FS): return accel, bp -def pressure_signal_processing(pressure_data, fs=BR_FS): +def pressure_signal_processing(pressure_data, fs=BR_FS, movmean_win=32): ''' Run pressure signal through the following steps: * Moving average with 8 samples * Standard scaler @@ -341,10 +341,10 @@ def pressure_signal_processing(pressure_data, fs=BR_FS): # Normalize data_sd = std_scaler(pressure_data) - data_ma = movingaverage(data_sd, 16) + data_ma = movingaverage(data_sd, movmean_win) # bandpass filter the lbls - bp_data = butter_bandpass_filter(data_ma, 4/60, 70/60, fs=fs, order=5) + bp_data = butter_bandpass_filter(data_ma, 4/60, 45/60, fs=fs, order=5) return bp_data def vectorized_slide_win(array, max_time, sub_window_size=3, @@ -362,13 +362,17 @@ def vectorized_slide_win(array, max_time, sub_window_size=3, np.expand_dims(np.arange(sub_window_size), 0) + np.expand_dims(np.arange(max_time, step=stride_size), 0).T ) + i = [i for i, win in enumerate(sub_windows) if max_time in win][0] + """ pad_len = int(np.max(sub_windows) - max_time + 1) # pad right side data array with zero arr = np.pad(array,(0, pad_len), 'constant', constant_values=0) - return arr[sub_windows] + """ + + return array[sub_windows[:i]] def generate_noisy_sine_windows(sig=None, window_size=WINDOW_SIZE*FS_RESAMPLE, diff --git a/modules/utils.py b/modules/utils.py index f633da5..3abf058 100644 --- a/modules/utils.py +++ b/modules/utils.py @@ -376,10 +376,10 @@ def get_df_windows(df, func, window_size=15, window_shift=0.2, fs=IMU_FS, args = zip(wins.tolist(), repeat(df, N), i_list, [cols]*N) out_data = [] - # with Pool(cpu_count()) as p: - # out_data = p.starmap(func, args) - for i, win in enumerate(wins): - out_data.append(func(win, df, i, cols)) + with Pool(cpu_count()) as p: + out_data = p.starmap(func, args) + # for i, win in enumerate(wins): + # out_data.append(func(win, df, i, cols)) x, y = [], [] for out in out_data: diff --git a/regress_rr.py b/regress_rr.py index b4dce1c..66b7ab2 100644 --- a/regress_rr.py +++ b/regress_rr.py @@ -10,6 +10,7 @@ import pickle import sys import time from zipfile import ZipFile +from joblib import dump, load import argparse from datetime import datetime, timedelta, timezone, timedelta @@ -265,12 +266,16 @@ def load_imu_files(f_list:list): """ data, hdr = [], [] tmp = [] - for f in f_list: - tmp.append(load_imu_file(f)) - for l in tmp: - data.append(l[0]) - hdr.append(l[1]) + with Pool(int(cpu_count()//1.5)) as p: + tmp = p.map(load_imu_file, f_list) + data, hdr = zip(*tmp) + # for f in f_list: + # tmp.append(load_imu_file(f)) + # for l in tmp: + # data.append(l[0]) + # hdr.append(l[1]) data_df = pd.concat(data, axis=0) + data_df.sort_values(by='sec', inplace=True) return data_df, hdr def load_e4_file(e4_file:str): @@ -392,7 +397,7 @@ def df_win_task(w_inds, df, i, cols): pandas.DataFrame, pandas.DataFrame """ time = df['sec'].values - if w_inds[-1] == 0: return + # if w_inds[-1] == 0: return w_df = df.iloc[w_inds] t0, t1 = time[w_inds][0], time[w_inds][-1] diff = time[w_inds[1:]] - time[w_inds[0:-1]] @@ -429,9 +434,9 @@ def df_win_task(w_inds, df, i, cols): x_vec_time = np.median(time[w_inds]) - fs = 1/np.mean(diff) + # fs = 1/np.mean(diff) ps_out = pressure_signal_processing(ps_out, fs=fs) - ps_freq = int(get_max_frequency(ps_out, fs=fs)) + ps_freq = get_max_frequency(ps_out, fs=fs) y_tmp = np.array([x_vec_time, np.nanmedian(sm_out), ps_freq]) x_out['sec'] = x_vec_time @@ -711,7 +716,7 @@ def get_respiration_log(subject): """ log_list = get_file_list('*.json', sbj=subject) - log_dfs = [pd.read_json(f) for f in log_list] + log_dfs = [pd.read_json(f, convert_dates=False) for f in log_list] return pd.concat(log_dfs, axis=0) def get_cal_data(event_df, xsens_df): @@ -731,7 +736,7 @@ def get_cal_data(event_df, xsens_df): pd.DataFrame """ - fmt ="%Y-%m-%d %H.%M.%S" + fmt ="%d/%m/%Y %I:%M:%S %p" cal_list = [] cpms = [] start_sec = 0 @@ -744,7 +749,7 @@ def get_cal_data(event_df, xsens_df): cpm = np.round( 60/(inhalePeriod + exhalePeriod) ) - sec = timestamp.to_pydatetime().timestamp() + sec = datetime.strptime(str(timestamp), fmt).timestamp() if event == 'Start': start_sec = sec @@ -849,6 +854,10 @@ def dsp_win_func(w_inds, df, i, cols): data = w_df[cols].values + fs_est = 1/np.median(diff) + if fs_est > 70 and ('acc_x' in cols or 'gyro_x' in cols): fs = IMU_FS + elif fs_est < 70 and 'bvp' in cols: fs = PPG_FS + if reject_artefact((data-np.mean(data,axis=0))/np.std(data,axis=0)): return @@ -872,10 +881,10 @@ def dsp_win_func(w_inds, df, i, cols): x_vec_time = np.median(time[w_inds]) - fs = 1/np.mean(diff) + # fs = 1/np.mean(diff) ps_out = pressure_signal_processing(ps_out, fs=fs) - ps_freq = int(get_max_frequency(ps_out, fs=IMU_FS)) + ps_freq = get_max_frequency(ps_out, fs=IMU_FS) y_tmp = np.array([x_vec_time, np.nanmedian(sm_out), ps_freq]) @@ -1052,8 +1061,6 @@ def imu_rr_dsp(subject, train_len:int=3, test_standing=False, ): - # TODO: - # implement evaluation saving """Loads, preprocesses, and performs Hernandez digital signal processing pipeline on the selected subject. Uses the specified parameters. Runs on both accelerometer and gyroscope. @@ -1154,9 +1161,11 @@ def imu_rr_dsp(subject, pp.pprint(acc_eval.load_eval_history()) fig, ax = plt.subplots(2, 1) - ax[0].plot(acc_y_dsp_df[lbl_str]); plt.plot(acc_dsp_df['pred']) + ax[0].plot(acc_y_dsp_df[lbl_str].values); + ax[0].plot(acc_dsp_df['pred'].values) ax[0].set_title("ACC") - ax[1].plot(gyr_y_dsp_df[lbl_str]); plt.plot(gyr_dsp_df['pred']) + ax[1].plot(gyr_y_dsp_df[lbl_str].values); + ax[1].plot(gyr_dsp_df['pred'].values) ax[1].set_title("GYRO") ax[1].legend([lbl_str, 'estimate']) fig_dir = join(project_dir, 'figures') @@ -1232,6 +1241,7 @@ def sens_rr_model(subject, use_tsfresh = False overwrite_tsfresh = overwrite train_size = int(train_len) + minirocket = None if feature_method == 'tsfresh': use_tsfresh = True @@ -1262,7 +1272,7 @@ def sens_rr_model(subject, project_dir = pfh.project_directory xsens_df = load_and_sync_xsens(subject, sens_list=sens_list) - activity_df = get_activity_log(subject) + activity_df = get_activity_log(subject).reset_index(drop=True) event_df = get_respiration_log(subject) cal_df = get_cal_data(event_df, xsens_df) @@ -1339,12 +1349,29 @@ def sens_rr_model(subject, # x_train = y_train_df['hr_est'].values.reshape(-1, 1) # x_test = y_test_df['hr_est'].values.reshape(-1, 1) - print("minirocket transforming...") x_train = np.swapaxes(x_train, 1, 2) x_test = np.swapaxes(x_test, 1, 2) - minirocket = MiniRocketMultivariate() - x_train = minirocket.fit_transform(x_train) - x_test = minirocket.transform(x_test) + + directory = join(project_dir, '_'.join([mdl_str, marker])) + minirocket_fname = join(directory, "minirocket.joblib") + + if not overwrite and exists(minirocket_fname): + with open(minirocket_fname, 'rb') as mfile: + minirocket = load(mfile) + x_train = minirocket.transform(x_train) + print("loaded minirocket...") + else: + minirocket = MiniRocketMultivariate() + x_train = minirocket.fit_transform(x_train) + + x_test = minirocket.transform(x_test) + + if overwrite or not exists(minirocket_fname): + if not exists(directory): makedirs(directory) + with open(minirocket_fname, 'wb') as mfile: + dump(minirocket, mfile) + print("saved new minirocket...") + elif use_tsfresh: y_cols = ['sec', 'br', 'pss', 'cpm'] x_cols = [col for col in train_df.columns.values if col not in y_cols] @@ -1398,9 +1425,11 @@ def sens_rr_model(subject, if lbl_str == 'pss': br = y_test_df['br'].values - ax[1].plot(movingaverage(y_test, 12), color='tab:blue') + ax[1].plot(movingaverage(y_test, 12), + color='tab:blue') ax[1].plot(br, 'k') - ax[1].plot(movingaverage(preds, 12), color='tab:orange') + ax[1].plot(movingaverage(preds, 12), + color='tab:orange') ax[1].legend([lbl_str, 'br', 'pred']) else: ax[1].plot(y_test, 'k') @@ -1409,8 +1438,100 @@ def sens_rr_model(subject, ax[1].set_title('smoothened') fig_dir = join(project_dir, 'figures') if not exists(fig_dir): mkdir(fig_dir) + + if subject == 'S02': + sec = y_test_df['sec'].values + activity_df2 = get_activity_log(subject+'-repeat') + + event_df2 = get_respiration_log(subject+'-repeat') + second_cal_df = get_cal_data(event_df2, xsens_df)\ + .reset_index(drop=True) + sec = y_test_df['sec'].values + activity_df2 = get_activity_log(subject+'-repeat') + + event_df2 = get_respiration_log(subject+'-repeat') + second_cal_df = get_cal_data(event_df2, xsens_df)\ + .reset_index(drop=True) + + fmt = "%d/%m/%Y %H:%M:%S" + activity_sec0 = datetime.strptime( + activity_df2['Timestamps'].iloc[0], fmt).timestamp() + activity_sec1 = datetime.strptime( + activity_df2['Timestamps'].iloc[-1], fmt).timestamp() + + ind0 = np.argmin(np.abs(sec - activity_sec0)) + ind1 = np.argmin(np.abs(sec - activity_sec1)) + + ax[0].axvline(ind0, c='r', linestyle='--') + ax[0].axvline(ind1, c='r', linestyle='--') + ax[1].axvline(ind0, c='r', linestyle='--') + ax[1].axvline(ind1, c='r', linestyle='--') + + fig2, ax2 = plt.subplots(2, 1, sharex=True) + cal_sec0 = second_cal_df.iloc[-7]['data']['sec'].iloc[0] + cal_sec1 = second_cal_df.iloc[-1]['data']['sec'].iloc[-1] + calind0 = np.argmin(np.abs(sec - cal_sec0)) + calind1 = np.argmin(np.abs(sec - cal_sec1)) + + ax2[0].plot(sec[calind0:], y_test[calind0:]) + ax2[0].plot(sec[calind0:],preds[calind0:]) + ax2[0].set_title('raw') + ax2[1].plot(sec[calind0:], + movingaverage(y_test, 12)[calind0:], + color='tab:blue') + ax2[1].plot(sec[calind0:], br[calind0:], 'k') + ax2[1].plot(sec[calind0:], + movingaverage(preds, 12)[calind0:], + color='tab:orange') + ax2[1].legend([lbl_str, 'br', 'pred']) + + ll = len(second_cal_df) + for i in range(ll-7, ll): + cal_sec0 = second_cal_df.iloc[i]['data']['sec'].iloc[0] + cal_sec1 = second_cal_df.iloc[i]['data']['sec'].iloc[-1] + calind0 = np.argmin(np.abs(sec - cal_sec0)) + calind1 = np.argmin(np.abs(sec - cal_sec1)) + + ax2[0].axvline(sec[calind0], c='g', linestyle='--') + ax2[0].axvline(sec[calind1], c='g', linestyle='--') + ax2[1].axvline(sec[calind0], c='g', linestyle='--') + ax2[1].axvline(sec[calind1], c='g', linestyle='--') + + def pressure_ax_plot(func, win_size=12, win_shift=0.2, fs=120): + dt = test_df.sec + t = dt.map(lambda x: datetime.fromtimestamp(x)) + pss_sig = test_df.PSS.values + processed_wins = vsw(pss_sig, len(pss_sig), + sub_window_size=int(win_size*fs), + stride_size=int(win_size*win_shift*fs)) + processed_wins = map(func, processed_wins) + ps_freq = [get_max_frequency(ps_out, fs=fs) for ps_out in + processed_wins] + + ft = vsw(dt.values, len(dt), + sub_window_size=int(win_size*fs), + stride_size=int(win_size*win_shift*fs)) + + fig, axs = plt.subplots(3, 1, sharex=True) + # axs[0].get_shared_x_axes().join(axs[0], axs[1]) + + axs[0].plot(t, pss_sig) + axs[1].plot(t, func(pss_sig)) + mtime = map(lambda x: datetime.fromtimestamp(x), + ft.mean(axis=1)) + freqdt = np.array([t for t in mtime]) + tt = y_test_df.sec.map(lambda x: + datetime.fromtimestamp(x)).values + axs[2].plot(freqdt, ps_freq) + axs[2].plot(tt, y_test_df.pss.values) + # fig.savefig(join(fig_dir, fig_title+"cal_check.png")) + + fig2.savefig(join(fig_dir, fig_title+"cal_check.png")) + # func = partial(pressure_signal_processing, fs=fs) + # pressure_ax_plot(func) + fig.savefig(join(fig_dir, fig_title+".png")) - plt.close() + plt.close('all') def arg_parser(): """Returns arguments in a Namespace to configure the subject specific model @@ -1459,7 +1580,7 @@ def arg_parser(): parser.add_argument('--method', type=str, default='ml', help="choose between 'ml' or 'dsp' methods for"\ - " regression", + "regression", choices=['ml', 'dsp'] ) args = parser.parse_args() @@ -1484,6 +1605,7 @@ if __name__ == '__main__': print(args) assert train_len>0,"--train_len must be an integer greater than 0" + assert train_len <= 7,"--train_len must be an integer less than 8" subject_pre_string = 'S' # Pilot / S @@ -1528,9 +1650,20 @@ if __name__ == '__main__': overwrite=overwrite, train_len=train_len, test_standing=test_standing) + elif subject <= 0 and method == 'dsp': + subjects = [subject_pre_string+str(i).zfill(2) for i in \ + range(1, N_SUBJECT_MAX+1)] + func = partial(imu_rr_dsp, window_size=window_size, + window_shift=window_shift, + lbl_str=lbl_str, + overwrite=overwrite, + train_len=train_len, + test_standing=test_standing) + for subject in subjects: + func(subject) elif subject <= 0 and method == 'ml': subjects = [subject_pre_string+str(i).zfill(2) for i in \ - range(1, n_subject_max+1)] + range(1, N_SUBJECT_MAX+1)] rr_func = partial(sens_rr_model, window_size=window_size, diff --git a/s02_check.py b/s02_check.py new file mode 100644 index 0000000..593948b --- /dev/null +++ b/s02_check.py @@ -0,0 +1,595 @@ +# TODO: + # + +import glob +from os import makedirs, mkdir +from os.path import join, exists +import pandas as pd +import numpy as np +import json +import ipdb +import re +import pickle +import sys +import time +from zipfile import ZipFile +from joblib import dump, load + +import argparse +from datetime import datetime, timedelta, timezone, timedelta +import pytz + +import matplotlib.pyplot as plt +from functools import partial +from collections import Counter +from itertools import repeat, chain, combinations +from multiprocessing import Pool, cpu_count +import tensorflow as tf + +from sklearn.decomposition import PCA +from sklearn.preprocessing import StandardScaler, MinMaxScaler, OneHotEncoder +from sklearn.preprocessing import PolynomialFeatures, LabelEncoder +from sklearn.model_selection import KFold, train_test_split +from sklearn.metrics import accuracy_score + +from tsfresh.feature_extraction import extract_features +from tsfresh.feature_extraction import settings as tsfresh_settings +from tsfresh.utilities.string_manipulation import get_config_from_string + +from modules.datapipeline import get_file_list, load_and_snip, load_data, \ + load_split_data, load_harness_data +from modules.digitalsignalprocessing import vectorized_slide_win as vsw +from modules.digitalsignalprocessing import imu_signal_processing +from modules.digitalsignalprocessing import bvp_signal_processing +from modules.digitalsignalprocessing import hernandez_sp, reject_artefact +from modules.digitalsignalprocessing import do_pad_fft,\ + pressure_signal_processing, infer_frequency, movingaverage +from modules.utils import * + +from modules.evaluations import Evaluation +from modules.datapipeline import get_windowed_data, DataSynchronizer,\ + parallelize_dataframe +from modules.datapipeline import ProjectFileHandler +from models.ardregression import ARDRegressionClass +from models.knn import KNNClass +from models.svm import SVMClass +from models.lda import LDAClass +from models.svr import SVRClass +from models.logisticregression import LogisticRegressionClass +from models.linearregression import LinearRegressionClass +from models.neuralnet import FNN_HyperModel, LSTM_HyperModel, TunerClass,\ + CNN1D_HyperModel +from models.ridgeclass import RidgeClass +from models.resnet import Regressor_RESNET, Classifier_RESNET +from models.xgboostclass import XGBoostClass + +from pprint import PrettyPrinter + +from sktime.transformations.panel.rocket import ( + MiniRocket, + MiniRocketMultivariate, + MiniRocketMultivariateVariable, +) + +from config import WINDOW_SIZE, WINDOW_SHIFT, IMU_FS, DATA_DIR, BR_FS\ + , FS_RESAMPLE, PPG_FS + +from regress_rr import * + +N_SUBJECT_MAX = 6 +IMU_COLS = ['acc_x', 'acc_y', 'acc_z', 'gyro_x', 'gyro_y', 'gyro_z'] + + +def load_and_sync_xsens(subject, sens_list:list=['imu', 'bvp']): + """ + Loads requested sensors from the subject folder and synchronises each to + the beginning and end timestamps. Linearly interpolates the data and + timestamps to match the higher frequency data. + + Arguments + --------- + subject : str + subject to extract data from (i.e. 'Pilot02', 'S02') + sens_list : list + a list that contains either or both 'imu' and 'bvp' + + Returns + ------- + pd.DataFrame + """ + assert 'imu' in sens_list or 'bvp' in sens_list, \ + f"{sens_list} is not supported, must contain"\ + "'imu', 'bvp' or 'imu, bvp'" + + pss_df, br_df, imu_df, bvp_df = None, None, None, None + acc_data, gyr_data, bvp_data = None, None, None + # load imu + if 'imu' in sens_list: + imu_list = get_file_list('imudata.gz', sbj=subject) + imu_df_all, imu_hdr_df_all = load_imu_files(imu_list) + + # load bioharness + pss_list = get_file_list('*Breathing.csv', sbj=subject) + if len(pss_list) == 0: + pss_list = get_file_list('BR*.csv', sbj=subject) + + br_list = get_file_list('*Summary*.csv', sbj=subject) + + # load e4 wristband + if 'bvp' in sens_list: + e4_list = get_file_list('*.zip', sbj=subject) + bvp_df_all, bvp_hdr = load_e4_files(e4_list) + bvp_fs = bvp_hdr[0]['fs'] + + xsens_list = [] + # skip the first and last x minute(s) + minutes_to_skip = .5 + br_skiprows = br_skipfooter = int(minutes_to_skip*60) + pss_skiprows = pss_skipfooter = int(minutes_to_skip*60*BR_FS) + + # load each bioharness file and sync the imu to it + for pss_file, br_file in zip(pss_list, br_list): + xsens_data = {} + + pss_df = load_bioharness_file(pss_file, skiprows=pss_skiprows, + skipfooter=pss_skipfooter, + engine='python') + pss_time = pss_df['Time'].map(bioharness_datetime_to_seconds).values\ + .reshape(-1, 1) + pss_df['sec'] = pss_time + + br_df = load_bioharness_file(br_file, skiprows=br_skiprows, + skipfooter=br_skipfooter, + engine='python') + br_time = br_df['Time'].map(bioharness_datetime_to_seconds).values\ + .reshape(-1, 1) + br_df['sec'] = br_time + + # sync + if 'imu' in sens_list and 'bvp' in sens_list: + br_df, imu_df = sync_to_ref(br_df, imu_df_all.copy()) + pss_df, _ = sync_to_ref(pss_df, imu_df_all.copy()) + bvp_df, _ = sync_to_ref(bvp_df_all.copy(), pss_df.copy()) + elif 'imu' in sens_list and not 'bvp' in sens_list: + br_df, imu_df = sync_to_ref(br_df, imu_df_all.copy()) + pss_df, _ = sync_to_ref(pss_df, imu_df_all.copy()) + elif not 'imu' in sens_list and 'bvp' in sens_list: + br_df, bvp_df = sync_to_ref(br_df, bvp_df_all.copy()) + pss_df, _ = sync_to_ref(pss_df, bvp_df_all.copy()) + + # extract relevant data + if 'imu' in sens_list: + axes = ['x', 'y', 'z'] + acc_data = np.stack(imu_df['accelerometer'].values) + gyr_data = np.stack(imu_df['gyroscope'].values) + x_time = imu_df['sec'].values.reshape(-1, 1) + + if 'bvp' in sens_list and 'imu' in sens_list: + bvp_data = bvp_df['bvp'].values + bvp_data = np.interp(x_time, bvp_df['sec'].values, bvp_data)\ + .reshape(-1, 1) + elif 'bvp' in sens_list and not 'imu' in sens_list: + bvp_data = bvp_df['bvp'].values + x_time = bvp_df['sec'].values + + xsens_data['sec'] = x_time.flatten() + + br_col = [col for col in pss_df.columns.values if\ + 'breathing' in col.lower()][0] + pss_data = pss_df[br_col].values + pss_data = np.interp(x_time, pss_df['sec'].values, pss_data)\ + .reshape(-1, 1) + xsens_data['PSS'] = pss_data.flatten() + + br_lbl = [col for col in br_df.columns.values if\ + 'br' in col.lower()][0] + br_data = br_df['BR'].values + br_data = np.interp(x_time, br_df['sec'].values, br_data)\ + .reshape(-1, 1) + xsens_data['BR'] = br_data.flatten() + + if 'imu' in sens_list: + for i, axis in enumerate(axes): + xsens_data['acc_'+axis] = acc_data.T[i].flatten() + xsens_data['gyro_'+axis] = gyr_data.T[i].flatten() + + if 'bvp' in sens_list: + xsens_data['bvp'] = bvp_data.flatten() + + xsens_df_tmp = pd.DataFrame(xsens_data) + + xsens_list.append(xsens_df_tmp) + + if len(xsens_list) > 1: + xsens_df = pd.concat(xsens_list, axis=0, ignore_index=True) + xsens_df.reset_index(drop=True, inplace=True) + else: + xsens_df = xsens_list[0] + + return xsens_df + +def get_test_data(cal_df, activity_df, xsens_df, test_standing): + """ + Loads and retrieves the activity timestamps from sitting and standing + events + + Arguments + --------- + cal_df : pandas.DataFrame + synchronised and frequency matched respiration calibration data + activity_df : pandas.DataFrame + timestamps of activity events + xsens_df : pandas.DataFrame + synchronised and frequency matched DataFrame with all data and labels + test_standing : bool + list of column str + + Returns + ------- + pd.DataFrame + """ + + fmt = "%d/%m/%Y %H:%M:%S" + start_time = cal_df.iloc[0]['data'].sec.values[0] + data_df = xsens_df[xsens_df.sec < start_time] + activity_start = 0 + activity_end = 0 + + activity_list = [] + + for index, row in activity_df.iterrows(): + sec = datetime.strptime(row['Timestamps'], fmt).timestamp() + if not test_standing and row['Activity'] == 'standing': + continue + if row['Event'] == 'start': + activity_start = sec + elif row['Event'] == 'end': + activity_stop = sec + + dsync = DataSynchronizer() + dsync.set_bounds(data_df['sec'].values, activity_start, + activity_stop) + + sync_df = dsync.sync_df(data_df.copy()) + activity_data = {'activity': row['Activity'], 'data': sync_df} + activity_list.append(activity_data) + + return pd.DataFrame(activity_list) + +def sens_rr_model(subject='S02-repeat', + window_size=12, + window_shift=0.2, + lbl_str='pss', + mdl_str='linreg', + overwrite=False, + feature_method='minirocket', + train_len:int=5, + test_standing=True, + data_input:str='imu', + ): + """Loads, preprocesses, and trains a select model using the configured + settings. + Attributes + ---------- + subject: str + specify the subject code (i.e. 'Pilot02', 'S02') + window_size : float + a numpy array of the respiration rate ground truth values from the + bioharness + window_shift : float + a portion of the window size between 0 and 1 + mdl_str : str + a string to infoa portion of the window size between 0 and 1rm what model was used + overwrite : bool + overwrites the evaluations, models, and graphs (default False) + feature_method : str + choose between 'minirocket', 'tsfresh', or 'None' + train_len : int + number of minutes to sample from, choose between 1 to 7 + test_standing : bool + boolean to use standing data + data_input : str + sensors to use, choose from 'imu', 'bvp', 'imu+bvp' + + Returns + ------ + None + """ + cal_str = 'cpm' + tmp = [] + imu_cols = IMU_COLS + bvp_cols = ['bvp'] + if 'imu' in data_input and 'bvp' in data_input: + data_cols = ['acc_x', 'acc_y', 'acc_z', + 'gyro_x', 'gyro_y', 'gyro_z', + 'bvp'] + parent_directory_string = "imu-bvp_rr" + data_input = 'imu+bvp' + sens_list = ['imu', 'bvp'] + fs = IMU_FS + elif 'imu' in data_input and not 'bvp' in data_input: + data_cols = ['acc_x', 'acc_y', 'acc_z', + 'gyro_x', 'gyro_y', 'gyro_z',] + parent_directory_string = "imu_rr" + sens_list = ['imu'] + fs = IMU_FS + elif not 'imu' in data_input and 'bvp' in data_input: + data_cols = ['bvp'] + parent_directory_string = "bvp_rr" + sens_list = ['bvp'] + fs = PPG_FS + + do_minirocket = False + use_tsfresh = False + overwrite_tsfresh = overwrite + train_size = int(train_len) + minirocket = None + + if feature_method == 'tsfresh': + use_tsfresh = True + elif feature_method == 'minirocket': + do_minirocket = True + + config = {'window_size' : window_size, + 'window_shift' : window_shift, + 'lbl_str' : lbl_str, + 'do_minirocket' : do_minirocket, + 'use_tsfresh' : use_tsfresh, + 'train_len' : train_len, + 'test_standing' : test_standing, + 'sens_list' : data_input + } + + pfh = ProjectFileHandler(config) + pfh.set_home_directory(join(DATA_DIR, 'subject_specific', '_'+subject)) + pfh.set_parent_directory(parent_directory_string) + id_check = pfh.get_id_from_config() + if id_check is None: + pfh.set_project_directory() + pfh.save_metafile() + else: + pfh.set_id(int(id_check)) + pfh.set_project_directory() + print('Using pre-set data id: ', pfh.fset_id) + project_dir = pfh.project_directory + + xsens_df = load_and_sync_xsens(subject, sens_list=sens_list) + activity_df = get_activity_log(subject).reset_index(drop=True) + activity_df.drop([16, 17], inplace=True) + event_df = get_respiration_log(subject).iloc[-14:] + cal_df = get_cal_data(event_df, xsens_df) + + day0_event_df = get_respiration_log("S02") + second_cal_df = get_cal_data(day0_event_df, xsens_df) + + # include standing or not + test_df_tmp = get_test_data(cal_df, activity_df, xsens_df, test_standing) + test_df = pd.concat([df for df in test_df_tmp['data']], axis=0) + + if use_tsfresh: + cal_df_list = [] + test_df = load_tsfresh(test_df, + pfh.home_directory, + window_size=window_size, + window_shift=window_shift, + fs=fs, + overwrite=overwrite_tsfresh, + data_cols=data_cols, + prefix='test', + ) + for index, row in cal_df.iterrows(): + data = load_tsfresh(row['data'], + pfh.home_directory, + window_size=window_size, + window_shift=window_shift, + fs=fs, + overwrite=overwrite_tsfresh, + data_cols=data_cols, + prefix=f"calcpm_{row['cpm']}" + ) + cal_df_list.append({'cpm': row['cpm'], 'data': data}) + + cal_df = pd.DataFrame(cal_df_list) + else: + x_test_df, y_test_df = get_df_windows( + test_df, df_win_task, window_size=window_size, + window_shift=window_shift, fs=fs, cols=data_cols) + + my_df = [] + cal_inds = {} + for i in range(len(second_cal_df)): + df = second_cal_df.iloc[i] + data_df = df['data'] + data_df['cpm'] = df.cpm + + my_df.append(data_df) + my_df = pd.concat(my_df) + + + for combi in combinations(cal_df[cal_str].values, train_len): + combi_str = "-".join([str(x) for x in combi]) + pfh.config[cal_str] = combi_str + marker = f'{parent_directory_string}_{subject}_id{pfh.fset_id}'\ + f'_combi{combi_str}' + print(marker) + + train_df_list = [] + for cpm in combi: + df = cal_df[cal_df[cal_str] == cpm] + data_df = df['data'].iloc[0] + data_df['cpm'] = cpm + train_df_list.append(data_df) + train_df = pd.concat(train_df_list) + + assert np.isin(train_df.sec.values, test_df.sec.values).any()==False,\ + "overlapping test and train data" + + print("train") + print(train_df.shape) + print("test") + print(test_df.shape) + + if do_minirocket: + x_train_df, y_train_df = get_df_windows(train_df, + df_win_task, + window_size=window_size, + window_shift=window_shift, + fs=fs, + cols=data_cols + ) + + x_train = make_windows_from_id(x_train_df, data_cols) + y_train = y_train_df['cpm'].values.reshape(-1, 1) + x_test = make_windows_from_id(x_test_df, data_cols) + y_test = y_test_df[lbl_str].values.reshape(-1, 1) + + # x_train = y_train_df['hr_est'].values.reshape(-1, 1) + # x_test = y_test_df['hr_est'].values.reshape(-1, 1) + x_train = np.swapaxes(x_train, 1, 2) + x_test = np.swapaxes(x_test, 1, 2) + + directory = join(project_dir, '_'.join([mdl_str, marker])) + minirocket_fname = join(directory, "minirocket.joblib") + + if not overwrite and exists(minirocket_fname): + with open(minirocket_fname, 'rb') as mfile: + minirocket = load(mfile) + x_train = minirocket.transform(x_train) + print("loaded minirocket...") + else: + minirocket = MiniRocketMultivariate() + x_train = minirocket.fit_transform(x_train) + + x_test = minirocket.transform(x_test) + + if overwrite or not exists(minirocket_fname): + if not exists(directory): makedirs(directory) + with open(minirocket_fname, 'wb') as mfile: + dump(minirocket, mfile) + print("saved new minirocket...") + + elif use_tsfresh: + y_cols = ['sec', 'br', 'pss', 'cpm'] + x_cols = [col for col in train_df.columns.values if col not in y_cols] + x_train = train_df[x_cols].values + y_train = train_df['cpm'].values.reshape(-1, 1) + x_test = test_df[x_cols].values + y_test = test_df[lbl_str].values.reshape(-1, 1) + y_test_df = test_df[y_cols[:-1]] + else: + x_train_df, y_train_df = get_df_windows(train_df, + df_win_task, + window_size=window_size, + window_shift=window_shift, + fs=fs, + cols=data_cols, + ) + x_train = make_windows_from_id(x_train_df, data_cols) + x_test = make_windows_from_id(x_test_df, data_cols) + y_train = y_train_df['cpm'].values.reshape(-1, 1) + y_test = y_test_df[lbl_str].values.reshape(-1, 1) + + transforms, model = model_training(mdl_str, x_train, y_train, + marker, validation_data=None, + overwrite=overwrite, + is_regression=True, + project_directory=project_dir, + window_size=int(window_size*fs), + extra_train=200, + poly_deg=1 + ) + + if transforms is not None: + x_test = transforms.transform(x_test) + + preds = model.predict(x_test) + + eval_handle = EvalHandler(y_test.flatten(), preds.flatten(), subject, + pfh, mdl_str, overwrite=overwrite) + eval_handle.update_eval_history() + eval_handle.save_eval_history() + + pp = PrettyPrinter() + pp.pprint(eval_handle.load_eval_history()) + + fig, ax = plt.subplots(2, 1, figsize=(7.3, 4.5)) + fig_title = '_'.join([mdl_str, data_input, subject]+[combi_str]) + fig.suptitle(fig_title) + ax[0].plot(y_test) + ax[0].plot(preds) + ax[0].set_title('raw') + + if lbl_str == 'pss': + br = y_test_df['br'].values + ax[1].plot(movingaverage(y_test, 12), color='tab:blue') + ax[1].plot(br, 'k') + ax[1].plot(movingaverage(preds, 12), color='tab:orange') + ax[1].legend([lbl_str, 'br', 'pred']) + else: + ax[1].plot(y_test, 'k') + ax[1].plot(movingaverage(preds, 12), color='tab:orange') + ax[1].legend([lbl_str, 'pred']) + ax[1].set_title('smoothened') + fig_dir = join(project_dir, 'figures') + if not exists(fig_dir): mkdir(fig_dir) + fig.savefig(join(fig_dir, fig_title+".png")) + + # make PSS to lbls and preprocess data + if do_minirocket: + x_df, y_df = get_df_windows(my_df, + df_win_task, + window_size=window_size, + window_shift=window_shift, + fs=fs, + cols=data_cols + ) + + x = make_windows_from_id(x_df, data_cols) + y = y_df['pss'].values.reshape(-1, 1) + x = np.swapaxes(x, 1, 2) + + # load and process + x = minirocket.transform(x) + + elif use_tsfresh: + y_cols = ['sec', 'br', 'pss', 'cpm'] + x_cols = [col for col in train_df.columns.values if col not in y_cols] + x = my_df[x_cols].values + y = my_df['pss'].values.reshape(-1, 1) + else: + x_df, y_df = get_df_windows(my_df, + df_win_task, + window_size=window_size, + window_shift=window_shift, + fs=fs, + cols=data_cols, + ) + x = make_windows_from_id(x_df, data_cols) + y = y_df['pss'].values.reshape(-1, 1) + + if transforms is not None: + x = transforms.transform(x) + preds = model.predict(x) + + fig2, ax2 = plt.subplots() + ax2.plot(y) + ax2.plot(preds) + ax2.plot(y_df.cpm.values, '--') + ax2.legend(['label', 'pred', 'cpm']) + ax2.set_title("second day mapped to first day cal") + fig2.savefig(join(fig_dir, fig_title+"cal_check.png")) + + plt.close('all') + +sens_rr_model(subject='S02-repeat', + window_size=12, + window_shift=0.2, + lbl_str='pss', + mdl_str='cnn1d', + overwrite=False, + feature_method=None, + # feature_method='minirocket', + train_len=5, + test_standing=True, + data_input='bvp', + ) -- GitLab