# TODO: # import glob from os import makedirs, mkdir from os.path import join, exists import pandas as pd import numpy as np import json import ipdb import re import pickle import sys import time from zipfile import ZipFile from joblib import dump, load import argparse from datetime import datetime, timedelta, timezone, timedelta import pytz import matplotlib.pyplot as plt from functools import partial from collections import Counter from itertools import repeat, chain, combinations from multiprocessing import Pool, cpu_count import tensorflow as tf from sklearn.decomposition import PCA from sklearn.preprocessing import StandardScaler, MinMaxScaler, OneHotEncoder from sklearn.preprocessing import PolynomialFeatures, LabelEncoder from sklearn.model_selection import KFold, train_test_split from sklearn.metrics import accuracy_score from tsfresh.feature_extraction import extract_features from tsfresh.feature_extraction import settings as tsfresh_settings from tsfresh.utilities.string_manipulation import get_config_from_string from modules.datapipeline import get_file_list, load_and_snip, load_data, \ load_split_data, load_harness_data from modules.digitalsignalprocessing import vectorized_slide_win as vsw from modules.digitalsignalprocessing import imu_signal_processing from modules.digitalsignalprocessing import bvp_signal_processing from modules.digitalsignalprocessing import hernandez_sp, reject_artefact from modules.digitalsignalprocessing import do_pad_fft,\ pressure_signal_processing, infer_frequency, movingaverage from modules.utils import * from modules.evaluations import Evaluation from modules.datapipeline import get_windowed_data, DataSynchronizer,\ parallelize_dataframe from modules.datapipeline import ProjectFileHandler from models.ardregression import ARDRegressionClass from models.knn import KNNClass from models.svm import SVMClass from models.lda import LDAClass from models.svr import SVRClass from models.logisticregression import LogisticRegressionClass from models.linearregression import LinearRegressionClass from models.neuralnet import FNN_HyperModel, LSTM_HyperModel, TunerClass,\ CNN1D_HyperModel from models.ridgeclass import RidgeClass from models.resnet import Regressor_RESNET, Classifier_RESNET from models.xgboostclass import XGBoostClass from pprint import PrettyPrinter from sktime.transformations.panel.rocket import ( MiniRocket, MiniRocketMultivariate, MiniRocketMultivariateVariable, ) from config import WINDOW_SIZE, WINDOW_SHIFT, IMU_FS, DATA_DIR, BR_FS\ , FS_RESAMPLE, PPG_FS from regress_rr import * N_SUBJECT_MAX = 6 IMU_COLS = ['acc_x', 'acc_y', 'acc_z', 'gyro_x', 'gyro_y', 'gyro_z'] def load_and_sync_xsens(subject, sens_list:list=['imu', 'bvp']): """ Loads requested sensors from the subject folder and synchronises each to the beginning and end timestamps. Linearly interpolates the data and timestamps to match the higher frequency data. Arguments --------- subject : str subject to extract data from (i.e. 'Pilot02', 'S02') sens_list : list a list that contains either or both 'imu' and 'bvp' Returns ------- pd.DataFrame """ assert 'imu' in sens_list or 'bvp' in sens_list, \ f"{sens_list} is not supported, must contain"\ "'imu', 'bvp' or 'imu, bvp'" pss_df, br_df, imu_df, bvp_df = None, None, None, None acc_data, gyr_data, bvp_data = None, None, None # load imu if 'imu' in sens_list: imu_list = get_file_list('imudata.gz', sbj=subject) imu_df_all, imu_hdr_df_all = load_imu_files(imu_list) # load bioharness pss_list = get_file_list('*Breathing.csv', sbj=subject) if len(pss_list) == 0: pss_list = get_file_list('BR*.csv', sbj=subject) br_list = get_file_list('*Summary*.csv', sbj=subject) # load e4 wristband if 'bvp' in sens_list: e4_list = get_file_list('*.zip', sbj=subject) bvp_df_all, bvp_hdr = load_e4_files(e4_list) bvp_fs = bvp_hdr[0]['fs'] xsens_list = [] # skip the first and last x minute(s) minutes_to_skip = .5 br_skiprows = br_skipfooter = int(minutes_to_skip*60) pss_skiprows = pss_skipfooter = int(minutes_to_skip*60*BR_FS) # load each bioharness file and sync the imu to it for pss_file, br_file in zip(pss_list, br_list): xsens_data = {} pss_df = load_bioharness_file(pss_file, skiprows=pss_skiprows, skipfooter=pss_skipfooter, engine='python') pss_time = pss_df['Time'].map(bioharness_datetime_to_seconds).values\ .reshape(-1, 1) pss_df['sec'] = pss_time br_df = load_bioharness_file(br_file, skiprows=br_skiprows, skipfooter=br_skipfooter, engine='python') br_time = br_df['Time'].map(bioharness_datetime_to_seconds).values\ .reshape(-1, 1) br_df['sec'] = br_time # sync if 'imu' in sens_list and 'bvp' in sens_list: br_df, imu_df = sync_to_ref(br_df, imu_df_all.copy()) pss_df, _ = sync_to_ref(pss_df, imu_df_all.copy()) bvp_df, _ = sync_to_ref(bvp_df_all.copy(), pss_df.copy()) elif 'imu' in sens_list and not 'bvp' in sens_list: br_df, imu_df = sync_to_ref(br_df, imu_df_all.copy()) pss_df, _ = sync_to_ref(pss_df, imu_df_all.copy()) elif not 'imu' in sens_list and 'bvp' in sens_list: br_df, bvp_df = sync_to_ref(br_df, bvp_df_all.copy()) pss_df, _ = sync_to_ref(pss_df, bvp_df_all.copy()) # extract relevant data if 'imu' in sens_list: axes = ['x', 'y', 'z'] acc_data = np.stack(imu_df['accelerometer'].values) gyr_data = np.stack(imu_df['gyroscope'].values) x_time = imu_df['sec'].values.reshape(-1, 1) if 'bvp' in sens_list and 'imu' in sens_list: bvp_data = bvp_df['bvp'].values bvp_data = np.interp(x_time, bvp_df['sec'].values, bvp_data)\ .reshape(-1, 1) elif 'bvp' in sens_list and not 'imu' in sens_list: bvp_data = bvp_df['bvp'].values x_time = bvp_df['sec'].values xsens_data['sec'] = x_time.flatten() br_col = [col for col in pss_df.columns.values if\ 'breathing' in col.lower()][0] pss_data = pss_df[br_col].values pss_data = np.interp(x_time, pss_df['sec'].values, pss_data)\ .reshape(-1, 1) xsens_data['PSS'] = pss_data.flatten() br_lbl = [col for col in br_df.columns.values if\ 'br' in col.lower()][0] br_data = br_df['BR'].values br_data = np.interp(x_time, br_df['sec'].values, br_data)\ .reshape(-1, 1) xsens_data['BR'] = br_data.flatten() if 'imu' in sens_list: for i, axis in enumerate(axes): xsens_data['acc_'+axis] = acc_data.T[i].flatten() xsens_data['gyro_'+axis] = gyr_data.T[i].flatten() if 'bvp' in sens_list: xsens_data['bvp'] = bvp_data.flatten() xsens_df_tmp = pd.DataFrame(xsens_data) xsens_list.append(xsens_df_tmp) if len(xsens_list) > 1: xsens_df = pd.concat(xsens_list, axis=0, ignore_index=True) xsens_df.reset_index(drop=True, inplace=True) else: xsens_df = xsens_list[0] return xsens_df def get_test_data(cal_df, activity_df, xsens_df, test_standing): """ Loads and retrieves the activity timestamps from sitting and standing events Arguments --------- cal_df : pandas.DataFrame synchronised and frequency matched respiration calibration data activity_df : pandas.DataFrame timestamps of activity events xsens_df : pandas.DataFrame synchronised and frequency matched DataFrame with all data and labels test_standing : bool list of column str Returns ------- pd.DataFrame """ fmt = "%d/%m/%Y %H:%M:%S" start_time = cal_df.iloc[0]['data'].sec.values[0] data_df = xsens_df[xsens_df.sec < start_time] activity_start = 0 activity_end = 0 activity_list = [] for index, row in activity_df.iterrows(): sec = datetime.strptime(row['Timestamps'], fmt).timestamp() if not test_standing and row['Activity'] == 'standing': continue if row['Event'] == 'start': activity_start = sec elif row['Event'] == 'end': activity_stop = sec dsync = DataSynchronizer() dsync.set_bounds(data_df['sec'].values, activity_start, activity_stop) sync_df = dsync.sync_df(data_df.copy()) activity_data = {'activity': row['Activity'], 'data': sync_df} activity_list.append(activity_data) return pd.DataFrame(activity_list) def sens_rr_model(subject='S02-repeat', window_size=12, window_shift=0.2, lbl_str='pss', mdl_str='linreg', overwrite=False, feature_method='minirocket', train_len:int=5, test_standing=True, data_input:str='imu', ): """Loads, preprocesses, and trains a select model using the configured settings. Attributes ---------- subject: str specify the subject code (i.e. 'Pilot02', 'S02') window_size : float a numpy array of the respiration rate ground truth values from the bioharness window_shift : float a portion of the window size between 0 and 1 mdl_str : str a string to infoa portion of the window size between 0 and 1rm what model was used overwrite : bool overwrites the evaluations, models, and graphs (default False) feature_method : str choose between 'minirocket', 'tsfresh', or 'None' train_len : int number of minutes to sample from, choose between 1 to 7 test_standing : bool boolean to use standing data data_input : str sensors to use, choose from 'imu', 'bvp', 'imu+bvp' Returns ------ None """ cal_str = 'cpm' tmp = [] imu_cols = IMU_COLS bvp_cols = ['bvp'] if 'imu' in data_input and 'bvp' in data_input: data_cols = ['acc_x', 'acc_y', 'acc_z', 'gyro_x', 'gyro_y', 'gyro_z', 'bvp'] parent_directory_string = "imu-bvp_rr" data_input = 'imu+bvp' sens_list = ['imu', 'bvp'] fs = IMU_FS elif 'imu' in data_input and not 'bvp' in data_input: data_cols = ['acc_x', 'acc_y', 'acc_z', 'gyro_x', 'gyro_y', 'gyro_z',] parent_directory_string = "imu_rr" sens_list = ['imu'] fs = IMU_FS elif not 'imu' in data_input and 'bvp' in data_input: data_cols = ['bvp'] parent_directory_string = "bvp_rr" sens_list = ['bvp'] fs = PPG_FS do_minirocket = False use_tsfresh = False overwrite_tsfresh = overwrite train_size = int(train_len) minirocket = None if feature_method == 'tsfresh': use_tsfresh = True elif feature_method == 'minirocket': do_minirocket = True config = {'window_size' : window_size, 'window_shift' : window_shift, 'lbl_str' : lbl_str, 'do_minirocket' : do_minirocket, 'use_tsfresh' : use_tsfresh, 'train_len' : train_len, 'test_standing' : test_standing, 'sens_list' : data_input } pfh = ProjectFileHandler(config) pfh.set_home_directory(join(DATA_DIR, 'subject_specific', '_'+subject)) pfh.set_parent_directory(parent_directory_string) id_check = pfh.get_id_from_config() if id_check is None: pfh.set_project_directory() pfh.save_metafile() else: pfh.set_id(int(id_check)) pfh.set_project_directory() print('Using pre-set data id: ', pfh.fset_id) project_dir = pfh.project_directory xsens_df = load_and_sync_xsens(subject, sens_list=sens_list) activity_df = get_activity_log(subject).reset_index(drop=True) activity_df.drop([16, 17], inplace=True) event_df = get_respiration_log(subject).iloc[-14:] cal_df = get_cal_data(event_df, xsens_df) day0_event_df = get_respiration_log("S02") second_cal_df = get_cal_data(day0_event_df, xsens_df) # include standing or not test_df_tmp = get_test_data(cal_df, activity_df, xsens_df, test_standing) test_df = pd.concat([df for df in test_df_tmp['data']], axis=0) if use_tsfresh: cal_df_list = [] test_df = load_tsfresh(test_df, pfh.home_directory, window_size=window_size, window_shift=window_shift, fs=fs, overwrite=overwrite_tsfresh, data_cols=data_cols, prefix='test', ) for index, row in cal_df.iterrows(): data = load_tsfresh(row['data'], pfh.home_directory, window_size=window_size, window_shift=window_shift, fs=fs, overwrite=overwrite_tsfresh, data_cols=data_cols, prefix=f"calcpm_{row['cpm']}" ) cal_df_list.append({'cpm': row['cpm'], 'data': data}) cal_df = pd.DataFrame(cal_df_list) else: x_test_df, y_test_df = get_df_windows( test_df, df_win_task, window_size=window_size, window_shift=window_shift, fs=fs, cols=data_cols) my_df = [] cal_inds = {} for i in range(len(second_cal_df)): df = second_cal_df.iloc[i] data_df = df['data'] data_df['cpm'] = df.cpm my_df.append(data_df) my_df = pd.concat(my_df) for combi in combinations(cal_df[cal_str].values, train_len): combi_str = "-".join([str(x) for x in combi]) pfh.config[cal_str] = combi_str marker = f'{parent_directory_string}_{subject}_id{pfh.fset_id}'\ f'_combi{combi_str}' print(marker) train_df_list = [] for cpm in combi: df = cal_df[cal_df[cal_str] == cpm] data_df = df['data'].iloc[0] data_df['cpm'] = cpm train_df_list.append(data_df) train_df = pd.concat(train_df_list) assert np.isin(train_df.sec.values, test_df.sec.values).any()==False,\ "overlapping test and train data" print("train") print(train_df.shape) print("test") print(test_df.shape) if do_minirocket: x_train_df, y_train_df = get_df_windows(train_df, df_win_task, window_size=window_size, window_shift=window_shift, fs=fs, cols=data_cols ) x_train = make_windows_from_id(x_train_df, data_cols) y_train = y_train_df['cpm'].values.reshape(-1, 1) x_test = make_windows_from_id(x_test_df, data_cols) y_test = y_test_df[lbl_str].values.reshape(-1, 1) # x_train = y_train_df['hr_est'].values.reshape(-1, 1) # x_test = y_test_df['hr_est'].values.reshape(-1, 1) x_train = np.swapaxes(x_train, 1, 2) x_test = np.swapaxes(x_test, 1, 2) directory = join(project_dir, '_'.join([mdl_str, marker])) minirocket_fname = join(directory, "minirocket.joblib") if not overwrite and exists(minirocket_fname): with open(minirocket_fname, 'rb') as mfile: minirocket = load(mfile) x_train = minirocket.transform(x_train) print("loaded minirocket...") else: minirocket = MiniRocketMultivariate() x_train = minirocket.fit_transform(x_train) x_test = minirocket.transform(x_test) if overwrite or not exists(minirocket_fname): if not exists(directory): makedirs(directory) with open(minirocket_fname, 'wb') as mfile: dump(minirocket, mfile) print("saved new minirocket...") elif use_tsfresh: y_cols = ['sec', 'br', 'pss', 'cpm'] x_cols = [col for col in train_df.columns.values if col not in y_cols] x_train = train_df[x_cols].values y_train = train_df['cpm'].values.reshape(-1, 1) x_test = test_df[x_cols].values y_test = test_df[lbl_str].values.reshape(-1, 1) y_test_df = test_df[y_cols[:-1]] else: x_train_df, y_train_df = get_df_windows(train_df, df_win_task, window_size=window_size, window_shift=window_shift, fs=fs, cols=data_cols, ) x_train = make_windows_from_id(x_train_df, data_cols) x_test = make_windows_from_id(x_test_df, data_cols) y_train = y_train_df['cpm'].values.reshape(-1, 1) y_test = y_test_df[lbl_str].values.reshape(-1, 1) transforms, model = model_training(mdl_str, x_train, y_train, marker, validation_data=None, overwrite=overwrite, is_regression=True, project_directory=project_dir, window_size=int(window_size*fs), extra_train=200, poly_deg=1 ) if transforms is not None: x_test = transforms.transform(x_test) preds = model.predict(x_test) eval_handle = EvalHandler(y_test.flatten(), preds.flatten(), subject, pfh, mdl_str, overwrite=overwrite) eval_handle.update_eval_history() eval_handle.save_eval_history() pp = PrettyPrinter() pp.pprint(eval_handle.load_eval_history()) fig, ax = plt.subplots(2, 1, figsize=(7.3, 4.5)) fig_title = '_'.join([mdl_str, data_input, subject]+[combi_str]) fig.suptitle(fig_title) ax[0].plot(y_test) ax[0].plot(preds) ax[0].set_title('raw') if lbl_str == 'pss': br = y_test_df['br'].values ax[1].plot(movingaverage(y_test, 12), color='tab:blue') ax[1].plot(br, 'k') ax[1].plot(movingaverage(preds, 12), color='tab:orange') ax[1].legend([lbl_str, 'br', 'pred']) else: ax[1].plot(y_test, 'k') ax[1].plot(movingaverage(preds, 12), color='tab:orange') ax[1].legend([lbl_str, 'pred']) ax[1].set_title('smoothened') fig_dir = join(project_dir, 'figures') if not exists(fig_dir): mkdir(fig_dir) fig.savefig(join(fig_dir, fig_title+".png")) # make PSS to lbls and preprocess data if do_minirocket: x_df, y_df = get_df_windows(my_df, df_win_task, window_size=window_size, window_shift=window_shift, fs=fs, cols=data_cols ) x = make_windows_from_id(x_df, data_cols) y = y_df['pss'].values.reshape(-1, 1) x = np.swapaxes(x, 1, 2) # load and process x = minirocket.transform(x) elif use_tsfresh: y_cols = ['sec', 'br', 'pss', 'cpm'] x_cols = [col for col in train_df.columns.values if col not in y_cols] x = my_df[x_cols].values y = my_df['pss'].values.reshape(-1, 1) else: x_df, y_df = get_df_windows(my_df, df_win_task, window_size=window_size, window_shift=window_shift, fs=fs, cols=data_cols, ) x = make_windows_from_id(x_df, data_cols) y = y_df['pss'].values.reshape(-1, 1) if transforms is not None: x = transforms.transform(x) preds = model.predict(x) fig2, ax2 = plt.subplots() ax2.plot(y) ax2.plot(preds) ax2.plot(y_df.cpm.values, '--') ax2.legend(['label', 'pred', 'cpm']) ax2.set_title("second day mapped to first day cal") fig2.savefig(join(fig_dir, fig_title+"cal_check.png")) plt.close('all') sens_rr_model(subject='S02-repeat', window_size=12, window_shift=0.2, lbl_str='pss', mdl_str='cnn1d', overwrite=False, feature_method=None, # feature_method='minirocket', train_len=5, test_standing=True, data_input='bvp', )