From 01d376bb159496d8c883f725b8fec7409e4ab447 Mon Sep 17 00:00:00 2001 From: Raymond Chia <rqchia@janus0.ihpc.uts.edu.au> Date: Tue, 21 Nov 2023 20:19:58 +1100 Subject: [PATCH] bvp implementation --- 3 | 855 ------------------ __pycache__/config.cpython-38.pyc | Bin 1072 -> 1077 bytes config.py | 2 +- models/__pycache__/neuralnet.cpython-38.pyc | Bin 12764 -> 12764 bytes .../digitalsignalprocessing.cpython-38.pyc | Bin 27016 -> 27238 bytes modules/__pycache__/utils.cpython-38.pyc | Bin 11310 -> 11269 bytes modules/digitalsignalprocessing.py | 12 +- modules/utils.py | 11 +- regress_rr.py | 464 +++++++--- 9 files changed, 359 insertions(+), 985 deletions(-) delete mode 100644 3 diff --git a/3 b/3 deleted file mode 100644 index c7387fd..0000000 --- a/3 +++ /dev/null @@ -1,855 +0,0 @@ -import glob -from os import makedirs, mkdir -from os.path import join, exists -import pandas as pd -import numpy as np -import json -import ipdb -import re -import pickle -import sys -import time -from zipfile import ZipFile - -import argparse -from datetime import datetime, timedelta, timezone, timedelta -import pytz - -import matplotlib.pyplot as plt -from functools import partial -from collections import Counter -from itertools import repeat, chain, combinations -from multiprocessing import Pool, cpu_count -import tensorflow as tf - -from sklearn.preprocessing import StandardScaler, MinMaxScaler, OneHotEncoder -from sklearn.preprocessing import PolynomialFeatures, LabelEncoder -from sklearn.model_selection import KFold, train_test_split -from sklearn.metrics import accuracy_score - -from tsfresh.feature_extraction import extract_features -from tsfresh.feature_extraction import settings as tsfresh_settings -from tsfresh.utilities.string_manipulation import get_config_from_string - -from modules.datapipeline import get_file_list, load_and_snip, load_data, \ - load_split_data, load_harness_data -from modules.digitalsignalprocessing import vectorized_slide_win as vsw -from modules.digitalsignalprocessing import imu_signal_processing -from modules.digitalsignalprocessing import do_pad_fft,\ - pressure_signal_processing, infer_frequency -from modules.utils import * - -from modules.evaluations import Evaluation -from modules.datapipeline import get_windowed_data, DataSynchronizer,\ - parallelize_dataframe -from modules.datapipeline import ProjectFileHandler -from models.ardregression import ARDRegressionClass -from models.knn import KNNClass -from models.svm import SVMClass -from models.lda import LDAClass -from models.svr import SVRClass -from models.logisticregression import LogisticRegressionClass -from models.linearregression import LinearRegressionClass -from models.neuralnet import FNN_HyperModel, LSTM_HyperModel, TunerClass,\ - CNN1D_HyperModel -from models.ridgeclass import RidgeClass -from models.resnet import Regressor_RESNET, Classifier_RESNET -from models.xgboostclass import XGBoostClass - -from pprint import PrettyPrinter - -from sktime.transformations.panel.rocket import ( - MiniRocket, - MiniRocketMultivariate, - MiniRocketMultivariateVariable, -) - -from config import WINDOW_SIZE, WINDOW_SHIFT, IMU_FS, DATA_DIR, BR_FS - -IMU_COLS = ['acc_x', 'acc_y', 'acc_z', 'gyr_x', 'gyr_y', 'gyr_z'] - -def utc_to_local(utc_dt, tz=None): - return utc_dt.replace(tzinfo=timezone.utc).astimezone(tz=tz) - -def datetime_from_utc_to_local(utc_datetime): - now_timestamp = time.time() - offset = datetime.fromtimestamp(now_timestamp) - datetime.utcfromtimestamp(now_timestamp) - return utc_datetime + offset - -# Load data -def load_bioharness_file(f:str, skiprows=0, skipfooter=0, **kwargs): - df_list = [] - method = partial(pd.read_csv, skipinitialspace=True, - skiprows=list(range(1, skiprows+1)), - skipfooter=skipfooter, - header=0, - **kwargs - ) - df = method(f) - if 'Time' not in df.columns.values: - df['Time'] = pd.to_datetime( - df.rename(columns={'Date':'Day'})[ - ['Day','Month','Year']]) \ - + pd.to_timedelta(df['ms'], unit='ms') - if pd.isna(df['Time']).any(): - df['Time'].interpolate(inplace=True) - df['Time'] = pd.to_datetime(df['Time'], format="%d/%m/%Y %H:%M:%S.%f") - df['Time'] = df['Time'].dt.strftime("%d/%m/%Y %H:%M:%S.%f") - return df - -def load_bioharness_files(f_list:list, skiprows=0, skipfooter=0, **kwargs): - df_list = [] - method = partial(pd.read_csv, skipinitialspace=True, - skiprows=list(range(1, skiprows+1)), - skipfooter=skipfooter, - header=0, **kwargs) - for f in f_list: - df_list.append(load_bioharness_file(f)) - - df = pd.concat(df_list, ignore_index=True) - return df - -def bioharness_datetime_to_seconds(val): - fmt = "%d/%m/%Y %H:%M:%S.%f" - dstr = datetime.strptime(val, fmt) - seconds = dstr.timestamp() - return seconds - -def load_imu_file(imu_file:str): - hdr_file = imu_file.replace('imudata.gz', 'recording.g3') - - df = pd.read_json(imu_file, lines=True, compression='gzip') - hdr = pd.read_json(hdr_file, orient='index') - hdr = hdr.to_dict().pop(0) - - if df.empty: return df, hdr - - data_df = pd.DataFrame(df['data'].tolist()) - df = pd.concat([df.drop('data', axis=1), data_df], axis=1) - - iso_tz = hdr['created'] - tzinfo = pytz.timezone(hdr['timezone']) - # adjust for UTC - start_time = datetime.fromisoformat(iso_tz[:-1]) - start_time = utc_to_local(start_time, tz=tzinfo).astimezone(tzinfo) - - na_inds = df.loc[pd.isna(df['accelerometer']), :].index.values - df.drop(index=na_inds, inplace=True) - - imu_times = df['timestamp'].values - df['timestamp_interp'] = imu_times - df['timestamp_interp'] = df['timestamp_interp'].interpolate() - imu_times = df['timestamp_interp'].values - imu_datetimes = [start_time + timedelta(seconds=val) \ - for val in imu_times] - imu_s = np.array([time.timestamp() for time in imu_datetimes]) - df['sec'] = imu_s - - time_check_thold = df['sec'].min() + 3*3600 - mask = df['sec'] > time_check_thold - if np.any(mask): - df = df[np.logical_not(mask)] - - return df, hdr - -def load_imu_files(f_list:list): - data, hdr = [], [] - tmp = [] - for f in f_list: - tmp.append(load_imu_file(f)) - for l in tmp: - data.append(l[0]) - hdr.append(l[1]) - data_df = pd.concat(data, axis=0) - return data_df, hdr - -def load_e4_file(e4_file:str): - ''' First row is the initial time of the session as unix time. - Second row is the sample rate in Hz''' - zip_file = ZipFile(e4_file) - dfs = {csv_file.filename: pd.read_csv(zip_file.open(csv_file.filename) - ,header=None) - for csv_file in zip_file.infolist() - if csv_file.filename.endswith('.csv')} - bvp = dfs["BVP.csv"] - t0 = bvp.iloc[0].values[0] - fs = bvp.iloc[1].values[0] - nsamples = len(bvp) - 2 - - t0_datetime = datetime.utcfromtimestamp(t0) - t0_local = datetime_from_utc_to_local(t0_datetime) - time = [t0_local.timestamp() + ind*(1/fs) for ind in - range(nsamples)] - tmp = [np.nan, np.nan] - time = tmp + time - bvp.rename(columns={0: "bvp"}, inplace=True) - bvp['sec'] = np.array(time) - - head = bvp.iloc[[0, 1]] - bvp.drop(inplace=True, index=[0, 1]) - - hdr = {'start_time': head.iloc[0,0], - 'fs': head.iloc[0,1]} - - return bvp, hdr - -def load_e4_files(f_list:list): - tmp = [] - data = [] - hdr = [] - for f in f_list: - tmp.append(load_e4_file(f)) - for d, h in tmp: - data.append(d) - hdr.append(h) - data_df = pd.concat(data, axis=0) - return data_df, hdr - -# Synchronising data -def sync_to_ref(df0, df1): - dsync0 = DataSynchronizer() - dsync1 = DataSynchronizer() - - time0 = df0['sec'].values - time1 = df1['sec'].values - - t0 = max((time0[0], time1[0])) - t1 = min((time0[-1], time1[-1])) - dsync0.set_bounds(time0, t0, t1) - dsync1.set_bounds(time1, t0, t1) - - return dsync0.sync_df(df0), dsync1.sync_df(df1) - -def pss_br_calculations(win, pss_df=None, br_df=None): - n_out = 5 - if win[-1] == 0: return [None]*n_out - - dsync = DataSynchronizer() - pss_fs = BR_FS - pss_col = [col for col in pss_df.columns.values if\ - 'breathing' in col.lower()][0] - pss_ms = pss_df['ms'].values - br_ms = br_df['ms'].values - t0, t1 = pss_ms[win][0], pss_ms[win][-1] - - diff = pss_ms[win][1:] - pss_ms[win][:-1] - mask = np.abs(diff/1e3) > 60 - diff_chk = np.any(mask) - if diff_chk: return [None]*n_out - - # Get pressure estimate for window - pss_win = pss_df.iloc[win] - pss_data = pss_win[pss_col] - pss_filt = pressure_signal_processing(pss_data, fs=pss_fs) - xf, yf = do_pad_fft(pss_filt, fs=pss_fs) - pss_est = xf[yf.argmax()]*60 - - # Sync and get summary br output - dsync.set_bounds(br_ms, t0, t1) - br_win = dsync.sync_df(br_df) - - br_out = np.median(br_win['BR'].values) - - # Get subject and condition - sbj_out = pss_win['subject'].values[0] - time_out = np.median(pss_win['sec'].values) - return time_out, pss_est, br_out, sbj_out, cond_out - -def get_pss_br_estimates(pss_df, br_df, window_size=12, window_shift=1): - pss_fs = BR_FS - # pss_col = [col for col in pss_df.columns.values if\ - # 'breathing' in col.lower()][0] - pss_ms = pss_df['sec'].values - br_ms = br_df['sec'].values - - inds = np.arange(0, len(pss_ms)) - vsw_out = vsw(inds, len(inds), sub_window_size=int(window_size*pss_fs), - stride_size=int(window_shift*pss_fs)) - - # dsync = DataSynchronizer() - pss_est, br_out = [], [] - cond_out, sbj_out = [], [] - func = partial(pss_br_calculations, pss_df=pss_df, br_df=br_df) - # for i, win in enumerate(vsw_out): - # tmp = func(win) - - with Pool(cpu_count()) as p: - tmp = p.map(func, vsw_out) - - time_out, pss_est, br_out, sbj_out, cond_out = zip(*tmp) - - time_array = np.array(time_out) - pss_est_array = np.array(pss_est) - br_out_array = np.array(br_out) - sbj_out_array = np.array(sbj_out) - cond_out_array = np.array(cond_out) - - df = pd.DataFrame( - np.array( - [time_array, sbj_out_array, cond_out_array, - pss_est_array, br_out_array] - ).T, - columns=['ms', 'subject', 'condition', 'pss', 'br']) - df.dropna(inplace=True) - - return df - -# Multiprocessing task for windowing dataframe -def imu_df_win_task(w_inds, df, i, cols): - time = df['sec'].values - if w_inds[-1] == 0: return - w_df = df.iloc[w_inds] - t0, t1 = time[w_inds][0], time[w_inds][-1] - diff = time[w_inds[1:]] - time[w_inds[0:-1]] - mask = np.abs(diff)>20 - diff_chk = np.any(mask) - if diff_chk: - return - - # sbj = w_df['subject'].values.astype(int) - # sbj_mask = np.any((sbj[1:] - sbj[:-1])>0) - # if sbj_mask: - # return - - if cols is None: - cols = ['acc_x', 'acc_y', 'acc_z', - 'gyr_x', 'gyr_y', 'gyr_z'] - - data = w_df[cols].values - - # DSP - sd_data = (data - np.mean(data, axis=0))/np.std(data, axis=0) - # ys = cubic_interp(sd_data, BR_FS, FS_RESAMPLE) - filt_data = imu_signal_processing(sd_data, IMU_FS) - x_out = pd.DataFrame(filt_data, - columns=[ - 'acc_x', 'acc_y', 'acc_z', - 'gyro_x', 'gyro_y', 'gyro_z', - ]) - - sm_out = w_df['BR'].values - ps_out = w_df['PSS'].values - - x_vec_time = np.median(time[w_inds]) - - fs = 1/np.mean(diff) - ps_freq = int(get_max_frequency(ps_out, fs=fs)) - - y_tmp = np.array([x_vec_time, np.nanmedian(sm_out), ps_freq]) - - x_out['sec'] = x_vec_time - x_out['id'] = i - y_out = pd.DataFrame([y_tmp], columns=['sec', 'br', 'pss']) - - return x_out, y_out - -def get_max_frequency(data, fs=IMU_FS): - data = pressure_signal_processing(data, fs=fs) - - xf, yf = do_pad_fft(data, fs=fs) - max_freq = xf[yf.argmax()]*60 - return max_freq - -def convert_to_float(df): - cols = df.columns.values - if 'sec' in cols: - df['sec'] = df['sec'].astype(float) - if 'pss' in cols: - df['pss'] = df['pss'].astype(float) - if 'br' in cols: - df['br'] = df['br'].astype(float) - if 'subject' in cols: - df['subject'] = df['subject'].astype(float) - -def load_and_sync_xsens(subject): - # load imu - imu_list = get_file_list('imudata.gz', sbj=subject) - imu_df_all, imu_hdr_df_all = load_imu_files(imu_list) - - # load bioharness - pss_list = get_file_list('*Breathing.csv', sbj=subject) - if len(pss_list) == 0: - pss_list = get_file_list('BR*.csv', sbj=subject) - - br_list = get_file_list('*Summary*', sbj=subject) - - # load e4 wristband - e4_list = get_file_list('*.zip', sbj=subject) - bvp_df_all, bvp_hdr = load_e4_files(e4_list) - bvp_fs = bvp_hdr[0]['fs'] - - xsens_list = [] - # skip the first and last x minute(s) - minutes_to_skip = .5 - br_skiprows = br_skipfooter = int(minutes_to_skip*60) - pss_skiprows = pss_skipfooter = int(minutes_to_skip*60*BR_FS) - # load each bioharness file and sync the imu to it - for pss_file, br_file in zip(pss_list, br_list): - pss_df = load_bioharness_file(pss_file, skiprows=pss_skiprows, - skipfooter=pss_skipfooter, - engine='python') - pss_time = pss_df['Time'].map(bioharness_datetime_to_seconds).values\ - .reshape(-1, 1) - pss_df['sec'] = pss_time - - br_df = load_bioharness_file(br_file, skiprows=br_skiprows, - skipfooter=br_skipfooter, - engine='python') - br_time = br_df['Time'].map(bioharness_datetime_to_seconds).values\ - .reshape(-1, 1) - br_df['sec'] = br_time - - # sync - br_df, imu_df = sync_to_ref(br_df, imu_df_all.copy()) - pss_df, _ = sync_to_ref(pss_df, imu_df_all.copy()) - bvp_df, _ = sync_to_ref(bvp_df_all.copy(), pss_df.copy()) - - # extract relevant data - acc_data = np.stack(imu_df['accelerometer'].values) - gyr_data = np.stack(imu_df['gyroscope'].values) - x_time = imu_df['sec'].values.reshape(-1, 1) - - br_col = [col for col in pss_df.columns.values if\ - 'breathing' in col.lower()][0] - pss_data = pss_df[br_col].values - pss_data = np.interp(x_time, pss_df['sec'].values, pss_data)\ - .reshape(-1, 1) - - br_lbl = [col for col in br_df.columns.values if\ - 'br' in col.lower()][0] - br_data = br_df['BR'].values - br_data = np.interp(x_time, br_df['sec'].values, br_data)\ - .reshape(-1, 1) - - bvp_data = bvp_df['bvp'].values - bvp_data = np.interp(x_time, bvp_df['sec'].values, bvp_data)\ - .reshape(-1, 1) - - xsens_data = np.concatenate( - (x_time, br_data, pss_data, bvp_data, acc_data, gyr_data), - axis=1) - - columns=['sec' , 'BR' , 'PSS' , 'BVP' , - 'acc_x' , 'acc_y' , 'acc_z' , - 'gyr_x' , 'gyr_y' , 'gyr_z' , ] - xsens_df_tmp = pd.DataFrame(xsens_data, columns=columns) - - ''' - print("{:.2f}\t{:.2f}\t{:.2f}".format(br_df.sec.iloc[0], - pss_df.sec.iloc[0], - imu_df.sec.iloc[0])) - print("{:.2f}\t{:.2f}\t{:.2f}".format(br_df.sec.iloc[-1], - pss_df.sec.iloc[-1], - imu_df.sec.iloc[-1])) - print(xsens_df_tmp.head()) - ''' - xsens_list.append(xsens_df_tmp) - - if len(xsens_list) > 1: - xsens_df = pd.concat(xsens_list, axis=0, ignore_index=True) - xsens_df.reset_index(drop=True, inplace=True) - else: - xsens_df = xsens_list[0] - - return xsens_df - - -def load_tsfresh(subject, project_dir, - window_size=12, window_shift=0.2, fs=IMU_FS, - overwrite=False): - cols = ['acc_x', 'acc_y', 'acc_z', 'gyro_x', 'gyro_y', 'gyro_z'] - pkl_file = join(project_dir, 'tsfresh.pkl') - if exists(pkl_file) and not overwrite: - return pd.read_pickle(pkl_file) - - xsens_df = load_and_sync_xsens(subject) - x_df, y_df = get_df_windows(xsens_df, - imu_df_win_task, - window_size=window_size, - window_shift=window_shift, - fs=fs, - ) - x_features_df = extract_features( - x_df, column_sort='sec', - column_id='id', - # default_fc_parameters=tsfresh_settings.MinimalFCParameters(), - ) - x_features_df.fillna(0, inplace=True) - - cols = x_features_df.columns.values - - df_out = pd.concat([y_df, x_features_df], axis=1) - df_out.to_pickle(pkl_file) - return df_out - -def get_activity_log(subject): - activity_list = get_file_list('activity*.csv', sbj=subject) - activity_dfs = [pd.read_csv(f) for f in activity_list] - return pd.concat(activity_dfs, axis=0) - -def get_respiration_log(subject): - log_list = get_file_list('*.json', sbj=subject) - log_dfs = [pd.read_json(f) for f in log_list] - return pd.concat(log_dfs, axis=0) - -def get_cal_data(event_df, xsens_df): - fmt ="%Y-%m-%d %H.%M.%S" - cal_list = [] - cpms = [] - start_sec = 0 - stop_sec = 0 - for index, row in event_df.iterrows(): - event = row['eventTag'] - timestamp = row['timestamp'] - inhalePeriod = row['inhalePeriod'] - exhalePeriod = row['exhalePeriod'] - - cpm = np.round( 60/(inhalePeriod + exhalePeriod) ) - - sec = timestamp.to_pydatetime().timestamp() - - if event == 'Start': - start_sec = sec - continue - elif event == 'Stop': - stop_sec = sec - - dsync = DataSynchronizer() - dsync.set_bounds(xsens_df['sec'].values, start_sec, stop_sec) - - sync_df = dsync.sync_df(xsens_df.copy()) - cal_data = {'cpm': cpm, 'data': sync_df} - cal_list.append(cal_data) - - assert np.round(sync_df.sec.iloc[0])==np.round(start_sec), \ - "error with start sync" - assert np.round(sync_df.sec.iloc[-1])==np.round(stop_sec), \ - "error with stop sync" - - return pd.DataFrame(cal_list) - -def get_test_data(cal_df, activity_df, xsens_df): - fmt = "%d/%m/%Y %H:%M:%S" - start_time = cal_df.iloc[-1]['data'].sec.values[-1] - data_df = xsens_df[xsens_df.sec > start_time] - activity_start = 0 - activity_end = 0 - - activity_list = [] - - for index, row in activity_df.iterrows(): - sec = datetime.strptime(row['Timestamps'], fmt) - if row['Event'] == 'start': - activity_start = sec - elif row['Event'] == 'stop': - activity_stop = sec - - dsync = DataSynchronizer() - dsync.set_bounds(data_df['sec'].values, activity_start, - activity_stop) - - sync_df = dsync.sync_df(data_df.copy()) - activity_data = {'activity': row['Activity'], 'data': sync_df} - activity_list.append(activity_data) - - return pd.DataFrame(activity_list) - -# save evaluation metrics in single file that handles the models for the -# subject and config -class EvalHandler(): - def __init__(self, y_true, y_pred, subject, pfh, mdl_str, overwrite=False): - self.subject = subject - self.config = pfh.config - self.parent_directory = join(DATA_DIR, 'subject_specific') - self.fset_id = pfh.fset_id - self.mdl_str = mdl_str - self.overwrite = overwrite - - self.evals = Evaluation(y_true, y_pred) - - entry = {'subject': self.subject, - 'config_id': self.fset_id, - 'mdl_str': self.mdl_str, - } - self.entry = {**entry, **self.config, **self.evals.get_evals()} - - self.eval_history_file = join(self.parent_directory, - 'eval_history.csv') - self.eval_hist = self.load_eval_history() - - def load_eval_history(self): - if not exists(self.eval_history_file): - return None - else: - return pd.read_csv(self.eval_history_file) - - def update_eval_history(self): - eval_hist = self.eval_hist - if eval_hist is None: - eval_hist = pd.DataFrame([self.entry]) - else: - index_list = eval_hist[ - (eval_hist['subject'] == self.entry['subject']) &\ - (eval_hist['config_id'] == self.entry['config_id']) &\ - (eval_hist['mdl_str'] == self.entry['mdl_str'])\ - ].index.tolist() - if len(index_list) == 0: - print("adding new entry") - eval_hist = eval_hist._append(self.entry, ignore_index=True) - elif index_list is not None and self.overwrite: - eval_hist.loc[index_list[0]] = self.entry - self.eval_hist = eval_hist - - def save_eval_history(self): - self.eval_hist.to_csv(self.eval_history_file, index=False) - -# Train IMU - RR models across subjects -def imu_rr_model(subject, - window_size=12, - window_shift=0.2, - lbl_str='pss', - mdl_str='knn', - overwrite=False, - feature_method='tsfresh', - train_len:int=3, - test_standing=False, - ): - # window_size, window_shift, intra, inter - cal_str = 'cpm' - fs = IMU_FS - tmp = [] - imu_cols = ['acc_x', 'acc_y', 'acc_z', 'gyro_x', 'gyro_y', 'gyro_z'] - - do_minirocket = False - use_tsfresh = False - overwrite_tsfresh = True - train_size = int(train_len) - - if feature_method == 'tsfresh': - use_tsfresh = True - elif feature_method == 'minirocket': - do_minirocket = True - - config = {'window_size' : window_size, - 'window_shift' : window_shift, - 'lbl_str' : lbl_str, - 'do_minirocket' : do_minirocket, - 'use_tsfresh' : use_tsfresh, - 'train_len' : train_len, - } - - pfh = ProjectFileHandler(config) - pfh.set_home_directory(join(DATA_DIR, 'subject_specific', subject)) - pfh.set_parent_directory('imu_rr') - id_check = pfh.get_id_from_config() - if id_check is None: - pfh.set_project_directory() - pfh.save_metafile() - else: - pfh.set_id(int(id_check)) - pfh.set_project_directory() - print('Using pre-set data id: ', pfh.fset_id) - project_dir = pfh.project_directory - - marker = f'imu_rr_{subject}_id{pfh.fset_id}' - - if not use_tsfresh: - xsens_df = load_and_sync_xsens(subject) - else: - xsens_df = load_tsfresh(subject, - project_dir, - window_size=window_size, - window_shift=window_shift, - fs=IMU_FS, - overwrite=overwrite_tsfresh) - - activity_df = get_activity_log(subject) - event_df = get_respiration_log(subject) - - cal_df = get_cal_data(event_df, xsens_df) - - # include standing or not - test_df = get_test_data(cal_df, activity_df, xsens_df) - ipdb.set_trace() - - for combi in combinations(cal_df[cal_str].values, train_len): - config[cal_cpm] = combi - train_df = pd.concat( - [cal_df[cal_df[cal_cpm] == cpm]['data'] for cpm in combi], - axis=0 - ) - - assert np.isin(train_df.index.values, test_df.index.values).any()==False,\ - "overlapping test and train data" - - print("train") - print(train_df.shape) - print("test") - print(test_df.shape) - - if do_minirocket: - x_train_df, y_train_df = get_df_windows(train_df, - imu_df_win_task, - window_size=window_size, - window_shift=window_shift, - fs=fs, - ) - x_test_df, y_test_df = get_df_windows(test_df, - imu_df_win_task, - window_size=window_size, - window_shift=window_shift, - fs=fs, - ) - - x_train = make_windows_from_id(x_train_df, imu_cols) - x_test = make_windows_from_id(x_test_df, imu_cols) - y_train = y_train_df[lbl_str].values.reshape(-1, 1) - y_test = y_test_df[lbl_str].values.reshape(-1, 1) - - print("minirocket transforming...") - x_train = np.swapaxes(x_train, 1, 2) - x_test = np.swapaxes(x_test, 1, 2) - minirocket = MiniRocketMultivariate() - x_train = minirocket.fit_transform(x_train) - x_test = minirocket.transform(x_test) - elif use_tsfresh: - x_train = train_df.iloc[:, 3:].values - y_train = train_df[lbl_str].values.reshape(-1, 1) - x_test = test_df.iloc[:, 3:].values - y_test = test_df[lbl_str].values.reshape(-1, 1) - else: - x_train_df, y_train_df = get_df_windows(train_df, - imu_df_win_task, - window_size=window_size, - window_shift=window_shift, - fs=fs, - ) - x_test_df, y_test_df = get_df_windows(test_df, - imu_df_win_task, - window_size=window_size, - window_shift=window_shift, - fs=fs, - ) - - x_train = make_windows_from_id(x_train_df, imu_cols) - x_test = make_windows_from_id(x_test_df, imu_cols) - y_train = y_train_df[lbl_str].values.reshape(-1, 1) - y_test = y_test_df[lbl_str].values.reshape(-1, 1) - - transforms, model = model_training(mdl_str, x_train, y_train, - marker, validation_data=None, - overwrite=overwrite, - is_regression=True, - project_directory=project_dir, - window_size=int(window_size*fs), - extra_train=200, - ) - - if transforms is not None: - x_test = transforms.transform(x_test) - - preds = model.predict(x_test) - - eval_handle = EvalHandler(y_test.flatten(), preds.flatten(), subject, - pfh, mdl_str, overwrite=overwrite) - eval_handle.update_eval_history() - eval_handle.save_eval_history() - - pp = PrettyPrinter() - pp.pprint(eval_handle.load_eval_history()) - - fig, ax = plt.subplots() - fig_title = ' '.join([mdl_str, subject, combi]) - ax.plot(y_test) - ax.plot(preds) - ax.set_title(fig_title) - ax.legend([lbl_str, 'pred']) - fig_dir = join(project_dir, 'figures',) - if not exists(fig_dir): mkdir(fig_dir) - fig.savefig(join(fig_dir, mdl_str)) - -def arg_parser(): - parser = argparse.ArgumentParser() - parser.add_argument("-m", '--model', type=str, - default='linreg', - choices=['linreg', 'ard', 'xgboost', 'knn', - 'svr', 'cnn1d', 'fnn', 'lstm', 'ridge', - 'elastic'], - ) - parser.add_argument("-s", '--subject', type=int, - default=2, - choices=list(range(1,3))+[-1], - ) - parser.add_argument("-f", '--feature_method', type=str, - default='minirocket', - choices=['tsfresh', 'minirocket', 'None'] - ) - parser.add_argument("-o", '--overwrite', type=int, - default=0, - ) - parser.add_argument('--win_size', type=int, - default=12, - ) - parser.add_argument('--win_shift', type=float, - default=0.2, - ) - parser.add_argument('-l', '--lbl_str', type=str, - default='pss', - ) - parser.add_argument('-tl', '--train_len', type=int, - default=3, - help='minutes of data to use for calibration' - ) - args = parser.parse_args() - return args - -if __name__ == '__main__': - # choose either intra or inter subject features to use for model training - # '[!M]*' - np.random.seed(100) - n_subject_max = 2 - args = arg_parser() - - mdl_str = args.model - subject = args.subject - feature_method = args.feature_method - window_size = args.win_size - window_shift = args.win_shift - lbl_str = args.lbl_str - train_len = args.train_len - overwrite = args.overwrite - - print(args) - assert train_len>0,"--train_len must be an integer greater than 0" - - subject_pre_string = 'Pilot' - - if subject > 0: - subject = subject_pre_string+str(subject).zfill(2) - - imu_rr_model(subject, window_size=window_size, window_shift=window_shift, - lbl_str=lbl_str, mdl_str=mdl_str, overwrite=overwrite, - feature_method=feature_method, train_len=train_len - ) - else: - subjects = [subject_pre_string+str(i).zfill(2) for i in \ - range(1, n_subject_max+1) if i not in imu_issues] - imu_rr_func = partial(imu_rr_model, - window_size=window_size, - window_shift=window_shift, - lbl_str=lbl_str, - mdl_str=mdl_str, - overwrite=overwrite, - feature_method=feature_method, - train_len=train_len - ) - - if mdl_str in ['fnn', 'lstm', 'cnn1d', 'elastic', 'ard', 'xgboost']: - for subject in subjects: - imu_rr_func(subject) - else: - ncpu = min(len(subjects), cpu_count()) - with Pool(ncpu) as p: - p.map(imu_rr_func, subjects) - - print(args) diff --git a/__pycache__/config.cpython-38.pyc b/__pycache__/config.cpython-38.pyc index 5bb18e1613b8d7cef640aae6b04dcc3e7fb98408..d8e616259cf79c05b9bfeadef59abd66c919dd7c 100644 GIT binary patch delta 271 zcmdnMv6X{2l$V!_0SL-<Vp8W%<Ta{iiDF4%jbcq<i(*S*k77^Zh~h}$jN(k;isDM) zj^a+?iQ-A&P2o%7PZ3CGiQ-KWOc6>EN@tGZOA!XrOi}zPA}OLd0#Sk~VyS{zLMh@l z3{aK?kR=HglY)y$!&x#AmJd*iERZP|C7dE3B?2@|AxAV<EJ}=#Aw@A&ESs%pM~YIE zcpguRGMINHMI}l+MKqW}Q`POI5YQtpQ$R!oh;Z1<%D9)2@z&(SOd3G)4byUQVW9Xe Z4j0D|$9Nadpp^_oDnNlEmC37^+W?0_LB{|9 delta 266 zcmdnWv4Mj(l$V!_0SF$S3{71yk=Ll6DT*b9C5knLHHs~TEs8ybJ&GfRBZ@PHGm0yP zD~daXJBlZTCxthKFNHsyC5ksiAVn}mFr7JyFGUDQGez;I2&ahT2t)~{h^7i=38jeH zFhE)2K$ZkpOcE|81!qY^SnfbAGC-zmlyHh%lnBr;`5e()u_!S{h7^TVv23=Y9Vv=Y z;(0tNN?_iR6y+%K6p>&CO%=D7LO_qaOaT!Uo4FYGGBQR@KEb2`BtJ4O7Y9k+;&5>c Wag2BI3|h%hqzDu!Qk=Y=xeWjk=Rke{ diff --git a/config.py b/config.py index 0e64588..67b023a 100644 --- a/config.py +++ b/config.py @@ -2,7 +2,7 @@ DEBUG = False BR_FS = 18 ACC_FS = 100 IMU_FS = 120 -PPG_FS = 100 +PPG_FS = 64 ACC_THOLD = 10 WIN_THOLD = 0.03 diff --git a/models/__pycache__/neuralnet.cpython-38.pyc b/models/__pycache__/neuralnet.cpython-38.pyc index 02fcc1fffdf3521fed55f79a6cb176a8e820878c..cbe052be930dd43036f0f60dbcc2069a4a430de5 100644 GIT binary patch delta 20 acmcbUd?%SZl$V!_0SLNFVm5MLGz0)cGX@+0 delta 20 acmcbUd?%SZl$V!_0SK0?3f;(k(GUPd0|sOO diff --git a/modules/__pycache__/digitalsignalprocessing.cpython-38.pyc b/modules/__pycache__/digitalsignalprocessing.cpython-38.pyc index fca6f3076758c0f03a274c04595a7f65e80cc225..d945ad7ecb679cd424f019071cad0fa8b6368f90 100644 GIT binary patch delta 6752 zcma)A4NzRik=}Xx_J>^-B!mzk5CU00D?kE)gg`&o5<)VPkz`3At$tRIH*c3UyKh(h z76Wn|&UZT5u_H_J_~&FhxwG#Qg;b)us!dgLmmm9n?EKoX6UV80sVnEll@uxG$CXOz zRCTegdmdt!wXf=+_})xUPfvGG_e>9u{E+?fEmn3c6biWT=S#;<8M*tfmesKhSDXJl zoob7;F&B67P;yPGJ<^`q6xoFRPw;ZS=mLv$@JIP#UV*Jn{up1vE3vhif08fdRoL3X zFY#r(8e3ibabCk~v9*;i=XDock#2sO*Yg!f>ET!SO5TXAZM=!E!m;f}YmD(b`05MO zEV6^I;cJn&ldt1p{PyzoycxfD@~ga+Z@_7H@r}F<DZ6+(--O@Yyn}b*w~ue;TkyMw zKf$|s4|4W%!FTYT*xJjV<hywvw)XKoydSyl=6m@*{0{J^jQxDzdB*qi1Jf5gkpra7 z9O)iDh?IMbwt31yeh4XpoFSJo$90(Bi?o9aXh--_q#ZIgqGiYU5H5U}Kh2Nx6WF>J zIoBB*DqTjHFy`hFeiG?NjA=>6DW~{pBpl_>@UvXQ)-nD!e1z-R8p6$CNj2iZFHZ(j z!;xXRVMg+J>O|xOwy*K~_!x3V__N&PX>6V38Ga6JJ;i}r_&v?Dd>p@L_;Y-UpGS@; ze}I4Nf~zNTmOscpjzb!s=6`|T5&jVWOZ@8md4ux{=%&Bs7tu0<e~LfMAHk_H&W^ak zpU&-bw=?Du=hZtrw)fDXz2Z6b8=*Te#9%eTj0-l3m?z8H#bMuFgEQ{IFoRYaV797F zCakam?ZoUs2>Wc6YgrANiPAzqtk6k7(rh*b8>u6qP2A^Q&$`6R>cD<jh2(x)9ZkeX zB`d5UzV!%Uuk9nUKAtU*xq*ha5|H2pQrG*|upU%**Ik=vx&l4K;V1~3sohQR5&FRv z8YtB8EnhR+_Jn`<(42lFn#qgx^E>ty36%;;KYp~9*Zu3*4)KP+uYXR<9rIfH9WA97 zi=@uG^24e|N}AbDaj|578`)@k$0jt0TZPt0a&nWIh7`HPQV?7Wb_*6b-A9tcimmGD z%oLPU$0OCSgA)l1$BC$wIBz&xxeG^aAMJEBVHPeRUJR^b1LC`ZBdkR<1lM_`w_C(L z!L`9cuTBRW*-`OA@a6tq66H@OOe>@5hV9lyELemzSWFuD97oVj*=*2eM{PD~D^oFB znT#!1_j+lQ_bhVhide*}#ji>Sf^vHq(x6Xl4{ct3oSJeucShs3&pJ157-$hq!w_>@ zi8!=~r$gZdntmtL$P(hc(Ee^|3RxcU=@Z##8l$nmB&6Fu7&oIhHiR8ZMQ|yl43cPP zrTCqvL|iIMt~rL(K_Im-E~PdB)FEv}lZi27-d@pAzH)&Y1LdvE%1xF>*fdF)y@fwg z${-9={{!rYy+AhF9&1#~7^saQ8*&v$(MXA5j;9RJvPL+V_biw+=9k-<G)zci7cvCr zXh4~4!@qX?+XUcpg%;+=nZ<qH2Z=X?K~yPz+FBxBS$z2(Qqoo}EoI<w(T|;Q07Ar8 zN>C<8O`b?OF7SepF|;h)PlFQ#bjRVAjHf6F#S0aU3%v1K#TM^H<iOp2KeR;DE!p1N zf!!I8V{1fC*32n*NVZbi<c!;iWlwfAo#eKf(yXz<?Gw)|Y4$!|C=$19$;#S?kpzzr zJVsEk;UZkw!Y+%U%8$kF!y(jjKEU7v@wyRgcf!n~=`wa?6nkS<)HEh+UpAUeXSF1p zCSE#rY(=+hZ%os(X+URN80QO;S8QF{?0p6qb+=fts#-j>bZV$*$txtri%S}i376zo zg{!p469n_i!IRW|ir{I2Lemaat!LN7163!7DY%?yWkAx=%qjfXN(|)rX?x5O%-x8` z#GG27E=}YJpT|Fow$sJ@td#j(Dg!QBm&JOdDu0Iq@B#s)R-qQywM*hgHj1G!X8jX( zZ=Y8$ir1FCdYV*=KTvsN@6XO>d*jbuqo13-VP%#a4-|lN#Mzf_H+nG`_uL*u2T}gX z&9c>OB8jJul`w53lYmp=_tllv6pomZ_u?m_VrGxRVVck)>S~%6#N*vHt?UM-dA)eD z=GpzUw9HD!aY5<0e011uJ#7KG#rA5TnQ_Bb)23nBYAl(^$Q-ae8h|#1YE0GcTCmcq zwVT*i#Le17Ql1@Z%Fs+yJIn2WshJp3QOt148PY)>(@JXGRxA!*B}&02)A|9N7`1dQ zX~1`}Z}|~itPFqg+<FPyv$TVt<z8R@E}Nc@1)yYu&(b)Zax+T7RZ2-d!{D#*gAS%} zg?a*#20IB>5;PJ>_tU`yl*|hP_LE4kLs>|mf1=L)Bf3^BJO%U>eeB!fb1P2nk(&J* zPMIYOq}!#AsF_@H0I4%-K|Xww#49PS&>>=9Lnr$_%G);$pTY4s{^5^$r?ZzN9{!bp z5l9z&k6N!06ghV3$o(0-FXN_`jAkIM8x}@Z+=DZ%Mab?_RHiV+g})zo-DM2kK$@=5 z2>s+9Z7`Ls(x~g$%`Z3?$N$_bt0hMpsTYOZpmV_fyJ%}#I!zWAdtbU+o{cK0=D0?> zUmhbA?Y2h-q}&gPAz{Hi@gF3Vb_QM{c%9%?f=dMCQ}`A^#?^pW<dPtyIZwn5h+3JD z_(juZ_9OB7^7`DCRsZOO)@&=y%{!OkD{o@B6y7Al4+x6FYThvTA&pAg>9ESj+lOK+ z;>AlLIs%R*2lG})qogTC`j?IxfGNg}^JqXP?PMh`;qs}9ss!Anc+;1$DtE0?tH6(Q zxz&4F*LO&TCIZ@{u-nN2sU1aS*b@n5@Q${|!HCMLnJx%;RkW|I+Fi7G?L6a$g6bft zA`{o=M@1?a{3nUI?h<bX>cnT)zR)Yrqdp>z(6T<~&J7xq88jz~m#_tNy22>}>9<Fz zWjM?^adBP4+C$ih1g-Irxn#6p6e(D)0Tgbu_};pj`ZJ_a1rEDP7qO~a$}L5$!XQ^} zdEM|%P6=OR({w!9CG#GYyN>nHe<2xYDaaDY44_2A&A)*k+nX}*{^sx_8vQYW+<K|K zjNx~PHc9Xj0R_8^iEtTEI>SB$yL6&ruAm)F#Inx4^F>k9+|Zf9;fVi!L&vg~Tagko zov;j9>B*?iM@hl<h-C9hHZCqTce8iJH=C^<sr64$QvXq`$f%}(T7oKq%LFw9KSRL1 zpiHvWbK_VE>=W6R&iVq;G*qgO8v0mNmMzvb@k&dh_m?z%SiILVHcies+8<W)w~H)! z;1fv4_;PAGJU(UC+P;hdx?yJFBuRGbqYFwPqLqVjpP^@?8e|QsL_SAb8Nd;`ylYv2 zF7IDsZ|<xTZ?_K1dQ|QgMtD9G{{^|i9=cbt;FR72q&O#qjPoKp%M)e{&xRNnSeBdm z)Q$PDX4RNk7}oERL}q&?jHEt_jQ~Cxz=-F*u;FcWi97(Y(2@$sl_KJ)j>hVTa1u_z zNW+2{iwP~81&pTC;`NTRjYTRf(nskwj^bvRR1R2gi-VobOwB#e*{hDw;ccsllopp^ zg;%s0PZ~#25jQvDc;ylU_z3b|@M9O_nkpU4m3X``SBMtW)2U3-n2h3Bov7;`nwE1H z(U=M`j7|z`+dZfaVrhuaHZ6-WBMYYh>EqGUMl1$@MkeRg6eOw}PnF`8%28NUawqfz z#(-PFoh`+AuBD5VQpF!wNq7F@-22^MV1qK3q$VMf=fCX{++k@7JLfF9(1B2a%<D<6 zJdT0^lo()8A>Qw<%-!1dKb~q5Y5Pnrs~g*=Sx%XATrAnuw5CYdpOCj_Uf4Pkg$n*D zcXHP|N*`@E)^h)ipBcqgHnhq2%8c7C(MQM4u_!%bZsMThDk-@QyLMoEGGS%O={v{8 z$M<w^d5f5?5J=rr-AE**&*w~aq%@#mjzd>at4;i7PtB@0Dd)whuCg+v`U7YpS9!6b zG5}^bk;q4L;r`b=+ZK9L^1*LuTrN&FKpD=(J4!lNr5FXd<W13jVE>206k`Qni>9GL zGz*5w&x=nT=wL0mHxG2H`66u)<yDflavzsc>kqh;?LA^Z0^_5KD0^seN>w0tV-QOX z+(`^_iYc36IjnaS#S-9u8eKQYdU*pj1~5AV%!}LOSNtm5a7%SVTkh)N47*b*K+8fa z&cO%am@$FXA+1s@v>h3KGVjuW44wAeT}Sq~{m+qZ3=W9LhqktsliswWm{;{A)T^h> z7{1+$;%6m}eIWiY^w<h=F4Wzg=I~K80I}Q>&kyhIlJc0`c&r(vSWHb!Ehkwh$upCa zMN_?QQG2|vEx+z&l!vS#F!;D`a3|f0Ci&dr@#9rH$uYJ{!3e)WRcv<(53_ur$P1Wq zZ!X(yPX?a~DSRT}`3U9W=f}I&&-tJWxnL`SjPMF8GnS3YGcw8z(@JEgX2tFk&8#<< zKH>FL$jnh?$kV$4pjSM6roQI?a?eKb^)s+{IcYNY?+Q;c9dB=&^Ac>Nh4UVmcWVbe z!sNd|yjbMQ#~65-xIDB>T#7dJ%(+_z-foi6F)wCb!sbE=xHxj4ATb+V-Yg?p#&3>@ z5rVG~P(s;$Jq<vchu7k1vGHtcP2aq9xsF_dXcc1o>?+omyLR@ba`#8%iVRvOQRM4M z?n)0%ynQ&_O@b*=3SA(6qSr6kLE6h>u+m8n7!X0O2j3^=J80T<F~tYjYVj@nXK;Z7 zMX#}p4T)i+VVWvC4ErXWg7(wLsgwS{B%DqY_tQivgvl2IlAa}!%qj&wP3^xWRu%r9 z+W$cC8JBC?dE%i4FVm=kB_$3JU|SYBzoIsi4?&##JM8tt?}>@5L?(G7ddUhmtRkDG z;p9b}wNKLfrEo{RI9{(AK4LG%TP~AKi)^ehB#$f)zG?A)lD*=^*q7M>5s9~X$7q>O zaV5TOx{;dg1YHC@1ltI95Xkr6ZfXq>93?nQkRV7BSOgCeTqKYc+huA!LvW3N6TC=3 zbtrs=;3a}9=q49@o0{Jzc!MBcp2|}E9iqKUAS<_DQ;UijC-Z}rp2$pXu&^|-oNGZP zS1l%>3ss)X&ZW^pT{?V$4om1I=-Wd00}?ktEee`~Xq1sf0dM=ar_y{pY3xJ>m-U4B T<7gwR6U!1^N`+qxCVKuCIqNJ! delta 6376 zcma)AeQ;dWb>I8;L(;A!+p=tZA<L4D(IUx`ZP`Y$En{1jFv7xMVOeX!dbRgychP?N zyj3Lut=!Zm5CX=%31H_7lGamqLNjHPp(!mmO`&v}P#Drb`qH$JrWsV+WJ;TMm`reg z=RR556=bHG(eK@N?z!iluY2x!`?DX4N8T2d_f%B)UHF@Q?H=>nQ|Bvh5Vhyqemy5# z(j_aBn^Ik&uGE&$7QBB(u8=Fwh|pH~h+HMBFxn;`m8)enM&0tWa*bSz(RTTmtdX@C z?U0|7b+R6#ow7mRaK;t7MLsSY<vLJ$<P&neY{sZpw#W^b+hul42)R+-2-<Erw@Kav z;vU&5gLwAIn`IlGd*wOVAvc5NR@o`LK-nj^$gOzpm)m4Fo(JT1xdYFB`K0WTz2H13 z&&xfs52FG3`|^P7$7oO<lmp<pO%BT2@H`}ydAq#*OF|CGJI;7RLvyUn0_m_E2I&qd zKr<F-cgiE69WJ8XCGQ4p*zANIN972%xl?{#9+RUO9RX*nxw+b91{o*Z^0*uW{Vs=! z6(M;7?{~{D$opjY%$x`vl~2i&G6LcVLLri#kQdMKWFU1<=pG%OX!2NUG&G9w(=sj- z;5;s$kx7}tXiTPM29||nR-VH1gd}O<d9S=6r{xSdJ|$=61EAa|ACwQ_8J52-e+SQ# za!#J!h(SbNG{@zq;gsK#XJA=Weo>y44`Zp6;;1Y5rTl&FEkbzIlb((0FFY?*Y(N^( zM!;nkZB}QzInkvuCHsaiyN82<+Id3Qp2$qX3L4bK%w9mS#P-NYE<!gmrHz4EX&VDe zv&AfJ<`)f}>Ji_~Vu$)?&+S9h0`h?Ei6>%l%}O^iUl0)V*(FSlPUQ+@cJTC01{Pc- z^|q2tq6g~k-?xRA8`Q%bj)K(9<69U$K|k8g6NMUnP|_xP)w<HrBMS<SN78b+f~AhZ z%cN=zwIe@S%~wlX#cuWU(*A)3HFqznS?Z{%-MCEZqAx$L>r7dj*rQI4R;xcN+u6m2 z+P=x@2*s>IlQcQGHJzbUFUFPua0%M4n*5*YXX!!1_Czz;Sz5s_Uag89m`)%Ir^8m_ z0n>@+KFr!B9PV%;UATd|;%^m))V%+wXj6Lvtv>DYHZ>WzDNyLzF9({%QT0mTis)0P z%bPYiY%Em_+hW8PGqy21VH-0OMbcj_ZxLbjUU@8_11PY74yaJY_8Uie*vA6g*@(1D ztW#5_33bCDA&J~pB1UcMTNS|~HQuXe7IEdT9O}^uuvwv!=yWceLB18}%W_L-Djmk$ zF}zqFfXgTsG^w7be00+Y-iC?QLB?q<{Mb8Vg_DU%Gg#t86b@OjzG(aSiVi{fr&ff- z97~wLhCeN3n0S}@kMJJ!5!+^at#~ABLJLz5^e&X-nKCmyl`<)kGlPMJoj|4(&z0&J zCCxOYnU-P5@`N$dh5s%1-%6)ku8QK!e`r;|cu0MB)t^?fUbe@Iq)eRsgX(lu!$xhN zkIbwY$<Y)`{4EB)e&FY+6sL`PrK-8e)c31)h_mY5)$M9{b?e@(;Jxg1Y#7t)BI#K= ztw*WI%w@Nec-~w*la#h66|p7@!=b*tx=lO|X?N5=j5YKbhR-q-#7qrVx2tznd(}+! z%v=JK&}%VE=s0t`0k%7l&ISETj<bCeRyb`=+a<YhE|ZHS>0ajL>R=mD3)7KkE<^B~ zg&h6Z@hXo_jVGcw7CDo$pT{C=rOTx<YpQ%t;U(%;|JhKhzPe`i*fL$tv3#GiELgUr z4n2tpI?u4E2PuAK;q(QDLaWnjZx+v}3u{k|atb*)DTt-R{uw-MV}f-3v%Tr@MA`~@ z(>O89?Vu=F{}}%k$JHfDMY$+-c?3n(P|ZY>R^_Xhpl2EQ;1p@`W=*|#PQ73AS`%v# zd$9UX2H$-kw>Nh20{?xuH)zbW^JzrASzCK|za9c$IA@oRj_t`Mk|v+rv~6S)bOIB$ zk5Ga$Dci>-458ul!}+=kkB|q)Gfqc%eVgj7YbnaVSY3yB9tEdSy-;^?h_hNBF~>gv z?VqLWu-(y&MVG;3`y$veX4;-i+O%xXL^6@po7&z8QDl}*sHf}q7wz=Z`mN$cRnw3_ zY5U!V#+Gj0iydSesVUpAB>g#43S)Mqp*ip}i1U(XPUf#R{8Y>>hL8BP&>!+Vmz~Q- z!R=MS1VNw11HR)7qeeieEGNTOG?Fxl_As%DVLgNPFV_t^3n<F8$3U<va+H9@VRdL- zamjvgUB7r;U0HYHpw{fWV4g2!Ib8O%j;MWlvqK;RJq0QJ8VImHQe^bWrf%{5{GIC` zG#pbe70Fu2l<(}-%>iBEN#2q;bi>4U(_aCCD-j|qH;dBfok(U-k$CqdPZ0aHpwM`5 z5u_b8wG;Rh&Mib#M_RjuOcq^OO~1!t*g^Ey41dS)D#Pa(*g*Ol23=tbVhfrbpp2A> z7%n3#>xH+hc2Heq9^o<k?kZ8GUf<9n-cnZ^8uO(azv)DNUUv|SuGQgR@<Onje!#oE z!LS(hWwz4a^Q?9UAOEF9_*l?GJ`~;XI61a$S+Yx;sf}A^QhD5@G>)SA0BmYku}zKQ z?fltIgJSM)d8=jy4p-3aD5bUF<Oq606<OS2))bjx-7mH=FK-!g8j4gTR5G4*=9cYR zzqpd47)ge+l!-!CA{`rKzakx1xm+cs+O6~tEclX3{lb5PnrVG*FJ}%NV2A)vHTWpl z0dpn`L&7qVvgn&U%Z0`%2#@lJ&m@>%P{)H!H{FSsP{5iR4?8`PMKMrpH;O?~a(^XQ z*LaLos=~CZe5F`>&A4WG*19W&an0wZALV}-d|1qN^Ww5aUpm#upnn9Jcq_6P^m*eV zfUrN0hwV$5xEUqA#j|fS=u@q=*VoFAm^RIz)1R+coejZCn&9yuz%HMzTIlQIiHV$3 z!@r^4Z)*}66=<)k*V(?53k9)IeeLT-N`>2d#5?LjyVa{T{3i$<7(u`CIB!wUu$JKo zhB}5H1K>(NLAK}A6zcb&8tCY5ED+68<<Ynqoeb;C-#V|J?r8S?gqI&vFLzANu_HzX zP+zpA+VImn`v?Hp<#arVjy|n+N!FyOna)uN)2Qq5qDF#gE6BXxjOM}-%9(t%Kg(ge ze(BW<KVN!317lP-%P#dtn@9KSRN~kmzqJP1!{1?om+vjKW7>PCKyfOB&hi2~)xOT^ z)t45M7WZqds1#L#-pj{3uX<Lq{9!aDTDBH4refXAwWmS1z0*h-bb>K67s=%aNpY`w zw)<prfplhD8IAMJ6^5VFe3M#lsOIf$!j(U~eXnO+U~AixNJV1Ay5l-a$CBnqP}pv1 z#?UG-1GyQN4D{d?MV>Ezq<kz^nDR^uL^G*u(wqrnu3G)P=h&QHiv|dL(FBr=V`jUD zBg1H;_y*4*9ds|@V|~2Qj5#quAA*T^5tT62jdHPkrxg$u*4&9`0^$7?7_D+FajjWt zlpCe~_2^fqBLBVKZ;0WgL@8r=rPn=z;AvCDDQC-K2T~=NTM%?ED=t^S4~c$3mHEHi zz0!xf{f+%=#i;tX{Vki8t@{AnTNl^0@+#=^LB8X_zZm@-S#;9>jmKrfHa2(Z8{@Lu zE{n#e(vx9s)qjae$17TLCthv;?qtHsv48tg>c~LPjyIX<90U6pUu|Y0solF^l_RAA zHaNbk=TWD6WuR_Dn3eNkm8-JSsQrl6u~&RnPNG|w=i^0>=6^Nt1MjY4$7w$Lcb?aq zvkm0OTHGhvj~-4((p&ygxepJ0EF+N?_##T15enzX#J9A{9Ns2c^It#Q<5}u=h9Pey zYpauI6_0*_t!&>>lM+aUDyH=F=6v9Y-Hm*8k}`>W=Jc@zn#Sn1Wobk0SbB-AuLh5+ z*^eXV7d~!njZ%-;e9hyg_I&j2tk|a&;B6_0HTVLWG^f#M>0DhF?+!4$z35WE&XNwH z&Q;dr+eSLvr57OuAs@om`7Ip*R-Ogm9M!R8Uo?}Rz{lw_p*LXef7Gj^kF8@5Q|<L- z4j-okG0Xp|^zp%+TAt9+M|Y^j;wa)MIfsJ}b#`WE+0u^;b@h0E*V4ZDaS(di6=ce( zs41P}wQSKfL!BI3+sEFqJ)Ds=4^?b;3iWm=TlBrjXKmqh+ukhN=oH##GEY%JeQRuI z)q)SUfs48sI@PboYQ(#0O{h)m%I^#LyjA+JcyyNII{>-Wr^dpKb-&3OH>q=B8m!@c z7rw&1$xLiZ*Md(7#jmLEzV5^}e2VBVdY{X18cZ^Tij3B%@X40m1?TEq+p85_6tftH zjzS68931>@^U}$NHl4&eWnW<87{f~pe1`1OXoiSGh5PJYB_bVlw=PN7n=vcX{Aw_= zLEM_3jC^Pu`h?@5^VB&T`ujr1U<VdnuLlQN9G?v*q&n`M>Ur6?dOK^ci(IvHB4|MU zSVjZ?!OZJ<*(Eh(4vS`W(M%S7>#Uln5yL7t(KI*2I|<$R!Tx4m3CjPUq?rx6ofq<I z$g1FnK^K^$&yYc1=J8jU?xC;o`0EURfEM2QK*Fk5c-BDQhY1FZb$9ZwJQli-#^U$! zHb7UIiLC^azV8gE0e>~&;TX<d#HxNxAJD=b_3%ZQ$jWhh74EQXGNT4!%@w*zdGTWm zen0C|x!8;1Hq{>Q^4-tdbgA+9uDLEA?qKL)=w;Z=pg)}U^XL%6Q3n3)isB5D48-sd z!&wI1Wj(>8rx>1Lh%!9K@I1qd3@<aBV|X2Ia?$sBxYRl7E?M`B@9@M=7`WY`_Za2@ zcEE}zva_2lv>TRF|6S~$3I@I;bqRKEhgN>&Dob1yX%~ae=r>r}fS%!0Dab)55qY1| e-KmV6N}7FOa9K~Pw-U{wR{d9Er)s~y_x}JW#?4Ov diff --git a/modules/__pycache__/utils.cpython-38.pyc b/modules/__pycache__/utils.cpython-38.pyc index 29f9b792e78cd77fbddf620dbd8b764782742cce..0ae60611424d313f7060707ed9e97db8ad855b88 100644 GIT binary patch delta 2196 zcmb7_YiyHM7{|}^_TKgKu50`DvUP3QeQaagnYVFO5Wxh|fk+*k71oV1-@cKsoYI7X zei~VF5)=HOttLjJA!>~uOfbd>UyKlm7@b5h^2IL#F(!O5_?)vAI$ex5d7tz2{Qu|n zd!N&u8#z6a-0gNd34Tr)`^J7BCgd0KrT%04AbA>K1AiM5sg1Oo_MC#fkI_wZDec2- zE8R?&(-oL)qbupEQ)KVsbTwTwHcZWFGhIv9p|YJ0(Fag@f<8ze!gdGULLa7&pt6$= z)AiW%BsK3M-H%qflymO5-j2&3Y^d8q6mthY8}Z3a^4s3v<_2b_WWu5MW2=M-MpKw5 zB=5?*8KLG0n|`3`IAB0VVZdBzg(}oCqvS#MD9p{btBHb#LB>MuIEN=?Wp<1<>cD7c zR{cF{!pvbfGYuJ6$z2v6Pc~+F4JfHhDMJzJ^u~GJ@G%cFQ75yo2HG%V$$RBa?D`$% z%{1Xo6iTRzx@XKlvc3ZImSiV$F?U)in}9IOEac*bAMF9^(MS>S`#$PrYAT3*8go!} z2J(%Vhp3k}Vvb8@!jv$dn4X!@;hVAMn{e((nc$4V*(97IFGZ_;&6$==A(r=vfoWwI zL2~{HWmeI@HB2;6M6=OaBzgZmLViieFA4btp;^!`AtV0+Mt)?}zktzwT`)pj5{;J< zWl}f<*b&DCgUp|?;bToJMFp2OQB5o?b070Bh7`BD4bS!usEaaHa{2<MO749eTc{xn zAYlLB0Q)7>Kn*Huq@fwxf5IBe2asx@N;QmB&3X`z6cJQwJo7;#If8&y7Q}Zeicd}1 zFb2~KW=pUPRSDUouSUq~0?2C0gA1sscvfA2TDSyDl@B)s3hJWNO!}D`H7w0qDSGZv zGgqt7l>2Z2F;niAtfcvx(-x^nRRY8JHeZE>|3ys018G%aQAuo*(Z)=8i<<G)hOpYq z`Vehlp;UX;N@Jp(h3rRIwXM>95)U4iRohV=<(0$G*olKJxjTHyuN4w#w&)QwC#AWE zIg5}Fvv8@GK^gK9V`<%@7F_f@7Ap10I}^&pN?{hktB)0mz5Sv$yLQ@zc}V2XkZccN zE7e8E6y^u*Az5XFurR|Zt7l#!H#USDYm*xrxWA3;r+CVCsqaO-L+Z=w^w&`Dl=?L4 zT~Z&c(@&t@E%m~yXy}oK<#mSrXy}!OPf%Yf^%Zsc!>IR3eGc`0sjsZlW2g^E{SNBO zq`vCDj=yPEJt;U&mPuY?8e+sEkr*|6LHIVSFgHy5YA^+xyoN~o>yiH3=y|HOI_e&* zOa%v^a#>pst~uv{QRCp@RBn8FtP%*10G#F7$kPzz7a~tN=j>BQCy%G-*dZQ{J`Zp5 z>F8EC!M}+9=)P+=d6XcyzzQyf?`e+ltIfyZ0^iwUc%K9OM>31QDzJlov2vy5BEUrD zZ0oZCqx_HfI=IFM67OnXqodoRk6<%r8mBpZ4QBfH{C1)pid;+XfuH#c$)@GkMR$*I z`HzfFjHQl^XX(*nxl~#|dL%VYPm1O2C=+nm?BE3bM&;w=X8>pTv9>|DS$U@|1n?#Q zynU;BNlf*k2v<b-hDSSwn%=;)+d1c;qsC}zay)0~xA@_Xe#ceppMxp=H~xCZFucXD zcl0U*xV!T!xXgd(?1i7Wt!og@^L1TOu<+eoeT}vqTej@bUlMb_EJAzbOxLeU>jjZt u6ycf(H$=E80$vHCTlg!z9Wceu_wEl6@~5*vw%g5CQ0_Vtc9%Wm)cyhn;X9oG delta 2221 zcma);e{54#6vy9r@Aa+g+P-eD>+61ZV?WxBjcxo!R0fKG3JRhF4P|gQSSQ8YzCc{= zvww^s|1=r9N}@5Q^GAYW2&VpN3?^WV2}Gk&Ga{Oh_#;M)|M`b9&%OIWX9%H5&pq#a z&$;*X^UiBK-haNoX?GytQ}Fff`@08E^(o2?aZ&s-K5QBX=;LRhF}aZ>Nc#!cyM=5b zOGzhsTggMDhxDSijVvcCPAGdHB`e9Q!3{(o(#dMF29?LiTCxt6$H{$UJ+9ly!{mPQ z04h7kgJc6no*?=zC9$~>SI-A}56XV#TBycVTyMg6X4Fs=M`0xd@(NR!c1WFqG1c|) zK*;VabNx&q`bY^qz;vRFl+ptZq*Mm1Y)B<4@#It&WJG0t{$!}}NPt1gL%dZA3nUHZ zMO#8j(R!IdKOh=<rH(IEmh$HVQz9pru6Ila@*yV9g(AItR(C2`fN8|XJj_SRavnD* zJMq%LS&;WsSlM9S!hTd%#{5I-lm-g(Oh6{)gjq0ENrJXA26+7(F`1c+V9aKvB$NX; zgnpEmq#XTn7D!bQg_X;lbj+y88Eyp&=BuX^+)-p&B2)PJnz>kQsxDQZb<0KJkh%-; zk?@E*q0(<1jZ}_dvePi8xY*tt5=$bnBoa#?nE-kfp;(KcSO}$S5tQ4yfP%V27|S<K zDThqJKnxFzFe_EUEIw0i*I0KVhK+5z3o|uC7co;a7m5glk)QQHewJhwo@d5FB$_L^ z7i!s5y&Fbq;W=tmNKL2B_#vtVwRv7{rPGA`Oje1vP7IToEJ0g2q@uS3$I6rpHM$BJ znu{2k$sq1;CS&Q)B9bCYBiRTuQ%lBFyg?eV#c|TFVCEgUX><}v%ubn3{8;NHH8~F` zX)$ASaT%@6>Daz!Ld$%-h^3afRm3uqy~8k?5@uupCLV%7>vG<E0g5~$_<199JnJRT zolY~;@S$nIht$Sd)9DrxV|KD7oi#|Ka3&!A5e{vWOrJm&OXSeD7MEdO8x|WGqZp(U zPbRSX7*;o9wTH&Bx<yvE6KxFKD2wJh7)*g%<t#1Q&x39JoMm>tQ$9p?sy<)Es_@~* zn}N|T5lycd^`jpZ{xeFt9dMQEU266yX|-QrQHDFtQD~KoZNbLcWMj(~+Q{1JPq6d2 zOgM}Q37OEnkYK0ZM7>?=r%>;ZdQXx566#B(eirpksrMG?`%v$a`sb)Gllt;`J?q-( z2QZ;qCj5#CJu+d%d;;qAsQ037Py_YlQeSyn$00Ll_bf*@!Xii!zXT=42;Q>#1(q8} zE9O}WHf{uouI(yDdbjdftn;IlSW!3>Sq9w+m7d1mj%nY40q5Z1WM+7DFah)oh7vk` z1s6Igj0n(I(G<?uec+MTg&Wa+fY<r@>ZhTa^XjMgU)B5J9lo<>E1cwS)LbtE{gzkv zfj(nsJX;$vbWr~>4E|c}I9%clb&k0o@MlZMZ58kWy<B*!?lXXNVYuO00KzZF*1|PD z6FX;ri5UqG?Z;(W8zvdL8a?_w|Fp3MkNTx?5B$XYnylU*MYvsLg^vu33?`2cr^(Ud zndA^XdL%ha#>H`7lojxoc)<tsM&V@BMS#=%k>(z_S$MHI3h)J=Y}p#RBDOjv%vZvE z&3|uMXT6PX!Z%$?2AqN9^TQd3{>j(0c9njE@o9L0{=tv7Zh-grh1L#rM&UoSUV*Fp zTw4cR=ijyUz(rmfuK}I6#5?`Q_RX8O)01NFmkS5uzo`wEgnwC>Yr=?s8uX?xGs5V6 eq@xwme6nMo^{+3h?C|Oas58DsuiqQ>+5Z9V<U_y! diff --git a/modules/digitalsignalprocessing.py b/modules/digitalsignalprocessing.py index c3ffa19..c4f4f41 100644 --- a/modules/digitalsignalprocessing.py +++ b/modules/digitalsignalprocessing.py @@ -17,7 +17,7 @@ from skimage.feature import corner_harris, corner_shi_tomasi, peak_local_max from ssqueezepy import cwt as sqz_cwt from config import WIN_THOLD, WINDOW_SIZE, WINDOW_SHIFT, MQA_THOLD, ACC_THOLD -from config import ACC_FS, IMU_FS, FS_RESAMPLE, BR_FS +from config import ACC_FS, IMU_FS, FS_RESAMPLE, BR_FS, PPG_FS from config import MIN_RESP_RATE, MAX_RESP_RATE def butter_lowpass(lowcut, fs, order=5): @@ -223,7 +223,7 @@ def acc_signal_processing(data, fs:int=100): # ma = accel - movingaverage(accel, 3) - data_sd = td_scaler(accel, axis=0) + data_sd = std_scaler(accel, axis=0) # data_sd = attenuate_edges(data_sd, 20) # mask = np.abs(data_sd) > thold @@ -258,6 +258,14 @@ def imu_signal_processing(data, fs:int=IMU_FS): ma = movingaverage(bp, 8, axis=0) return ma +def bvp_signal_processing(data, fs:int=PPG_FS): + # TODO + bp = butter_bandpass_filter(data, + 45/60, + 150/60, fs=fs, order=2) + ma = movingaverage(bp, 4, axis=0) + return ma + def roddiger_sp(data=None, fs:int=IMU_FS): ''' Run data through the following steps: * Second order taylor estimation diff --git a/modules/utils.py b/modules/utils.py index 74d679c..295d2b4 100644 --- a/modules/utils.py +++ b/modules/utils.py @@ -146,7 +146,7 @@ def get_inter_feature_hist(df, lbl_str='br', ntop_features=5, nsbjs=30): def model_training(mdl_str, x_train, y_train, marker, validation_data=None, overwrite=False, is_regression=False, project_directory=None, - window_size=50, extra_train=20): + window_size=50, extra_train=20, poly_deg=1): directory = join(project_directory, '_'.join([mdl_str, marker])) if validation_data is not None: @@ -258,11 +258,9 @@ def model_training(mdl_str, x_train, y_train, marker, if validation_data is None: tuner.search(x_train, y_train, validation_data, - validation_split=0.2, - batch_size=hypermodel.batch_size) + validation_split=0.2) else: - tuner.search(x_train, y_train, validation_data, - batch_size=hypermodel.batch_size) + tuner.search(x_train, y_train, validation_data) if overwrite or not exists(tuner.best_model_path+'.index'): mdl = tuner.load_model(is_training=True) @@ -272,7 +270,6 @@ def model_training(mdl_str, x_train, y_train, marker, history = hypermodel.fit( None, mdl, x_train, y_train, validation_data=validation_data, epochs=extra_train, - batch_size=hypermodel.batch_size, callbacks=callbacks, ) tuner.save_weights_to_path() @@ -296,7 +293,7 @@ def model_training(mdl_str, x_train, y_train, marker, elif mdl_str == 'linreg': print("---LinearRegression---") # n to 2 if full set, 1 if M and R - poly = PolynomialFeatures(1) + poly = PolynomialFeatures(poly_deg) x_train_poly = poly.fit_transform(x_train) mdl_cls = LinearRegressionClass(marker=marker, directory=directory) if overwrite: diff --git a/regress_rr.py b/regress_rr.py index a5376e4..f600080 100644 --- a/regress_rr.py +++ b/regress_rr.py @@ -36,6 +36,7 @@ from modules.datapipeline import get_file_list, load_and_snip, load_data, \ load_split_data, load_harness_data from modules.digitalsignalprocessing import vectorized_slide_win as vsw from modules.digitalsignalprocessing import imu_signal_processing +from modules.digitalsignalprocessing import bvp_signal_processing from modules.digitalsignalprocessing import hernandez_sp, reject_artefact from modules.digitalsignalprocessing import do_pad_fft,\ pressure_signal_processing, infer_frequency @@ -67,9 +68,9 @@ from sktime.transformations.panel.rocket import ( ) from config import WINDOW_SIZE, WINDOW_SHIFT, IMU_FS, DATA_DIR, BR_FS\ - , FS_RESAMPLE + , FS_RESAMPLE, PPG_FS -IMU_COLS = ['acc_x', 'acc_y', 'acc_z', 'gyr_x', 'gyr_y', 'gyr_z'] +IMU_COLS = ['acc_x', 'acc_y', 'acc_z', 'gyro_x', 'gyro_y', 'gyro_z'] def utc_to_local(utc_dt, tz=None): return utc_dt.replace(tzinfo=timezone.utc).astimezone(tz=tz) @@ -315,8 +316,7 @@ def imu_df_win_task(w_inds, df, i, cols): # return if cols is None: - cols = ['acc_x', 'acc_y', 'acc_z', - 'gyr_x', 'gyr_y', 'gyr_z'] + cols = IMU_COLS data = w_df[cols].values @@ -325,10 +325,7 @@ def imu_df_win_task(w_inds, df, i, cols): # ys = cubic_interp(sd_data, BR_FS, FS_RESAMPLE) filt_data = imu_signal_processing(sd_data, IMU_FS) x_out = pd.DataFrame(filt_data, - columns=[ - 'acc_x', 'acc_y', 'acc_z', - 'gyro_x', 'gyro_y', 'gyro_z', - ]) + columns=IMU_COLS) sm_out = w_df['BR'].values ps_out = w_df['PSS'].values @@ -346,6 +343,103 @@ def imu_df_win_task(w_inds, df, i, cols): return x_out, y_out +def bvp_df_win_task(w_inds, df, i, cols): + time = df['sec'].values + + fs = PPG_FS + + if w_inds[-1] == 0: return + w_df = df.iloc[w_inds] + t0, t1 = time[w_inds][0], time[w_inds][-1] + diff = time[w_inds[1:]] - time[w_inds[0:-1]] + mask = np.abs(diff)>20 + diff_chk = np.any(mask) + if diff_chk: + return + + # sbj = w_df['subject'].values.astype(int) + # sbj_mask = np.any((sbj[1:] - sbj[:-1])>0) + # if sbj_mask: + # return + + if cols is None: + cols = ['bvp'] + + data = w_df[cols].values + + # DSP + sd_data = (data - np.mean(data, axis=0))/np.std(data, axis=0) + filt_data = bvp_signal_processing(sd_data.copy(), fs) + x_out = pd.DataFrame(filt_data, + columns=cols) + + sm_out = w_df['BR'].values + ps_out = w_df['PSS'].values + + x_vec_time = np.median(time[w_inds]) + + ps_freq = int(get_max_frequency(ps_out, fs=fs)) + + y_tmp = np.array([x_vec_time, np.nanmedian(sm_out), ps_freq]) + + x_out['sec'] = x_vec_time + x_out['id'] = i + y_out = pd.DataFrame([y_tmp], columns=['sec', 'br', 'pss']) + + return x_out, y_out + +def df_win_task(w_inds, df, i, cols): + time = df['sec'].values + if w_inds[-1] == 0: return + w_df = df.iloc[w_inds] + t0, t1 = time[w_inds][0], time[w_inds][-1] + diff = time[w_inds[1:]] - time[w_inds[0:-1]] + + fs_est = 1/np.mean(diff) + if fs_est > 70 and 'acc_x' in cols: fs = IMU_FS + elif fs_est < 70 and 'bvp' in cols: fs = PPG_FS + mask = np.abs(diff)>20 + diff_chk = np.any(mask) + if diff_chk: + return + + filt_out = [] + for col in cols: + data = w_df[col].values + # DSP + sd_data = (data - np.mean(data, axis=0))/np.std(data, axis=0) + # ys = cubic_interp(sd_data, BR_FS, FS_RESAMPLE) + if col != 'bvp': + filt_out.append(imu_signal_processing(sd_data, fs)) + else: + bvp_filt = bvp_signal_processing(sd_data, fs) + filt_out.append(bvp_filt) + + x_out = pd.DataFrame(np.array(filt_out).T, columns=cols) + + sm_out = w_df['BR'].values + ps_out = w_df['PSS'].values + + x_vec_time = np.median(time[w_inds]) + + fs = 1/np.mean(diff) + ps_freq = int(get_max_frequency(ps_out, fs=fs)) + y_tmp = np.array([x_vec_time, np.nanmedian(sm_out), ps_freq]) + + x_out['sec'] = x_vec_time + x_out['id'] = i + y_out = pd.DataFrame([y_tmp], columns=['sec', 'br', 'pss']) + + if 'cpm' in w_df.columns.tolist(): + cpm_out = int(np.median(w_df['cpm'].values)) + y_out['cpm'] = cpm_out + if 'bvp' in cols: + xf, yf = do_pad_fft(bvp_filt, fs=fs) + bv_freq = int(xf[yf.argmax()]*60) + y_out['bvp_est'] = bv_freq + + return x_out, y_out + def get_max_frequency(data, fs=IMU_FS): data = pressure_signal_processing(data, fs=fs) @@ -364,30 +458,41 @@ def convert_to_float(df): if 'subject' in cols: df['subject'] = df['subject'].astype(float) -def load_and_sync_xsens(subject): +def load_and_sync_xsens(subject, sens_list:list=['imu', 'bvp']): + assert 'imu' in sens_list or 'bvp' in sens_list, \ + f"{sens_list} is not supported, must contain"\ + "'imu', 'bvp' or 'imu, bvp'" + + pss_df, br_df, imu_df, bvp_df = None, None, None, None + acc_data, gyr_data, bvp_data = None, None, None # load imu - imu_list = get_file_list('imudata.gz', sbj=subject) - imu_df_all, imu_hdr_df_all = load_imu_files(imu_list) + if 'imu' in sens_list: + imu_list = get_file_list('imudata.gz', sbj=subject) + imu_df_all, imu_hdr_df_all = load_imu_files(imu_list) # load bioharness pss_list = get_file_list('*Breathing.csv', sbj=subject) if len(pss_list) == 0: pss_list = get_file_list('BR*.csv', sbj=subject) - br_list = get_file_list('*Summary*', sbj=subject) + br_list = get_file_list('*Summary*.csv', sbj=subject) # load e4 wristband - e4_list = get_file_list('*.zip', sbj=subject) - bvp_df_all, bvp_hdr = load_e4_files(e4_list) - bvp_fs = bvp_hdr[0]['fs'] + if 'bvp' in sens_list: + e4_list = get_file_list('*.zip', sbj=subject) + bvp_df_all, bvp_hdr = load_e4_files(e4_list) + bvp_fs = bvp_hdr[0]['fs'] xsens_list = [] # skip the first and last x minute(s) minutes_to_skip = .5 br_skiprows = br_skipfooter = int(minutes_to_skip*60) pss_skiprows = pss_skipfooter = int(minutes_to_skip*60*BR_FS) + # load each bioharness file and sync the imu to it for pss_file, br_file in zip(pss_list, br_list): + xsens_data = {} + pss_df = load_bioharness_file(pss_file, skiprows=pss_skiprows, skipfooter=pss_skipfooter, engine='python') @@ -403,49 +508,58 @@ def load_and_sync_xsens(subject): br_df['sec'] = br_time # sync - br_df, imu_df = sync_to_ref(br_df, imu_df_all.copy()) - pss_df, _ = sync_to_ref(pss_df, imu_df_all.copy()) - bvp_df, _ = sync_to_ref(bvp_df_all.copy(), pss_df.copy()) + if 'imu' in sens_list and 'bvp' in sens_list: + br_df, imu_df = sync_to_ref(br_df, imu_df_all.copy()) + pss_df, _ = sync_to_ref(pss_df, imu_df_all.copy()) + bvp_df, _ = sync_to_ref(bvp_df_all.copy(), pss_df.copy()) + elif 'imu' in sens_list and not 'bvp' in sens_list: + br_df, imu_df = sync_to_ref(br_df, imu_df_all.copy()) + pss_df, _ = sync_to_ref(pss_df, imu_df_all.copy()) + elif not 'imu' in sens_list and 'bvp' in sens_list: + br_df, bvp_df = sync_to_ref(br_df, bvp_df_all.copy()) + pss_df, _ = sync_to_ref(pss_df, bvp_df_all.copy()) # extract relevant data - acc_data = np.stack(imu_df['accelerometer'].values) - gyr_data = np.stack(imu_df['gyroscope'].values) - x_time = imu_df['sec'].values.reshape(-1, 1) + if 'imu' in sens_list: + axes = ['x', 'y', 'z'] + acc_data = np.stack(imu_df['accelerometer'].values) + gyr_data = np.stack(imu_df['gyroscope'].values) + x_time = imu_df['sec'].values.reshape(-1, 1) + + if 'bvp' in sens_list and 'imu' in sens_list: + bvp_data = bvp_df['bvp'].values + bvp_data = np.interp(x_time, bvp_df['sec'].values, bvp_data)\ + .reshape(-1, 1) + elif 'bvp' in sens_list and not 'imu' in sens_list: + bvp_data = bvp_df['bvp'].values + x_time = bvp_df['sec'].values + + xsens_data['sec'] = x_time.flatten() br_col = [col for col in pss_df.columns.values if\ 'breathing' in col.lower()][0] pss_data = pss_df[br_col].values pss_data = np.interp(x_time, pss_df['sec'].values, pss_data)\ .reshape(-1, 1) + xsens_data['PSS'] = pss_data.flatten() br_lbl = [col for col in br_df.columns.values if\ 'br' in col.lower()][0] br_data = br_df['BR'].values br_data = np.interp(x_time, br_df['sec'].values, br_data)\ .reshape(-1, 1) + xsens_data['BR'] = br_data.flatten() - bvp_data = bvp_df['bvp'].values - bvp_data = np.interp(x_time, bvp_df['sec'].values, bvp_data)\ - .reshape(-1, 1) + if 'imu' in sens_list: + for i, axis in enumerate(axes): + xsens_data['acc_'+axis] = acc_data.T[i].flatten() + xsens_data['gyro_'+axis] = gyr_data.T[i].flatten() + + if 'bvp' in sens_list: + xsens_data['bvp'] = bvp_data.flatten() + + xsens_df_tmp = pd.DataFrame(xsens_data) - xsens_data = np.concatenate( - (x_time, br_data, pss_data, bvp_data, acc_data, gyr_data), - axis=1) - - columns=['sec' , 'BR' , 'PSS' , 'BVP' , - 'acc_x' , 'acc_y' , 'acc_z' , - 'gyr_x' , 'gyr_y' , 'gyr_z' , ] - xsens_df_tmp = pd.DataFrame(xsens_data, columns=columns) - - ''' - print("{:.2f}\t{:.2f}\t{:.2f}".format(br_df.sec.iloc[0], - pss_df.sec.iloc[0], - imu_df.sec.iloc[0])) - print("{:.2f}\t{:.2f}\t{:.2f}".format(br_df.sec.iloc[-1], - pss_df.sec.iloc[-1], - imu_df.sec.iloc[-1])) - print(xsens_df_tmp.head()) - ''' xsens_list.append(xsens_df_tmp) if len(xsens_list) > 1: @@ -456,20 +570,23 @@ def load_and_sync_xsens(subject): return xsens_df -def load_tsfresh(subject, project_dir, +def load_tsfresh(xsens_df, project_dir, + sens_list:list=['imu', 'bvp'], window_size=12, window_shift=0.2, fs=IMU_FS, - overwrite=False): - cols = ['acc_x', 'acc_y', 'acc_z', 'gyro_x', 'gyro_y', 'gyro_z'] + overwrite=False, data_cols=None): + + assert data_cols is not None, "invalid selection for data columns" + pkl_file = join(project_dir, 'tsfresh.pkl') if exists(pkl_file) and not overwrite: return pd.read_pickle(pkl_file) - xsens_df = load_and_sync_xsens(subject) x_df, y_df = get_df_windows(xsens_df, - imu_df_win_task, + df_win_task, window_size=window_size, window_shift=window_shift, fs=fs, + cols=data_cols, ) x_features_df = extract_features( x_df, column_sort='sec', @@ -657,55 +774,37 @@ class EvalHandler(): def save_eval_history(self): self.eval_hist.to_csv(self.eval_history_file, index=False) -# Train IMU - RR models across subjects -def imu_rr_model(subject, - window_size=12, - window_shift=0.2, - lbl_str='pss', - mdl_str='knn', - overwrite=False, - feature_method='tsfresh', - train_len:int=3, - test_standing=False, - data_input:str='imu+bvp', - ): +def imu_rr_dsp(subject, + window_size=12, + window_shift=0.2, + lbl_str='pss', + overwrite=False, + train_len:int=3, + test_standing=False, + ): # window_size, window_shift, intra, inter cal_str = 'cpm' fs = IMU_FS tmp = [] imu_cols = ['acc_x', 'acc_y', 'acc_z', 'gyro_x', 'gyro_y', 'gyro_z'] - bvp_cols = ['bvp'] - - if 'imu' in data_input and 'bvp' in data_input: - data_cols = imu_cols + bvp_cols - elif 'imu' in data_input and not 'bvp' in data_input: - data_cols = imu_cols - elif not 'imu' in data_input and 'bvp' in data_input: - data_cols = bvp_cols + parent_directory_string = "imu_rr_dsp" do_minirocket = False use_tsfresh = False overwrite_tsfresh = True train_size = int(train_len) - if feature_method == 'tsfresh': - use_tsfresh = True - elif feature_method == 'minirocket': - do_minirocket = True - config = {'window_size' : window_size, 'window_shift' : window_shift, 'lbl_str' : lbl_str, - 'do_minirocket' : do_minirocket, - 'use_tsfresh' : use_tsfresh, 'train_len' : train_len, - 'data_cols' : data_cols, 'test_standing' : test_standing, + 'sens_list' : 'imu', } pfh = ProjectFileHandler(config) pfh.set_home_directory(join(DATA_DIR, 'subject_specific', subject)) - pfh.set_parent_directory('imu_rr') + pfh.set_parent_directory(parent_directory_string) id_check = pfh.get_id_from_config() if id_check is None: pfh.set_project_directory() @@ -716,16 +815,7 @@ def imu_rr_model(subject, print('Using pre-set data id: ', pfh.fset_id) project_dir = pfh.project_directory - if not use_tsfresh: - xsens_df = load_and_sync_xsens(subject) - else: - xsens_df = load_tsfresh(subject, - project_dir, - window_size=window_size, - window_shift=window_shift, - fs=IMU_FS, - overwrite=overwrite_tsfresh) - + xsens_df = load_and_sync_xsens(subject, sens_list=['imu']) activity_df = get_activity_log(subject) event_df = get_respiration_log(subject) @@ -759,22 +849,138 @@ def imu_rr_model(subject, plt.subplot(211) plt.plot(acc_y_dsp_df[lbl_str]); plt.plot(acc_dsp_df['pred']) plt.subplot(212) - plt.plot(acc_y_dsp_df[lbl_str]); plt.plot(acc_dsp_df['pred']) + plt.plot(gyr_y_dsp_df[lbl_str]); plt.plot(gyr_dsp_df['pred']) plt.show() - # TODO implement evals from dsp to results - ipdb.set_trace() + eval_handle = EvalHandler(y_test.flatten(), preds.flatten(), subject, + pfh, None, overwrite=overwrite) + eval_handle.update_eval_history() + eval_handle.save_eval_history() + + pp = PrettyPrinter() + pp.pprint(eval_handle.load_eval_history()) + + fig, ax = plt.subplots() + fig_title = '_'.join([mdl_str, subject]+[combi_str]) + ax.plot(y_test) + ax.plot(preds) + ax.set_title(fig_title) + ax.legend([lbl_str, 'pred']) + fig_dir = join(project_dir, 'figures') + if not exists(fig_dir): mkdir(fig_dir) + fig.savefig(join(fig_dir, fig_title+".png")) + plt.close() + +def sens_rr_model(subject, + window_size=12, + window_shift=0.2, + lbl_str='pss', + mdl_str='knn', + overwrite=False, + feature_method='tsfresh', + train_len:int=3, + test_standing=False, + data_input:str='imu+bvp', + ): + # window_size, window_shift, intra, inter + cal_str = 'cpm' + tmp = [] + imu_cols = IMU_COLS + bvp_cols = ['bvp'] + if 'imu' in data_input and 'bvp' in data_input: + data_cols = ['acc_x', 'acc_y', 'acc_z', + 'gyro_x', 'gyro_y', 'gyro_z', + 'bvp'] + parent_directory_string = "imu-bvp_rr" + data_input = 'imu+bvp' + sens_list = ['imu', 'bvp'] + fs = IMU_FS + elif 'imu' in data_input and not 'bvp' in data_input: + data_cols = ['acc_x', 'acc_y', 'acc_z', + 'gyro_x', 'gyro_y', 'gyro_z',] + parent_directory_string = "imu_rr" + sens_list = ['imu'] + fs = IMU_FS + elif not 'imu' in data_input and 'bvp' in data_input: + data_cols = ['bvp'] + parent_directory_string = "bvp_rr" + sens_list = ['bvp'] + fs = PPG_FS + + do_minirocket = False + use_tsfresh = False + overwrite_tsfresh = True + train_size = int(train_len) + + if feature_method == 'tsfresh': + use_tsfresh = True + elif feature_method == 'minirocket': + do_minirocket = True + + config = {'window_size' : window_size, + 'window_shift' : window_shift, + 'lbl_str' : lbl_str, + 'do_minirocket' : do_minirocket, + 'use_tsfresh' : use_tsfresh, + 'train_len' : train_len, + 'test_standing' : test_standing, + 'sens_list' : data_input + } + + pfh = ProjectFileHandler(config) + pfh.set_home_directory(join(DATA_DIR, 'subject_specific', subject)) + pfh.set_parent_directory(parent_directory_string) + id_check = pfh.get_id_from_config() + if id_check is None: + pfh.set_project_directory() + pfh.save_metafile() + else: + pfh.set_id(int(id_check)) + pfh.set_project_directory() + print('Using pre-set data id: ', pfh.fset_id) + project_dir = pfh.project_directory + + xsens_df = load_and_sync_xsens(subject, sens_list=sens_list) + activity_df = get_activity_log(subject) + event_df = get_respiration_log(subject) + + cal_df = get_cal_data(event_df, xsens_df) + + # TODO: needs to be fixed + if use_tsfresh: + xsens_df = load_tsfresh(xsens_df, + project_dir, + sens_list=sens_list, + window_size=window_size, + window_shift=window_shift, + fs=fs, + overwrite=overwrite_tsfresh, + data_cols=data_cols, + ) + + # include standing or not + test_df_tmp = get_test_data(cal_df, activity_df, xsens_df, test_standing) + test_df = pd.concat([df for df in test_df_tmp['data']], axis=0) + + x_test_df, y_test_df = get_df_windows( + test_df, df_win_task, window_size=window_size, + window_shift=window_shift, fs=fs, cols=data_cols) + for combi in combinations(cal_df[cal_str].values, train_len): combi_str = "-".join([str(x) for x in combi]) pfh.config[cal_str] = combi_str - marker = f'imu_rr_{subject}_id{pfh.fset_id}_combi{combi_str}' + marker = f'{parent_directory_string}_{subject}_id{pfh.fset_id}'\ + f'_combi{combi_str}' print(marker) - train_df = pd.concat( - [cal_df[cal_df[cal_str] == cpm]['data'].iloc[0] for cpm in combi], - axis=0 - ) + train_df_list = [] + for cpm in combi: + df = cal_df[cal_df[cal_str] == cpm] + data_df = df['data'].iloc[0] + data_df['cpm'] = cpm + train_df_list.append(data_df) + train_df = pd.concat(train_df_list) assert np.isin(train_df.index.values, test_df.index.values).any()==False,\ "overlapping test and train data" @@ -786,18 +992,20 @@ def imu_rr_model(subject, if do_minirocket: x_train_df, y_train_df = get_df_windows(train_df, - imu_df_win_task, + df_win_task, window_size=window_size, window_shift=window_shift, fs=fs, + cols=data_cols ) - x_train = make_windows_from_id(x_train_df, imu_cols) - y_train = y_train_df[lbl_str].values.reshape(-1, 1) - x_test = make_windows_from_id(x_test_df, imu_cols) + x_train = make_windows_from_id(x_train_df, data_cols) + y_train = y_train_df['cpm'].values.reshape(-1, 1) + x_test = make_windows_from_id(x_test_df, data_cols) y_test = y_test_df[lbl_str].values.reshape(-1, 1) - + # x_train = y_train_df['bvp_est'].values.reshape(-1, 1) + # x_test = y_test_df['bvp_est'].values.reshape(-1, 1) print("minirocket transforming...") x_train = np.swapaxes(x_train, 1, 2) x_test = np.swapaxes(x_test, 1, 2) @@ -806,13 +1014,20 @@ def imu_rr_model(subject, x_test = minirocket.transform(x_test) elif use_tsfresh: x_train = train_df.iloc[:, 3:].values - y_train = train_df[lbl_str].values.reshape(-1, 1) + y_train = train_df['cpm'].values.reshape(-1, 1) x_test = test_df.iloc[:, 3:].values y_test = test_df[lbl_str].values.reshape(-1, 1) else: - x_train = make_windows_from_id(x_train_df, imu_cols) - x_test = make_windows_from_id(x_test_df, imu_cols) - y_train = y_train_df[lbl_str].values.reshape(-1, 1) + x_train_df, y_train_df = get_df_windows(train_df, + df_win_task, + window_size=window_size, + window_shift=window_shift, + fs=fs, + cols=data_cols, + ) + x_train = make_windows_from_id(x_train_df, data_cols) + x_test = make_windows_from_id(x_test_df, data_cols) + y_train = y_train_df['cpm'].values.reshape(-1, 1) y_test = y_test_df[lbl_str].values.reshape(-1, 1) transforms, model = model_training(mdl_str, x_train, y_train, @@ -822,6 +1037,7 @@ def imu_rr_model(subject, project_directory=project_dir, window_size=int(window_size*fs), extra_train=200, + poly_deg=1 ) if transforms is not None: @@ -918,31 +1134,39 @@ if __name__ == '__main__': if subject > 0: subject = subject_pre_string+str(subject).zfill(2) - imu_rr_model(subject, window_size=window_size, window_shift=window_shift, - lbl_str=lbl_str, mdl_str=mdl_str, overwrite=overwrite, - feature_method=feature_method, train_len=train_len - ) + sens_rr_model(subject, + window_size=window_size, + window_shift=window_shift, + lbl_str=lbl_str, + mdl_str=mdl_str, + overwrite=overwrite, + feature_method=feature_method, + train_len=train_len, + test_standing=test_standing, + data_input=data_input, + ) else: subjects = [subject_pre_string+str(i).zfill(2) for i in \ range(1, n_subject_max+1) if i not in imu_issues] - imu_rr_func = partial(imu_rr_model, - window_size=window_size, - window_shift=window_shift, - lbl_str=lbl_str, - mdl_str=mdl_str, - overwrite=overwrite, - feature_method=feature_method, - train_len=train_len, - data_input=data_input, - test_standing=test_standing, - ) + + rr_func = partial(sens_rr_model, + window_size=window_size, + window_shift=window_shift, + lbl_str=lbl_str, + mdl_str=mdl_str, + overwrite=overwrite, + feature_method=feature_method, + train_len=train_len, + test_standing=test_standing, + data_input=data_input, + ) if mdl_str in ['fnn', 'lstm', 'cnn1d', 'elastic', 'ard', 'xgboost']: for subject in subjects: - imu_rr_func(subject) + rr_func(subject) else: ncpu = min(len(subjects), cpu_count()) with Pool(ncpu) as p: - p.map(imu_rr_func, subjects) + p.map(rr_func, subjects) print(args) -- GitLab