diff --git a/create_features.py b/create_features.py index b180232de11f4004f9486225eb96c3ffa9be3d90..bcf8f4621f1b58dcaf517b07ecaafde007a9252f 100644 --- a/create_features.py +++ b/create_features.py @@ -32,7 +32,7 @@ from modules.animationplotter import AnimationPlotter, AnimationPlotter2D from modules.digitalsignalprocessing import imu_signal_processing from modules.digitalsignalprocessing import vectorized_slide_win from modules.digitalsignalprocessing import get_video_features -from modules.datapipeline import SubjectData, datetime_to_sec\ +from modules.datapipeline import datetime_to_sec\ ,sec_to_datetime, DataSynchronizer from modules.datapipeline import get_file_list, load_files_conditions from modules.evaluations import Evaluation @@ -183,16 +183,14 @@ def plot_video_features(video_fname, nframes=1000): ipdb.set_trace() def map_imu_tsfresh_subject(subject, - tlx_df=None, - conditions=['R', 'L0', 'L1', 'L2', 'L3'], window_size=5, window_shift=0.2): - sbj_data = SubjectData(subject=subject) - sbj_data.set_imu_fname() - sbj_dir = sbj_data.subject_dir + pfh = ProjectFileHandler({}) + pfh.set_home_directory(join(DATA_DIR, 'subject_specific', subject)) for condition in conditions: tsfresh_pkl = path_join( - sbj_dir, "{0}__winsize_{1}__winshift_{2}_imu_tsfresh_df.pkl"\ - .format(condition, window_size, window_shift)) + pfh.home_directory, + "{0}__winsize_{1}__winshift_{2}_tsfresh_df.pkl"\ + .format(window_size, window_shift)) if path_exists(tsfresh_pkl): continue print(f"trying {subject} for {condition}") data_df = load_df(subject, condition) diff --git a/logs/singularity_2776358.out b/logs/singularity_2776358.out new file mode 100644 index 0000000000000000000000000000000000000000..23dd6d65ed57e8f0ccc7cd3de757bf36956a329d --- /dev/null +++ b/logs/singularity_2776358.out @@ -0,0 +1,18 @@ +2023-11-26 00:44:07.923231: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. +To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. +Traceback (most recent call last): + File "regress_rr.py", line 23, in <module> + import tensorflow as tf + File "/home/rqchia/.local/lib/python3.8/site-packages/tensorflow/__init__.py", line 54, in <module> + from ._api.v2 import data + File "/home/rqchia/.local/lib/python3.8/site-packages/tensorflow/_api/v2/data/__init__.py", line 11, in <module> + from . import experimental + File "/home/rqchia/.local/lib/python3.8/site-packages/tensorflow/_api/v2/data/experimental/__init__.py", line 87, in <module> + from . import service + File "<frozen importlib._bootstrap>", line 991, in _find_and_load + File "<frozen importlib._bootstrap>", line 975, in _find_and_load_unlocked + File "<frozen importlib._bootstrap>", line 671, in _load_unlocked + File "<frozen importlib._bootstrap_external>", line 844, in exec_module + File "<frozen importlib._bootstrap_external>", line 939, in get_code + File "<frozen importlib._bootstrap_external>", line 1037, in get_data +KeyboardInterrupt diff --git a/logs/singularity_2776834.out b/logs/singularity_2776834.out new file mode 100644 index 0000000000000000000000000000000000000000..4b9cae66160f895311be7a24fb9feb17a8b532aa --- /dev/null +++ b/logs/singularity_2776834.out @@ -0,0 +1,42 @@ +2023-11-26 00:44:26.282311: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. +To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. +Using TensorFlow backend +Namespace(data_input='imu-bvp', feature_method='tsfresh', lbl_str='pss', model='linreg', overwrite=0, subject=3, test_standing=1, train_len=5, win_shift=0.2, win_size=12) +unable to find matching config id +Data id not set, auto assigned to: 2 +Dependency not available for matrix_profile, this feature will be disabled! + Feature Extraction: 0%| | 0/65 [00:00<?, ?it/s] Feature Extraction: 2%|■| 1/65 [00:00<00:08, 7.47it/s] Feature Extraction: 12%|█■| 8/65 [00:00<00:01, 38.61it/s] Feature Extraction: 25%|██■| 16/65 [00:00<00:00, 53.36it/s] Feature Extraction: 37%|███▋ | 24/65 [00:00<00:00, 58.89it/s] Feature Extraction: 49%|████▉ | 32/65 [00:00<00:00, 63.82it/s] Feature Extraction: 62%|██████■| 40/65 [00:00<00:00, 65.14it/s] Feature Extraction: 74%|███████■| 48/65 [00:00<00:00, 67.87it/s] Feature Extraction: 86%|████████▌ | 56/65 [00:00<00:00, 69.75it/s] Feature Extraction: 98%|█████████▊| 64/65 [00:01<00:00, 72.31it/s] Feature Extraction: 100%|██████████| 65/65 [00:01<00:00, 63.53it/s] +Dependency not available for matrix_profile, this feature will be disabled! + Feature Extraction: 0%| | 0/47 [00:00<?, ?it/s] Feature Extraction: 100%|██████████| 47/47 [00:00<00:00, 997.74it/s] +Dependency not available for matrix_profile, this feature will be disabled! + Feature Extraction: 0%| | 0/47 [00:00<?, ?it/s] Feature Extraction: 100%|██████████| 47/47 [00:00<00:00, 975.44it/s] +Dependency not available for matrix_profile, this feature will be disabled! + Feature Extraction: 0%| | 0/47 [00:00<?, ?it/s] Feature Extraction: 100%|██████████| 47/47 [00:00<00:00, 1034.00it/s] +Dependency not available for matrix_profile, this feature will be disabled! + Feature Extraction: 0%| | 0/47 [00:00<?, ?it/s] Feature Extraction: 100%|██████████| 47/47 [00:00<00:00, 1018.94it/s] +Dependency not available for matrix_profile, this feature will be disabled! + Feature Extraction: 0%| | 0/49 [00:00<?, ?it/s] Feature Extraction: 100%|██████████| 49/49 [00:00<00:00, 1008.80it/s] +Dependency not available for matrix_profile, this feature will be disabled! + Feature Extraction: 0%| | 0/47 [00:00<?, ?it/s] Feature Extraction: 100%|██████████| 47/47 [00:00<00:00, 950.21it/s] +Dependency not available for matrix_profile, this feature will be disabled! + Feature Extraction: 0%| | 0/49 [00:00<?, ?it/s] Feature Extraction: 100%|██████████| 49/49 [00:00<00:00, 1035.00it/s] +imu-bvp_rr_Pilot03_id2_combi5.0-7.0-10.0-12.0-15.0 +train +(101, 74) +test +(978, 73) +---LinearRegression--- +Traceback (most recent call last): + File "regress_rr.py", line 1374, in <module> + sens_rr_model(subject, + File "regress_rr.py", line 1268, in sens_rr_model + x_test = transforms.transform(x_test) + File "/usr/local/lib/python3.8/dist-packages/sklearn/utils/_set_output.py", line 140, in wrapped + data_to_wrap = f(self, X, *args, **kwargs) + File "/usr/local/lib/python3.8/dist-packages/sklearn/preprocessing/_polynomial.py", line 432, in transform + X = self._validate_data( + File "/usr/local/lib/python3.8/dist-packages/sklearn/base.py", line 625, in _validate_data + self._check_n_features(X, reset=reset) + File "/usr/local/lib/python3.8/dist-packages/sklearn/base.py", line 414, in _check_n_features + raise ValueError( +ValueError: X has 70 features, but PolynomialFeatures is expecting 71 features as input. diff --git a/logs/singularity_2810990.out b/logs/singularity_2810990.out new file mode 100644 index 0000000000000000000000000000000000000000..179501a86c42461c9a163c3864c1dc92576d8960 --- /dev/null +++ b/logs/singularity_2810990.out @@ -0,0 +1,7 @@ +2023-11-26 01:21:46.434356: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. +To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. +Using TensorFlow backend +Namespace(data_input='imu-bvp', feature_method='tsfresh', lbl_str='pss', model='linreg', overwrite=1, subject=3, test_standing=1, train_len=5, win_shift=0.2, win_size=12) +Using pre-set data id: 2 +Dependency not available for matrix_profile, this feature will be disabled! + Feature Extraction: 0%| | 0/65 [00:00<?, ?it/s] \ No newline at end of file diff --git a/modules/__pycache__/datapipeline.cpython-38.pyc b/modules/__pycache__/datapipeline.cpython-38.pyc index 450a764cdbfb582417b9f1631e429109e238e31f..112731661244257e314c326704b0de1742c5608c 100644 Binary files a/modules/__pycache__/datapipeline.cpython-38.pyc and b/modules/__pycache__/datapipeline.cpython-38.pyc differ diff --git a/modules/datapipeline.py b/modules/datapipeline.py index 7c17bc4175fa140b4653530f55da9e67ab697668..a05ec3487144a74ff4c451aeaf8053b76387a1a0 100644 --- a/modules/datapipeline.py +++ b/modules/datapipeline.py @@ -455,279 +455,6 @@ class DataSynchronizer(): self.start_ind = start_ind self.end_ind = end_ind -class SubjectData(DataImporter): - ''' Loads in data for the rigid body, breathing rate, summary files - and syncs them accordingly ''' - def __init__(self, condition='M', subject='S01'): - super().__init__() - self.condition = condition - self.subject = subject - if subject[0] != 'S': - self.subject_id = subject - else: - self.subject_id = int(re.search(r'\d+', subject).group()) - self.study_start = 0 - self.study_end = 0 - - self.subject_dir = path_join(self.parent_dir, self.subject) - - self.pressure_df = pd.DataFrame() - self.summary_df = pd.DataFrame() - self.accel_df = pd.DataFrame() - self.imu_df = pd.DataFrame() - - def get_cond_file(self, files): - for f in files: - if self.condition in f.split(sep)[-1] and \ - self.subject in f.split(sep)[-1]: - return f - return '' - - def list_sub_dirs(self, parent_dir, endswith=None): - reg_str = r'[0-9]+$' - if endswith is not None: - reg_str = r'[0-9]+{0}$'.format(endswith) - regex = re.compile(reg_str) - sub_dirs = [ - path_join(parent_dir, d) for d in listdir(parent_dir) if \ - ( - isdir(path_join(parent_dir,d)) and bool(regex.search(d)) - ) - ] - return sorted(sub_dirs) - - def check_times(self, sub_dirs, is_utc=False): - ''' Parses sub directory names to datetime and checks against mat_start - and end ''' - sep = self.sep - - if is_utc: - imu_hdr_files = [path_join(sub_dir, 'recording.g3')\ - for sub_dir in sub_dirs] - hdrs = [pd.read_json(imu_hdr_file, orient='index')\ - for imu_hdr_file in imu_hdr_files] - times = [hdr.to_dict().pop(0)['created'] \ - for hdr in hdrs] - times = [datetime.fromisoformat(time[:-1]) for time in times] - times = [(time.timestamp()+ timedelta(hours=11).seconds) for time in times] - else: - times = [datetime_to_sec(sub_dir.split(sep)[-1])\ - for sub_dir in sub_dirs] - - sel_dir = sub_dirs[-1] - for i, time in enumerate(times[:-1]): - if self.study_start > time and self.study_start < times[i+1]: - sel_dir = sub_dirs[i] - return sel_dir - - def set_pressure_fname(self): - subject_dir = self.subject_dir - sub_dirs = self.list_sub_dirs(subject_dir) - sub_dir = sub_dirs[0] - if len(sub_dirs)> 1: - # Check directory times with timeline - sub_dir = self.check_times(sub_dirs) - - pressure_glob = path_join(sub_dir, 'BR*.csv') - pressure_files = sorted(glob.glob(pressure_glob)) - if not pressure_files: - dt_info = sub_dir.split(sep)[-1] - pressure_glob = path_join(sub_dir, '*_Breathing.csv') - pressure_files = sorted(glob.glob(pressure_glob)) - self.pressure_fname = pressure_files[-1] - - def set_summary_fname(self): - subject_dir = self.subject_dir - sub_dirs = self.list_sub_dirs(subject_dir) - sub_dir = sub_dirs[0] - if len(sub_dirs)> 1: - # Check directory times with timeline - sub_dir = self.check_times(sub_dirs) - - summary_glob = path_join(sub_dir, 'Summary*.csv') - summary_files = sorted(glob.glob(summary_glob)) - if not summary_files: - dt_info = sub_dir.split(sep)[-1] - summary_glob = path_join(sub_dir, dt_info+'_Summary*.csv') - summary_files = sorted(glob.glob(summary_glob)) - self.summary_fname = summary_files[-1] - - def set_imu_fname(self): - subject_dir = self.subject_dir - sub_dirs = self.list_sub_dirs(subject_dir, endswith='Z') - sub_dir = sub_dirs[0] - if len(sub_dirs)> 1: - sub_dir = self.check_times(sub_dirs, is_utc=True) - - imu_glob = path_join(sub_dir, 'imu*') - imu_files = sorted(glob.glob(imu_glob)) - self.imu_fname = imu_files[-1] - - imu_hdr_glob = path_join(sub_dir, 'recording.g3') - imu_hdr_files = sorted(glob.glob(imu_hdr_glob)) - self.imu_hdr_fname = imu_hdr_files[-1] - video_fname = path_join(sub_dir, 'scenevideo.mp4') - if path_exists(video_fname): - self.video_fname = video_fname - - def set_accel_fname(self): - subject_dir = self.subject_dir - sub_dirs = self.list_sub_dirs(subject_dir) - sub_dir = sub_dirs[0] - if len(sub_dirs)> 1: - # Check directory times with timeline - sub_dir = self.check_times(sub_dirs) - - accel_glob = path_join(sub_dir, 'Accel*.csv') - accel_files = sorted(glob.glob(accel_glob)) - if not accel_files: - dt_info = sub_dir.split(sep)[-1] - accel_glob = path_join(sub_dir, '*_Accel.csv') - accel_files = sorted(glob.glob(accel_glob)) - accel_files = [f for f in accel_files if 'g' not in \ - f.lower().split(sep)[-1]] - self.accel_fname = accel_files[-1] - - def set_timeline(self): - times_glob = path_join(self.subject_dir,f'*.csv') - times_files = sorted(glob.glob(times_glob)) - self.timeline_fname = self.get_cond_file(times_files) - self.timeline_df = self.import_time_data() - - mat_time = self.timeline_df['Timestamps'].map(mat_to_sec) - mat_start_ind = self.timeline_df.index[ - self.timeline_df['Event']=='Start Test' - ].tolist()[0] - mat_start = mat_time.values[mat_start_ind] - mat_end = mat_time.values[-1] - - self.study_start = mat_start - self.study_end = mat_end - - def set_fnames(self): - self.set_pressure_fname() - self.set_summary_fname() - self.set_imu_fname() - self.set_accel_fname() - - def load_dataframes(self): - self.timeline_df = self.import_time_data() - self.pressure_df = self.import_labels(self.pressure_fname) - self.summary_df = self.import_labels(self.summary_fname) - self.accel_df = self.import_labels(self.accel_fname) - self.imu_df, self.imu_hdr = self.import_imu_data() - - def sync_pressure_df(self): - data_sync = DataSynchronizer() - - cols = self.pressure_df.columns - if 'Year' in cols: - year = int(self.pressure_df['Year'].values[0]) - month = int(self.pressure_df['Month'].values[0]) - day = int(self.pressure_df['Day'].values[0]) - dt_fmt = "%Y/%m/%d" - dt_str = f"{year}/{month}/{day}" - dt_obj = datetime.strptime(dt_str, dt_fmt) - pressure_time = self.pressure_df['ms'].interpolate().values/1000 - pressure_time = pressure_time + dt_obj.timestamp() - else: - pressure_time = self.pressure_df['Time'].map(datetime_to_sec).values - - self.pressure_df['sec'] = pressure_time - data_sync.set_bounds(pressure_time, self.study_start, self.study_end) - self.pressure_df = data_sync.sync_df(self.pressure_df) - - def sync_accel_df(self): - data_sync = DataSynchronizer() - - cols = self.accel_df.columns - if 'Year' in cols: - year = int(self.accel_df['Year'].values[0]) - month = int(self.accel_df['Month'].values[0]) - day = int(self.accel_df['Day'].values[0]) - dt_fmt = "%Y/%m/%d" - dt_str = f"{year}/{month}/{day}" - dt_obj = datetime.strptime(dt_str, dt_fmt) - accel_time = self.accel_df['ms'].interpolate().values/1000 - accel_time = accel_time + dt_obj.timestamp() - else: - accel_time = self.accel_df['Time'].map(datetime_to_sec).values - - self.accel_df['sec'] = accel_time - data_sync.set_bounds(accel_time, self.study_start, self.study_end) - self.accel_df = data_sync.sync_df(self.accel_df) - - def sync_summary_df(self): - data_sync = DataSynchronizer() - - cols = self.summary_df.columns - if 'Year' in cols: - year = int(self.summary_df['Year'].values[0]) - month = int(self.summary_df['Month'].values[0]) - day = int(self.summary_df['Day'].values[0]) - dt_fmt = "%Y/%m/%d" - dt_str = f"{year}/{month}/{day}" - dt_obj = datetime.strptime(dt_str, dt_fmt) - summary_times = self.summary_df['ms'].values/1000 + dt_obj.timestamp() - else: - summary_times = self.summary_df['Time'].map(datetime_to_sec).values - - self.summary_df['sec'] = summary_times - data_sync.set_bounds(summary_times, self.study_start, self.study_end) - self.summary_df = data_sync.sync_df(self.summary_df) - - def sync_imu_df(self): - na_inds = self.imu_df\ - .loc[pd.isna(self.imu_df['accelerometer']), :].index.values - self.imu_df.drop(index=na_inds, inplace=True) - imu_times = self.imu_df['timestamp'].values - - ''' S21, S30 has strange time recordings ''' - mask = imu_times > 3*60*60 - if mask.any(): - bad_args = np.arange(0, len(mask))[mask] - self.imu_df.drop(index=self.imu_df.iloc[bad_args].index, - inplace=True) - # self.imu_df['timestamp'] = self.imu_df['timestamp'].values - \ - # self.imu_df['timestamp'].values[0] - imu_times = self.imu_df['timestamp'].values - - print(np.mean(1/(imu_times[1:] - imu_times[:-1]))) - self.imu_df['timestamp_interp'] = imu_times - self.imu_df['timestamp_interp'] = self.imu_df['timestamp_interp']\ - .interpolate() - - data_sync = DataSynchronizer() - - iso_tz = self.imu_hdr['created'] - tzinfo = pytz.timezone(self.imu_hdr['timezone']) - # adjust for UTC - start_time = datetime.fromisoformat(iso_tz[:-1]) + timedelta(hours=11) - imu_times = self.imu_df['timestamp_interp'].values - - imu_datetimes = [start_time + timedelta(seconds=val) \ - for val in imu_times] - imu_sec = np.array([time.timestamp() for time in imu_datetimes]) - self.imu_df['sec'] = imu_sec - data_sync.set_bounds(imu_sec, self.study_start, self.study_end) - self.imu_df = data_sync.sync_df(self.imu_df) - - def sync_all_df(self): - if self.study_start == 0 or self.study_start is None: - self.set_timeline() - self.sync_pressure_df() - self.sync_summary_df() - self.sync_accel_df() - self.sync_imu_df() - - def get_accel_data(self): - accel_cols = self.accel_df.columns - if 'Time' in accel_cols: - data_cols = ['Vertical', 'Lateral', 'Sagittal'] - else: - data_cols = ['X Data', 'Y Data', 'Z Data'] - return self.accel_df[data_cols].values - class TFDataPipeline(): def __init__(self, window_size=60, batch_size=32): self.window_size = window_size diff --git a/regress_rr.py b/regress_rr.py index 0255ce19f2615fc6663b4462f2bef7bf98d1f5c9..c420b0cae1fd0cee3f1c4fb7636666b6d64949db 100644 --- a/regress_rr.py +++ b/regress_rr.py @@ -426,7 +426,10 @@ def df_win_task(w_inds, df, i, cols): for col in cols: data = w_df[col].values # DSP - sd_data = (data - np.mean(data, axis=0))/np.std(data, axis=0) + if sum(np.abs(data)) > 0: + sd_data = (data - np.mean(data, axis=0))/np.std(data, axis=0) + else: + sd_data = data.copy() # ys = cubic_interp(sd_data, BR_FS, FS_RESAMPLE) if col != 'bvp': filt_out.append(imu_signal_processing(sd_data, fs)) @@ -455,7 +458,7 @@ def df_win_task(w_inds, df, i, cols): if 'bvp' in cols: xf, yf = do_pad_fft(bvp_filt, fs=fs) bv_freq = int(xf[yf.argmax()]*60) - y_out['bvp_est'] = bv_freq + # y_out['bvp_est'] = bv_freq return x_out, y_out @@ -590,9 +593,8 @@ def load_and_sync_xsens(subject, sens_list:list=['imu', 'bvp']): return xsens_df def load_tsfresh(xsens_df, home_dir, - sens_list:list=['imu', 'bvp'], window_size=12, window_shift=0.2, fs=IMU_FS, - overwrite=False, data_cols=None): + overwrite=False, data_cols=None, prefix=None): """ Loads the tsfresh pickle file, or generates if it does not exist for the given configuration @@ -607,16 +609,26 @@ def load_tsfresh(xsens_df, home_dir, pd.DataFrame """ - raise NotImplementedError("To be implemented") + assert data_cols is not None, "invalid selection for data columns" + assert 'acc_x' in xsens_df.columns.tolist() and \ + 'gyro_x' in xsens_df.columns.tolist() and \ + 'bvp' in xsens_df.columns.tolist(), \ + "Does not include the full required dataset. Must have both IMU and BVP" - # make home directory + # raise NotImplementedError("To be implemented") - assert data_cols is not None, "invalid selection for data columns" - pkl_file = join(project_dir, 'tsfresh.pkl') + if prefix is not None: + pkl_fname = f'{prefix}__winsize_{window_size}__winshift_{window_shift}__tsfresh.pkl' + else: + pkl_fname = f'winsize_{window_size}__winshift_{window_shift}__tsfresh.pkl' + + pkl_dir = join(home_dir, + f'tsfresh__winsize_{window_size}__winshift_{window_shift}') + pkl_file = join(pkl_dir, pkl_fname) + if not exists(pkl_dir): mkdir(pkl_dir) if exists(pkl_file) and not overwrite: return pd.read_pickle(pkl_file) - ipdb.set_trace() x_df, y_df = get_df_windows(xsens_df, df_win_task, window_size=window_size, @@ -630,6 +642,9 @@ def load_tsfresh(xsens_df, home_dir, # default_fc_parameters=tsfresh_settings.MinimalFCParameters(), ) x_features_df.fillna(0, inplace=True) + x_features_df.reset_index(drop=True, inplace=True) + x_features_df = x_features_df.reindex(sorted(x_features_df.columns.values), + axis=1) cols = x_features_df.columns.values @@ -964,7 +979,7 @@ def imu_rr_dsp(subject, do_minirocket = False use_tsfresh = False - overwrite_tsfresh = True + overwrite_tsfresh = False train_size = int(train_len) config = {'window_size' : window_size, @@ -1050,8 +1065,6 @@ def sens_rr_model(subject, test_standing=False, data_input:str='imu+bvp', ): - # TODO: - # implement tsfresh """Loads, preprocesses, and trains a select model using the configured settings. Attributes @@ -1106,7 +1119,7 @@ def sens_rr_model(subject, do_minirocket = False use_tsfresh = False - overwrite_tsfresh = True + overwrite_tsfresh = overwrite train_size = int(train_len) if feature_method == 'tsfresh': @@ -1143,25 +1156,38 @@ def sens_rr_model(subject, cal_df = get_cal_data(event_df, xsens_df) + # include standing or not + test_df_tmp = get_test_data(cal_df, activity_df, xsens_df, test_standing) + test_df = pd.concat([df for df in test_df_tmp['data']], axis=0) + if use_tsfresh: - xsens_df = load_tsfresh(xsens_df, - project_dir, - sens_list=sens_list, + cal_df_list = [] + test_df = load_tsfresh(test_df, + pfh.home_directory, + window_size=window_size, + window_shift=window_shift, + fs=fs, + overwrite=overwrite_tsfresh, + data_cols=data_cols, + prefix='test', + ) + for index, row in cal_df.iterrows(): + data = load_tsfresh(row['data'], + pfh.home_directory, window_size=window_size, window_shift=window_shift, fs=fs, overwrite=overwrite_tsfresh, data_cols=data_cols, + prefix=f"calcpm_{row['cpm']}" ) + cal_df_list.append({'cpm': row['cpm'], 'data': data}) - # include standing or not - test_df_tmp = get_test_data(cal_df, activity_df, xsens_df, test_standing) - test_df = pd.concat([df for df in test_df_tmp['data']], axis=0) - - x_test_df, y_test_df = get_df_windows( - test_df, df_win_task, window_size=window_size, - window_shift=window_shift, fs=fs, cols=data_cols) - + cal_df = pd.DataFrame(cal_df_list) + else: + x_test_df, y_test_df = get_df_windows( + test_df, df_win_task, window_size=window_size, + window_shift=window_shift, fs=fs, cols=data_cols) for combi in combinations(cal_df[cal_str].values, train_len): combi_str = "-".join([str(x) for x in combi]) @@ -1178,7 +1204,7 @@ def sens_rr_model(subject, train_df_list.append(data_df) train_df = pd.concat(train_df_list) - assert np.isin(train_df.index.values, test_df.index.values).any()==False,\ + assert np.isin(train_df.sec.values, test_df.sec.values).any()==False,\ "overlapping test and train data" print("train") @@ -1209,10 +1235,13 @@ def sens_rr_model(subject, x_train = minirocket.fit_transform(x_train) x_test = minirocket.transform(x_test) elif use_tsfresh: - x_train = train_df.iloc[:, 3:].values + y_cols = ['sec', 'br', 'pss', 'cpm'] + x_cols = [col for col in train_df.columns.values if col not in y_cols] + x_train = train_df[x_cols].values y_train = train_df['cpm'].values.reshape(-1, 1) - x_test = test_df.iloc[:, 3:].values + x_test = test_df[x_cols].values y_test = test_df[lbl_str].values.reshape(-1, 1) + y_test_df = test_df[y_cols[:-1]] else: x_train_df, y_train_df = get_df_windows(train_df, df_win_task, @@ -1303,7 +1332,7 @@ def arg_parser(): default='pss', ) parser.add_argument('-tl', '--train_len', type=int, - default=3, + default=5, help='minutes of data to use for calibration' ) parser.add_argument('-d', '--data_input', type=str, @@ -1311,7 +1340,7 @@ def arg_parser(): help='imu, bvp, imu+bvp: select data cols for input' ) parser.add_argument('-ts', '--test_standing', type=int, - default=0, + default=1, help='1 or 0 input, choose if standing data will be '\ 'recorded or not' ) @@ -1320,7 +1349,7 @@ def arg_parser(): if __name__ == '__main__': np.random.seed(100) - n_subject_max = 2 + n_subject_max = 3 args = arg_parser() # Load command line arguments @@ -1356,7 +1385,7 @@ if __name__ == '__main__': ) else: subjects = [subject_pre_string+str(i).zfill(2) for i in \ - range(1, n_subject_max+1) if i not in imu_issues] + range(2, n_subject_max+1)] rr_func = partial(sens_rr_model, window_size=window_size, @@ -1370,12 +1399,6 @@ if __name__ == '__main__': data_input=data_input, ) - if mdl_str in ['fnn', 'lstm', 'cnn1d', 'elastic', 'ard', 'xgboost']: - for subject in subjects: - rr_func(subject) - else: - ncpu = min(len(subjects), cpu_count()) - with Pool(ncpu) as p: - p.map(rr_func, subjects) - + for subject in subjects: + rr_func(subject) print(args)