tsfresh tested

1b5a2fc2 · Raymond Chia · f75db665 · 1b5a2fc2 · 1b5a2fc2 · 1b5a2fc2
Commit 1b5a2fc2 authored 1 year ago by Raymond Chia
--- a/create_features.py
+++ b/create_features.py
@@ -32,7 +32,7 @@ from modules.animationplotter import AnimationPlotter, AnimationPlotter2D
 from modules.digitalsignalprocessing import imu_signal_processing
 from modules.digitalsignalprocessing import vectorized_slide_win
 from modules.digitalsignalprocessing import get_video_features
-from modules.datapipeline import SubjectData, datetime_to_sec\
+from modules.datapipeline import  datetime_to_sec\
        ,sec_to_datetime, DataSynchronizer
 from modules.datapipeline import get_file_list, load_files_conditions
 from modules.evaluations import Evaluation
@@ -183,16 +183,14 @@ def plot_video_features(video_fname, nframes=1000):
    ipdb.set_trace()

 def map_imu_tsfresh_subject(subject,
-                            tlx_df=None,
-                            conditions=['R', 'L0', 'L1', 'L2', 'L3'],
                            window_size=5, window_shift=0.2):
-    sbj_data = SubjectData(subject=subject)
-    sbj_data.set_imu_fname()
-    sbj_dir = sbj_data.subject_dir
+    pfh = ProjectFileHandler({})
+    pfh.set_home_directory(join(DATA_DIR, 'subject_specific', subject))
    for condition in conditions:
        tsfresh_pkl = path_join(
-            sbj_dir, "{0}__winsize_{1}__winshift_{2}_imu_tsfresh_df.pkl"\
-            .format(condition, window_size, window_shift))
+            pfh.home_directory,
+            "{0}__winsize_{1}__winshift_{2}_tsfresh_df.pkl"\
+            .format(window_size, window_shift))
        if path_exists(tsfresh_pkl): continue
        print(f"trying {subject} for {condition}")
        data_df = load_df(subject, condition)

--- a/logs/singularity_2776358.out
+++ b/logs/singularity_2776358.out
+2023-11-26 00:44:07.923231: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
+To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
+Traceback (most recent call last):
+  File "regress_rr.py", line 23, in <module>
+    import tensorflow as tf
+  File "/home/rqchia/.local/lib/python3.8/site-packages/tensorflow/__init__.py", line 54, in <module>
+    from ._api.v2 import data
+  File "/home/rqchia/.local/lib/python3.8/site-packages/tensorflow/_api/v2/data/__init__.py", line 11, in <module>
+    from . import experimental
+  File "/home/rqchia/.local/lib/python3.8/site-packages/tensorflow/_api/v2/data/experimental/__init__.py", line 87, in <module>
+    from . import service
+  File "<frozen importlib._bootstrap>", line 991, in _find_and_load
+  File "<frozen importlib._bootstrap>", line 975, in _find_and_load_unlocked
+  File "<frozen importlib._bootstrap>", line 671, in _load_unlocked
+  File "<frozen importlib._bootstrap_external>", line 844, in exec_module
+  File "<frozen importlib._bootstrap_external>", line 939, in get_code
+  File "<frozen importlib._bootstrap_external>", line 1037, in get_data
+KeyboardInterrupt
--- a/logs/singularity_2776834.out
+++ b/logs/singularity_2776834.out
+2023-11-26 00:44:26.282311: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
+To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
+Using TensorFlow backend
+Namespace(data_input='imu-bvp', feature_method='tsfresh', lbl_str='pss', model='linreg', overwrite=0, subject=3, test_standing=1, train_len=5, win_shift=0.2, win_size=12)
+unable to find matching config id
+Data id not set, auto assigned to:  2
+Dependency not available for matrix_profile, this feature will be disabled!
+
Feature Extraction:   0%|          | 0/65 [00:00<?, ?it/s]
Feature Extraction:   2%|▏         | 1/65 [00:00<00:08,  7.47it/s]
Feature Extraction:  12%|█▏        | 8/65 [00:00<00:01, 38.61it/s]
Feature Extraction:  25%|██▍       | 16/65 [00:00<00:00, 53.36it/s]
Feature Extraction:  37%|███▋      | 24/65 [00:00<00:00, 58.89it/s]
Feature Extraction:  49%|████▉     | 32/65 [00:00<00:00, 63.82it/s]
Feature Extraction:  62%|██████▏   | 40/65 [00:00<00:00, 65.14it/s]
Feature Extraction:  74%|███████▍  | 48/65 [00:00<00:00, 67.87it/s]
Feature Extraction:  86%|████████▌ | 56/65 [00:00<00:00, 69.75it/s]
Feature Extraction:  98%|█████████▊| 64/65 [00:01<00:00, 72.31it/s]
Feature Extraction: 100%|██████████| 65/65 [00:01<00:00, 63.53it/s]
+Dependency not available for matrix_profile, this feature will be disabled!
+
Feature Extraction:   0%|          | 0/47 [00:00<?, ?it/s]
Feature Extraction: 100%|██████████| 47/47 [00:00<00:00, 997.74it/s]
+Dependency not available for matrix_profile, this feature will be disabled!
+
Feature Extraction:   0%|          | 0/47 [00:00<?, ?it/s]
Feature Extraction: 100%|██████████| 47/47 [00:00<00:00, 975.44it/s]
+Dependency not available for matrix_profile, this feature will be disabled!
+
Feature Extraction:   0%|          | 0/47 [00:00<?, ?it/s]
Feature Extraction: 100%|██████████| 47/47 [00:00<00:00, 1034.00it/s]
+Dependency not available for matrix_profile, this feature will be disabled!
+
Feature Extraction:   0%|          | 0/47 [00:00<?, ?it/s]
Feature Extraction: 100%|██████████| 47/47 [00:00<00:00, 1018.94it/s]
+Dependency not available for matrix_profile, this feature will be disabled!
+
Feature Extraction:   0%|          | 0/49 [00:00<?, ?it/s]
Feature Extraction: 100%|██████████| 49/49 [00:00<00:00, 1008.80it/s]
+Dependency not available for matrix_profile, this feature will be disabled!
+
Feature Extraction:   0%|          | 0/47 [00:00<?, ?it/s]
Feature Extraction: 100%|██████████| 47/47 [00:00<00:00, 950.21it/s]
+Dependency not available for matrix_profile, this feature will be disabled!
+
Feature Extraction:   0%|          | 0/49 [00:00<?, ?it/s]
Feature Extraction: 100%|██████████| 49/49 [00:00<00:00, 1035.00it/s]
+imu-bvp_rr_Pilot03_id2_combi5.0-7.0-10.0-12.0-15.0
+train
+(101, 74)
+test
+(978, 73)
+---LinearRegression---
+Traceback (most recent call last):
+  File "regress_rr.py", line 1374, in <module>
+    sens_rr_model(subject,
+  File "regress_rr.py", line 1268, in sens_rr_model
+    x_test = transforms.transform(x_test)
+  File "/usr/local/lib/python3.8/dist-packages/sklearn/utils/_set_output.py", line 140, in wrapped
+    data_to_wrap = f(self, X, *args, **kwargs)
+  File "/usr/local/lib/python3.8/dist-packages/sklearn/preprocessing/_polynomial.py", line 432, in transform
+    X = self._validate_data(
+  File "/usr/local/lib/python3.8/dist-packages/sklearn/base.py", line 625, in _validate_data
+    self._check_n_features(X, reset=reset)
+  File "/usr/local/lib/python3.8/dist-packages/sklearn/base.py", line 414, in _check_n_features
+    raise ValueError(
+ValueError: X has 70 features, but PolynomialFeatures is expecting 71 features as input.
--- a/logs/singularity_2810990.out
+++ b/logs/singularity_2810990.out
+2023-11-26 01:21:46.434356: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
+To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
+Using TensorFlow backend
+Namespace(data_input='imu-bvp', feature_method='tsfresh', lbl_str='pss', model='linreg', overwrite=1, subject=3, test_standing=1, train_len=5, win_shift=0.2, win_size=12)
+Using pre-set data id:  2
+Dependency not available for matrix_profile, this feature will be disabled!
+
Feature Extraction:   0%|          | 0/65 [00:00<?, ?it/s]
\ No newline at end of file
--- a/modules/__pycache__/datapipeline.cpython-38.pyc
+++ b/modules/__pycache__/datapipeline.cpython-38.pyc
--- a/modules/datapipeline.py
+++ b/modules/datapipeline.py
@@ -455,279 +455,6 @@ class DataSynchronizer():
        self.start_ind = start_ind
        self.end_ind   = end_ind

-class SubjectData(DataImporter):
-    ''' Loads in data for the rigid body, breathing rate, summary files
-    and syncs them accordingly '''
-    def __init__(self, condition='M', subject='S01'):
-        super().__init__()
-        self.condition   = condition
-        self.subject     = subject
-        if subject[0] != 'S':
-            self.subject_id = subject
-        else:
-            self.subject_id  = int(re.search(r'\d+', subject).group())
-        self.study_start = 0
-        self.study_end   = 0
-
-        self.subject_dir = path_join(self.parent_dir, self.subject)
-
-        self.pressure_df = pd.DataFrame()
-        self.summary_df = pd.DataFrame()
-        self.accel_df = pd.DataFrame()
-        self.imu_df = pd.DataFrame()
-
-    def get_cond_file(self, files):
-        for f in files:
-            if self.condition in f.split(sep)[-1] and \
-               self.subject in f.split(sep)[-1]:
-                return f
-        return ''
-
-    def list_sub_dirs(self, parent_dir, endswith=None):
-        reg_str = r'[0-9]+$'
-        if endswith is not None:
-            reg_str = r'[0-9]+{0}$'.format(endswith)
-        regex = re.compile(reg_str)
-        sub_dirs = [
-            path_join(parent_dir, d) for d in listdir(parent_dir) if \
-            (
-                isdir(path_join(parent_dir,d)) and bool(regex.search(d))
-            )
-        ]
-        return sorted(sub_dirs)
-
-    def check_times(self, sub_dirs, is_utc=False):
-        ''' Parses sub directory names to datetime and checks against mat_start
-        and end '''
-        sep = self.sep
-
-        if is_utc:
-            imu_hdr_files = [path_join(sub_dir, 'recording.g3')\
-                             for sub_dir in sub_dirs]
-            hdrs = [pd.read_json(imu_hdr_file, orient='index')\
-                    for imu_hdr_file in imu_hdr_files]
-            times = [hdr.to_dict().pop(0)['created'] \
-                     for hdr in hdrs]
-            times = [datetime.fromisoformat(time[:-1]) for time in times]
-            times = [(time.timestamp()+ timedelta(hours=11).seconds) for time in times]
-        else:
-            times = [datetime_to_sec(sub_dir.split(sep)[-1])\
-                     for sub_dir in sub_dirs]
-
-        sel_dir = sub_dirs[-1]
-        for i, time in enumerate(times[:-1]):
-            if self.study_start > time and self.study_start < times[i+1]:
-                sel_dir = sub_dirs[i]
-        return sel_dir
-
-    def set_pressure_fname(self):
-        subject_dir = self.subject_dir
-        sub_dirs = self.list_sub_dirs(subject_dir)
-        sub_dir = sub_dirs[0]
-        if len(sub_dirs)> 1:
-            # Check directory times with timeline
-            sub_dir = self.check_times(sub_dirs)
-
-        pressure_glob   = path_join(sub_dir, 'BR*.csv')
-        pressure_files   = sorted(glob.glob(pressure_glob))
-        if not pressure_files:
-            dt_info = sub_dir.split(sep)[-1]
-            pressure_glob = path_join(sub_dir, '*_Breathing.csv')
-            pressure_files = sorted(glob.glob(pressure_glob))
-        self.pressure_fname = pressure_files[-1]
-
-    def set_summary_fname(self):
-        subject_dir = self.subject_dir
-        sub_dirs = self.list_sub_dirs(subject_dir)
-        sub_dir = sub_dirs[0]
-        if len(sub_dirs)> 1:
-            # Check directory times with timeline
-            sub_dir = self.check_times(sub_dirs)
-
-        summary_glob = path_join(sub_dir, 'Summary*.csv')
-        summary_files = sorted(glob.glob(summary_glob))
-        if not summary_files:
-            dt_info = sub_dir.split(sep)[-1]
-            summary_glob = path_join(sub_dir, dt_info+'_Summary*.csv')
-            summary_files = sorted(glob.glob(summary_glob))
-        self.summary_fname  = summary_files[-1]
-
-    def set_imu_fname(self):
-        subject_dir = self.subject_dir
-        sub_dirs = self.list_sub_dirs(subject_dir, endswith='Z')
-        sub_dir = sub_dirs[0]
-        if len(sub_dirs)> 1:
-            sub_dir = self.check_times(sub_dirs, is_utc=True)
-
-        imu_glob = path_join(sub_dir, 'imu*')
-        imu_files = sorted(glob.glob(imu_glob))
-        self.imu_fname  = imu_files[-1]
-
-        imu_hdr_glob = path_join(sub_dir, 'recording.g3')
-        imu_hdr_files = sorted(glob.glob(imu_hdr_glob))
-        self.imu_hdr_fname  = imu_hdr_files[-1]
-        video_fname = path_join(sub_dir, 'scenevideo.mp4')
-        if path_exists(video_fname):
-            self.video_fname = video_fname
-
-    def set_accel_fname(self):
-        subject_dir = self.subject_dir
-        sub_dirs = self.list_sub_dirs(subject_dir)
-        sub_dir = sub_dirs[0]
-        if len(sub_dirs)> 1:
-            # Check directory times with timeline
-            sub_dir = self.check_times(sub_dirs)
-
-        accel_glob = path_join(sub_dir, 'Accel*.csv')
-        accel_files = sorted(glob.glob(accel_glob))
-        if not accel_files:
-            dt_info = sub_dir.split(sep)[-1]
-            accel_glob = path_join(sub_dir, '*_Accel.csv')
-            accel_files = sorted(glob.glob(accel_glob))
-        accel_files = [f for f in accel_files if 'g' not in \
-                       f.lower().split(sep)[-1]]
-        self.accel_fname  = accel_files[-1]
-
-    def set_timeline(self):
-        times_glob  = path_join(self.subject_dir,f'*.csv')
-        times_files = sorted(glob.glob(times_glob))
-        self.timeline_fname = self.get_cond_file(times_files)
-        self.timeline_df = self.import_time_data()
-
-        mat_time = self.timeline_df['Timestamps'].map(mat_to_sec)
-        mat_start_ind = self.timeline_df.index[
-            self.timeline_df['Event']=='Start Test'
-        ].tolist()[0]
-        mat_start = mat_time.values[mat_start_ind]
-        mat_end = mat_time.values[-1]
-
-        self.study_start = mat_start
-        self.study_end = mat_end
-
-    def set_fnames(self):
-        self.set_pressure_fname()
-        self.set_summary_fname()
-        self.set_imu_fname()
-        self.set_accel_fname()
-
-    def load_dataframes(self):
-        self.timeline_df = self.import_time_data()
-        self.pressure_df = self.import_labels(self.pressure_fname)
-        self.summary_df = self.import_labels(self.summary_fname)
-        self.accel_df = self.import_labels(self.accel_fname)
-        self.imu_df, self.imu_hdr = self.import_imu_data()
-
-    def sync_pressure_df(self):
-        data_sync = DataSynchronizer()
-
-        cols = self.pressure_df.columns
-        if 'Year' in cols:
-            year = int(self.pressure_df['Year'].values[0])
-            month = int(self.pressure_df['Month'].values[0])
-            day = int(self.pressure_df['Day'].values[0])
-            dt_fmt = "%Y/%m/%d"
-            dt_str = f"{year}/{month}/{day}"
-            dt_obj = datetime.strptime(dt_str, dt_fmt)
-            pressure_time = self.pressure_df['ms'].interpolate().values/1000
-            pressure_time = pressure_time + dt_obj.timestamp()
-        else:
-            pressure_time = self.pressure_df['Time'].map(datetime_to_sec).values
-
-        self.pressure_df['sec'] = pressure_time
-        data_sync.set_bounds(pressure_time, self.study_start, self.study_end)
-        self.pressure_df = data_sync.sync_df(self.pressure_df)
-
-    def sync_accel_df(self):
-        data_sync = DataSynchronizer()
-
-        cols = self.accel_df.columns
-        if 'Year' in cols:
-            year = int(self.accel_df['Year'].values[0])
-            month = int(self.accel_df['Month'].values[0])
-            day = int(self.accel_df['Day'].values[0])
-            dt_fmt = "%Y/%m/%d"
-            dt_str = f"{year}/{month}/{day}"
-            dt_obj = datetime.strptime(dt_str, dt_fmt)
-            accel_time = self.accel_df['ms'].interpolate().values/1000
-            accel_time = accel_time + dt_obj.timestamp()
-        else:
-            accel_time = self.accel_df['Time'].map(datetime_to_sec).values
-
-        self.accel_df['sec'] = accel_time
-        data_sync.set_bounds(accel_time, self.study_start, self.study_end)
-        self.accel_df = data_sync.sync_df(self.accel_df)
-
-    def sync_summary_df(self):
-        data_sync = DataSynchronizer()
-
-        cols = self.summary_df.columns
-        if 'Year' in cols:
-            year = int(self.summary_df['Year'].values[0])
-            month = int(self.summary_df['Month'].values[0])
-            day = int(self.summary_df['Day'].values[0])
-            dt_fmt = "%Y/%m/%d"
-            dt_str = f"{year}/{month}/{day}"
-            dt_obj = datetime.strptime(dt_str, dt_fmt)
-            summary_times = self.summary_df['ms'].values/1000 + dt_obj.timestamp()
-        else:
-            summary_times = self.summary_df['Time'].map(datetime_to_sec).values
-
-        self.summary_df['sec'] = summary_times
-        data_sync.set_bounds(summary_times, self.study_start, self.study_end)
-        self.summary_df = data_sync.sync_df(self.summary_df)
-
-    def sync_imu_df(self):
-        na_inds = self.imu_df\
-                .loc[pd.isna(self.imu_df['accelerometer']), :].index.values
-        self.imu_df.drop(index=na_inds, inplace=True)
-        imu_times = self.imu_df['timestamp'].values
-
-        ''' S21, S30 has strange time recordings '''
-        mask = imu_times > 3*60*60
-        if mask.any():
-            bad_args = np.arange(0, len(mask))[mask]
-            self.imu_df.drop(index=self.imu_df.iloc[bad_args].index,
-                             inplace=True)
-            # self.imu_df['timestamp'] = self.imu_df['timestamp'].values - \
-            #         self.imu_df['timestamp'].values[0]
-            imu_times = self.imu_df['timestamp'].values
-
-        print(np.mean(1/(imu_times[1:] - imu_times[:-1])))
-        self.imu_df['timestamp_interp'] = imu_times
-        self.imu_df['timestamp_interp'] = self.imu_df['timestamp_interp']\
-                .interpolate()
-
-        data_sync = DataSynchronizer()
-
-        iso_tz = self.imu_hdr['created']
-        tzinfo = pytz.timezone(self.imu_hdr['timezone'])
-        # adjust for UTC
-        start_time = datetime.fromisoformat(iso_tz[:-1]) + timedelta(hours=11)
-        imu_times = self.imu_df['timestamp_interp'].values
-
-        imu_datetimes = [start_time + timedelta(seconds=val) \
-                         for val in imu_times]
-        imu_sec = np.array([time.timestamp() for time in imu_datetimes])
-        self.imu_df['sec'] = imu_sec
-        data_sync.set_bounds(imu_sec, self.study_start, self.study_end)
-        self.imu_df = data_sync.sync_df(self.imu_df)
-
-    def sync_all_df(self):
-        if self.study_start == 0 or self.study_start is None:
-            self.set_timeline()
-        self.sync_pressure_df()
-        self.sync_summary_df()
-        self.sync_accel_df()
-        self.sync_imu_df()
-
-    def get_accel_data(self):
-        accel_cols = self.accel_df.columns
-        if 'Time' in accel_cols:
-            data_cols = ['Vertical', 'Lateral', 'Sagittal']
-        else:
-            data_cols = ['X Data', 'Y Data', 'Z Data']
-        return self.accel_df[data_cols].values
-
 class TFDataPipeline():
    def __init__(self, window_size=60, batch_size=32):
        self.window_size = window_size

--- a/regress_rr.py
+++ b/regress_rr.py
@@ -426,7 +426,10 @@ def df_win_task(w_inds, df, i, cols):
    for col in cols:
        data = w_df[col].values
        # DSP
-        sd_data = (data - np.mean(data, axis=0))/np.std(data, axis=0)
+        if sum(np.abs(data)) > 0:
+            sd_data = (data - np.mean(data, axis=0))/np.std(data, axis=0)
+        else:
+            sd_data = data.copy()
        # ys = cubic_interp(sd_data, BR_FS, FS_RESAMPLE)
        if col != 'bvp':
            filt_out.append(imu_signal_processing(sd_data, fs))
@@ -455,7 +458,7 @@ def df_win_task(w_inds, df, i, cols):
    if 'bvp' in cols:
        xf, yf = do_pad_fft(bvp_filt, fs=fs)
        bv_freq = int(xf[yf.argmax()]*60)
-        y_out['bvp_est'] = bv_freq
+        # y_out['bvp_est'] = bv_freq

    return x_out, y_out

@@ -590,9 +593,8 @@ def load_and_sync_xsens(subject, sens_list:list=['imu', 'bvp']):
    return xsens_df

 def load_tsfresh(xsens_df, home_dir,
-                 sens_list:list=['imu', 'bvp'],
                 window_size=12, window_shift=0.2, fs=IMU_FS,
-                 overwrite=False, data_cols=None):
+                 overwrite=False, data_cols=None, prefix=None):
    """
    Loads the tsfresh pickle file, or generates if it does not exist for the
    given configuration
@@ -607,16 +609,26 @@ def load_tsfresh(xsens_df, home_dir,
    pd.DataFrame
    """

-    raise NotImplementedError("To be implemented")
+    assert data_cols is not None, "invalid selection for data columns"
+    assert 'acc_x' in xsens_df.columns.tolist() and \
+            'gyro_x' in xsens_df.columns.tolist() and \
+            'bvp' in xsens_df.columns.tolist(), \
+    "Does not include the full required dataset. Must have both IMU and BVP"

-    # make home directory
+    # raise NotImplementedError("To be implemented")

-    assert data_cols is not None, "invalid selection for data columns"
-    pkl_file = join(project_dir, 'tsfresh.pkl')
+    if prefix is not None:
+        pkl_fname = f'{prefix}__winsize_{window_size}__winshift_{window_shift}__tsfresh.pkl' 
+    else:
+        pkl_fname = f'winsize_{window_size}__winshift_{window_shift}__tsfresh.pkl' 
+
+    pkl_dir = join(home_dir,
+                   f'tsfresh__winsize_{window_size}__winshift_{window_shift}')
+    pkl_file = join(pkl_dir, pkl_fname)
+    if not exists(pkl_dir): mkdir(pkl_dir)
    if exists(pkl_file) and not overwrite:
        return pd.read_pickle(pkl_file)

-    ipdb.set_trace()
    x_df, y_df = get_df_windows(xsens_df,
                                df_win_task,
                                window_size=window_size,
@@ -630,6 +642,9 @@ def load_tsfresh(xsens_df, home_dir,
        # default_fc_parameters=tsfresh_settings.MinimalFCParameters(),
    )
    x_features_df.fillna(0, inplace=True)
+    x_features_df.reset_index(drop=True, inplace=True)
+    x_features_df = x_features_df.reindex(sorted(x_features_df.columns.values),
+                                          axis=1)

    cols = x_features_df.columns.values

@@ -964,7 +979,7 @@ def imu_rr_dsp(subject,

    do_minirocket = False
    use_tsfresh   = False
-    overwrite_tsfresh = True
+    overwrite_tsfresh = False
    train_size = int(train_len)

    config = {'window_size'   : window_size,
@@ -1050,8 +1065,6 @@ def sens_rr_model(subject,
                  test_standing=False,
                  data_input:str='imu+bvp',
                 ):
-    # TODO: 
-        # implement tsfresh
    """Loads, preprocesses, and trains a select model using the configured
    settings.
    Attributes
@@ -1106,7 +1119,7 @@ def sens_rr_model(subject,

    do_minirocket = False
    use_tsfresh   = False
-    overwrite_tsfresh = True
+    overwrite_tsfresh = overwrite
    train_size = int(train_len)

    if feature_method == 'tsfresh':
@@ -1143,25 +1156,38 @@ def sens_rr_model(subject,

    cal_df = get_cal_data(event_df, xsens_df)

+    # include standing or not
+    test_df_tmp = get_test_data(cal_df, activity_df, xsens_df, test_standing)
+    test_df = pd.concat([df for df in test_df_tmp['data']], axis=0)
+
    if use_tsfresh:
-        xsens_df = load_tsfresh(xsens_df,
-                                project_dir,
-                                sens_list=sens_list,
+        cal_df_list = []
+        test_df = load_tsfresh(test_df,
+                               pfh.home_directory,
+                               window_size=window_size,
+                               window_shift=window_shift,
+                               fs=fs,
+                               overwrite=overwrite_tsfresh,
+                               data_cols=data_cols,
+                               prefix='test',
+                              )
+        for index, row in cal_df.iterrows():
+            data = load_tsfresh(row['data'],
+                                pfh.home_directory,
                                window_size=window_size,
                                window_shift=window_shift,
                                fs=fs,
                                overwrite=overwrite_tsfresh,
                                data_cols=data_cols,
+                                prefix=f"calcpm_{row['cpm']}"
                               )
+            cal_df_list.append({'cpm': row['cpm'], 'data': data})

-    # include standing or not
-    test_df_tmp = get_test_data(cal_df, activity_df, xsens_df, test_standing)
-    test_df = pd.concat([df for df in test_df_tmp['data']], axis=0)
-
-    x_test_df, y_test_df = get_df_windows(
-        test_df, df_win_task, window_size=window_size,
-        window_shift=window_shift, fs=fs, cols=data_cols)
-
+        cal_df = pd.DataFrame(cal_df_list)
+    else:
+        x_test_df, y_test_df = get_df_windows(
+            test_df, df_win_task, window_size=window_size,
+            window_shift=window_shift, fs=fs, cols=data_cols)

    for combi in combinations(cal_df[cal_str].values, train_len):
        combi_str = "-".join([str(x) for x in combi])
@@ -1178,7 +1204,7 @@ def sens_rr_model(subject,
            train_df_list.append(data_df)
        train_df = pd.concat(train_df_list)

-        assert np.isin(train_df.index.values, test_df.index.values).any()==False,\
+        assert np.isin(train_df.sec.values, test_df.sec.values).any()==False,\
                "overlapping test and train data"

        print("train")
@@ -1209,10 +1235,13 @@ def sens_rr_model(subject,
            x_train    = minirocket.fit_transform(x_train)
            x_test     = minirocket.transform(x_test)
        elif use_tsfresh:
-            x_train = train_df.iloc[:, 3:].values
+            y_cols = ['sec', 'br', 'pss', 'cpm']
+            x_cols = [col for col in train_df.columns.values if col not in y_cols]
+            x_train = train_df[x_cols].values
            y_train = train_df['cpm'].values.reshape(-1, 1)
-            x_test  = test_df.iloc[:, 3:].values
+            x_test  = test_df[x_cols].values
            y_test  = test_df[lbl_str].values.reshape(-1, 1)
+            y_test_df = test_df[y_cols[:-1]]
        else:
            x_train_df, y_train_df = get_df_windows(train_df,
                                                    df_win_task,
@@ -1303,7 +1332,7 @@ def arg_parser():
                        default='pss',
                       )
    parser.add_argument('-tl', '--train_len', type=int,
-                        default=3,
+                        default=5,
                        help='minutes of data to use for calibration'
                       )
    parser.add_argument('-d', '--data_input', type=str,
@@ -1311,7 +1340,7 @@ def arg_parser():
                        help='imu, bvp, imu+bvp: select data cols for input'
                       )
    parser.add_argument('-ts', '--test_standing', type=int,
-                        default=0,
+                        default=1,
                        help='1 or 0 input, choose if standing data will be '\
                        'recorded or not'
                       )
@@ -1320,7 +1349,7 @@ def arg_parser():

 if __name__ == '__main__':
    np.random.seed(100)
-    n_subject_max = 2
+    n_subject_max = 3
    args = arg_parser()

    # Load command line arguments
@@ -1356,7 +1385,7 @@ if __name__ == '__main__':
                     )
    else:
        subjects = [subject_pre_string+str(i).zfill(2) for i in \
-                    range(1, n_subject_max+1) if i not in imu_issues]
+                    range(2, n_subject_max+1)]

        rr_func = partial(sens_rr_model,
                          window_size=window_size,
@@ -1370,12 +1399,6 @@ if __name__ == '__main__':
                          data_input=data_input,
                         )

-        if mdl_str in ['fnn', 'lstm', 'cnn1d', 'elastic', 'ard', 'xgboost']:
-            for subject in subjects:
-                rr_func(subject)
-        else:
-            ncpu = min(len(subjects), cpu_count())
-            with Pool(ncpu) as p:
-                p.map(rr_func, subjects)
-
+        for subject in subjects:
+            rr_func(subject)
    print(args)