diff --git a/3 b/3
new file mode 100644
index 0000000000000000000000000000000000000000..c7387fd49acb768cb64d0c6ae97153bfb5d51767
--- /dev/null
+++ b/3
@@ -0,0 +1,855 @@
+import glob
+from os import makedirs, mkdir
+from os.path import join, exists
+import pandas as pd
+import numpy as np
+import json
+import ipdb
+import re
+import pickle
+import sys
+import time
+from zipfile import ZipFile
+
+import argparse
+from datetime import datetime, timedelta, timezone, timedelta
+import pytz
+
+import matplotlib.pyplot as plt
+from functools import partial
+from collections import Counter
+from itertools import repeat, chain, combinations
+from multiprocessing import Pool, cpu_count
+import tensorflow as tf
+
+from sklearn.preprocessing import StandardScaler, MinMaxScaler, OneHotEncoder
+from sklearn.preprocessing import PolynomialFeatures, LabelEncoder
+from sklearn.model_selection import KFold, train_test_split
+from sklearn.metrics import accuracy_score
+
+from tsfresh.feature_extraction import extract_features
+from tsfresh.feature_extraction import settings as tsfresh_settings
+from tsfresh.utilities.string_manipulation import get_config_from_string
+
+from modules.datapipeline import get_file_list, load_and_snip, load_data, \
+        load_split_data, load_harness_data
+from modules.digitalsignalprocessing import vectorized_slide_win as vsw
+from modules.digitalsignalprocessing import imu_signal_processing
+from modules.digitalsignalprocessing import do_pad_fft,\
+        pressure_signal_processing, infer_frequency
+from modules.utils import *
+
+from modules.evaluations import Evaluation
+from modules.datapipeline import get_windowed_data, DataSynchronizer,\
+        parallelize_dataframe
+from modules.datapipeline import ProjectFileHandler
+from models.ardregression import ARDRegressionClass
+from models.knn import KNNClass
+from models.svm import SVMClass
+from models.lda import LDAClass
+from models.svr import SVRClass
+from models.logisticregression import LogisticRegressionClass
+from models.linearregression import LinearRegressionClass
+from models.neuralnet import FNN_HyperModel, LSTM_HyperModel, TunerClass,\
+        CNN1D_HyperModel
+from models.ridgeclass import RidgeClass
+from models.resnet import Regressor_RESNET, Classifier_RESNET
+from models.xgboostclass import XGBoostClass
+
+from pprint import PrettyPrinter
+
+from sktime.transformations.panel.rocket import (
+        MiniRocket,
+        MiniRocketMultivariate,
+        MiniRocketMultivariateVariable,
+)
+
+from config import WINDOW_SIZE, WINDOW_SHIFT, IMU_FS, DATA_DIR, BR_FS
+
+IMU_COLS =  ['acc_x', 'acc_y', 'acc_z', 'gyr_x', 'gyr_y', 'gyr_z']
+
+def utc_to_local(utc_dt, tz=None):
+    return utc_dt.replace(tzinfo=timezone.utc).astimezone(tz=tz)
+
+def datetime_from_utc_to_local(utc_datetime):
+    now_timestamp = time.time()
+    offset = datetime.fromtimestamp(now_timestamp) - datetime.utcfromtimestamp(now_timestamp)
+    return utc_datetime + offset
+
+# Load data
+def load_bioharness_file(f:str, skiprows=0, skipfooter=0, **kwargs):
+    df_list = []
+    method = partial(pd.read_csv, skipinitialspace=True,
+                     skiprows=list(range(1, skiprows+1)),
+                     skipfooter=skipfooter,
+                     header=0,
+                     **kwargs
+                    )
+    df = method(f)
+    if 'Time' not in df.columns.values:
+        df['Time'] = pd.to_datetime(
+            df.rename(columns={'Date':'Day'})[
+                ['Day','Month','Year']]) \
+                + pd.to_timedelta(df['ms'], unit='ms')
+        if pd.isna(df['Time']).any():
+            df['Time'].interpolate(inplace=True)
+        df['Time'] = pd.to_datetime(df['Time'], format="%d/%m/%Y %H:%M:%S.%f")
+        df['Time'] = df['Time'].dt.strftime("%d/%m/%Y %H:%M:%S.%f")
+    return df
+
+def load_bioharness_files(f_list:list, skiprows=0, skipfooter=0, **kwargs):
+    df_list = []
+    method = partial(pd.read_csv, skipinitialspace=True,
+                     skiprows=list(range(1, skiprows+1)),
+                     skipfooter=skipfooter,
+                     header=0, **kwargs)
+    for f in f_list:
+        df_list.append(load_bioharness_file(f))
+
+    df = pd.concat(df_list, ignore_index=True)
+    return df
+
+def bioharness_datetime_to_seconds(val):
+    fmt = "%d/%m/%Y %H:%M:%S.%f" 
+    dstr = datetime.strptime(val, fmt)
+    seconds = dstr.timestamp()
+    return seconds
+
+def load_imu_file(imu_file:str):
+    hdr_file = imu_file.replace('imudata.gz', 'recording.g3')
+
+    df = pd.read_json(imu_file, lines=True, compression='gzip')
+    hdr = pd.read_json(hdr_file, orient='index')
+    hdr = hdr.to_dict().pop(0)
+
+    if df.empty: return df, hdr
+
+    data_df = pd.DataFrame(df['data'].tolist())
+    df = pd.concat([df.drop('data', axis=1), data_df], axis=1)
+
+    iso_tz = hdr['created']
+    tzinfo = pytz.timezone(hdr['timezone'])
+    # adjust for UTC
+    start_time = datetime.fromisoformat(iso_tz[:-1])
+    start_time = utc_to_local(start_time, tz=tzinfo).astimezone(tzinfo)
+
+    na_inds = df.loc[pd.isna(df['accelerometer']), :].index.values
+    df.drop(index=na_inds, inplace=True)
+
+    imu_times = df['timestamp'].values
+    df['timestamp_interp'] = imu_times
+    df['timestamp_interp'] = df['timestamp_interp'].interpolate()
+    imu_times = df['timestamp_interp'].values
+    imu_datetimes = [start_time + timedelta(seconds=val) \
+                     for val in imu_times]
+    imu_s = np.array([time.timestamp() for time in imu_datetimes])
+    df['sec'] = imu_s
+
+    time_check_thold = df['sec'].min() + 3*3600
+    mask = df['sec'] > time_check_thold
+    if np.any(mask):
+        df = df[np.logical_not(mask)]
+
+    return df, hdr
+
+def load_imu_files(f_list:list):
+    data, hdr = [], []
+    tmp = []
+    for f in f_list:
+        tmp.append(load_imu_file(f))
+    for l in tmp:
+        data.append(l[0])
+        hdr.append(l[1])
+    data_df = pd.concat(data, axis=0)
+    return data_df, hdr
+
+def load_e4_file(e4_file:str):
+    ''' First row is the initial time of the session as unix time.
+    Second row is the sample rate in Hz'''
+    zip_file = ZipFile(e4_file)
+    dfs = {csv_file.filename: pd.read_csv(zip_file.open(csv_file.filename)
+                                          ,header=None)
+           for csv_file in zip_file.infolist()
+           if csv_file.filename.endswith('.csv')}
+    bvp = dfs["BVP.csv"]
+    t0 = bvp.iloc[0].values[0]
+    fs = bvp.iloc[1].values[0]
+    nsamples = len(bvp) - 2
+
+    t0_datetime = datetime.utcfromtimestamp(t0)
+    t0_local = datetime_from_utc_to_local(t0_datetime)
+    time = [t0_local.timestamp() + ind*(1/fs) for ind in
+            range(nsamples)]
+    tmp = [np.nan, np.nan]
+    time = tmp + time
+    bvp.rename(columns={0: "bvp"}, inplace=True)
+    bvp['sec'] = np.array(time)
+
+    head = bvp.iloc[[0, 1]]
+    bvp.drop(inplace=True, index=[0, 1])
+
+    hdr = {'start_time': head.iloc[0,0],
+           'fs': head.iloc[0,1]}
+
+    return bvp, hdr
+
+def load_e4_files(f_list:list):
+    tmp = []
+    data = []
+    hdr = []
+    for f in f_list:
+        tmp.append(load_e4_file(f))
+    for d, h in tmp:
+        data.append(d)
+        hdr.append(h)
+    data_df = pd.concat(data, axis=0)
+    return data_df, hdr
+
+# Synchronising data
+def sync_to_ref(df0, df1):
+    dsync0 = DataSynchronizer()
+    dsync1 = DataSynchronizer()
+
+    time0 = df0['sec'].values
+    time1 = df1['sec'].values
+
+    t0 = max((time0[0], time1[0]))
+    t1 = min((time0[-1], time1[-1]))
+    dsync0.set_bounds(time0, t0, t1)
+    dsync1.set_bounds(time1, t0, t1)
+
+    return dsync0.sync_df(df0), dsync1.sync_df(df1)
+
+def pss_br_calculations(win, pss_df=None, br_df=None):
+    n_out = 5
+    if win[-1] == 0: return [None]*n_out
+
+    dsync = DataSynchronizer()
+    pss_fs = BR_FS
+    pss_col = [col for col in pss_df.columns.values if\
+               'breathing' in col.lower()][0]
+    pss_ms = pss_df['ms'].values
+    br_ms  = br_df['ms'].values
+    t0, t1 = pss_ms[win][0], pss_ms[win][-1]
+
+    diff = pss_ms[win][1:] - pss_ms[win][:-1]
+    mask = np.abs(diff/1e3) > 60
+    diff_chk = np.any(mask)
+    if diff_chk: return [None]*n_out
+
+    # Get pressure estimate for window
+    pss_win = pss_df.iloc[win]
+    pss_data = pss_win[pss_col]
+    pss_filt = pressure_signal_processing(pss_data, fs=pss_fs)
+    xf, yf = do_pad_fft(pss_filt, fs=pss_fs)
+    pss_est = xf[yf.argmax()]*60
+
+    # Sync and get summary br output
+    dsync.set_bounds(br_ms, t0, t1)
+    br_win = dsync.sync_df(br_df)
+
+    br_out = np.median(br_win['BR'].values)
+
+    # Get subject and condition
+    sbj_out = pss_win['subject'].values[0]
+    time_out = np.median(pss_win['sec'].values)
+    return time_out, pss_est, br_out, sbj_out, cond_out
+
+def get_pss_br_estimates(pss_df, br_df, window_size=12, window_shift=1):
+    pss_fs = BR_FS
+    # pss_col = [col for col in pss_df.columns.values if\
+    #            'breathing' in col.lower()][0]
+    pss_ms = pss_df['sec'].values
+    br_ms  = br_df['sec'].values
+
+    inds = np.arange(0, len(pss_ms))
+    vsw_out = vsw(inds, len(inds), sub_window_size=int(window_size*pss_fs),
+                  stride_size=int(window_shift*pss_fs))
+
+    # dsync = DataSynchronizer()
+    pss_est, br_out = [], []
+    cond_out, sbj_out = [], []
+    func = partial(pss_br_calculations, pss_df=pss_df, br_df=br_df)
+    # for i, win in enumerate(vsw_out):
+    #     tmp = func(win)
+
+    with Pool(cpu_count()) as p:
+        tmp = p.map(func, vsw_out)
+
+    time_out, pss_est, br_out, sbj_out, cond_out = zip(*tmp)
+
+    time_array = np.array(time_out)
+    pss_est_array = np.array(pss_est)
+    br_out_array = np.array(br_out)
+    sbj_out_array = np.array(sbj_out)
+    cond_out_array = np.array(cond_out)
+
+    df = pd.DataFrame(
+        np.array(
+            [time_array, sbj_out_array, cond_out_array,
+             pss_est_array, br_out_array]
+        ).T,
+        columns=['ms', 'subject', 'condition', 'pss', 'br'])
+    df.dropna(inplace=True)
+
+    return df
+
+# Multiprocessing task for windowing dataframe
+def imu_df_win_task(w_inds, df, i, cols):
+    time = df['sec'].values
+    if w_inds[-1] == 0: return
+    w_df = df.iloc[w_inds]
+    t0, t1 = time[w_inds][0], time[w_inds][-1]
+    diff = time[w_inds[1:]] - time[w_inds[0:-1]]
+    mask = np.abs(diff)>20
+    diff_chk = np.any(mask)
+    if diff_chk:
+        return
+
+    # sbj = w_df['subject'].values.astype(int)
+    # sbj_mask = np.any((sbj[1:] - sbj[:-1])>0)
+    # if sbj_mask:
+    #     return
+
+    if cols is None:
+        cols = ['acc_x', 'acc_y', 'acc_z',
+                'gyr_x', 'gyr_y', 'gyr_z']
+
+    data = w_df[cols].values
+
+    # DSP
+    sd_data = (data - np.mean(data, axis=0))/np.std(data, axis=0)
+    # ys = cubic_interp(sd_data, BR_FS, FS_RESAMPLE)
+    filt_data = imu_signal_processing(sd_data, IMU_FS)
+    x_out = pd.DataFrame(filt_data,
+                         columns=[
+                             'acc_x', 'acc_y', 'acc_z',
+                             'gyro_x', 'gyro_y', 'gyro_z',
+                         ])
+
+    sm_out = w_df['BR'].values
+    ps_out = w_df['PSS'].values
+
+    x_vec_time = np.median(time[w_inds])
+
+    fs = 1/np.mean(diff)
+    ps_freq = int(get_max_frequency(ps_out, fs=fs))
+
+    y_tmp = np.array([x_vec_time, np.nanmedian(sm_out), ps_freq])
+
+    x_out['sec'] = x_vec_time
+    x_out['id'] = i
+    y_out = pd.DataFrame([y_tmp], columns=['sec', 'br', 'pss'])
+
+    return x_out, y_out
+
+def get_max_frequency(data, fs=IMU_FS):
+    data = pressure_signal_processing(data, fs=fs)
+
+    xf, yf = do_pad_fft(data, fs=fs)
+    max_freq = xf[yf.argmax()]*60
+    return max_freq
+
+def convert_to_float(df):
+    cols = df.columns.values
+    if 'sec' in cols:
+        df['sec'] = df['sec'].astype(float)
+    if 'pss' in cols:
+        df['pss'] = df['pss'].astype(float)
+    if 'br' in cols:
+        df['br'] = df['br'].astype(float)
+    if 'subject' in cols:
+        df['subject'] = df['subject'].astype(float)
+
+def load_and_sync_xsens(subject):
+    # load imu
+    imu_list = get_file_list('imudata.gz', sbj=subject)
+    imu_df_all, imu_hdr_df_all = load_imu_files(imu_list)
+
+    # load bioharness
+    pss_list = get_file_list('*Breathing.csv', sbj=subject)
+    if len(pss_list) == 0:
+        pss_list = get_file_list('BR*.csv', sbj=subject)
+
+    br_list = get_file_list('*Summary*', sbj=subject)
+
+    # load e4 wristband
+    e4_list = get_file_list('*.zip', sbj=subject)
+    bvp_df_all, bvp_hdr = load_e4_files(e4_list)
+    bvp_fs = bvp_hdr[0]['fs']
+
+    xsens_list = []
+    # skip the first and last x minute(s)
+    minutes_to_skip = .5
+    br_skiprows = br_skipfooter = int(minutes_to_skip*60)
+    pss_skiprows = pss_skipfooter = int(minutes_to_skip*60*BR_FS)
+    # load each bioharness file and sync the imu to it
+    for pss_file, br_file in zip(pss_list, br_list):
+        pss_df = load_bioharness_file(pss_file, skiprows=pss_skiprows,
+                                      skipfooter=pss_skipfooter,
+                                      engine='python')
+        pss_time = pss_df['Time'].map(bioharness_datetime_to_seconds).values\
+                .reshape(-1, 1)
+        pss_df['sec'] = pss_time
+
+        br_df = load_bioharness_file(br_file, skiprows=br_skiprows,
+                                     skipfooter=br_skipfooter,
+                                     engine='python')
+        br_time = br_df['Time'].map(bioharness_datetime_to_seconds).values\
+                .reshape(-1, 1)
+        br_df['sec'] = br_time
+
+        # sync
+        br_df, imu_df = sync_to_ref(br_df, imu_df_all.copy())
+        pss_df, _ = sync_to_ref(pss_df, imu_df_all.copy())
+        bvp_df, _ = sync_to_ref(bvp_df_all.copy(), pss_df.copy())
+
+        # extract relevant data
+        acc_data = np.stack(imu_df['accelerometer'].values)
+        gyr_data = np.stack(imu_df['gyroscope'].values)
+        x_time = imu_df['sec'].values.reshape(-1, 1)
+
+        br_col = [col for col in pss_df.columns.values if\
+                  'breathing' in col.lower()][0]
+        pss_data = pss_df[br_col].values
+        pss_data = np.interp(x_time, pss_df['sec'].values, pss_data)\
+                .reshape(-1, 1)
+
+        br_lbl = [col for col in br_df.columns.values if\
+                  'br' in col.lower()][0]
+        br_data = br_df['BR'].values
+        br_data = np.interp(x_time, br_df['sec'].values, br_data)\
+                .reshape(-1, 1)
+
+        bvp_data = bvp_df['bvp'].values
+        bvp_data = np.interp(x_time, bvp_df['sec'].values, bvp_data)\
+                .reshape(-1, 1)
+
+        xsens_data = np.concatenate(
+            (x_time, br_data, pss_data, bvp_data, acc_data, gyr_data),
+            axis=1)
+
+        columns=['sec'   , 'BR'    , 'PSS'   , 'BVP' ,
+                 'acc_x' , 'acc_y' , 'acc_z' ,
+                 'gyr_x' , 'gyr_y' , 'gyr_z' , ]
+        xsens_df_tmp = pd.DataFrame(xsens_data, columns=columns)
+
+        '''
+        print("{:.2f}\t{:.2f}\t{:.2f}".format(br_df.sec.iloc[0],
+                                              pss_df.sec.iloc[0],
+                                              imu_df.sec.iloc[0]))
+        print("{:.2f}\t{:.2f}\t{:.2f}".format(br_df.sec.iloc[-1],
+                                              pss_df.sec.iloc[-1],
+                                              imu_df.sec.iloc[-1]))
+        print(xsens_df_tmp.head())
+        '''
+        xsens_list.append(xsens_df_tmp)
+
+    if len(xsens_list) > 1:
+        xsens_df = pd.concat(xsens_list, axis=0, ignore_index=True)
+        xsens_df.reset_index(drop=True, inplace=True)
+    else:
+        xsens_df = xsens_list[0]
+
+    return xsens_df
+
+
+def load_tsfresh(subject, project_dir,
+                 window_size=12, window_shift=0.2, fs=IMU_FS,
+                 overwrite=False):
+    cols = ['acc_x', 'acc_y', 'acc_z', 'gyro_x', 'gyro_y', 'gyro_z']
+    pkl_file = join(project_dir, 'tsfresh.pkl')
+    if exists(pkl_file) and not overwrite:
+        return pd.read_pickle(pkl_file)
+
+    xsens_df = load_and_sync_xsens(subject)
+    x_df, y_df = get_df_windows(xsens_df,
+                                imu_df_win_task,
+                                window_size=window_size,
+                                window_shift=window_shift,
+                                fs=fs,
+                               )
+    x_features_df = extract_features(
+        x_df, column_sort='sec',
+        column_id='id',
+        # default_fc_parameters=tsfresh_settings.MinimalFCParameters(),
+    )
+    x_features_df.fillna(0, inplace=True)
+
+    cols = x_features_df.columns.values
+
+    df_out = pd.concat([y_df, x_features_df], axis=1)
+    df_out.to_pickle(pkl_file)
+    return df_out
+
+def get_activity_log(subject):
+    activity_list = get_file_list('activity*.csv', sbj=subject)
+    activity_dfs = [pd.read_csv(f) for f in activity_list]
+    return pd.concat(activity_dfs, axis=0)
+
+def get_respiration_log(subject):
+    log_list = get_file_list('*.json', sbj=subject)
+    log_dfs = [pd.read_json(f) for f in log_list]
+    return pd.concat(log_dfs, axis=0)
+
+def get_cal_data(event_df, xsens_df):
+    fmt ="%Y-%m-%d %H.%M.%S" 
+    cal_list = []
+    cpms = []
+    start_sec = 0
+    stop_sec = 0
+    for index, row in event_df.iterrows():
+        event = row['eventTag']
+        timestamp = row['timestamp']
+        inhalePeriod = row['inhalePeriod']
+        exhalePeriod = row['exhalePeriod']
+
+        cpm = np.round( 60/(inhalePeriod + exhalePeriod) )
+
+        sec = timestamp.to_pydatetime().timestamp()
+
+        if event == 'Start':
+            start_sec = sec
+            continue
+        elif event == 'Stop':
+            stop_sec = sec
+
+            dsync = DataSynchronizer()
+            dsync.set_bounds(xsens_df['sec'].values, start_sec, stop_sec)
+
+            sync_df = dsync.sync_df(xsens_df.copy())
+            cal_data = {'cpm': cpm, 'data': sync_df}
+            cal_list.append(cal_data)
+
+            assert np.round(sync_df.sec.iloc[0])==np.round(start_sec), \
+            "error with start sync"
+            assert np.round(sync_df.sec.iloc[-1])==np.round(stop_sec), \
+            "error with stop sync"
+
+    return pd.DataFrame(cal_list)
+
+def get_test_data(cal_df, activity_df, xsens_df):
+    fmt = "%d/%m/%Y %H:%M:%S"
+    start_time = cal_df.iloc[-1]['data'].sec.values[-1]
+    data_df = xsens_df[xsens_df.sec > start_time]
+    activity_start = 0
+    activity_end = 0
+
+    activity_list = []
+
+    for index, row in activity_df.iterrows():
+        sec = datetime.strptime(row['Timestamps'], fmt)
+        if row['Event'] == 'start':
+            activity_start = sec
+        elif row['Event'] == 'stop':
+            activity_stop = sec
+
+            dsync = DataSynchronizer()
+            dsync.set_bounds(data_df['sec'].values, activity_start,
+                             activity_stop)
+
+            sync_df = dsync.sync_df(data_df.copy())
+            activity_data = {'activity': row['Activity'], 'data': sync_df}
+            activity_list.append(activity_data)
+
+    return pd.DataFrame(activity_list)
+
+# save evaluation metrics in single file that handles the models for the
+# subject and config
+class EvalHandler():
+    def __init__(self, y_true, y_pred, subject, pfh, mdl_str, overwrite=False):
+        self.subject = subject
+        self.config = pfh.config
+        self.parent_directory = join(DATA_DIR, 'subject_specific')
+        self.fset_id = pfh.fset_id
+        self.mdl_str = mdl_str
+        self.overwrite = overwrite
+
+        self.evals = Evaluation(y_true, y_pred)
+
+        entry = {'subject': self.subject,
+                 'config_id': self.fset_id,
+                 'mdl_str': self.mdl_str,
+                }
+        self.entry = {**entry, **self.config, **self.evals.get_evals()}
+
+        self.eval_history_file = join(self.parent_directory,
+                                      'eval_history.csv')
+        self.eval_hist = self.load_eval_history()
+
+    def load_eval_history(self):
+        if not exists(self.eval_history_file):
+            return None
+        else:
+            return pd.read_csv(self.eval_history_file)
+
+    def update_eval_history(self):
+        eval_hist = self.eval_hist
+        if eval_hist is None:
+            eval_hist = pd.DataFrame([self.entry])
+        else:
+            index_list = eval_hist[
+                (eval_hist['subject'] == self.entry['subject']) &\
+                (eval_hist['config_id'] == self.entry['config_id']) &\
+                (eval_hist['mdl_str'] == self.entry['mdl_str'])\
+            ].index.tolist()
+            if len(index_list) == 0:
+                print("adding new entry")
+                eval_hist = eval_hist._append(self.entry, ignore_index=True)
+            elif index_list is not None and self.overwrite:
+                eval_hist.loc[index_list[0]] = self.entry
+        self.eval_hist = eval_hist
+
+    def save_eval_history(self):
+        self.eval_hist.to_csv(self.eval_history_file, index=False)
+
+# Train IMU - RR models across subjects
+def imu_rr_model(subject,
+                 window_size=12,
+                 window_shift=0.2,
+                 lbl_str='pss',
+                 mdl_str='knn',
+                 overwrite=False,
+                 feature_method='tsfresh',
+                 train_len:int=3,
+                 test_standing=False,
+                ):
+    # window_size, window_shift, intra, inter
+    cal_str = 'cpm'
+    fs = IMU_FS
+    tmp = []
+    imu_cols = ['acc_x', 'acc_y', 'acc_z', 'gyro_x', 'gyro_y', 'gyro_z']
+
+    do_minirocket = False
+    use_tsfresh   = False
+    overwrite_tsfresh = True
+    train_size = int(train_len)
+
+    if feature_method == 'tsfresh':
+        use_tsfresh = True
+    elif feature_method == 'minirocket':
+        do_minirocket = True
+
+    config = {'window_size'   : window_size,
+              'window_shift'  : window_shift,
+              'lbl_str'       : lbl_str,
+              'do_minirocket' : do_minirocket,
+              'use_tsfresh'   : use_tsfresh,
+              'train_len'     : train_len,
+             }
+
+    pfh = ProjectFileHandler(config)
+    pfh.set_home_directory(join(DATA_DIR, 'subject_specific', subject))
+    pfh.set_parent_directory('imu_rr')
+    id_check = pfh.get_id_from_config()
+    if id_check is None:
+        pfh.set_project_directory()
+        pfh.save_metafile()
+    else:
+        pfh.set_id(int(id_check))
+        pfh.set_project_directory()
+        print('Using pre-set data id: ', pfh.fset_id)
+    project_dir = pfh.project_directory
+
+    marker = f'imu_rr_{subject}_id{pfh.fset_id}'
+
+    if not use_tsfresh:
+        xsens_df = load_and_sync_xsens(subject)
+    else:
+        xsens_df = load_tsfresh(subject,
+                                project_dir,
+                                window_size=window_size,
+                                window_shift=window_shift,
+                                fs=IMU_FS,
+                                overwrite=overwrite_tsfresh)
+
+    activity_df = get_activity_log(subject)
+    event_df = get_respiration_log(subject)
+
+    cal_df = get_cal_data(event_df, xsens_df)
+
+    # include standing or not
+    test_df = get_test_data(cal_df, activity_df, xsens_df)
+    ipdb.set_trace()
+    
+    for combi in combinations(cal_df[cal_str].values, train_len):
+        config[cal_cpm] = combi
+        train_df = pd.concat(
+            [cal_df[cal_df[cal_cpm] == cpm]['data'] for cpm in combi],
+            axis=0
+        )
+
+        assert np.isin(train_df.index.values, test_df.index.values).any()==False,\
+                "overlapping test and train data"
+
+        print("train")
+        print(train_df.shape)
+        print("test")
+        print(test_df.shape)
+
+        if do_minirocket:
+            x_train_df, y_train_df = get_df_windows(train_df,
+                                                    imu_df_win_task,
+                                                    window_size=window_size,
+                                                    window_shift=window_shift,
+                                                    fs=fs,
+                                                   )
+            x_test_df, y_test_df = get_df_windows(test_df, 
+                                                  imu_df_win_task,
+                                                  window_size=window_size,
+                                                  window_shift=window_shift,
+                                                  fs=fs,
+                                                 )
+
+            x_train = make_windows_from_id(x_train_df, imu_cols)
+            x_test  = make_windows_from_id(x_test_df, imu_cols)
+            y_train = y_train_df[lbl_str].values.reshape(-1, 1)
+            y_test  = y_test_df[lbl_str].values.reshape(-1, 1)
+
+            print("minirocket transforming...")
+            x_train = np.swapaxes(x_train, 1, 2)
+            x_test = np.swapaxes(x_test, 1, 2)
+            minirocket = MiniRocketMultivariate()
+            x_train    = minirocket.fit_transform(x_train)
+            x_test     = minirocket.transform(x_test)
+        elif use_tsfresh:
+            x_train = train_df.iloc[:, 3:].values
+            y_train = train_df[lbl_str].values.reshape(-1, 1)
+            x_test  = test_df.iloc[:, 3:].values
+            y_test  = test_df[lbl_str].values.reshape(-1, 1)
+        else:
+            x_train_df, y_train_df = get_df_windows(train_df,
+                                                    imu_df_win_task,
+                                                    window_size=window_size,
+                                                    window_shift=window_shift,
+                                                    fs=fs,
+                                                   )
+            x_test_df, y_test_df = get_df_windows(test_df, 
+                                                  imu_df_win_task,
+                                                  window_size=window_size,
+                                                  window_shift=window_shift,
+                                                  fs=fs,
+                                                 )
+
+            x_train = make_windows_from_id(x_train_df, imu_cols)
+            x_test  = make_windows_from_id(x_test_df, imu_cols)
+            y_train = y_train_df[lbl_str].values.reshape(-1, 1)
+            y_test  = y_test_df[lbl_str].values.reshape(-1, 1)
+
+        transforms, model = model_training(mdl_str, x_train, y_train,
+                                           marker, validation_data=None,
+                                           overwrite=overwrite,
+                                           is_regression=True,
+                                           project_directory=project_dir,
+                                           window_size=int(window_size*fs),
+                                           extra_train=200,
+                                          )
+
+        if transforms is not None:
+            x_test = transforms.transform(x_test)
+
+        preds = model.predict(x_test)
+
+        eval_handle = EvalHandler(y_test.flatten(), preds.flatten(), subject,
+                                  pfh, mdl_str, overwrite=overwrite)
+        eval_handle.update_eval_history()
+        eval_handle.save_eval_history()
+
+        pp = PrettyPrinter()
+        pp.pprint(eval_handle.load_eval_history())
+
+        fig, ax = plt.subplots()
+        fig_title = ' '.join([mdl_str, subject, combi])
+        ax.plot(y_test)
+        ax.plot(preds)
+        ax.set_title(fig_title)
+        ax.legend([lbl_str, 'pred'])
+        fig_dir = join(project_dir, 'figures',)
+        if not exists(fig_dir): mkdir(fig_dir)
+        fig.savefig(join(fig_dir, mdl_str))
+
+def arg_parser():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-m", '--model', type=str,
+                        default='linreg',
+                        choices=['linreg', 'ard', 'xgboost', 'knn',
+                                 'svr', 'cnn1d', 'fnn', 'lstm', 'ridge',
+                                 'elastic'],
+                       )
+    parser.add_argument("-s", '--subject', type=int,
+                        default=2,
+                        choices=list(range(1,3))+[-1],
+                       )
+    parser.add_argument("-f", '--feature_method', type=str,
+                        default='minirocket',
+                        choices=['tsfresh', 'minirocket', 'None']
+                       )
+    parser.add_argument("-o", '--overwrite', type=int,
+                        default=0,
+                       )
+    parser.add_argument('--win_size', type=int,
+                        default=12,
+                       )
+    parser.add_argument('--win_shift', type=float,
+                        default=0.2,
+                       )
+    parser.add_argument('-l', '--lbl_str', type=str,
+                        default='pss',
+                       )
+    parser.add_argument('-tl', '--train_len', type=int,
+                        default=3,
+                        help='minutes of data to use for calibration'
+                       )
+    args = parser.parse_args()
+    return args
+
+if __name__ == '__main__':
+    # choose either intra or inter subject features to use for model training
+    # '[!M]*'
+    np.random.seed(100)
+    n_subject_max = 2
+    args = arg_parser()
+
+    mdl_str        = args.model
+    subject        = args.subject
+    feature_method = args.feature_method
+    window_size    = args.win_size
+    window_shift   = args.win_shift
+    lbl_str        = args.lbl_str
+    train_len      = args.train_len
+    overwrite      = args.overwrite
+
+    print(args)
+    assert train_len>0,"--train_len must be an integer greater than 0"
+
+    subject_pre_string = 'Pilot'
+
+    if subject > 0:
+        subject = subject_pre_string+str(subject).zfill(2)
+
+        imu_rr_model(subject, window_size=window_size, window_shift=window_shift,
+                     lbl_str=lbl_str, mdl_str=mdl_str, overwrite=overwrite,
+                     feature_method=feature_method, train_len=train_len
+                    )
+    else:
+        subjects = [subject_pre_string+str(i).zfill(2) for i in \
+                    range(1, n_subject_max+1) if i not in imu_issues]
+        imu_rr_func = partial(imu_rr_model,
+                              window_size=window_size,
+                              window_shift=window_shift,
+                              lbl_str=lbl_str,
+                              mdl_str=mdl_str,
+                              overwrite=overwrite,
+                              feature_method=feature_method,
+                              train_len=train_len
+                             )
+
+        if mdl_str in ['fnn', 'lstm', 'cnn1d', 'elastic', 'ard', 'xgboost']:
+            for subject in subjects:
+                imu_rr_func(subject)
+        else:
+            ncpu = min(len(subjects), cpu_count())
+            with Pool(ncpu) as p:
+                p.map(imu_rr_func, subjects)
+
+    print(args)
diff --git a/modules/.datapipeline.py.swo b/modules/.datapipeline.py.swo
deleted file mode 100644
index e1d1686c91104d8ab1be7c0477fcbb672876d299..0000000000000000000000000000000000000000
Binary files a/modules/.datapipeline.py.swo and /dev/null differ
diff --git a/modules/.digitalsignalprocessing.py.swp b/modules/.digitalsignalprocessing.py.swp
deleted file mode 100644
index b0450c45748586ac7d937004bbbaa7146ae127fe..0000000000000000000000000000000000000000
Binary files a/modules/.digitalsignalprocessing.py.swp and /dev/null differ
diff --git a/modules/.utils.py.swp b/modules/.utils.py.swp
deleted file mode 100644
index 3c3989aba08152442eb3211073976dc6e7063f06..0000000000000000000000000000000000000000
Binary files a/modules/.utils.py.swp and /dev/null differ
diff --git a/regress_rr.py b/regress_rr.py
index 111a9f99c06c74385051ed0635053f110637face..38d987f233c251975852fc812933cf267aa6ec6d 100644
--- a/regress_rr.py
+++ b/regress_rr.py
@@ -18,7 +18,7 @@ import pytz
 import matplotlib.pyplot as plt
 from functools import partial
 from collections import Counter
-from itertools import repeat, chain
+from itertools import repeat, chain, combinations
 from multiprocessing import Pool, cpu_count
 import tensorflow as tf
 
@@ -380,9 +380,9 @@ def load_and_sync_xsens(subject):
 
     xsens_list = []
     # skip the first and last x minute(s)
-    minutes_to_skip = 2
-    br_skiprows = br_skipfooter = minutes_to_skip*60
-    pss_skiprows = pss_skipfooter = minutes_to_skip*60*BR_FS
+    minutes_to_skip = .5
+    br_skiprows = br_skipfooter = int(minutes_to_skip*60)
+    pss_skiprows = pss_skipfooter = int(minutes_to_skip*60*BR_FS)
     # load each bioharness file and sync the imu to it
     for pss_file, br_file in zip(pss_list, br_list):
         pss_df = load_bioharness_file(pss_file, skiprows=pss_skiprows,
@@ -492,6 +492,70 @@ def get_respiration_log(subject):
     log_dfs = [pd.read_json(f) for f in log_list]
     return pd.concat(log_dfs, axis=0)
 
+def get_cal_data(event_df, xsens_df):
+    fmt ="%Y-%m-%d %H.%M.%S" 
+    cal_list = []
+    cpms = []
+    start_sec = 0
+    stop_sec = 0
+    for index, row in event_df.iterrows():
+        event = row['eventTag']
+        timestamp = row['timestamp']
+        inhalePeriod = row['inhalePeriod']
+        exhalePeriod = row['exhalePeriod']
+
+        cpm = np.round( 60/(inhalePeriod + exhalePeriod) )
+
+        sec = timestamp.to_pydatetime().timestamp()
+
+        if event == 'Start':
+            start_sec = sec
+            continue
+        elif event == 'Stop':
+            stop_sec = sec
+
+            dsync = DataSynchronizer()
+            dsync.set_bounds(xsens_df['sec'].values, start_sec, stop_sec)
+
+            sync_df = dsync.sync_df(xsens_df.copy())
+            cal_data = {'cpm': cpm, 'data': sync_df}
+            cal_list.append(cal_data)
+
+            assert np.round(sync_df.sec.iloc[0])==np.round(start_sec), \
+            "error with start sync"
+            assert np.round(sync_df.sec.iloc[-1])==np.round(stop_sec), \
+            "error with stop sync"
+
+    return pd.DataFrame(cal_list)
+
+def get_test_data(cal_df, activity_df, xsens_df, test_standing):
+    fmt = "%d/%m/%Y %H:%M:%S"
+    start_time = cal_df.iloc[-1]['data'].sec.values[-1]
+    data_df = xsens_df[xsens_df.sec > start_time]
+    activity_start = 0
+    activity_end = 0
+
+    activity_list = []
+
+    for index, row in activity_df.iterrows():
+        sec = datetime.strptime(row['Timestamps'], fmt).timestamp()
+        if not test_standing and row['Activity'] == 'standing':
+            continue
+        if row['Event'] == 'start':
+            activity_start = sec
+        elif row['Event'] == 'end':
+            activity_stop = sec
+
+            dsync = DataSynchronizer()
+            dsync.set_bounds(data_df['sec'].values, activity_start,
+                             activity_stop)
+
+            sync_df = dsync.sync_df(data_df.copy())
+            activity_data = {'activity': row['Activity'], 'data': sync_df}
+            activity_list.append(activity_data)
+
+    return pd.DataFrame(activity_list)
+
 # save evaluation metrics in single file that handles the models for the
 # subject and config
 class EvalHandler():
@@ -529,7 +593,8 @@ class EvalHandler():
             index_list = eval_hist[
                 (eval_hist['subject'] == self.entry['subject']) &\
                 (eval_hist['config_id'] == self.entry['config_id']) &\
-                (eval_hist['mdl_str'] == self.entry['mdl_str'])\
+                (eval_hist['mdl_str'] == self.entry['mdl_str']) &\
+                (eval_hist['cpm'] == self.entry['cpm'])\
             ].index.tolist()
             if len(index_list) == 0:
                 print("adding new entry")
@@ -549,11 +614,20 @@ def imu_rr_model(subject,
                  mdl_str='knn',
                  overwrite=False,
                  feature_method='tsfresh',
-                 train_len=1
+                 train_len:int=3,
+                 test_standing=False,
                 ):
     # window_size, window_shift, intra, inter
+    cal_str = 'cpm'
     fs = IMU_FS
     tmp = []
+    imu_cols = ['acc_x', 'acc_y', 'acc_z', 'gyro_x', 'gyro_y', 'gyro_z']
+    bvp_cols= ['bvp']
+
+    # TODO: 
+        # implement and input args config by data cols
+        # implement and input args config with test_standing
+    data_cols = imu_cols + bvp_cols
 
     do_minirocket = False
     use_tsfresh   = False
@@ -571,6 +645,8 @@ def imu_rr_model(subject,
               'do_minirocket' : do_minirocket,
               'use_tsfresh'   : use_tsfresh,
               'train_len'     : train_len,
+              'data_cols'     : data_cols,
+              'test_standing' : test_standing,
              }
 
     pfh = ProjectFileHandler(config)
@@ -586,8 +662,6 @@ def imu_rr_model(subject,
         print('Using pre-set data id: ', pfh.fset_id)
     project_dir = pfh.project_directory
 
-    marker = f'imu_rr_{subject}_id{pfh.fset_id}'
-
     if not use_tsfresh:
         xsens_df = load_and_sync_xsens(subject)
     else:
@@ -598,115 +672,115 @@ def imu_rr_model(subject,
                                 fs=IMU_FS,
                                 overwrite=overwrite_tsfresh)
 
-    large_win_size = 60*fs
-    xsens_inds = np.arange(len(xsens_df))
-    large_windows = vsw(xsens_inds, len(xsens_inds),
-                        sub_window_size=large_win_size,
-                        stride_size=large_win_size)
-    keep_inds = []
-    for i, win in enumerate(large_windows):
-        if win[-1] != 0:
-            keep_inds.append(i)
-
-    large_windows = large_windows[keep_inds]
+    activity_df = get_activity_log(subject)
+    event_df = get_respiration_log(subject)
+
+    cal_df = get_cal_data(event_df, xsens_df)
+
+    # include standing or not
+    test_df_tmp = get_test_data(cal_df, activity_df, xsens_df, test_standing)
+    test_df = pd.concat([df for df in test_df_tmp['data']], axis=0)
     
-    train_inds, test_inds = train_test_split(large_windows,
-                                             train_size=train_size,
-                                             shuffle=True,
-                                             random_state=123)
-
-    train_df = pd.concat([xsens_df.iloc[win] for win in train_inds if
-                          win[-1]!=0])
-    test_df = pd.concat([xsens_df.iloc[win] for win in test_inds if win[-1]!=0])
-
-    assert np.isin(train_df.index.values, test_df.index.values).any()==False,\
-            "overlapping test and train data"
-
-    print("train")
-    print(train_df.shape)
-    print("test")
-    print(test_df.shape)
-
-    if do_minirocket:
-        cols = ['acc_x', 'acc_y', 'acc_z', 'gyro_x', 'gyro_y', 'gyro_z']
-        x_train_df, y_train_df = get_df_windows(train_df,
-                                                imu_df_win_task,
-                                                window_size=window_size,
-                                                window_shift=window_shift,
-                                                fs=fs,
-                                               )
-        x_test_df, y_test_df = get_df_windows(test_df, 
-                                              imu_df_win_task,
-                                              window_size=window_size,
-                                              window_shift=window_shift,
-                                              fs=fs,
-                                             )
-
-        x_train = make_windows_from_id(x_train_df, cols)
-        x_test  = make_windows_from_id(x_test_df, cols)
-        y_train = y_train_df[lbl_str].values.reshape(-1, 1)
-        y_test  = y_test_df[lbl_str].values.reshape(-1, 1)
-
-        print("minirocket transforming...")
-        x_train = np.swapaxes(x_train, 1, 2)
-        x_test = np.swapaxes(x_test, 1, 2)
-        minirocket = MiniRocketMultivariate()
-        x_train    = minirocket.fit_transform(x_train)
-        x_test     = minirocket.transform(x_test)
-    elif use_tsfresh:
-        x_train = train_df.iloc[:, 3:].values
-        y_train = train_df[lbl_str].values.reshape(-1, 1)
-        x_test  = test_df.iloc[:, 3:].values
-        y_test  = test_df[lbl_str].values.reshape(-1, 1)
-    else:
-        cols = ['acc_x', 'acc_y', 'acc_z', 'gyro_x', 'gyro_y', 'gyro_z']
-        x_train_df, y_train_df = get_df_windows(train_df,
-                                                imu_df_win_task,
-                                                window_size=window_size,
-                                                window_shift=window_shift,
-                                                fs=fs,
-                                               )
-        x_test_df, y_test_df = get_df_windows(test_df, 
-                                              imu_df_win_task,
-                                              window_size=window_size,
-                                              window_shift=window_shift,
-                                              fs=fs,
-                                             )
-
-        x_train = make_windows_from_id(x_train_df, cols)
-        x_test  = make_windows_from_id(x_test_df, cols)
-        y_train = y_train_df[lbl_str].values.reshape(-1, 1)
-        y_test  = y_test_df[lbl_str].values.reshape(-1, 1)
-
-    transforms, model = model_training(mdl_str, x_train, y_train,
-                                       marker, validation_data=None,
-                                       overwrite=overwrite,
-                                       is_regression=True,
-                                       project_directory=project_dir,
-                                       window_size=int(window_size*fs),
-                                       extra_train=200,
-                                      )
-
-    if transforms is not None:
-        x_test = transforms.transform(x_test)
-
-    preds = model.predict(x_test)
-
-    eval_handle = EvalHandler(y_test.flatten(), preds.flatten(), subject,
-                              pfh, mdl_str, overwrite=overwrite)
-    eval_handle.update_eval_history()
-    eval_handle.save_eval_history()
-    pp = PrettyPrinter()
-    pp.pprint(eval_handle.load_eval_history())
-
-    fig, ax = plt.subplots()
-    ax.plot(y_test)
-    ax.plot(preds)
-    ax.set_title(' '.join([mdl_str, subject]))
-    ax.legend([lbl_str, 'pred'])
-    fig_dir = join(project_dir, 'figures',)
-    if not exists(fig_dir): mkdir(fig_dir)
-    fig.savefig(join(fig_dir, mdl_str))
+    for combi in combinations(cal_df[cal_str].values, train_len):
+        combi_str = "-".join([str(x) for x in combi])
+        pfh.config[cal_str] = combi_str
+        marker = f'imu_rr_{subject}_id{pfh.fset_id}_combi{combi_str}'
+        print(marker)
+
+        train_df = pd.concat(
+            [cal_df[cal_df[cal_str] == cpm]['data'].iloc[0] for cpm in combi],
+            axis=0
+        )
+
+        assert np.isin(train_df.index.values, test_df.index.values).any()==False,\
+                "overlapping test and train data"
+
+        print("train")
+        print(train_df.shape)
+        print("test")
+        print(test_df.shape)
+
+        if do_minirocket:
+            x_train_df, y_train_df = get_df_windows(train_df,
+                                                    imu_df_win_task,
+                                                    window_size=window_size,
+                                                    window_shift=window_shift,
+                                                    fs=fs,
+                                                   )
+            x_test_df, y_test_df = get_df_windows(test_df, 
+                                                  imu_df_win_task,
+                                                  window_size=window_size,
+                                                  window_shift=window_shift,
+                                                  fs=fs,
+                                                 )
+
+            x_train = make_windows_from_id(x_train_df, imu_cols)
+            x_test  = make_windows_from_id(x_test_df, imu_cols)
+            y_train = y_train_df[lbl_str].values.reshape(-1, 1)
+            y_test  = y_test_df[lbl_str].values.reshape(-1, 1)
+
+            print("minirocket transforming...")
+            x_train = np.swapaxes(x_train, 1, 2)
+            x_test = np.swapaxes(x_test, 1, 2)
+            minirocket = MiniRocketMultivariate()
+            x_train    = minirocket.fit_transform(x_train)
+            x_test     = minirocket.transform(x_test)
+        elif use_tsfresh:
+            x_train = train_df.iloc[:, 3:].values
+            y_train = train_df[lbl_str].values.reshape(-1, 1)
+            x_test  = test_df.iloc[:, 3:].values
+            y_test  = test_df[lbl_str].values.reshape(-1, 1)
+        else:
+            x_train_df, y_train_df = get_df_windows(train_df,
+                                                    imu_df_win_task,
+                                                    window_size=window_size,
+                                                    window_shift=window_shift,
+                                                    fs=fs,
+                                                   )
+            x_test_df, y_test_df = get_df_windows(test_df, 
+                                                  imu_df_win_task,
+                                                  window_size=window_size,
+                                                  window_shift=window_shift,
+                                                  fs=fs,
+                                                 )
+
+            x_train = make_windows_from_id(x_train_df, imu_cols)
+            x_test  = make_windows_from_id(x_test_df, imu_cols)
+            y_train = y_train_df[lbl_str].values.reshape(-1, 1)
+            y_test  = y_test_df[lbl_str].values.reshape(-1, 1)
+
+        transforms, model = model_training(mdl_str, x_train, y_train,
+                                           marker, validation_data=None,
+                                           overwrite=overwrite,
+                                           is_regression=True,
+                                           project_directory=project_dir,
+                                           window_size=int(window_size*fs),
+                                           extra_train=200,
+                                          )
+
+        if transforms is not None:
+            x_test = transforms.transform(x_test)
+
+        preds = model.predict(x_test)
+
+        eval_handle = EvalHandler(y_test.flatten(), preds.flatten(), subject,
+                                  pfh, mdl_str, overwrite=overwrite)
+        eval_handle.update_eval_history()
+        eval_handle.save_eval_history()
+
+        pp = PrettyPrinter()
+        pp.pprint(eval_handle.load_eval_history())
+
+        fig, ax = plt.subplots()
+        fig_title = '_'.join([mdl_str, subject]+[combi_str])
+        ax.plot(y_test)
+        ax.plot(preds)
+        ax.set_title(fig_title)
+        ax.legend([lbl_str, 'pred'])
+        fig_dir = join(project_dir, 'figures')
+        if not exists(fig_dir): mkdir(fig_dir)
+        fig.savefig(join(fig_dir, fig_title+".png"))
+        plt.close()
 
 def arg_parser():
     parser = argparse.ArgumentParser()
@@ -717,7 +791,7 @@ def arg_parser():
                                  'elastic'],
                        )
     parser.add_argument("-s", '--subject', type=int,
-                        default=1,
+                        default=2,
                         choices=list(range(1,3))+[-1],
                        )
     parser.add_argument("-f", '--feature_method', type=str,
@@ -734,10 +808,10 @@ def arg_parser():
                         default=0.2,
                        )
     parser.add_argument('-l', '--lbl_str', type=str,
-                        default='br',
+                        default='pss',
                        )
     parser.add_argument('-tl', '--train_len', type=int,
-                        default=1,
+                        default=3,
                         help='minutes of data to use for calibration'
                        )
     args = parser.parse_args()
@@ -748,13 +822,6 @@ if __name__ == '__main__':
     # '[!M]*'
     np.random.seed(100)
     n_subject_max = 2
-    xsens_df = load_and_sync_xsens('Pilot02')
-
-    activity_df = get_activity_log('Pilot02')
-    event_df = get_respiration_log('Pilot02')
-
-    ipdb.set_trace()
-    
     args = arg_parser()
 
     mdl_str        = args.model