initial commit

c995cbc5 · Raymond Chia · c995cbc5 · c995cbc5 · c995cbc5 · c995cbc5
Commit c995cbc5 authored 1 year ago by Raymond Chia
--- a/README.md
+++ b/README.md
+# aria-respiration-cal
+Development of subject-specific models using minimal respiration data
+collection from https://code.research.uts.edu.au/rqchia/respirationcalibrator
--- a/__pycache__/classify_mwl.cpython-38.pyc
+++ b/__pycache__/classify_mwl.cpython-38.pyc
--- a/__pycache__/config.cpython-38.pyc
+++ b/__pycache__/config.cpython-38.pyc
--- a/__pycache__/dsp_imu.cpython-38.pyc
+++ b/__pycache__/dsp_imu.cpython-38.pyc
--- a/__pycache__/ml.cpython-38.pyc
+++ b/__pycache__/ml.cpython-38.pyc
--- a/__pycache__/notebook.cpython-38.pyc
+++ b/__pycache__/notebook.cpython-38.pyc
--- a/__pycache__/result_plots.cpython-38.pyc
+++ b/__pycache__/result_plots.cpython-38.pyc
--- a/__pycache__/run_experiments.cpython-38.pyc
+++ b/__pycache__/run_experiments.cpython-38.pyc
--- a/__pycache__/timeseriesai.cpython-38.pyc
+++ b/__pycache__/timeseriesai.cpython-38.pyc
--- a/__pycache__/transferlearning.cpython-38.pyc
+++ b/__pycache__/transferlearning.cpython-38.pyc
--- a/check_imu_timestamp.py
+++ b/check_imu_timestamp.py
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+import ipdb
+import glob
+from datetime import datetime, timedelta
+import pytz
+from multiprocessing import Pool
+from os.path import join, splitext
+from modules.datapipeline import datetime_to_ms
+def get_flist(f_glob):
+    f_list = sorted(glob.glob(f_glob, recursive=True))
+    return f_list
+def load_only_imu(f):
+    try:
+        df = pd.read_json(f, lines=True, compression='gzip')
+    except EOFError:
+        df = pd.read_json(splitext(f)[0], lines=True)
+    if df.empty: return
+    data_df = pd.DataFrame(df['data'].tolist())
+    df = pd.concat([df.drop('data', axis=1), data_df], axis=1)
+    mask = pd.isna(df['accelerometer'])
+    na_inds = df.loc[mask, :].index.values
+    not_na_inds = df.loc[~mask, :].index.values
+    df_na = df.drop(index=not_na_inds)
+    df_not_na = df.drop(index=na_inds)
+    return df_not_na
+def get_mean_fs(df):
+    time = df['timestamp'].values
+    diff = time[1:] - time[:-1]
+    fs = 1/np.mean(diff)
+    print(max(diff))
+    print(min(diff))
+    plt.plot(diff)
+    plt.show()
+    return fs
+def fname_fs(f):
+    imu_df = load_only_imu(f)
+    if imu_df is None: return
+    fs = get_mean_fs(imu_df)
+    return f, fs
+def imu_start_end_time(hdr_fname, data_fname):
+    imu_hdr = pd.read_json(hdr_fname, orient='index')
+    imu_hdr = imu_hdr.to_dict().pop(0)
+    imu_df = load_only_imu(data_fname)
+    iso_tz = imu_hdr['created']
+    tzinfo = pytz.timezone(imu_hdr['timezone'])
+    # adjust for UTC
+    start_time = datetime.fromisoformat(iso_tz[:-1]) + timedelta(hours=11)
+    imu_times = imu_df['timestamp'].values
+    imu_datetimes = [start_time + timedelta(seconds=val) \
+                     for val in imu_times]
+    nbins = len(imu_times)
+    est_end = datetime.fromtimestamp(imu_datetimes[0].timestamp() + nbins*(1/120))
+    print("endtime: {0}\testimate: {1}".format(imu_datetimes[-1],
+                                                       est_end))
+    return imu_datetimes[0], imu_datetimes[-1]
+def str_to_datetime(time_in):
+    fmt ="%d/%m/%Y %H:%M:%S.%f" 
+    dstr = datetime.strptime(time_in, fmt)
+    return dstr
+def harness_start_end_time(fname, fs=100):
+    df = pd.read_csv(fname)
+    t0 = str_to_datetime(df['Time'].iloc[0])
+    t1 = str_to_datetime(df['Time'].iloc[-1])
+    nbins = len(df)
+    est_end = datetime.fromtimestamp(t0.timestamp() + nbins*(1/fs))
+    print("endtime: {0}\testimate: {1}".format(t1,
+                                                       est_end))
+    return t0, t1
+if __name__ == '__main__':
+    data_dir = '/data/rqchia/aria_walk/Data/test-rest'
+    f_glob = join(data_dir, '**', 'imudata.gz')
+    h_glob = join(data_dir, '**', 'recording.g3')
+    data_fname = glob.glob(f_glob)[0]
+    hdr_fname = glob.glob(h_glob)[0]
+    t0, t1 = imu_start_end_time(hdr_fname, data_fname)
+    a_glob = join(data_dir, '**', '*_Accel.csv')
+    b_glob = join(data_dir, '**', '*_Breathing.csv')
+    s_glob = join(data_dir, '**', '*_SummaryEnhanced.csv')
+    a_fname = glob.glob(a_glob)[0]
+    a0, a1 = harness_start_end_time(a_fname, fs=100)
+    b_fname = glob.glob(b_glob)[0]
+    b0, b1 = harness_start_end_time(b_fname, fs=25)
+    s_fname = glob.glob(s_glob)[0]
+    s0, s1 = harness_start_end_time(s_fname, fs=1)
+    print(f"imu {t0}\t{t1}\n"
+          f"acc {a0}\t{a1}\n"
+          f"bre {b0}\t{b1}\n"
+          f"sum {s0}\t{s1}\n")
+    # flist = get_flist(f_glob)
+    # tmp = []
+    # tmp = fname_fs(flist[0])
+    # # with Pool(10) as p:
+    # #     tmp = p.map(fname_fs, flist)
+    # df = pd.DataFrame(tmp, columns=['fname', 'fs'])
+    # df.dropna(inplace=True)
+    # mask = df['fs'] > 10
+    # df = df[mask]
+    # print(df)
--- a/config.py
+++ b/config.py
+DEBUG = False
+NROWS = 1008
+MARKER_FS = 120
+BR_FS = 18
+ACC_FS = 100
+IMU_FS = 120
+N_MARKERS = 7
+ACC_THOLD = 10
+WIN_THOLD = 0.03
+MQA_THOLD = 0.7
+FS_RESAMPLE = 256
+WINDOW_SIZE = 20 # seconds
+WINDOW_SHIFT = 1 # seconds
+MIN_RESP_RATE = 3 # BPM
+MAX_RESP_RATE = 45 # BPM
+TIME_COLS = ['Timestamps', 'Event', 'Text', 'Color']
+MOCAP_ACCEL_SD = 0.00352
+TRAIN_VAL_TEST_SPLIT = [0.6, 0.2, 0.2]
+TRAIN_TEST_SPLIT = [0.8, 0.2]
+import matplotlib as mpl
+mpl.rcParams['figure.titlesize']   = 6
+mpl.rcParams['axes.titlesize']   = 6
+mpl.rcParams['axes.titleweight'] = 'bold'
+mpl.rcParams['axes.labelsize']   = 6
+mpl.rcParams['xtick.labelsize']  = 6
+mpl.rcParams['ytick.labelsize']  = 6
+LOW_HACC_FS_ID = [1, 2, 9, 11, 12, 13, 14, 15, 16, 17, 18, 20, 21, 22, 23, 24,
+                  25, 26, 27]
+NO_HACC_ID = [3, 4, 5, 6]
+# issues with marker data on MR conditions:
+MARKER_ISSUES_MR = [12, 14, 17, 18, 26, 30]
+MARKER_ISSUES_R = [12, 14, 18]
+MARKER_ISSUES_M = [12, 14]
+# issues with imu data on MR and L0-3 conditions:
+IMU_ISSUES = [17, 21, 23, 26, 28, 30]
+IMU_ISSUES_L = [15, 17, 21, 23, 26, 28]
+# issues with imu data on MR:
+IMU_ISSUES_MR = [17, 26, 30]
+DPI = 300
+FIG_FMT = 'pdf'
+from sys import platform
+if 'linux' in platform:
+    DATA_DIR = '/projects/CIBCIGroup/00DataUploading/rqchia/aria_seated/Data'
+elif 'win' in platform:
+    DATA_DIR = 'D:/Raymond Chia/UTS/Howe Zhu - Data/1stExperiment_sitting'
--- a/create_features.py
+++ b/create_features.py
+import argparse
+import ipdb
+import numpy as np
+import matplotlib.pyplot as plt
+from matplotlib.patches import Rectangle
+import seaborn as sns
+sns.set_theme()
+from tqdm import tqdm
+import glob
+import re
+import pandas as pd
+import json
+import cv2
+import tsfresh
+from tsfresh import extract_features
+from tsfresh.feature_selection import relevance as tsfresh_relevance
+from tsfresh.feature_extraction import settings as tsfresh_settings
+from datetime import datetime, timedelta
+from os import makedirs, listdir
+from os.path import isdir, getsize
+from os.path import join as path_join
+from os.path import exists as path_exists
+from pprint import PrettyPrinter
+from multiprocessing import Pool, cpu_count
+from functools import partial
+from ast import literal_eval
+from modules.animationplotter import AnimationPlotter, AnimationPlotter2D
+from modules.digitalsignalprocessing import imu_signal_processing
+from modules.digitalsignalprocessing import vectorized_slide_win
+from modules.digitalsignalprocessing import get_video_features
+from modules.datapipeline import SubjectData, datetime_to_sec\
+        ,sec_to_datetime, DataSynchronizer
+from modules.datapipeline import get_file_list, load_files_conditions
+from modules.evaluations import Evaluation
+from modules.utils import map_condition_to_tlx
+# from skimage.util import view_as_blocks
+from config import DEBUG, NROWS, MARKER_FS, BR_FS, N_MARKERS , WINDOW_SIZE \
+        ,WINDOW_SHIFT, ACC_THOLD, FS_RESAMPLE, MIN_RESP_RATE, MAX_RESP_RATE \
+        ,TRAIN_VAL_TEST_SPLIT, IMU_FS, DATA_DIR
+DIR = './evaluations/'
+def load_df(subject, condition):
+    fs = IMU_FS
+    # set glob based on sensor type and subject and condition
+    data_glob = f"*{condition}_xsens_df"
+    data_list = get_file_list(data_glob, sbj=subject)
+    if len(data_list) == 0:
+        return None
+    # load the files in the glob
+    data_df = load_files_conditions(data_list, skip_ratio=0.0,
+                                    do_multiprocess=False)
+    return data_df
+def get_subject_tsfresh(fs, data_df, df_path, tlx_df,
+                        window_size=WINDOW_SIZE,
+                        window_shift=WINDOW_SHIFT):
+    if tlx_df is None:
+        tlx_df = pd.read_csv('seated_nasa_tlx.csv', index_col=[0]).T
+    x_time = data_df['sec'].values
+    x_times = []
+    x, y = [], []
+    c_out = []
+    x_df_out = pd.DataFrame()
+    inds = np.arange(len(data_df))
+    vsw = vectorized_slide_win
+    wins = vsw(inds, len(inds), sub_window_size=int(window_size*fs),
+               stride_size=int(window_shift*fs))
+    for i, w_inds in enumerate(wins):
+        if w_inds[-1] == 0: break
+        x_df = data_df.iloc[w_inds]
+        t0, t1 = x_time[w_inds][0], x_time[w_inds][-1]
+        diff = x_time[w_inds[1:]] - x_time[w_inds[0:-1]]
+        mask = diff>20
+        diff_chk = np.any(mask)
+        if diff_chk:
+            continue
+        data = x_df[['acc_x', 'acc_y', 'acc_z',
+                     'gyr_x', 'gyr_y', 'gyr_z']].values
+        # DSP
+        sd_data = (data - np.mean(data, axis=0))/np.std(data, axis=0)
+        # ys = cubic_interp(sd_data, BR_FS, FS_RESAMPLE)
+        filt_data = imu_signal_processing(sd_data, fs)
+        df_sp = pd.DataFrame(filt_data,
+                             columns=['acc_x', 'acc_y', 'acc_z',
+                                      'gyro_x', 'gyro_y', 'gyro_z'])
+        sm_out = data_df['BR'].values
+        cd_out = data_df['condition'].values
+        ps_out = data_df['PSS'].values
+        sbj    = data_df['subject'].values
+        x_vec_time = np.median(x_time[w_inds])
+        y_arr = np.array([x_vec_time, np.nanmedian(sm_out),
+                          np.nanmedian(ps_out), cd_out[0], sbj[0]])
+        y_out = pd.DataFrame([y_arr], columns=['sec', 'br', 'pss', 'condition',
+                                               'subject'])
+        map_condition_to_tlx(y_out, tlx_df['S'+str(sbj[0])])
+        x_times.append(x_vec_time)
+        df_sp['id'] = i
+        df_sp['time'] = x_vec_time
+        if x_df_out.empty: x_df_out = df_sp
+        else: x_df_out = pd.concat([x_df_out, df_sp])
+        x.append(df_sp)
+        y.append(y_out)
+    if len(x) == 0 or len(y) == 0:
+        return None
+    y_df = pd.concat(y)
+    y_df.reset_index(drop=True, inplace=True)
+    # Create tsfresh features; check for NaN values
+    x_features_df = extract_features(x_df_out, column_sort='time',
+                                     column_id='id', n_jobs=1,
+                                     # default_fc_parameters=tsfresh_settings.MinimalFCParameters(),
+                                    )
+    x_features_df.fillna(0, inplace=True)
+    cols = x_features_df.columns.values
+    df_out = pd.concat([y_df, x_features_df], axis=1)
+    df_out.to_pickle(df_path)
+    return df_out
+def plot_video_features(video_fname, nframes=1000):
+    cap = cv2.VideoCapture(video_fname)
+    xmin, ymin = 0, 0
+    boxw = 1080
+    boxh = 1920
+    fps = 25
+    box = np.array([[xmin,ymin], [xmin+boxw,ymin],
+                    [xmin,ymin+boxh], [xmin+boxw,ymin+boxh]])
+    fig, ax = plt.subplots()
+    for f_id in range(nframes):
+        _, frame = cap.read()
+        if f_id == 0:
+            oldframe_gray = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY)
+            oldPts = get_video_features(oldframe_gray, use_shi=True)
+        else:
+            frame_gray = cv2.cvtColor(frame,cv2.COLOR_RGB2GRAY)
+            nextPts, pt_status, err = cv2.calcOpticalFlowPyrLK(
+                oldframe_gray, frame_gray, oldPts, None, winSize=(15,15))
+            inds = np.arange(len(pt_status))
+            mask = pt_status == 1
+            inds = inds[mask.squeeze()]
+            npts = nextPts.squeeze()
+            ax.imshow(frame_gray,cmap='gray')
+            # plot and track only if the status has been found
+            if len(inds) > 0:
+                ax.scatter(npts[inds,0], npts[inds,1], color=(1,0,0), marker='o')
+            # update
+            oldframe_gray = frame_gray
+            if not f_id%(20*fps):
+                # extract harmonic data from here. Pixel displacement / dt
+                oldPts = get_video_features(oldframe_gray, use_shi=True)
+            plt.pause(0.01)
+            plt.cla()
+    ipdb.set_trace()
+def map_imu_tsfresh_subject(subject,
+                            tlx_df=None,
+                            conditions=['R', 'L0', 'L1', 'L2', 'L3'],
+                            window_size=5, window_shift=0.2):
+    sbj_data = SubjectData(subject=subject)
+    sbj_data.set_imu_fname()
+    sbj_dir = sbj_data.subject_dir
+    for condition in conditions:
+        tsfresh_pkl = path_join(
+            sbj_dir, "{0}__winsize_{1}__winshift_{2}_imu_tsfresh_df.pkl"\
+            .format(condition, window_size, window_shift))
+        if path_exists(tsfresh_pkl): continue
+        print(f"trying {subject} for {condition}")
+        data_df = load_df(subject, condition)
+        if data_df is None: continue
+        get_subject_tsfresh(fs, data_df, tsfresh_pkl, tlx_df,
+                            window_size=window_size,
+                            window_shift=window_shift)
+def arg_parser():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-w', '--win_size', type=str,
+                        default=50,
+                       )
+    args = parser.parse_args()
+    if len(args.win_size) == 1:
+        win_size = [int(args.win_size)]
+    else:
+        win_size = [int(item) for item in args.win_size.split(',')]
+    return win_size
+if __name__ == '__main__':
+    # window_sizes = [5, 7, 10, 12]
+    # window_sizes = [15, 17, 20]
+    window_sizes = arg_parser()
+    print(window_sizes)
+    window_shifts = [0.2, 0.5, 1.0]
+    fs = IMU_FS
+    subjects = ['S'+str(i).zfill(2) for i in range(12,31)] # mars13
+    # subjects = ['S'+str(i).zfill(2) for i in range(21,31)] # mars13
+    conditions = ['M', 'R'] + ['L'+str(i) for i in range(0,4)]
+    # conditions = ['R'] + ['L'+str(i) for i in range(0,4)]
+    window_size = WINDOW_SIZE
+    window_shift = WINDOW_SHIFT
+    tlx_df = pd.read_csv('seated_nasa_tlx.csv', index_col=[0]).T
+    for window_shift in window_shifts:
+        for window_size in window_sizes:
+            func = partial(map_imu_tsfresh_subject,
+                           tlx_df=tlx_df,
+                           conditions=conditions,
+                           window_size=window_size,
+                           window_shift=window_shift)
+            # gen = map(func, subjects)
+            # for i, x in enumerate(gen):
+            #     print(i/len(subjects)*100)
+            with Pool(cpu_count()) as p:
+                p.map(func, subjects)
+    print("completed for window sizes: ", window_sizes)
--- a/eval_regress_rr.py
+++ b/eval_regress_rr.py
+import ipdb
+from os.path import join,exists
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+from config import DATA_DIR
+def load_eval_file():
+    parent_directory = join(DATA_DIR, 'subject_specific')
+    eval_history_file = join(parent_directory, 'eval_history.csv')
+    assert exists(eval_history_file), "Evaluation file does not exist"
+    return pd.read_csv(eval_history_file)
+if __name__ == '__main__':
+    mdl_str       = 'linreg'
+    do_minirocket = True
+    use_tsfresh   = False
+    df = load_eval_file()
+    mask = ((df['mdl_str'] == mdl_str) &\
+            (df['do_minirocket'] == do_minirocket) &\
+            (df['use_tsfresh'] == use_tsfresh))
+    data = df[mask]
+    subjects = data['subject'].unique()
+    fig, axs = plt.subplots(2, 1)
+    for subject in subjects:
+        c = (int(subject[1:])-12)/(30-12)
+        print(int(subject[1:]))
+        print(c)
+        mask = data['subject'] == subject
+        x = data[mask]['train_len'].values
+        y0 = data[mask]['pearsonr_coeff'].values
+        y1 = data[mask]['mae'].values
+        axs[0].scatter(x, y0, c=[c]*len(x), vmin=0, vmax=1, cmap='hsv')
+        axs[1].scatter(x, y1, c=[c]*len(x), vmin=0, vmax=1, cmap='hsv')
+    axs[0].axhline(color='k', alpha=0.3)
+    axs[0].set_ylabel('pearson coeff', fontsize=12)
+    axs[1].set_ylabel('mae', fontsize=12)
+    axs[1].set_xlabel("training minutes", fontsize=12)
+    axs[0].set_xticks(range(1, 6))
+    axs[1].set_xticks(range(1, 6))
+    axs[0].tick_params(axis='x', labelsize=12)
+    axs[1].tick_params(axis='x', labelsize=12)
+    axs[0].tick_params(axis='y', labelsize=12)
+    axs[1].tick_params(axis='y', labelsize=12)
+    fig.suptitle(f"{mdl_str} with minirocket features", fontsize=12)
+    plt.show()
--- a/export_pss_condition_dict.py
+++ b/export_pss_condition_dict.py
+'''
+Author: Raymond Chia
+Date: 7/Sept/2023
+Description: Code to expor classification using tsfresh features
+'''
+from os import makedirs
+from os.path import join, exists
+import pandas as pd
+import numpy as np
+import json
+import ipdb
+import re
+from datetime import datetime
+import pickle
+import matplotlib.pyplot as plt
+from collections import Counter
+from itertools import repeat
+from multiprocessing import Pool, cpu_count
+from sklearn.preprocessing import StandardScaler, MinMaxScaler
+from sklearn.preprocessing import PolynomialFeatures, LabelEncoder
+from sklearn.model_selection import KFold
+from sklearn.metrics import accuracy_score
+from tsfresh.feature_extraction import extract_features
+from tsfresh.feature_selection import relevance as tsfresh_relevance
+from tsfresh.utilities.string_manipulation import get_config_from_string
+from modules.datapipeline import get_file_list
+from modules.datapipeline import DataSynchronizer
+from modules.digitalsignalprocessing import vectorized_slide_win as vsw
+from modules.digitalsignalprocessing import imu_signal_processing
+from modules.digitalsignalprocessing import do_pad_fft,\
+        pressure_signal_processing
+from timeseriesai import get_train_test_df, reshape_array, get_condition_name
+from modules.evaluations import Evaluation
+from modules.datapipeline import get_windowed_data, load_files_conditions
+from pprint import PrettyPrinter
+# TODO change these parameters to fit your environment
+from config import WINDOW_SIZE, WINDOW_SHIFT, IMU_FS, DATA_DIR, IMU_ISSUES, \
+        BR_FS, PROJ_DIR
+def load_data(subject:str, data_str:str, condition:str=None):
+    '''Loads the data into pandas dataframe
+    subject: string. Specify which subject ('S12', 'S13' etc)
+    use_tsfresh: Boolean (default = True). Determines if we are using tsfresh
+    extracted features or not
+    condition: string. Specify which condition you're interested in 
+    '''
+    if condition is not None:
+        data_glob = f'{condition}_{data_str}_df'
+    else:
+        data_glob = f'*{data_str}_df'
+    # use glob pattern to create list of files
+    data_list = get_file_list(data_glob, sbj=subject)
+    df = load_files_conditions(data_list, skip_ratio=None)
+    return df
+def generate_data(subject, condition):
+    pss_df = load_data(subject, 'pressure', condition=condition)
+    br_df = load_data(subject, 'summary', condition=condition)
+    pss_fs = BR_FS
+    pss_col = [col for col in pss_df.columns.values if\
+               'breathing' in col.lower()][0]
+    pss_ms = pss_df['ms'].values
+    br_ms  = br_df['ms'].values
+    inds = np.arange(0, len(pss_ms))
+    vsw_out = vsw(inds, len(inds), sub_window_size=int(window_size*pss_fs),
+                  stride_size=int(window_shift*pss_fs))
+    dsync = DataSynchronizer()
+    pss_est, br_out = [], []
+    pss_filts = []
+    for i, win in enumerate(vsw_out):
+        if win[-1] == 0: break
+        t0, t1 = pss_ms[win][0], pss_ms[win][-1]
+        # Get pressure estimate for window
+        pss_win = pss_df[pss_col].iloc[win]
+        pss_filt = pressure_signal_processing(pss_win, fs=pss_fs)
+        xf, yf = do_pad_fft(pss_filt, fs=pss_fs)
+        est = xf[yf.argmax()]*60
+        pss_filts.append(pss_filt)
+        pss_est.append(est)
+        # Sync and get summary br output
+        dsync.set_bounds(br_ms, t0, t1)
+        br_win = dsync.sync_df(br_df)
+        br_out.append(np.median(br_win['BR'].values))
+    pss_est_array = np.array(pss_est)
+    br_out_array = np.array(br_out)
+    sbj_cond_avg = np.mean(pss_est_array)
+    ipdb.set_trace()
+#     plt.plot(pss_est_array)
+#     plt.plot(br_out_array)
+#     plt.legend(['pss', 'br'])
+#     plt.figure()
+#     for i in range(9):
+#         plt.subplot(3,3, i+1)
+#         plt.plot(pss_filts[0])
+#     plt.show()
+    return pss_est_array
+if __name__ == "__main__":
+    window_sizes = [3, 5, 7, 10, 12, 15]
+    window_shift = 1
+    subjects = ['S'+str(i).zfill(2) for i in range(1,31)]
+    conditions = ['L'+str(i) for i in range(0, 4)]
+    parent_dir = join(PROJ_DIR, 'pressure-estimate')
+    makedirs(parent_dir, exist_ok=True)
+    for window_size in window_sizes:
+        pkl_fname = join(
+            parent_dir,
+            f"size_{window_size}__shift_{window_shift}__pss.pkl"
+        )
+        dataset = {"subjectid": [],
+                   "data": [],
+                   "data_norm": [],
+                   "rest_avg": [],
+                   "label": [],
+                  }
+        for subject in subjects:
+            print(f"calculating {subject} rest norm...", end=' ')
+            sbj_rest_norm = generate_data(subject, 'R').mean()
+            print('done')
+            for condition in conditions:
+                print(f"working on {subject}-{condition}")
+                pss_est = generate_data(subject, condition)
+                # except:
+                #     print(f"failed on {subject}-{condition}")
+                #     continue
+                dataset['subjectid'].append(subject)
+                dataset['label'].append(int(condition[-1]))
+                dataset['data'].append(pss_est)
+                dataset['data_norm'].append(pss_est.mean())
+                dataset['rest_avg'].append(sbj_rest_norm)
+        with open(pkl_fname, 'wb') as fp:
+            pickle.dump(dataset, fp)
+            print("dataset dict saved as", pkl_fname)
--- a/method_plots.py
+++ b/method_plots.py
+import ipdb
+import numpy as np
+import matplotlib.pyplot as plt
+import matplotlib as mpl
+import seaborn as sns
+from joblib import dump, load
+import time
+from tqdm import tqdm
+from sys import platform
+import glob
+import re
+import pandas as pd
+import json
+from tsfresh import extract_features
+from tsfresh.feature_selection import relevance as tsfresh_relevance
+from tsfresh.feature_extraction import settings as tsfresh_settings
+from sklearn.decomposition import PCA, FastICA
+from sklearn.preprocessing import StandardScaler
+from datetime import datetime, timedelta
+from os import makedirs, listdir
+from os.path import isdir, getsize
+from os.path import join as path_join
+from os.path import exists as path_exists
+from pprint import PrettyPrinter
+from multiprocessing import Pool, cpu_count
+from functools import partial
+from ast import literal_eval
+from modules.animationplotter import AnimationPlotter, AnimationPlotter2D
+from modules.digitalsignalprocessing import *
+from modules.datapipeline import SubjectData, datetime_to_ms\
+        ,ms_to_datetime, DataSynchronizer
+from modules.datapipeline import get_file_list, load_files_conditions
+from modules.datapipeline import get_windowed_data
+from modules.evaluations import Evaluation
+from config import DEBUG, NROWS, MARKER_FS, BR_FS, N_MARKERS , WINDOW_SIZE \
+        ,WINDOW_SHIFT, ACC_THOLD, FS_RESAMPLE, MIN_RESP_RATE, MAX_RESP_RATE \
+        ,TRAIN_VAL_TEST_SPLIT, IMU_FS, ACC_FS, DPI, FIG_FMT, DATA_DIR
+''' We're looking only at meditation and rest '''
+WINDOW_SIZE = 15 # seconds
+WINDOW_SHIFT = 1 # seconds
+# mpl.rc('font', serif='Times New Roman')
+sns.set_theme(style='ticks')
+# sns.axes_style('white')
+def marker_main(subject='S13', condition='R', sens='imu_gyr', method='roddiger'):
+    window_size = WINDOW_SIZE
+    window_shift = WINDOW_SHIFT
+    fs = MARKER_FS
+    marker_sel = 0
+    if sens == 'marker':
+        fs = MARKER_FS
+        title = 'Marker Raw'
+    elif 'imu' in sens:
+        fs = IMU_FS
+        if 'acc' in sens: title = 'Head Worn Accelerometer Raw'
+        if 'gyr' in sens: title = 'Head Worn Gyroscope Raw'
+    elif sens=='h_acc':
+        if int(subject[-2:]) < 27: fs=BR_FS
+        else: fs=ACC_FS
+        title = 'Harness Accelerometer Raw'
+    # set glob based on sensor type and subject and condition
+    if 'imu' in sens: data_glob = f"*{condition}_imu_df"
+    elif sens == 'h_acc': data_glob = f"*{condition}_accel_df"
+    else: data_glob = f"*{condition}_{sens}_df"
+    marker_glob = f"*{condition}_marker_df"
+    data_list = get_file_list(data_glob, sbj=subject)
+    lbl_list = get_file_list(f'*{condition}_summary_df', sbj=subject)
+    pss_list = get_file_list(f'*{condition}_pressure_df', sbj=subject)
+    marker_flist = get_file_list(marker_glob, sbj=subject)
+    # load the files in the glob
+    marker_df = load_files_conditions(marker_flist, skip_ratio=0.0)
+    data_df = load_files_conditions(data_list, skip_ratio=0.0)
+    lbl_df = load_files_conditions(lbl_list, skip_ratio=0.0)
+    pss_df = load_files_conditions(pss_list, skip_ratio=0.0)
+    # bin to windows
+    x_time = data_df['ms'].values
+    marker_time = marker_df['ms'].values
+    vsw = vectorized_slide_win
+    data_inds = np.arange(0, len(x_time))
+    wins = vsw(data_inds, len(data_inds),
+                    sub_window_size=window_size*fs,
+                    stride_size=window_shift*fs)
+    is_marker = False
+    # if sens == 'marker':
+    #     is_marker = True
+    if sens == 'imu_acc':
+        data = np.array(data_df['accelerometer'].map(literal_eval).tolist())
+    elif sens == 'imu_gyr':
+        data = np.array(data_df['gyroscope'].map(literal_eval).tolist())
+    elif sens == 'h_acc':
+        sbj_data = SubjectData(condition=condition, subject=subject)
+        sbj_data.accel_df = data_df
+        data = sbj_data.get_accel_data()
+    data_wins = get_windowed_data(x_time, data, wins)
+    data_list = [win for win in data_wins]
+    br_col = [col for col in pss_df.columns.values if\
+              'breathing' in col.lower()][0]
+    pss_dsp = pressure_signal_processing
+    pss_data = pss_df[br_col].values
+    pss_data = pss_dsp(pss_data)
+    pss_data = np.interp(x_time, pss_df['ms'].values, pss_data)
+    pss_data_wins = get_windowed_data(x_time, pss_data, wins)
+    pss_list = [win for win in pss_data_wins]
+    # Sync marker data
+    sbj_data = SubjectData(condition=condition, subject=subject)
+    sbj_data.marker_df = marker_df
+    marker_data = sbj_data.get_marker_data()
+    marker_data = marker_data[:, marker_sel, :]
+    tmp = []
+    for i in range(3):
+        tmp.append(np.interp(x_time, marker_df['ms'].values,
+                             marker_data[:,i]))
+    marker_data = np.array(tmp).T
+    marker_data = second_order_diff(marker_data, MARKER_FS)
+    marker_wins = get_windowed_data(x_time, marker_data, wins)
+    marker_list = [win for win in marker_wins]
+    # Perform rejection
+    std_scaler = StandardScaler()
+    rejects = [i for i, win in enumerate(data_list) if
+               reject_artefact((win-np.mean(win,axis=0))/np.std(win,axis=0))]
+    inds_to_keep = np.delete(np.arange(0, len(data_list)), rejects)
+    marker_list = [marker_list[i] for i in inds_to_keep]
+    data_list = [data_list[i] for i in inds_to_keep]
+    pss_list = [pss_list[i] for i in inds_to_keep]
+    # plot the raw signal of a select window
+    fig, axs = plt.subplots(3,2, figsize=(12, 6.5), dpi=DPI)
+    mpl.rcParams['axes.titleweight'] = 'bold'
+    axs = axs.flatten()
+    win_ind = 22
+    if len(data_list) < win_ind:
+        data_win = data_list[int(len(data_list)//2)]
+        marker_win = marker_list[int(len(data_list)//2)]
+    else:
+        data_win = data_list[win_ind]
+        marker_win = marker_list[win_ind]
+    fontsize = 10
+    piter = 0.03
+    x_win = np.linspace(0, window_size, len(data_win))
+    axs[0].plot(x_win, StandardScaler().fit_transform(data_win), linewidth=1)
+    axs[0].set_title(title, fontsize=fontsize)
+    axs[0].set_xticklabels([])
+    # axs[0].set_xlabel('Time (s)')
+    if sens == 'marker' or 'acc' in sens:
+        axs[0].set_ylabel('Acceleration (m/s${^2}$)', fontsize=fontsize)
+    else:
+        axs[0].set_ylabel('Angular Velocity (deg/s)', fontsize=fontsize)
+    axs[0].legend(['X','Y', 'Z'], ncol=3, loc='lower left', fontsize=fontsize)
+    x_win = np.linspace(0, window_size, len(marker_win))
+    axs[1].plot(x_win, StandardScaler().fit_transform(marker_win), linewidth=1)
+    axs[1].set_title(f'Marker {marker_sel+1} Acceleration Raw', fontsize=fontsize)
+    axs[1].set_xticklabels([])
+    # axs[1].set_xlabel('Time (s)')
+    axs[1].set_ylabel('Acceleration (m/s${^2}$)', fontsize=fontsize)
+    axs[1].legend(['X','Y', 'Z'], ncol=3, loc='lower left', fontsize=fontsize)
+    # plot the processed signal with the pressure signal
+    if method == 'roddiger': sig_dsp = roddiger_sp
+    elif method == 'hernandez': sig_dsp = hernandez_sp
+    interp, smth = sig_dsp(data=data_win, fs=fs, is_marker=False)
+    marker_interp, marker_smth = sig_dsp(data=marker_win, fs=MARKER_FS,
+                                         is_marker=False)
+    pca = PCA(n_components=1, random_state=3)
+    pca_out = StandardScaler().fit_transform(pca.fit_transform(smth))
+    marker_out = StandardScaler().fit_transform(pca.fit_transform(marker_smth))
+    pss_out = StandardScaler().fit_transform(pss_list[win_ind].reshape(-1, 1))\
+            .squeeze()
+    x_smth = np.linspace(0, window_size, len(pca_out))
+    x_pss = np.linspace(0, window_size, len(pss_list[win_ind]))
+    axs[2].plot(x_pss, pss_out)
+    axs[2].plot(x_smth, pca_out, color='tab:red')
+    axs[2].set_ylabel("Normalised Units", fontsize=fontsize)
+    axs[2].set_xticks(np.arange(0, WINDOW_SIZE, 2))
+    axs[2].set_yticks(np.arange(-4, 4+2, 2))
+    axs[2].set_xlabel('Time (s)', fontsize=fontsize)
+    if 'acc' in sens:
+        axs[2].set_title('Accelerometer Signal vs Pressure', fontsize=fontsize)
+        axs[2].legend(['PSS', 'ACC'], ncol=3, loc='lower left',
+                      fontsize=fontsize)
+    else:
+        axs[2].set_title('Gyroscope Signal vs Pressure', fontsize=fontsize)
+        axs[2].legend(['PSS', 'GYR'], ncol=3, loc='lower left',
+                      fontsize=fontsize)
+    x_marker = np.linspace(0, window_size, len(marker_out))
+    axs[3].plot(x_pss, pss_out)
+    axs[3].plot(x_marker, marker_out, color='tab:green')
+    axs[3].set_xticks(np.arange(0, WINDOW_SIZE, 2))
+    axs[3].set_yticks(np.arange(-4, 4+2, 2))
+    axs[3].set_xlabel('Time (s)', fontsize=fontsize)
+    axs[3].set_title(f'Marker {marker_sel+1} Signal vs Pressure', fontsize=fontsize)
+    axs[3].legend(['PSS', 'MKR'], ncol=3, loc='lower left', fontsize=fontsize)
+    for a in axs[:4]:
+        a.tick_params(labelsize=fontsize)
+        box = a.get_position()
+        box.y0 += piter
+        box.y1 += piter
+        a.set_position(box)
+    # plot the FFT w/o padding
+    pad_len = npads_frequency_resolution(len(pca_out), fs=FS_RESAMPLE)
+    data_pad = np.pad(pca_out.squeeze(), (0, pad_len), 'constant',
+                      constant_values=0)
+    data_xf, data_yf = run_fft(data_pad, FS_RESAMPLE)
+    data_ind = data_yf.argmax(axis=0)
+    pad_len = npads_frequency_resolution(len(pss_out), fs=fs)
+    pss_pad = np.pad(pss_out, (0, pad_len), 'constant', constant_values=0)
+    pss_xf, pss_yf = run_fft(pss_pad, fs)
+    pss_ind = pss_yf.argmax(axis=0)
+    pad_len = npads_frequency_resolution(len(marker_out), fs=FS_RESAMPLE)
+    marker_pad = np.pad(marker_out.squeeze(), (0, pad_len), 'constant',
+                      constant_values=0)
+    marker_xf, marker_yf = run_fft(marker_pad, FS_RESAMPLE)
+    marker_ind = marker_yf.argmax(axis=0)
+    fig.delaxes(axs[-1])
+    fig.delaxes(axs[-2])
+    axs = np.delete(axs, [-2,-1])
+    axs = np.insert(axs, len(axs), fig.add_subplot(3, 3, 7))
+    axs = np.insert(axs, len(axs), fig.add_subplot(3, 3, 8))
+    axs = np.insert(axs, len(axs), fig.add_subplot(3, 3, 9))
+    ymax = 0.5
+    axs[-3].plot(data_xf, data_yf, color='tab:red')
+    axs[-3].plot(data_xf[data_ind], data_yf[data_ind], 'rx')
+    axs[-3].set_xticks(np.arange(0, 1, 0.2))
+    axs[-3].set_title('GYR Spectrum', fontsize=fontsize)
+    axs[-3].set_xlim([0, 1])
+    axs[-3].set_ylim([0, ymax])
+    axs[-3].set_xlabel('Frequency (Hz)', fontsize=fontsize)
+    axs[-3].set_ylabel('$|Y(f)|$', fontsize=fontsize)
+    axs[-2].plot(pss_xf,  pss_yf)
+    axs[-2].plot(pss_xf[pss_ind], pss_yf[pss_ind], 'rx')
+    axs[-2].set_xticks(np.arange(0, 1, 0.2))
+    axs[-2].set_title('PSS Spectrum', fontsize=fontsize)
+    axs[-2].set_xlim([0, 1])
+    axs[-2].set_ylim([0, ymax])
+    axs[-2].set_xlabel('Frequency (Hz)', fontsize=fontsize)
+    axs[-1].plot(marker_xf,  marker_yf, color='tab:green')
+    axs[-1].plot(marker_xf[marker_ind], marker_yf[marker_ind], 'rx')
+    axs[-1].set_xticks(np.arange(0, 1, 0.2))
+    axs[-1].set_title(f'MKR{marker_sel+1} Spectrum', fontsize=fontsize)
+    axs[-1].set_xlabel('Frequency (Hz)', fontsize=fontsize)
+    axs[-1].set_xlim([0, 1])
+    axs[-1].set_ylim([0, ymax])
+    tmp = axs[-3:]
+    for a in tmp:
+        box = a.get_position()
+        box.y0 -= piter
+        box.y1 -= piter
+        a.set_position(box)
+    fmt = FIG_FMT
+    fig.savefig(f"./graphs/methods/{method}_methods.{fmt}")
+def main(subject='S14', condition='R', sens='imu_gyr', method='roddiger'):
+    window_size = WINDOW_SIZE
+    window_shift = WINDOW_SHIFT
+    fs = MARKER_FS
+    marker_sel = 6
+    if sens == 'marker':
+        fs = MARKER_FS
+        title = f'Marker {marker_sel+1} Raw'
+    elif 'imu' in sens:
+        fs = IMU_FS
+        if 'acc' in sens: title = 'Head Worn Accelerometer Raw'
+        if 'gyr' in sens: title = 'Head Worn Gyroscope Raw'
+    elif sens=='h_acc':
+        if int(subject[-2:]) < 27: fs=BR_FS
+        else: fs=ACC_FS
+        title = 'Harness Accelerometer Raw'
+    # set glob based on sensor type and subject and condition
+    if 'imu' in sens: data_glob = f"*{condition}_imu_df"
+    elif sens == 'h_acc': data_glob = f"*{condition}_accel_df"
+    else: data_glob = f"*{condition}_{sens}_df"
+    data_list = get_file_list(data_glob, sbj=subject)
+    lbl_list = get_file_list(f'*{condition}_summary_df', sbj=subject)
+    pss_list = get_file_list(f'*{condition}_pressure_df', sbj=subject)
+    # load the files in the glob
+    data_df = load_files_conditions(data_list, skip_ratio=0.0)
+    lbl_df = load_files_conditions(lbl_list, skip_ratio=0.0)
+    pss_df = load_files_conditions(pss_list, skip_ratio=0.0)
+    # bin to windows
+    x_time = data_df['ms'].values
+    vsw = vectorized_slide_win
+    data_inds = np.arange(0, len(x_time))
+    wins = vsw(data_inds, len(data_inds),
+                    sub_window_size=window_size*fs,
+                    stride_size=window_shift*fs)
+    is_marker = False
+    if sens == 'marker':
+        sbj_data = SubjectData(condition=condition, subject=subject)
+        sbj_data.marker_df = data_df
+        data = sbj_data.get_marker_data()
+        data = second_order_diff(data, fs)
+        is_marker = True
+    elif sens == 'imu_acc':
+        data = np.array(data_df['accelerometer'].map(literal_eval).tolist())
+    elif sens == 'imu_gyr':
+        data = np.array(data_df['gyroscope'].map(literal_eval).tolist())
+    elif sens == 'h_acc':
+        sbj_data = SubjectData(condition=condition, subject=subject)
+        sbj_data.accel_df = data_df
+        data = sbj_data.get_accel_data()
+    data_wins = get_windowed_data(x_time, data, wins)
+    data_list = [win for win in data_wins]
+    br_col = [col for col in pss_df.columns.values if\
+              'breathing' in col.lower()][0]
+    pss_dsp = pressure_signal_processing
+    pss_data = pss_df[br_col].values
+    pss_data = pss_dsp(pss_data)
+    pss_data = np.interp(x_time, pss_df['ms'].values, pss_data)
+    pss_data_wins = get_windowed_data(x_time, pss_data, wins)
+    pss_list = [win for win in pss_data_wins]
+    # Perform rejection
+    std_scaler = StandardScaler()
+    rejects = [i for i, win in enumerate(data_list) if
+               reject_artefact((win-np.mean(win,axis=0))/np.std(win,axis=0))]
+    inds_to_keep = np.delete(np.arange(0, len(data_list)), rejects)
+    data_list = [data_list[i] for i in inds_to_keep]
+    pss_list = [pss_list[i] for i in inds_to_keep]
+    # plot the raw signal of a select window
+    fig, axs = plt.subplots(3,1)
+    win_ind = 60
+    if len(data_list) < win_ind:
+        data_win = data_list[int(len(data_list)//2)]
+    else:
+        data_win = data_list[win_ind]
+    if is_marker:
+        data_win = data_win[:, marker_sel, :]
+    x_win = np.linspace(0, window_size, len(data_win))
+    axs[0].plot(x_win, StandardScaler().fit_transform(data_win), linewidth=1)
+    axs[0].set_title(title)
+    axs[0].set_xticklabels([])
+    axs[0].set_xlabel('Time (s)')
+    if sens == 'marker' or 'acc' in sens:
+        axs[0].set_ylabel('Acceleration (m/s${^2}$)')
+    else:
+        axs[0].set_ylabel('Angular Velocity (deg/s)')
+    axs[0].legend(['X','Y', 'Z'], ncol=3, loc='lower left')
+    # plot the processed signal with the pressure signal
+    if method == 'roddiger': sig_dsp = roddiger_sp
+    elif method == 'hernandez': sig_dsp = hernandez_sp
+    interp, smth = sig_dsp(data=data_win, fs=fs, is_marker=False)
+    pca = PCA(n_components=1, random_state=3)
+    pca_out = StandardScaler().fit_transform(pca.fit_transform(smth))
+    pss_out = StandardScaler().fit_transform(pss_list[win_ind].reshape(-1, 1))\
+            .squeeze()
+    x_smth = np.linspace(0, window_size, len(pca_out))
+    x_pss = np.linspace(0, window_size, len(pss_list[win_ind]))
+    axs[1].plot(x_pss, pss_out)
+    axs[1].plot(x_smth, pca_out, color='tab:red')
+    axs[1].set_ylabel("Normalised Units")
+    axs[1].set_xticks(np.arange(0, WINDOW_SIZE, 2))
+    axs[1].set_xlabel('Time (s)')
+    axs[1].set_title('Sensor Signal vs Pressure')
+    if sens == 'marker' or 'acc' in sens:
+        axs[1].legend(['PSS', 'ACC'], ncol=3, loc='lower left')
+    else:
+        axs[1].legend(['PSS', 'GYR'], ncol=3, loc='lower left')
+    # plot the FFT w/o padding
+    pad_len = npads_frequency_resolution(len(pca_out), fs=FS_RESAMPLE)
+    data_pad = np.pad(pca_out.squeeze(), (0, pad_len), 'constant', constant_values=0)
+    data_xf, data_yf = run_fft(data_pad, FS_RESAMPLE)
+    data_ind = data_yf.argmax(axis=0)
+    pad_len = npads_frequency_resolution(len(pss_out), fs=fs)
+    pss_pad = np.pad(pss_out, (0, pad_len), 'constant', constant_values=0)
+    pss_xf, pss_yf = run_fft(pss_pad, fs)
+    pss_ind = pss_yf.argmax(axis=0)
+    fig.delaxes(axs[-1])
+    axs = np.insert(axs, -1, fig.add_subplot(3, 2, 5))
+    axs = np.insert(axs, -1, fig.add_subplot(3, 2, 6))
+    axs[2].plot(data_xf, data_yf)
+    axs[2].plot(data_xf[data_ind], data_yf[data_ind], 'rx')
+    axs[2].set_xticks(np.arange(0, 1, 0.2))
+    axs[2].set_xlim([0, 1])
+    axs[2].set_ylim([0, 0.4])
+    axs[2].set_xlabel('Frequency (Hz)')
+    axs[2].set_ylabel('$|Y(f)|$')
+    axs[3].plot(pss_xf,  pss_yf)
+    axs[3].plot(pss_xf[pss_ind], pss_yf[pss_ind], 'rx')
+    axs[3].set_xticks(np.arange(0, 1, 0.2))
+    axs[3].set_xlim([0, 1])
+    axs[3].set_ylim([0, 0.4])
+    axs[3].set_xlabel('Frequency (Hz)')
+    axs[3].set_ylabel('$|Y(f)|$')
+    fig.set_size_inches((5.5, 5))
+    plt.tight_layout()
+    plt.show()
+if __name__ == '__main__':
+    # Make sure to run sync_data before any of these operations
+    marker_main(method='hernandez')
+    marker_main(method='roddiger')
+    plt.show()
--- a/models/.Baseline.py.un~
+++ b/models/.Baseline.py.un~
--- a/models/.xgboostclass.py.swp
+++ b/models/.xgboostclass.py.swp
--- a/models/__pycache__/ARDRegression.cpython-38.pyc
+++ b/models/__pycache__/ARDRegression.cpython-38.pyc
--- a/models/__pycache__/Baseline.cpython-38.pyc
+++ b/models/__pycache__/Baseline.cpython-38.pyc