regress_rr.py

        self.subject = subject
        self.config = pfh.config
        self.parent_directory = join(DATA_DIR, 'subject_specific')
        self.fset_id = pfh.fset_id
        self.sens_str = sens_str
        self.overwrite = overwrite

        self.evals = Evaluation(y_true, y_pred)

        entry = {'subject': self.subject,
                 'config_id': self.fset_id,
                 'sens_str': self.sens_str,
                }
        self.entry = {**entry, **self.config, **self.evals.get_evals()}

        self.eval_history_file = join(self.parent_directory,
                                      'dsp_eval_history.csv')
        self.eval_hist = self.load_eval_history()

    def load_eval_history(self):
        if not exists(self.eval_history_file):
            return None
        else:
            return pd.read_csv(self.eval_history_file)

    def update_eval_history(self):
        eval_hist = self.eval_hist
        if eval_hist is None:
            eval_hist = pd.DataFrame([self.entry])
        else:
            index_list = eval_hist[
                (eval_hist['subject'] == self.entry['subject']) &\
                (eval_hist['config_id'] == self.entry['config_id']) &\
                (eval_hist['sens_str'] == self.entry['sens_str'])
            ].index.tolist()
            if len(index_list) == 0:
                print("adding new entry")
                eval_hist = eval_hist._append(self.entry, ignore_index=True)
            elif index_list is not None and self.overwrite:
                eval_hist.loc[index_list[0]] = self.entry
        self.eval_hist = eval_hist

    def save_eval_history(self):
        self.eval_hist.to_csv(self.eval_history_file, index=False)

def imu_rr_dsp(subject,
               window_size=12,
               window_shift=0.2,
               lbl_str='pss',
               overwrite=False,
               train_len:int=3,
               test_standing=False,
              ):
    # TODO: 
        # implement evaluation saving
    """Loads, preprocesses, and performs Hernandez digital signal processing
    pipeline on the selected subject. Uses the specified parameters. Runs on
    both accelerometer and gyroscope.

    Attributes
    ----------
    subject: str
        specify the subject code (i.e. 'Pilot02', 'S02')
    window_size : float
        a numpy array of the respiration rate ground truth values from the
        bioharness
    window_shift : float
        a portion of the window size between 0 and 1
    mdl_str : str
        a string to infoa portion of the window size between 0 and 1rm what model was used
    overwrite : bool
        overwrites the evaluations, models, and graphs (default False)
    test_standing : bool
        boolean to use standing data

    Returns
    -------
    None
    """
    cal_str = 'cpm'
    fs = IMU_FS
    tmp = []
    imu_cols = ['acc_x', 'acc_y', 'acc_z', 'gyro_x', 'gyro_y', 'gyro_z']
    parent_directory_string = "imu_rr_dsp"

    do_minirocket = False
    use_tsfresh   = False
    overwrite_tsfresh = False
    train_size = int(train_len)

    config = {'window_size'   : window_size,
              'window_shift'  : window_shift,
              'lbl_str'       : lbl_str,
              'train_len'     : train_len,
              'test_standing' : test_standing,
              'sens_list'     : 'imu',
             }

    pfh = ProjectFileHandler(config)
    pfh.set_home_directory(join(DATA_DIR, 'subject_specific', subject))
    pfh.set_parent_directory(parent_directory_string)
    id_check = pfh.get_id_from_config()
    if id_check is None:
        pfh.set_project_directory()
        pfh.save_metafile()
    else:
        pfh.set_id(int(id_check))
        pfh.set_project_directory()
        print('Using pre-set data id: ', pfh.fset_id)
    project_dir = pfh.project_directory

    xsens_df = load_and_sync_xsens(subject, sens_list=['imu'])
    activity_df = get_activity_log(subject)
    event_df = get_respiration_log(subject)

    cal_df = get_cal_data(event_df, xsens_df)

    # include standing or not
    test_df_tmp = get_test_data(cal_df, activity_df, xsens_df, test_standing)
    test_df = pd.concat([df for df in test_df_tmp['data']], axis=0)

    acc_dsp_df, acc_y_dsp_df =  get_df_windows(test_df, dsp_win_func,
                                               window_size=window_size, 
                                               window_shift=window_shift,
                                               fs=fs,
                                               cols=['acc_x', 'acc_y', 'acc_z'])
    gyr_dsp_df, gyr_y_dsp_df =  get_df_windows(test_df, dsp_win_func,
                                               window_size=window_size, 
                                               window_shift=window_shift,
                                               fs=fs,
                                               cols=['gyro_x', 'gyro_y', 'gyro_z'])

    acc_evals = Evaluation(acc_y_dsp_df[lbl_str], acc_dsp_df['pred'])
    gyr_evals = Evaluation(gyr_y_dsp_df[lbl_str], gyr_dsp_df['pred'])
    print("acc evals: \n", acc_evals.get_evals())
    print("gyr evals: \n", gyr_evals.get_evals())

    acc_eval = DSPEvalHandler(acc_evals.y_true.values.flatten(),
                              acc_evals.y_pred.values.flatten(),
                              subject,
                              pfh, 'acc', overwrite=overwrite)
    acc_eval.update_eval_history()
    acc_eval.save_eval_history()

    gyr_eval = DSPEvalHandler(gyr_evals.y_true.values.flatten(),
                              gyr_evals.y_pred.values.flatten(),
                              subject,
                              pfh, 'gyro', overwrite=overwrite)
    gyr_eval.update_eval_history()
    gyr_eval.save_eval_history()

    pp = PrettyPrinter()
    pp.pprint(acc_eval.load_eval_history())

    fig, ax = plt.subplots(2, 1)
    ax[0].plot(acc_y_dsp_df[lbl_str]); plt.plot(acc_dsp_df['pred'])
    ax[0].set_title("ACC")
    ax[1].plot(gyr_y_dsp_df[lbl_str]); plt.plot(gyr_dsp_df['pred'])
    ax[1].set_title("GYRO")
    ax[1].legend([lbl_str, 'estimate'])
    fig_dir = join(project_dir, 'figures')
    if not exists(fig_dir): mkdir(fig_dir)
    fig_title = '_'.join([subject, 'dsp'])
    fig.savefig(join(fig_dir, fig_title+".png"))
    plt.close()

def sens_rr_model(subject,
                  window_size=12,
                  window_shift=0.2,
                  lbl_str='pss',
                  mdl_str='knn',
                  overwrite=False,
                  feature_method='tsfresh',
                  train_len:int=3,
                  test_standing=False,
                  data_input:str='imu+bvp',
                 ):
    """Loads, preprocesses, and trains a select model using the configured
    settings.
    Attributes
    ----------
    subject: str
        specify the subject code (i.e. 'Pilot02', 'S02')
    window_size : float
        a numpy array of the respiration rate ground truth values from the
        bioharness
    window_shift : float
        a portion of the window size between 0 and 1
    mdl_str : str
        a string to infoa portion of the window size between 0 and 1rm what model was used
    overwrite : bool
        overwrites the evaluations, models, and graphs (default False)
    feature_method : str
        choose between 'minirocket', 'tsfresh', or 'None'
    train_len : int
        number of minutes to sample from, choose between 1 to 7
    test_standing : bool
        boolean to use standing data
    data_input : str
        sensors to use, choose from 'imu', 'bvp', 'imu+bvp'

    Returns
    ------
    None
    """
    cal_str = 'cpm'
    tmp = []
    imu_cols = IMU_COLS
    bvp_cols = ['bvp']
    if 'imu' in data_input and 'bvp' in data_input:
        data_cols = ['acc_x', 'acc_y', 'acc_z',
                     'gyro_x', 'gyro_y', 'gyro_z',
                     'bvp']
        parent_directory_string = "imu-bvp_rr"
        data_input = 'imu+bvp'
        sens_list = ['imu', 'bvp']
        fs = IMU_FS
    elif 'imu' in data_input and not 'bvp' in data_input:
        data_cols = ['acc_x', 'acc_y', 'acc_z',
                     'gyro_x', 'gyro_y', 'gyro_z',]
        parent_directory_string = "imu_rr"
        sens_list = ['imu']
        fs = IMU_FS
    elif not 'imu' in data_input and 'bvp' in data_input:
        data_cols = ['bvp']
        parent_directory_string = "bvp_rr"
        sens_list = ['bvp']
        fs = PPG_FS

    do_minirocket = False
    use_tsfresh   = False
    overwrite_tsfresh = overwrite
    train_size = int(train_len)

    if feature_method == 'tsfresh':
        use_tsfresh = True
    elif feature_method == 'minirocket':
        do_minirocket = True

    config = {'window_size'   : window_size,
              'window_shift'  : window_shift,
              'lbl_str'       : lbl_str,
              'do_minirocket' : do_minirocket,
              'use_tsfresh'   : use_tsfresh,
              'train_len'     : train_len,
              'test_standing' : test_standing,
              'sens_list'     : data_input
             }

    pfh = ProjectFileHandler(config)
    pfh.set_home_directory(join(DATA_DIR, 'subject_specific', subject))
    pfh.set_parent_directory(parent_directory_string)
    id_check = pfh.get_id_from_config()
    if id_check is None:
        pfh.set_project_directory()
        pfh.save_metafile()
    else:
        pfh.set_id(int(id_check))
        pfh.set_project_directory()
        print('Using pre-set data id: ', pfh.fset_id)
    project_dir = pfh.project_directory

    xsens_df = load_and_sync_xsens(subject, sens_list=sens_list)
    activity_df = get_activity_log(subject)
    event_df = get_respiration_log(subject)

    cal_df = get_cal_data(event_df, xsens_df)

    # include standing or not
    test_df_tmp = get_test_data(cal_df, activity_df, xsens_df, test_standing)
    test_df = pd.concat([df for df in test_df_tmp['data']], axis=0)

    if use_tsfresh:
        cal_df_list = []
        test_df = load_tsfresh(test_df,
                               pfh.home_directory,
                               window_size=window_size,
                               window_shift=window_shift,
                               fs=fs,
                               overwrite=overwrite_tsfresh,
                               data_cols=data_cols,
                               prefix='test',
                              )
        for index, row in cal_df.iterrows():
            data = load_tsfresh(row['data'],
                                pfh.home_directory,
                                window_size=window_size,
                                window_shift=window_shift,
                                fs=fs,
                                overwrite=overwrite_tsfresh,
                                data_cols=data_cols,
                                prefix=f"calcpm_{row['cpm']}"
                               )
            cal_df_list.append({'cpm': row['cpm'], 'data': data})

        cal_df = pd.DataFrame(cal_df_list)
    else:
        x_test_df, y_test_df = get_df_windows(
            test_df, df_win_task, window_size=window_size,
            window_shift=window_shift, fs=fs, cols=data_cols)

    for combi in combinations(cal_df[cal_str].values, train_len):
        combi_str = "-".join([str(x) for x in combi])
        pfh.config[cal_str] = combi_str
        marker = f'{parent_directory_string}_{subject}_id{pfh.fset_id}'\
                f'_combi{combi_str}'
        print(marker)

        train_df_list = []
        for cpm in combi:
            df = cal_df[cal_df[cal_str] == cpm]
            data_df = df['data'].iloc[0]
            data_df['cpm'] = cpm
            train_df_list.append(data_df)
        train_df = pd.concat(train_df_list)

        assert np.isin(train_df.sec.values, test_df.sec.values).any()==False,\
                "overlapping test and train data"

        print("train")
        print(train_df.shape)
        print("test")
        print(test_df.shape)

        if do_minirocket:
            x_train_df, y_train_df = get_df_windows(train_df,
                                                    df_win_task,
                                                    window_size=window_size,
                                                    window_shift=window_shift,
                                                    fs=fs,
                                                    cols=data_cols
                                                   )

            x_train = make_windows_from_id(x_train_df, data_cols)
            y_train = y_train_df['cpm'].values.reshape(-1, 1)
            x_test  = make_windows_from_id(x_test_df, data_cols)
            y_test  = y_test_df[lbl_str].values.reshape(-1, 1)
    
            # x_train = y_train_df['hr_est'].values.reshape(-1, 1)
            # x_test  = y_test_df['hr_est'].values.reshape(-1, 1)
            print("minirocket transforming...")
            x_train = np.swapaxes(x_train, 1, 2)
            x_test = np.swapaxes(x_test, 1, 2)
            minirocket = MiniRocketMultivariate()
            x_train    = minirocket.fit_transform(x_train)
            x_test     = minirocket.transform(x_test)
        elif use_tsfresh:
            y_cols = ['sec', 'br', 'pss', 'cpm']
            x_cols = [col for col in train_df.columns.values if col not in y_cols]
            x_train = train_df[x_cols].values
            y_train = train_df['cpm'].values.reshape(-1, 1)
            x_test  = test_df[x_cols].values
            y_test  = test_df[lbl_str].values.reshape(-1, 1)
            y_test_df = test_df[y_cols[:-1]]
        else:
            x_train_df, y_train_df = get_df_windows(train_df,
                                                    df_win_task,
                                                    window_size=window_size,
                                                    window_shift=window_shift,
                                                    fs=fs,
                                                    cols=data_cols,
                                                   )
            x_train = make_windows_from_id(x_train_df, data_cols)
            x_test  = make_windows_from_id(x_test_df, data_cols)
            y_train = y_train_df['cpm'].values.reshape(-1, 1)
            y_test  = y_test_df[lbl_str].values.reshape(-1, 1)

        transforms, model = model_training(mdl_str, x_train, y_train,
                                           marker, validation_data=None,
                                           overwrite=overwrite,
                                           is_regression=True,
                                           project_directory=project_dir,
                                           window_size=int(window_size*fs),
                                           extra_train=200,
                                           poly_deg=1
                                          )

        if transforms is not None:
            x_test = transforms.transform(x_test)

        preds = model.predict(x_test)

        eval_handle = EvalHandler(y_test.flatten(), preds.flatten(), subject,
                                  pfh, mdl_str, overwrite=overwrite)
        eval_handle.update_eval_history()
        eval_handle.save_eval_history()

        pp = PrettyPrinter()
        pp.pprint(eval_handle.load_eval_history())

        fig, ax = plt.subplots(2, 1, figsize=(7.3, 4.5))
        fig_title = '_'.join([mdl_str, data_input, subject]+[combi_str])
        fig.suptitle(fig_title)
        ax[0].plot(y_test)
        ax[0].plot(preds)
        ax[0].set_title('raw')

        if lbl_str == 'pss':
            br  = y_test_df['br'].values
            ax[1].plot(movingaverage(y_test, 12), color='tab:blue')
            ax[1].plot(br, 'k')
            ax[1].plot(movingaverage(preds, 12), color='tab:orange')
            ax[1].legend([lbl_str, 'br', 'pred'])
        else:
            ax[1].plot(y_test, 'k')
            ax[1].plot(movingaverage(preds, 12), color='tab:orange')
            ax[1].legend([lbl_str, 'pred'])
        ax[1].set_title('smoothened')
        fig_dir = join(project_dir, 'figures')
        if not exists(fig_dir): mkdir(fig_dir)
        fig.savefig(join(fig_dir, fig_title+".png"))
        plt.close()

def arg_parser():
    """Returns arguments in a Namespace to configure the subject specific model
    """
    parser = argparse.ArgumentParser()
    parser.add_argument("-m", '--model', type=str,
                        default='linreg',
                        choices=['linreg', 'ard', 'xgboost', 'knn',
                                 'svr', 'cnn1d', 'fnn', 'lstm', 'ridge',
                                 'elastic'],
                       )
    parser.add_argument("-s", '--subject', type=int,
                        default=2,
                        choices=list(range(1,5))+[-1],
                       )
    parser.add_argument("-f", '--feature_method', type=str,
                        default='minirocket',
                        choices=['tsfresh', 'minirocket', 'None']
                       )
    parser.add_argument("-o", '--overwrite', type=int,
                        default=0,
                       )
    parser.add_argument('--win_size', type=int,
                        default=12,
                       )
    parser.add_argument('--win_shift', type=float,
                        default=0.2,
                       )
    parser.add_argument('-l', '--lbl_str', type=str,
                        default='pss',
                       )
    parser.add_argument('-tl', '--train_len', type=int,
                        default=5,
                        help='minutes of data to use for calibration'
                       )
    parser.add_argument('-d', '--data_input', type=str,
                        default='imu',
                        help='imu, bvp, imu+bvp: select data cols for input'
                       )
    parser.add_argument('-ts', '--test_standing', type=int,
                        default=1,
                        help='1 or 0 input, choose if standing data will be '\
                        'recorded or not'
                       )
    parser.add_argument('--method', type=str,
                        default='ml',
                        help="choose between 'ml' or 'dsp' methods for"\
                        " regression",
                        choices=['ml', 'dsp']
                       )
    args = parser.parse_args()
    return args

if __name__ == '__main__':
    np.random.seed(100)
    n_subject_max = 4
    args = arg_parser()

    # Load command line arguments
    mdl_str        = args.model
    subject        = args.subject
    feature_method = args.feature_method
    window_size    = args.win_size
    window_shift   = args.win_shift
    lbl_str        = args.lbl_str
    train_len      = args.train_len
    overwrite      = args.overwrite
    data_input     = args.data_input
    test_standing  = args.test_standing
    method         = args.method

    print(args)
    assert train_len>0,"--train_len must be an integer greater than 0"

    subject_pre_string = 'Pilot'

    if subject > 0 and method == 'ml':
        subject = subject_pre_string+str(subject).zfill(2)

        sens_rr_model(subject,
                      window_size=window_size,
                      window_shift=window_shift,
                      lbl_str=lbl_str,
                      mdl_str=mdl_str,
                      overwrite=overwrite,
                      feature_method=feature_method,
                      train_len=train_len,
                      test_standing=test_standing,
                      data_input=data_input,
                     )
    elif subject <= 0 and method == 'ml':
        subjects = [subject_pre_string+str(i).zfill(2) for i in \
                    range(2, n_subject_max+1)]

        rr_func = partial(sens_rr_model,
                          window_size=window_size,
                          window_shift=window_shift,
                          lbl_str=lbl_str,
                          mdl_str=mdl_str,
                          overwrite=overwrite,
                          feature_method=feature_method,
                          train_len=train_len,
                          test_standing=test_standing,
                          data_input=data_input,
                         )

        for subject in subjects:
            rr_func(subject)

    if subject > 0 and method == 'dsp':
        subject = subject_pre_string+str(subject).zfill(2)
        imu_rr_dsp(subject,
                   window_size=window_size,
                   window_shift=window_shift,
                   lbl_str=lbl_str,
                   overwrite=overwrite,
                   train_len=train_len,
                   test_standing=test_standing)

    elif subject <= 0 and method == 'ml':
        subjects = [subject_pre_string+str(i).zfill(2) for i in \
                    range(2, n_subject_max+1)]

        rr_func = partial(sens_rr_model,
                          window_size=window_size,
                          window_shift=window_shift,
                          lbl_str=lbl_str,
                          mdl_str=mdl_str,
                          overwrite=overwrite,
                          feature_method=feature_method,
                          train_len=train_len,
                          test_standing=test_standing,
                          data_input=data_input,
                         )

        for subject in subjects:
            rr_func(subject)
    print(args)