minor edit

cf1ea305 · Raymond Chia · ad1cf087 · cf1ea305 · cf1ea305 · cf1ea305
Commit cf1ea305 authored 8 months ago by Raymond Chia
--- a/modules/digitalsignalprocessing.py
+++ b/modules/digitalsignalprocessing.py
@@ -254,7 +254,7 @@ def acc_signal_processing(data, fs:int=100):
 def imu_signal_processing(data, fs:int=IMU_FS):
    bp = butter_bandpass_filter(data,
                                3/60,
-                                70/60, fs=fs, order=2)
+                                45/60, fs=fs, order=2)
    ma = movingaverage(bp, 8, axis=0)
    return ma
@@ -332,7 +332,7 @@ def hernandez_sp(data=None, fs:int=IMU_FS):
    return accel, bp
-def pressure_signal_processing(pressure_data, fs=BR_FS):
+def pressure_signal_processing(pressure_data, fs=BR_FS, movmean_win=32):
    ''' Run pressure signal through the following steps:
          * Moving average with 8 samples
          * Standard scaler
@@ -341,10 +341,10 @@ def pressure_signal_processing(pressure_data, fs=BR_FS):
    # Normalize
    data_sd = std_scaler(pressure_data)
-    data_ma = movingaverage(data_sd, 16)
+    data_ma = movingaverage(data_sd, movmean_win)
    # bandpass filter the lbls
-    bp_data = butter_bandpass_filter(data_ma, 4/60, 70/60, fs=fs, order=5)
+    bp_data = butter_bandpass_filter(data_ma, 4/60, 45/60, fs=fs, order=5)
    return bp_data
 def vectorized_slide_win(array, max_time, sub_window_size=3,
@@ -362,13 +362,17 @@ def vectorized_slide_win(array, max_time, sub_window_size=3,
        np.expand_dims(np.arange(sub_window_size), 0) +
        np.expand_dims(np.arange(max_time, step=stride_size), 0).T
    )
+    i = [i for i, win in enumerate(sub_windows) if max_time in win][0]
+    """
    pad_len = int(np.max(sub_windows) - max_time + 1)
    # pad right side data array with zero
    arr = np.pad(array,(0, pad_len), 'constant',
                 constant_values=0)
    return arr[sub_windows]
+     """
+    return array[sub_windows[:i]]
 def generate_noisy_sine_windows(sig=None,
                                window_size=WINDOW_SIZE*FS_RESAMPLE,

--- a/modules/utils.py
+++ b/modules/utils.py
@@ -376,10 +376,10 @@ def get_df_windows(df, func, window_size=15, window_shift=0.2, fs=IMU_FS,
    args = zip(wins.tolist(), repeat(df, N), i_list, [cols]*N)
    out_data = []
-    # with Pool(cpu_count()) as p:
+    with Pool(cpu_count()) as p:
-    #     out_data = p.starmap(func, args)
+        out_data = p.starmap(func, args)
-    for i, win in enumerate(wins):
+    # for i, win in enumerate(wins):
-        out_data.append(func(win, df, i, cols))
+    #     out_data.append(func(win, df, i, cols))
    x, y = [], []
    for out in out_data:

--- a/regress_rr.py
+++ b/regress_rr.py
@@ -10,6 +10,7 @@ import pickle
 import sys
 import time
 from zipfile import ZipFile
+from joblib import dump, load
 import argparse
 from datetime import datetime, timedelta, timezone, timedelta
@@ -265,12 +266,16 @@ def load_imu_files(f_list:list):
    """
    data, hdr = [], []
    tmp = []
-    for f in f_list:
+    with Pool(int(cpu_count()//1.5)) as p:
-        tmp.append(load_imu_file(f))
+        tmp = p.map(load_imu_file, f_list)
-    for l in tmp:
+    data, hdr = zip(*tmp)
-        data.append(l[0])
+    # for f in f_list:
-        hdr.append(l[1])
+    #     tmp.append(load_imu_file(f))
+    # for l in tmp:
+    #     data.append(l[0])
+    #     hdr.append(l[1])
    data_df = pd.concat(data, axis=0)
+    data_df.sort_values(by='sec', inplace=True)
    return data_df, hdr
 def load_e4_file(e4_file:str):
@@ -392,7 +397,7 @@ def df_win_task(w_inds, df, i, cols):
    pandas.DataFrame, pandas.DataFrame
    """
    time = df['sec'].values
-    if w_inds[-1] == 0: return
+    # if w_inds[-1] == 0: return
    w_df = df.iloc[w_inds]
    t0, t1 = time[w_inds][0], time[w_inds][-1]
    diff = time[w_inds[1:]] - time[w_inds[0:-1]]
@@ -429,9 +434,9 @@ def df_win_task(w_inds, df, i, cols):
    x_vec_time = np.median(time[w_inds])
-    fs = 1/np.mean(diff)
+    # fs = 1/np.mean(diff)
    ps_out = pressure_signal_processing(ps_out, fs=fs)
-    ps_freq = int(get_max_frequency(ps_out, fs=fs))
+    ps_freq = get_max_frequency(ps_out, fs=fs)
    y_tmp = np.array([x_vec_time, np.nanmedian(sm_out), ps_freq])
    x_out['sec'] = x_vec_time
@@ -711,7 +716,7 @@ def get_respiration_log(subject):
    """
    log_list = get_file_list('*.json', sbj=subject)
-    log_dfs = [pd.read_json(f) for f in log_list]
+    log_dfs = [pd.read_json(f, convert_dates=False) for f in log_list]
    return pd.concat(log_dfs, axis=0)
 def get_cal_data(event_df, xsens_df):
@@ -731,7 +736,7 @@ def get_cal_data(event_df, xsens_df):
    pd.DataFrame
    """
-    fmt ="%Y-%m-%d %H.%M.%S" 
+    fmt ="%d/%m/%Y %I:%M:%S %p" 
    cal_list = []
    cpms = []
    start_sec = 0
@@ -744,7 +749,7 @@ def get_cal_data(event_df, xsens_df):
        cpm = np.round( 60/(inhalePeriod + exhalePeriod) )
-        sec = timestamp.to_pydatetime().timestamp()
+        sec = datetime.strptime(str(timestamp), fmt).timestamp()
        if event == 'Start':
            start_sec = sec
@@ -849,6 +854,10 @@ def dsp_win_func(w_inds, df, i, cols):
    data = w_df[cols].values
+    fs_est = 1/np.median(diff)
+    if fs_est > 70 and ('acc_x' in cols or 'gyro_x' in cols): fs = IMU_FS
+    elif fs_est < 70 and 'bvp' in cols: fs = PPG_FS
    if reject_artefact((data-np.mean(data,axis=0))/np.std(data,axis=0)):
        return
@@ -872,10 +881,10 @@ def dsp_win_func(w_inds, df, i, cols):
    x_vec_time = np.median(time[w_inds])
-    fs = 1/np.mean(diff)
+    # fs = 1/np.mean(diff)
    ps_out = pressure_signal_processing(ps_out, fs=fs)
-    ps_freq = int(get_max_frequency(ps_out, fs=IMU_FS))
+    ps_freq = get_max_frequency(ps_out, fs=IMU_FS)
    y_tmp = np.array([x_vec_time, np.nanmedian(sm_out), ps_freq])
@@ -1052,8 +1061,6 @@ def imu_rr_dsp(subject,
               train_len:int=3,
               test_standing=False,
              ):
-    # TODO: 
-        # implement evaluation saving
    """Loads, preprocesses, and performs Hernandez digital signal processing
    pipeline on the selected subject. Uses the specified parameters. Runs on
    both accelerometer and gyroscope.
@@ -1154,9 +1161,11 @@ def imu_rr_dsp(subject,
    pp.pprint(acc_eval.load_eval_history())
    fig, ax = plt.subplots(2, 1)
-    ax[0].plot(acc_y_dsp_df[lbl_str]); plt.plot(acc_dsp_df['pred'])
+    ax[0].plot(acc_y_dsp_df[lbl_str].values);
+    ax[0].plot(acc_dsp_df['pred'].values)
    ax[0].set_title("ACC")
-    ax[1].plot(gyr_y_dsp_df[lbl_str]); plt.plot(gyr_dsp_df['pred'])
+    ax[1].plot(gyr_y_dsp_df[lbl_str].values);
+    ax[1].plot(gyr_dsp_df['pred'].values)
    ax[1].set_title("GYRO")
    ax[1].legend([lbl_str, 'estimate'])
    fig_dir = join(project_dir, 'figures')
@@ -1232,6 +1241,7 @@ def sens_rr_model(subject,
    use_tsfresh   = False
    overwrite_tsfresh = overwrite
    train_size = int(train_len)
+    minirocket = None
    if feature_method == 'tsfresh':
        use_tsfresh = True
@@ -1262,7 +1272,7 @@ def sens_rr_model(subject,
    project_dir = pfh.project_directory
    xsens_df = load_and_sync_xsens(subject, sens_list=sens_list)
-    activity_df = get_activity_log(subject)
+    activity_df = get_activity_log(subject).reset_index(drop=True)
    event_df = get_respiration_log(subject)
    cal_df = get_cal_data(event_df, xsens_df)
@@ -1339,12 +1349,29 @@ def sens_rr_model(subject,
            # x_train = y_train_df['hr_est'].values.reshape(-1, 1)
            # x_test  = y_test_df['hr_est'].values.reshape(-1, 1)
-            print("minirocket transforming...")
            x_train = np.swapaxes(x_train, 1, 2)
            x_test = np.swapaxes(x_test, 1, 2)
-            minirocket = MiniRocketMultivariate()
-            x_train    = minirocket.fit_transform(x_train)
+            directory = join(project_dir, '_'.join([mdl_str, marker]))
-            x_test     = minirocket.transform(x_test)
+            minirocket_fname = join(directory, "minirocket.joblib")
+            if not overwrite and exists(minirocket_fname):
+                with open(minirocket_fname, 'rb') as mfile:
+                    minirocket = load(mfile)
+                x_train = minirocket.transform(x_train)
+                print("loaded minirocket...")
+            else:
+                minirocket = MiniRocketMultivariate()
+                x_train = minirocket.fit_transform(x_train)
+            x_test  = minirocket.transform(x_test)
+            if overwrite or not exists(minirocket_fname):
+                if not exists(directory): makedirs(directory)
+                with open(minirocket_fname, 'wb') as mfile:
+                    dump(minirocket, mfile)
+                print("saved new minirocket...")
        elif use_tsfresh:
            y_cols = ['sec', 'br', 'pss', 'cpm']
            x_cols = [col for col in train_df.columns.values if col not in y_cols]
@@ -1398,9 +1425,11 @@ def sens_rr_model(subject,
        if lbl_str == 'pss':
            br  = y_test_df['br'].values
-            ax[1].plot(movingaverage(y_test, 12), color='tab:blue')
+            ax[1].plot(movingaverage(y_test, 12),
+                       color='tab:blue')
            ax[1].plot(br, 'k')
-            ax[1].plot(movingaverage(preds, 12), color='tab:orange')
+            ax[1].plot(movingaverage(preds, 12),
+                       color='tab:orange')
            ax[1].legend([lbl_str, 'br', 'pred'])
        else:
            ax[1].plot(y_test, 'k')
@@ -1409,8 +1438,100 @@ def sens_rr_model(subject,
        ax[1].set_title('smoothened')
        fig_dir = join(project_dir, 'figures')
        if not exists(fig_dir): mkdir(fig_dir)
+        if subject == 'S02':
+            sec = y_test_df['sec'].values
+            activity_df2 = get_activity_log(subject+'-repeat')
+            event_df2 = get_respiration_log(subject+'-repeat')
+            second_cal_df = get_cal_data(event_df2, xsens_df)\
+                    .reset_index(drop=True)
+            sec = y_test_df['sec'].values
+            activity_df2 = get_activity_log(subject+'-repeat')
+            event_df2 = get_respiration_log(subject+'-repeat')
+            second_cal_df = get_cal_data(event_df2, xsens_df)\
+                    .reset_index(drop=True)
+            fmt = "%d/%m/%Y %H:%M:%S"
+            activity_sec0 = datetime.strptime(
+                activity_df2['Timestamps'].iloc[0], fmt).timestamp()
+            activity_sec1 = datetime.strptime(
+                activity_df2['Timestamps'].iloc[-1], fmt).timestamp()
+            ind0 = np.argmin(np.abs(sec - activity_sec0))
+            ind1 = np.argmin(np.abs(sec - activity_sec1))
+            ax[0].axvline(ind0, c='r', linestyle='--')
+            ax[0].axvline(ind1, c='r', linestyle='--')
+            ax[1].axvline(ind0, c='r', linestyle='--')
+            ax[1].axvline(ind1, c='r', linestyle='--')
+            fig2, ax2 = plt.subplots(2, 1, sharex=True)
+            cal_sec0 = second_cal_df.iloc[-7]['data']['sec'].iloc[0]
+            cal_sec1 = second_cal_df.iloc[-1]['data']['sec'].iloc[-1]
+            calind0 = np.argmin(np.abs(sec - cal_sec0))
+            calind1 = np.argmin(np.abs(sec - cal_sec1))
+            ax2[0].plot(sec[calind0:], y_test[calind0:])
+            ax2[0].plot(sec[calind0:],preds[calind0:])
+            ax2[0].set_title('raw')
+            ax2[1].plot(sec[calind0:], 
+                        movingaverage(y_test, 12)[calind0:],
+                        color='tab:blue')
+            ax2[1].plot(sec[calind0:], br[calind0:], 'k')
+            ax2[1].plot(sec[calind0:],
+                        movingaverage(preds, 12)[calind0:],
+                        color='tab:orange')
+            ax2[1].legend([lbl_str, 'br', 'pred'])
+            ll = len(second_cal_df)
+            for i in range(ll-7, ll):
+                cal_sec0 = second_cal_df.iloc[i]['data']['sec'].iloc[0]
+                cal_sec1 = second_cal_df.iloc[i]['data']['sec'].iloc[-1]
+                calind0 = np.argmin(np.abs(sec - cal_sec0))
+                calind1 = np.argmin(np.abs(sec - cal_sec1))
+                ax2[0].axvline(sec[calind0], c='g', linestyle='--')
+                ax2[0].axvline(sec[calind1], c='g', linestyle='--')
+                ax2[1].axvline(sec[calind0], c='g', linestyle='--')
+                ax2[1].axvline(sec[calind1], c='g', linestyle='--')
+            def pressure_ax_plot(func, win_size=12, win_shift=0.2, fs=120):
+                dt = test_df.sec
+                t = dt.map(lambda x: datetime.fromtimestamp(x))
+                pss_sig = test_df.PSS.values
+                processed_wins = vsw(pss_sig, len(pss_sig),
+                                     sub_window_size=int(win_size*fs),
+                                     stride_size=int(win_size*win_shift*fs))
+                processed_wins = map(func, processed_wins)
+                ps_freq = [get_max_frequency(ps_out, fs=fs) for ps_out in
+                           processed_wins]
+                ft = vsw(dt.values, len(dt),
+                         sub_window_size=int(win_size*fs),
+                         stride_size=int(win_size*win_shift*fs))
+                fig, axs = plt.subplots(3, 1, sharex=True)
+                # axs[0].get_shared_x_axes().join(axs[0], axs[1])
+                axs[0].plot(t, pss_sig)
+                axs[1].plot(t, func(pss_sig))
+                mtime = map(lambda x: datetime.fromtimestamp(x),
+                            ft.mean(axis=1))
+                freqdt = np.array([t for t in mtime])
+                tt = y_test_df.sec.map(lambda x:
+                                       datetime.fromtimestamp(x)).values
+                axs[2].plot(freqdt, ps_freq)
+                axs[2].plot(tt, y_test_df.pss.values)
+                # fig.savefig(join(fig_dir, fig_title+"cal_check.png"))
+            fig2.savefig(join(fig_dir, fig_title+"cal_check.png"))
+            # func = partial(pressure_signal_processing, fs=fs)
+            # pressure_ax_plot(func)
        fig.savefig(join(fig_dir, fig_title+".png"))
-        plt.close()
+        plt.close('all')
 def arg_parser():
    """Returns arguments in a Namespace to configure the subject specific model
@@ -1459,7 +1580,7 @@ def arg_parser():
    parser.add_argument('--method', type=str,
                        default='ml',
                        help="choose between 'ml' or 'dsp' methods for"\
-                        " regression",
+                        "regression",
                        choices=['ml', 'dsp']
                       )
    args = parser.parse_args()
@@ -1484,6 +1605,7 @@ if __name__ == '__main__':
    print(args)
    assert train_len>0,"--train_len must be an integer greater than 0"
+    assert train_len <= 7,"--train_len must be an integer less than 8"
    subject_pre_string = 'S' # Pilot / S
@@ -1528,9 +1650,20 @@ if __name__ == '__main__':
                   overwrite=overwrite,
                   train_len=train_len,
                   test_standing=test_standing)
+    elif subject <= 0 and method == 'dsp':
+        subjects = [subject_pre_string+str(i).zfill(2) for i in \
+                    range(1, N_SUBJECT_MAX+1)]
+        func = partial(imu_rr_dsp, window_size=window_size,
+                       window_shift=window_shift,
+                       lbl_str=lbl_str,
+                       overwrite=overwrite,
+                       train_len=train_len,
+                       test_standing=test_standing)
+        for subject in subjects:
+            func(subject)
    elif subject <= 0 and method == 'ml':
        subjects = [subject_pre_string+str(i).zfill(2) for i in \
-                    range(1, n_subject_max+1)]
+                    range(1, N_SUBJECT_MAX+1)]
        rr_func = partial(sens_rr_model,
                          window_size=window_size,

--- a/s02_check.py
+++ b/s02_check.py