diff --git a/modules/.datapipeline.py.swp b/modules/.datapipeline.py.swp deleted file mode 100644 index 4213d68c350303ec5029eb85195c3fa423f0fba4..0000000000000000000000000000000000000000 Binary files a/modules/.datapipeline.py.swp and /dev/null differ diff --git a/regress_rr.py b/regress_rr.py index 38d987f233c251975852fc812933cf267aa6ec6d..a5376e4ba236a48c1af7bc0c8a32e08d9b820ed9 100644 --- a/regress_rr.py +++ b/regress_rr.py @@ -22,6 +22,7 @@ from itertools import repeat, chain, combinations from multiprocessing import Pool, cpu_count import tensorflow as tf +from sklearn.decomposition import PCA from sklearn.preprocessing import StandardScaler, MinMaxScaler, OneHotEncoder from sklearn.preprocessing import PolynomialFeatures, LabelEncoder from sklearn.model_selection import KFold, train_test_split @@ -35,6 +36,7 @@ from modules.datapipeline import get_file_list, load_and_snip, load_data, \ load_split_data, load_harness_data from modules.digitalsignalprocessing import vectorized_slide_win as vsw from modules.digitalsignalprocessing import imu_signal_processing +from modules.digitalsignalprocessing import hernandez_sp, reject_artefact from modules.digitalsignalprocessing import do_pad_fft,\ pressure_signal_processing, infer_frequency from modules.utils import * @@ -64,7 +66,8 @@ from sktime.transformations.panel.rocket import ( MiniRocketMultivariateVariable, ) -from config import WINDOW_SIZE, WINDOW_SHIFT, IMU_FS, DATA_DIR, BR_FS +from config import WINDOW_SIZE, WINDOW_SHIFT, IMU_FS, DATA_DIR, BR_FS\ + , FS_RESAMPLE IMU_COLS = ['acc_x', 'acc_y', 'acc_z', 'gyr_x', 'gyr_y', 'gyr_z'] @@ -453,7 +456,6 @@ def load_and_sync_xsens(subject): return xsens_df - def load_tsfresh(subject, project_dir, window_size=12, window_shift=0.2, fs=IMU_FS, overwrite=False): @@ -556,6 +558,55 @@ def get_test_data(cal_df, activity_df, xsens_df, test_standing): return pd.DataFrame(activity_list) +def dsp_win_func(w_inds, df, i, cols): + time = df['sec'].values + if w_inds[-1] == 0: return + w_df = df.iloc[w_inds] + t0, t1 = time[w_inds][0], time[w_inds][-1] + diff = time[w_inds[1:]] - time[w_inds[0:-1]] + mask = np.abs(diff)>20 + diff_chk = np.any(mask) + if diff_chk: + return + + # cols = ['acc_x', 'acc_y', 'acc_z', + # 'gyr_x', 'gyr_y', 'gyr_z'] + + data = w_df[cols].values + + if reject_artefact((data-np.mean(data,axis=0))/np.std(data,axis=0)): + return + + # DSP + pca = PCA(n_components=1, random_state=3) + + # do hernandez sp on datacols for df + filt = hernandez_sp(data=data, fs=IMU_FS)[1] + + # pca + pca_out = pca.fit_transform(filt) + + std = StandardScaler().fit_transform(pca_out) + + pred = get_max_frequency(std, fs=FS_RESAMPLE) + + # get pss / br estimates + # x_time median, pss max_freq, br median + sm_out = w_df['BR'].values + ps_out = w_df['PSS'].values + + x_vec_time = np.median(time[w_inds]) + + fs = 1/np.mean(diff) + ps_freq = int(get_max_frequency(ps_out, fs=IMU_FS)) + + y_tmp = np.array([x_vec_time, np.nanmedian(sm_out), ps_freq]) + + y_hat = pd.DataFrame([ {'sec': x_vec_time, 'pred': pred} ]) + y_out = pd.DataFrame([y_tmp], columns=['sec', 'br', 'pss']) + + return y_hat, y_out + # save evaluation metrics in single file that handles the models for the # subject and config class EvalHandler(): @@ -616,18 +667,21 @@ def imu_rr_model(subject, feature_method='tsfresh', train_len:int=3, test_standing=False, + data_input:str='imu+bvp', ): # window_size, window_shift, intra, inter cal_str = 'cpm' fs = IMU_FS tmp = [] imu_cols = ['acc_x', 'acc_y', 'acc_z', 'gyro_x', 'gyro_y', 'gyro_z'] - bvp_cols= ['bvp'] + bvp_cols = ['bvp'] - # TODO: - # implement and input args config by data cols - # implement and input args config with test_standing - data_cols = imu_cols + bvp_cols + if 'imu' in data_input and 'bvp' in data_input: + data_cols = imu_cols + bvp_cols + elif 'imu' in data_input and not 'bvp' in data_input: + data_cols = imu_cols + elif not 'imu' in data_input and 'bvp' in data_input: + data_cols = bvp_cols do_minirocket = False use_tsfresh = False @@ -680,7 +734,37 @@ def imu_rr_model(subject, # include standing or not test_df_tmp = get_test_data(cal_df, activity_df, xsens_df, test_standing) test_df = pd.concat([df for df in test_df_tmp['data']], axis=0) - + x_test_df, y_test_df = get_df_windows(test_df, + imu_df_win_task, + window_size=window_size, + window_shift=window_shift, + fs=fs, + ) + + acc_dsp_df, acc_y_dsp_df = get_df_windows(test_df, dsp_win_func, + window_size=window_size, + window_shift=window_shift, + fs=fs, + cols=['acc_x', 'acc_y', 'acc_z']) + gyr_dsp_df, gyr_y_dsp_df = get_df_windows(test_df, dsp_win_func, + window_size=window_size, + window_shift=window_shift, + fs=fs, + cols=['gyr_x', 'gyr_y', 'gyr_z']) + + acc_evals = Evaluation(acc_y_dsp_df[lbl_str], acc_dsp_df['pred']) + gyr_evals = Evaluation(gyr_y_dsp_df[lbl_str], gyr_dsp_df['pred']) + print("acc evals: \n", acc_evals.get_evals()) + print("gyr evals: \n", gyr_evals.get_evals()) + plt.subplot(211) + plt.plot(acc_y_dsp_df[lbl_str]); plt.plot(acc_dsp_df['pred']) + plt.subplot(212) + plt.plot(acc_y_dsp_df[lbl_str]); plt.plot(acc_dsp_df['pred']) + plt.show() + + # TODO implement evals from dsp to results + ipdb.set_trace() + for combi in combinations(cal_df[cal_str].values, train_len): combi_str = "-".join([str(x) for x in combi]) pfh.config[cal_str] = combi_str @@ -707,17 +791,12 @@ def imu_rr_model(subject, window_shift=window_shift, fs=fs, ) - x_test_df, y_test_df = get_df_windows(test_df, - imu_df_win_task, - window_size=window_size, - window_shift=window_shift, - fs=fs, - ) x_train = make_windows_from_id(x_train_df, imu_cols) - x_test = make_windows_from_id(x_test_df, imu_cols) y_train = y_train_df[lbl_str].values.reshape(-1, 1) + x_test = make_windows_from_id(x_test_df, imu_cols) y_test = y_test_df[lbl_str].values.reshape(-1, 1) + print("minirocket transforming...") x_train = np.swapaxes(x_train, 1, 2) @@ -731,19 +810,6 @@ def imu_rr_model(subject, x_test = test_df.iloc[:, 3:].values y_test = test_df[lbl_str].values.reshape(-1, 1) else: - x_train_df, y_train_df = get_df_windows(train_df, - imu_df_win_task, - window_size=window_size, - window_shift=window_shift, - fs=fs, - ) - x_test_df, y_test_df = get_df_windows(test_df, - imu_df_win_task, - window_size=window_size, - window_shift=window_shift, - fs=fs, - ) - x_train = make_windows_from_id(x_train_df, imu_cols) x_test = make_windows_from_id(x_test_df, imu_cols) y_train = y_train_df[lbl_str].values.reshape(-1, 1) @@ -814,6 +880,15 @@ def arg_parser(): default=3, help='minutes of data to use for calibration' ) + parser.add_argument('-d', '--data_input', type=str, + default='imu', + help='imu, bvp, imu+bvp: select data cols for input' + ) + parser.add_argument('-ts', '--test_standing', type=int, + default=0, + help='1 or 0 input, choose if standing data will be '\ + 'recorded or not' + ) args = parser.parse_args() return args @@ -832,6 +907,8 @@ if __name__ == '__main__': lbl_str = args.lbl_str train_len = args.train_len overwrite = args.overwrite + data_input = args.data_input + test_standing = args.test_standing print(args) assert train_len>0,"--train_len must be an integer greater than 0" @@ -855,7 +932,9 @@ if __name__ == '__main__': mdl_str=mdl_str, overwrite=overwrite, feature_method=feature_method, - train_len=train_len + train_len=train_len, + data_input=data_input, + test_standing=test_standing, ) if mdl_str in ['fnn', 'lstm', 'cnn1d', 'elastic', 'ard', 'xgboost']: