Newer
Older
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
self.subject = subject
self.config = pfh.config
self.parent_directory = join(DATA_DIR, 'subject_specific')
self.fset_id = pfh.fset_id
self.sens_str = sens_str
self.overwrite = overwrite
self.evals = Evaluation(y_true, y_pred)
entry = {'subject': self.subject,
'config_id': self.fset_id,
'sens_str': self.sens_str,
}
self.entry = {**entry, **self.config, **self.evals.get_evals()}
self.eval_history_file = join(self.parent_directory,
'dsp_eval_history.csv')
self.eval_hist = self.load_eval_history()
def load_eval_history(self):
if not exists(self.eval_history_file):
return None
else:
return pd.read_csv(self.eval_history_file)
def update_eval_history(self):
eval_hist = self.eval_hist
if eval_hist is None:
eval_hist = pd.DataFrame([self.entry])
else:
index_list = eval_hist[
(eval_hist['subject'] == self.entry['subject']) &\
(eval_hist['config_id'] == self.entry['config_id']) &\
(eval_hist['sens_str'] == self.entry['sens_str'])
].index.tolist()
if len(index_list) == 0:
print("adding new entry")
eval_hist = eval_hist._append(self.entry, ignore_index=True)
elif index_list is not None and self.overwrite:
eval_hist.loc[index_list[0]] = self.entry
self.eval_hist = eval_hist
def save_eval_history(self):
self.eval_hist.to_csv(self.eval_history_file, index=False)
def imu_rr_dsp(subject,
window_size=12,
window_shift=0.2,
lbl_str='pss',
overwrite=False,
train_len:int=3,
test_standing=False,
):
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
# TODO:
# implement evaluation saving
"""Loads, preprocesses, and performs Hernandez digital signal processing
pipeline on the selected subject. Uses the specified parameters. Runs on
both accelerometer and gyroscope.
Attributes
----------
subject: str
specify the subject code (i.e. 'Pilot02', 'S02')
window_size : float
a numpy array of the respiration rate ground truth values from the
bioharness
window_shift : float
a portion of the window size between 0 and 1
mdl_str : str
a string to infoa portion of the window size between 0 and 1rm what model was used
overwrite : bool
overwrites the evaluations, models, and graphs (default False)
test_standing : bool
boolean to use standing data
Returns
-------
None
"""
imu_cols = ['acc_x', 'acc_y', 'acc_z', 'gyro_x', 'gyro_y', 'gyro_z']
train_size = int(train_len)
config = {'window_size' : window_size,
'window_shift' : window_shift,
'lbl_str' : lbl_str,
'train_len' : train_len,
}
pfh = ProjectFileHandler(config)
pfh.set_home_directory(join(DATA_DIR, 'subject_specific', subject))
id_check = pfh.get_id_from_config()
if id_check is None:
pfh.set_project_directory()
pfh.save_metafile()
else:
pfh.set_id(int(id_check))
pfh.set_project_directory()
print('Using pre-set data id: ', pfh.fset_id)
project_dir = pfh.project_directory
xsens_df = load_and_sync_xsens(subject, sens_list=['imu'])
activity_df = get_activity_log(subject)
event_df = get_respiration_log(subject)
cal_df = get_cal_data(event_df, xsens_df)
# include standing or not
test_df_tmp = get_test_data(cal_df, activity_df, xsens_df, test_standing)
test_df = pd.concat([df for df in test_df_tmp['data']], axis=0)
acc_dsp_df, acc_y_dsp_df = get_df_windows(test_df, dsp_win_func,
window_size=window_size,
window_shift=window_shift,
fs=fs,
cols=['acc_x', 'acc_y', 'acc_z'])
gyr_dsp_df, gyr_y_dsp_df = get_df_windows(test_df, dsp_win_func,
window_size=window_size,
window_shift=window_shift,
fs=fs,
acc_evals = Evaluation(acc_y_dsp_df[lbl_str], acc_dsp_df['pred'])
gyr_evals = Evaluation(gyr_y_dsp_df[lbl_str], gyr_dsp_df['pred'])
print("acc evals: \n", acc_evals.get_evals())
print("gyr evals: \n", gyr_evals.get_evals())
acc_eval = DSPEvalHandler(acc_evals.y_true.values.flatten(),
acc_evals.y_pred.values.flatten(),
subject,
pfh, 'acc', overwrite=overwrite)
acc_eval.update_eval_history()
acc_eval.save_eval_history()
gyr_eval = DSPEvalHandler(gyr_evals.y_true.values.flatten(),
gyr_evals.y_pred.values.flatten(),
subject,
pfh, 'gyro', overwrite=overwrite)
gyr_eval.update_eval_history()
gyr_eval.save_eval_history()
pp.pprint(acc_eval.load_eval_history())
fig, ax = plt.subplots(2, 1)
ax[0].plot(acc_y_dsp_df[lbl_str]); plt.plot(acc_dsp_df['pred'])
ax[0].set_title("ACC")
ax[1].plot(gyr_y_dsp_df[lbl_str]); plt.plot(gyr_dsp_df['pred'])
ax[1].set_title("GYRO")
ax[1].legend([lbl_str, 'estimate'])
fig_dir = join(project_dir, 'figures')
if not exists(fig_dir): mkdir(fig_dir)
fig.savefig(join(fig_dir, fig_title+".png"))
plt.close()
def sens_rr_model(subject,
window_size=12,
window_shift=0.2,
lbl_str='pss',
mdl_str='knn',
overwrite=False,
feature_method='tsfresh',
train_len:int=3,
test_standing=False,
data_input:str='imu+bvp',
):
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
"""Loads, preprocesses, and trains a select model using the configured
settings.
Attributes
----------
subject: str
specify the subject code (i.e. 'Pilot02', 'S02')
window_size : float
a numpy array of the respiration rate ground truth values from the
bioharness
window_shift : float
a portion of the window size between 0 and 1
mdl_str : str
a string to infoa portion of the window size between 0 and 1rm what model was used
overwrite : bool
overwrites the evaluations, models, and graphs (default False)
feature_method : str
choose between 'minirocket', 'tsfresh', or 'None'
train_len : int
number of minutes to sample from, choose between 1 to 7
test_standing : bool
boolean to use standing data
data_input : str
sensors to use, choose from 'imu', 'bvp', 'imu+bvp'
Returns
------
None
"""
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
cal_str = 'cpm'
tmp = []
imu_cols = IMU_COLS
bvp_cols = ['bvp']
if 'imu' in data_input and 'bvp' in data_input:
data_cols = ['acc_x', 'acc_y', 'acc_z',
'gyro_x', 'gyro_y', 'gyro_z',
'bvp']
parent_directory_string = "imu-bvp_rr"
data_input = 'imu+bvp'
sens_list = ['imu', 'bvp']
fs = IMU_FS
elif 'imu' in data_input and not 'bvp' in data_input:
data_cols = ['acc_x', 'acc_y', 'acc_z',
'gyro_x', 'gyro_y', 'gyro_z',]
parent_directory_string = "imu_rr"
sens_list = ['imu']
fs = IMU_FS
elif not 'imu' in data_input and 'bvp' in data_input:
data_cols = ['bvp']
parent_directory_string = "bvp_rr"
sens_list = ['bvp']
fs = PPG_FS
do_minirocket = False
use_tsfresh = False
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
train_size = int(train_len)
if feature_method == 'tsfresh':
use_tsfresh = True
elif feature_method == 'minirocket':
do_minirocket = True
config = {'window_size' : window_size,
'window_shift' : window_shift,
'lbl_str' : lbl_str,
'do_minirocket' : do_minirocket,
'use_tsfresh' : use_tsfresh,
'train_len' : train_len,
'test_standing' : test_standing,
'sens_list' : data_input
}
pfh = ProjectFileHandler(config)
pfh.set_home_directory(join(DATA_DIR, 'subject_specific', subject))
pfh.set_parent_directory(parent_directory_string)
id_check = pfh.get_id_from_config()
if id_check is None:
pfh.set_project_directory()
pfh.save_metafile()
else:
pfh.set_id(int(id_check))
pfh.set_project_directory()
print('Using pre-set data id: ', pfh.fset_id)
project_dir = pfh.project_directory
xsens_df = load_and_sync_xsens(subject, sens_list=sens_list)
activity_df = get_activity_log(subject)
event_df = get_respiration_log(subject)
cal_df = get_cal_data(event_df, xsens_df)
# include standing or not
test_df_tmp = get_test_data(cal_df, activity_df, xsens_df, test_standing)
test_df = pd.concat([df for df in test_df_tmp['data']], axis=0)
cal_df_list = []
test_df = load_tsfresh(test_df,
pfh.home_directory,
window_size=window_size,
window_shift=window_shift,
fs=fs,
overwrite=overwrite_tsfresh,
data_cols=data_cols,
prefix='test',
)
for index, row in cal_df.iterrows():
data = load_tsfresh(row['data'],
pfh.home_directory,
window_size=window_size,
window_shift=window_shift,
fs=fs,
overwrite=overwrite_tsfresh,
data_cols=data_cols,
cal_df = pd.DataFrame(cal_df_list)
else:
x_test_df, y_test_df = get_df_windows(
test_df, df_win_task, window_size=window_size,
window_shift=window_shift, fs=fs, cols=data_cols)
for combi in combinations(cal_df[cal_str].values, train_len):
combi_str = "-".join([str(x) for x in combi])
pfh.config[cal_str] = combi_str
marker = f'{parent_directory_string}_{subject}_id{pfh.fset_id}'\
f'_combi{combi_str}'
train_df_list = []
for cpm in combi:
df = cal_df[cal_df[cal_str] == cpm]
data_df = df['data'].iloc[0]
data_df['cpm'] = cpm
train_df_list.append(data_df)
train_df = pd.concat(train_df_list)
assert np.isin(train_df.sec.values, test_df.sec.values).any()==False,\
"overlapping test and train data"
print("train")
print(train_df.shape)
print("test")
print(test_df.shape)
if do_minirocket:
x_train_df, y_train_df = get_df_windows(train_df,
window_size=window_size,
window_shift=window_shift,
fs=fs,
x_train = make_windows_from_id(x_train_df, data_cols)
y_train = y_train_df['cpm'].values.reshape(-1, 1)
x_test = make_windows_from_id(x_test_df, data_cols)
# x_train = y_train_df['hr_est'].values.reshape(-1, 1)
# x_test = y_test_df['hr_est'].values.reshape(-1, 1)
print("minirocket transforming...")
x_train = np.swapaxes(x_train, 1, 2)
x_test = np.swapaxes(x_test, 1, 2)
minirocket = MiniRocketMultivariate()
x_train = minirocket.fit_transform(x_train)
x_test = minirocket.transform(x_test)
elif use_tsfresh:
y_cols = ['sec', 'br', 'pss', 'cpm']
x_cols = [col for col in train_df.columns.values if col not in y_cols]
x_train = train_df[x_cols].values
x_train_df, y_train_df = get_df_windows(train_df,
df_win_task,
window_size=window_size,
window_shift=window_shift,
fs=fs,
cols=data_cols,
)
x_train = make_windows_from_id(x_train_df, data_cols)
x_test = make_windows_from_id(x_test_df, data_cols)
y_train = y_train_df['cpm'].values.reshape(-1, 1)
y_test = y_test_df[lbl_str].values.reshape(-1, 1)
transforms, model = model_training(mdl_str, x_train, y_train,
marker, validation_data=None,
overwrite=overwrite,
is_regression=True,
project_directory=project_dir,
window_size=int(window_size*fs),
extra_train=200,
)
if transforms is not None:
x_test = transforms.transform(x_test)
preds = model.predict(x_test)
eval_handle = EvalHandler(y_test.flatten(), preds.flatten(), subject,
pfh, mdl_str, overwrite=overwrite)
eval_handle.update_eval_history()
eval_handle.save_eval_history()
pp = PrettyPrinter()
pp.pprint(eval_handle.load_eval_history())
fig, ax = plt.subplots(2, 1, figsize=(7.3, 4.5))
fig_title = '_'.join([mdl_str, data_input, subject]+[combi_str])
fig.suptitle(fig_title)
ax[0].plot(y_test)
ax[0].plot(preds)
ax[0].set_title('raw')
if lbl_str == 'pss':
br = y_test_df['br'].values
ax[1].plot(movingaverage(y_test, 12), color='tab:blue')
ax[1].plot(br, 'k')
ax[1].plot(movingaverage(preds, 12), color='tab:orange')
ax[1].legend([lbl_str, 'br', 'pred'])
else:
ax[1].plot(y_test, 'k')
ax[1].plot(movingaverage(preds, 12), color='tab:orange')
ax[1].legend([lbl_str, 'pred'])
ax[1].set_title('smoothened')
fig_dir = join(project_dir, 'figures')
if not exists(fig_dir): mkdir(fig_dir)
fig.savefig(join(fig_dir, fig_title+".png"))
plt.close()
"""Returns arguments in a Namespace to configure the subject specific model
"""
parser = argparse.ArgumentParser()
parser.add_argument("-m", '--model', type=str,
default='linreg',
choices=['linreg', 'ard', 'xgboost', 'knn',
'svr', 'cnn1d', 'fnn', 'lstm', 'ridge',
'elastic'],
)
parser.add_argument("-s", '--subject', type=int,
)
parser.add_argument("-f", '--feature_method', type=str,
default='minirocket',
choices=['tsfresh', 'minirocket', 'None']
)
parser.add_argument("-o", '--overwrite', type=int,
default=0,
)
parser.add_argument('--win_size', type=int,
default=12,
)
parser.add_argument('--win_shift', type=float,
default=0.2,
)
parser.add_argument('-l', '--lbl_str', type=str,
)
parser.add_argument('-tl', '--train_len', type=int,
parser.add_argument('-d', '--data_input', type=str,
default='imu',
help='imu, bvp, imu+bvp: select data cols for input'
)
parser.add_argument('-ts', '--test_standing', type=int,
help='1 or 0 input, choose if standing data will be '\
'recorded or not'
)
parser.add_argument('--method', type=str,
default='ml',
help="choose between 'ml' or 'dsp' methods for"\
" regression",
choices=['ml', 'dsp']
)
args = parser.parse_args()
return args
if __name__ == '__main__':
np.random.seed(100)
mdl_str = args.model
subject = args.subject
feature_method = args.feature_method
window_size = args.win_size
window_shift = args.win_shift
lbl_str = args.lbl_str
train_len = args.train_len
overwrite = args.overwrite
data_input = args.data_input
test_standing = args.test_standing
print(args)
assert train_len>0,"--train_len must be an integer greater than 0"
sens_rr_model(subject,
window_size=window_size,
window_shift=window_shift,
lbl_str=lbl_str,
mdl_str=mdl_str,
overwrite=overwrite,
feature_method=feature_method,
train_len=train_len,
test_standing=test_standing,
data_input=data_input,
)
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
elif subject <= 0 and method == 'ml':
subjects = [subject_pre_string+str(i).zfill(2) for i in \
range(2, n_subject_max+1)]
rr_func = partial(sens_rr_model,
window_size=window_size,
window_shift=window_shift,
lbl_str=lbl_str,
mdl_str=mdl_str,
overwrite=overwrite,
feature_method=feature_method,
train_len=train_len,
test_standing=test_standing,
data_input=data_input,
)
for subject in subjects:
rr_func(subject)
if subject > 0 and method == 'dsp':
subject = subject_pre_string+str(subject).zfill(2)
imu_rr_dsp(subject,
window_size=window_size,
window_shift=window_shift,
lbl_str=lbl_str,
overwrite=overwrite,
train_len=train_len,
test_standing=test_standing)
elif subject <= 0 and method == 'ml':
rr_func = partial(sens_rr_model,
window_size=window_size,
window_shift=window_shift,
lbl_str=lbl_str,
mdl_str=mdl_str,
overwrite=overwrite,
feature_method=feature_method,
train_len=train_len,
test_standing=test_standing,
data_input=data_input,
)