Newer
Older
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
overwrite : bool
overwrites the evaluations (default False)
Methods
-------
load_eval_history()
loads the evaluation file
save_eval_history()
saves the evaluation file
update_eval_history()
updates the evaluation file using the new entry if there is no matching
model or configuration for the given subject
"""
def __init__(self, y_true, y_pred, subject, pfh, sens_str, overwrite=False):
self.subject = subject
self.config = pfh.config
self.parent_directory = join(DATA_DIR, 'subject_specific')
self.fset_id = pfh.fset_id
self.sens_str = sens_str
self.overwrite = overwrite
self.evals = Evaluation(y_true, y_pred)
entry = {'subject': self.subject,
'config_id': self.fset_id,
'sens_str': self.sens_str,
}
self.entry = {**entry, **self.config, **self.evals.get_evals()}
self.eval_history_file = join(self.parent_directory,
'dsp_eval_history.csv')
self.eval_hist = self.load_eval_history()
def load_eval_history(self):
if not exists(self.eval_history_file):
return None
else:
return pd.read_csv(self.eval_history_file)
def update_eval_history(self):
eval_hist = self.eval_hist
if eval_hist is None:
eval_hist = pd.DataFrame([self.entry])
else:
index_list = eval_hist[
(eval_hist['subject'] == self.entry['subject']) &\
(eval_hist['config_id'] == self.entry['config_id']) &\
(eval_hist['sens_str'] == self.entry['sens_str'])
].index.tolist()
if len(index_list) == 0:
print("adding new entry")
eval_hist = eval_hist._append(self.entry, ignore_index=True)
elif index_list is not None and self.overwrite:
eval_hist.loc[index_list[0]] = self.entry
self.eval_hist = eval_hist
def save_eval_history(self):
self.eval_hist.to_csv(self.eval_history_file, index=False)
def imu_rr_dsp(subject,
window_size=12,
window_shift=0.2,
lbl_str='pss',
overwrite=False,
train_len:int=3,
test_standing=False,
):
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
"""Loads, preprocesses, and performs Hernandez digital signal processing
pipeline on the selected subject. Uses the specified parameters. Runs on
both accelerometer and gyroscope.
Attributes
----------
subject: str
specify the subject code (i.e. 'Pilot02', 'S02')
window_size : float
a numpy array of the respiration rate ground truth values from the
bioharness
window_shift : float
a portion of the window size between 0 and 1
mdl_str : str
a string to infoa portion of the window size between 0 and 1rm what model was used
overwrite : bool
overwrites the evaluations, models, and graphs (default False)
test_standing : bool
boolean to use standing data
Returns
-------
None
"""
imu_cols = ['acc_x', 'acc_y', 'acc_z', 'gyro_x', 'gyro_y', 'gyro_z']
train_size = int(train_len)
config = {'window_size' : window_size,
'window_shift' : window_shift,
'lbl_str' : lbl_str,
'train_len' : train_len,
}
pfh = ProjectFileHandler(config)
pfh.set_home_directory(join(DATA_DIR, 'subject_specific', subject))
id_check = pfh.get_id_from_config()
if id_check is None:
pfh.set_project_directory()
pfh.save_metafile()
else:
pfh.set_id(int(id_check))
pfh.set_project_directory()
print('Using pre-set data id: ', pfh.fset_id)
project_dir = pfh.project_directory
xsens_df = load_and_sync_xsens(subject, sens_list=['imu'])
activity_df = get_activity_log(subject)
event_df = get_respiration_log(subject)
cal_df = get_cal_data(event_df, xsens_df)
# include standing or not
test_df_tmp = get_test_data(cal_df, activity_df, xsens_df, test_standing)
test_df = pd.concat([df for df in test_df_tmp['data']], axis=0)
acc_dsp_df, acc_y_dsp_df = get_df_windows(test_df, dsp_win_func,
window_size=window_size,
window_shift=window_shift,
fs=fs,
cols=['acc_x', 'acc_y', 'acc_z'])
gyr_dsp_df, gyr_y_dsp_df = get_df_windows(test_df, dsp_win_func,
window_size=window_size,
window_shift=window_shift,
fs=fs,
acc_evals = Evaluation(acc_y_dsp_df[lbl_str], acc_dsp_df['pred'])
gyr_evals = Evaluation(gyr_y_dsp_df[lbl_str], gyr_dsp_df['pred'])
print("acc evals: \n", acc_evals.get_evals())
print("gyr evals: \n", gyr_evals.get_evals())
acc_eval = DSPEvalHandler(acc_evals.y_true.values.flatten(),
acc_evals.y_pred.values.flatten(),
subject,
pfh, 'acc', overwrite=overwrite)
acc_eval.update_eval_history()
acc_eval.save_eval_history()
gyr_eval = DSPEvalHandler(gyr_evals.y_true.values.flatten(),
gyr_evals.y_pred.values.flatten(),
subject,
pfh, 'gyro', overwrite=overwrite)
gyr_eval.update_eval_history()
gyr_eval.save_eval_history()
pp.pprint(acc_eval.load_eval_history())
fig, ax = plt.subplots(2, 1)
ax[0].plot(acc_y_dsp_df[lbl_str].values);
ax[0].plot(acc_dsp_df['pred'].values)
ax[1].plot(gyr_y_dsp_df[lbl_str].values);
ax[1].plot(gyr_dsp_df['pred'].values)
ax[1].set_title("GYRO")
ax[1].legend([lbl_str, 'estimate'])
fig_dir = join(project_dir, 'figures')
if not exists(fig_dir): mkdir(fig_dir)
fig.savefig(join(fig_dir, fig_title+".png"))
plt.close()
def sens_rr_model(subject,
window_size=12,
window_shift=0.2,
lbl_str='pss',
mdl_str='knn',
overwrite=False,
feature_method='tsfresh',
train_len:int=3,
test_standing=False,
data_input:str='imu+bvp',
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
"""Loads, preprocesses, and trains a select model using the configured
settings.
Attributes
----------
subject: str
specify the subject code (i.e. 'Pilot02', 'S02')
window_size : float
a numpy array of the respiration rate ground truth values from the
bioharness
window_shift : float
a portion of the window size between 0 and 1
mdl_str : str
a string to infoa portion of the window size between 0 and 1rm what model was used
overwrite : bool
overwrites the evaluations, models, and graphs (default False)
feature_method : str
choose between 'minirocket', 'tsfresh', or 'None'
train_len : int
number of minutes to sample from, choose between 1 to 7
test_standing : bool
boolean to use standing data
data_input : str
sensors to use, choose from 'imu', 'bvp', 'imu+bvp'
Returns
------
None
"""
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
cal_str = 'cpm'
tmp = []
imu_cols = IMU_COLS
bvp_cols = ['bvp']
if 'imu' in data_input and 'bvp' in data_input:
data_cols = ['acc_x', 'acc_y', 'acc_z',
'gyro_x', 'gyro_y', 'gyro_z',
'bvp']
parent_directory_string = "imu-bvp_rr"
data_input = 'imu+bvp'
sens_list = ['imu', 'bvp']
fs = IMU_FS
elif 'imu' in data_input and not 'bvp' in data_input:
data_cols = ['acc_x', 'acc_y', 'acc_z',
'gyro_x', 'gyro_y', 'gyro_z',]
parent_directory_string = "imu_rr"
sens_list = ['imu']
fs = IMU_FS
elif not 'imu' in data_input and 'bvp' in data_input:
data_cols = ['bvp']
parent_directory_string = "bvp_rr"
sens_list = ['bvp']
fs = PPG_FS
do_minirocket = False
use_tsfresh = False
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
if feature_method == 'tsfresh':
use_tsfresh = True
elif feature_method == 'minirocket':
do_minirocket = True
config = {'window_size' : window_size,
'window_shift' : window_shift,
'lbl_str' : lbl_str,
'do_minirocket' : do_minirocket,
'use_tsfresh' : use_tsfresh,
'train_len' : train_len,
'test_standing' : test_standing,
'sens_list' : data_input
}
pfh = ProjectFileHandler(config)
pfh.set_home_directory(join(DATA_DIR, 'subject_specific', subject))
pfh.set_parent_directory(parent_directory_string)
id_check = pfh.get_id_from_config()
if id_check is None:
pfh.set_project_directory()
pfh.save_metafile()
else:
pfh.set_id(int(id_check))
pfh.set_project_directory()
print('Using pre-set data id: ', pfh.fset_id)
project_dir = pfh.project_directory
xsens_df = load_and_sync_xsens(subject, sens_list=sens_list)
activity_df = get_activity_log(subject).reset_index(drop=True)
event_df = get_respiration_log(subject)
cal_df = get_cal_data(event_df, xsens_df)
# include standing or not
test_df_tmp = get_test_data(cal_df, activity_df, xsens_df, test_standing)
test_df = pd.concat([df for df in test_df_tmp['data']], axis=0)
for index, row in cal_df.iterrows():
data = load_tsfresh(row['data'],
pfh.home_directory,
window_size=window_size,
window_shift=window_shift,
fs=fs,
overwrite=overwrite_tsfresh,
data_cols=data_cols,
m_data_cols = data.columns.values[3:]
rel_df = relevance.calculate_relevance_table(
data[m_data_cols], data['pss'])
params = []
if 'imu' in sens_list:
tmp = [
val for val in rel_df['feature'] if 'acc' in val or
'gyr' in val
]
params += tmp[:ntop_features]
if 'bvp' in sens_list:
tmp = [val for val in rel_df['feature'] if 'bvp' in val]
params += tmp[:ntop_features]
assert params is not None, "Invalid selection of parameters"
top_cpm_features[row['cpm']] = params
param_df = pd.DataFrame.from_dict(top_cpm_features, orient='index')
top_cpm_params = param_df.columns.values
arr = param_df[top_cpm_params].values.flatten()
hist_df = pd.DataFrame.from_dict(Counter(arr), orient='index')
feature_strings = get_parameters_from_feature_string(
hist_df.index.values.tolist())
# Get the relevance for tables cal and extract top 20
test_df = load_tsfresh(test_df,
pfh.home_directory,
window_size=window_size,
window_shift=window_shift,
fs=fs,
overwrite=overwrite_tsfresh,
data_cols=data_cols,
prefix='test',
kind_to_fc_parameters=feature_strings
)
chosen_cols = [col for col in test_df.columns if col in
hist_df.index.tolist()+y_cols]
test_df = test_df[chosen_cols]
else:
x_test_df, y_test_df = get_df_windows(
test_df, df_win_task, window_size=window_size,
window_shift=window_shift, fs=fs, cols=data_cols)
for combi in combinations(cal_df[cal_str].values, train_len):
combi_str = "-".join([str(x) for x in combi])
pfh.config[cal_str] = combi_str
marker = f'{parent_directory_string}_{subject}_id{pfh.fset_id}'\
f'_combi{combi_str}'
train_df_list = []
for cpm in combi:
df = cal_df[cal_df[cal_str] == cpm]
data_df = df['data'].iloc[0]
data_df['cpm'] = cpm
train_df_list.append(data_df)
train_df = pd.concat(train_df_list)
assert np.isin(train_df.sec.values, test_df.sec.values).any()==False,\
"overlapping test and train data"
print("train")
print(train_df.shape)
print("test")
print(test_df.shape)
if do_minirocket:
x_train_df, y_train_df = get_df_windows(train_df,
window_size=window_size,
window_shift=window_shift,
fs=fs,
x_train = make_windows_from_id(x_train_df, data_cols)
y_train = y_train_df['cpm'].values.reshape(-1, 1)
x_test = make_windows_from_id(x_test_df, data_cols)
# x_train = y_train_df['hr_est'].values.reshape(-1, 1)
# x_test = y_test_df['hr_est'].values.reshape(-1, 1)
x_train = np.swapaxes(x_train, 1, 2)
x_test = np.swapaxes(x_test, 1, 2)
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
directory = join(project_dir, '_'.join([mdl_str, marker]))
minirocket_fname = join(directory, "minirocket.joblib")
if not overwrite and exists(minirocket_fname):
with open(minirocket_fname, 'rb') as mfile:
minirocket = load(mfile)
x_train = minirocket.transform(x_train)
print("loaded minirocket...")
else:
minirocket = MiniRocketMultivariate()
x_train = minirocket.fit_transform(x_train)
x_test = minirocket.transform(x_test)
if overwrite or not exists(minirocket_fname):
if not exists(directory): makedirs(directory)
with open(minirocket_fname, 'wb') as mfile:
dump(minirocket, mfile)
print("saved new minirocket...")
x_cols = [col for col in test_df.columns.values if col not in y_cols]
x_train = train_df[x_cols].values
y_train = train_df['cpm'].values.reshape(-1, 1)
x_train_df, y_train_df = get_df_windows(train_df,
df_win_task,
window_size=window_size,
window_shift=window_shift,
fs=fs,
cols=data_cols,
)
x_train = make_windows_from_id(x_train_df, data_cols)
x_test = make_windows_from_id(x_test_df, data_cols)
y_train = y_train_df['cpm'].values.reshape(-1, 1)
y_test = y_test_df[lbl_str].values.reshape(-1, 1)
transforms, model = model_training(mdl_str, x_train, y_train,
marker, validation_data=None,
overwrite=overwrite,
is_regression=True,
project_directory=project_dir,
window_size=int(window_size*fs),
extra_train=200,
)
if transforms is not None:
x_test = transforms.transform(x_test)
preds = model.predict(x_test)
eval_handle = EvalHandler(y_test.flatten(), preds.flatten(), subject,
pfh, mdl_str, overwrite=overwrite)
eval_handle.update_eval_history()
eval_handle.save_eval_history()
pp = PrettyPrinter()
pp.pprint(eval_handle.load_eval_history())
fig, ax = plt.subplots(2, 1, figsize=(7.3, 4.5))
fig_title = '_'.join([mdl_str, data_input, subject]+[combi_str])
fig.suptitle(fig_title)
ax[0].plot(y_test)
ax[0].plot(preds)
ax[0].set_title('raw')
if lbl_str == 'pss':
br = y_test_df['br'].values
ax[1].plot(movingaverage(preds, 12),
color='tab:orange')
ax[1].legend([lbl_str, 'br', 'pred'])
else:
ax[1].plot(y_test, 'k')
ax[1].plot(movingaverage(preds, 12), color='tab:orange')
ax[1].legend([lbl_str, 'pred'])
ax[1].set_title('smoothened')
fig_dir = join(project_dir, 'figures')
if not exists(fig_dir): mkdir(fig_dir)
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
if subject == 'S02':
sec = y_test_df['sec'].values
activity_df2 = get_activity_log(subject+'-repeat')
event_df2 = get_respiration_log(subject+'-repeat')
second_cal_df = get_cal_data(event_df2, xsens_df)\
.reset_index(drop=True)
sec = y_test_df['sec'].values
activity_df2 = get_activity_log(subject+'-repeat')
event_df2 = get_respiration_log(subject+'-repeat')
second_cal_df = get_cal_data(event_df2, xsens_df)\
.reset_index(drop=True)
fmt = "%d/%m/%Y %H:%M:%S"
activity_sec0 = datetime.strptime(
activity_df2['Timestamps'].iloc[0], fmt).timestamp()
activity_sec1 = datetime.strptime(
activity_df2['Timestamps'].iloc[-1], fmt).timestamp()
ind0 = np.argmin(np.abs(sec - activity_sec0))
ind1 = np.argmin(np.abs(sec - activity_sec1))
ax[0].axvline(ind0, c='r', linestyle='--')
ax[0].axvline(ind1, c='r', linestyle='--')
ax[1].axvline(ind0, c='r', linestyle='--')
ax[1].axvline(ind1, c='r', linestyle='--')
fig2, ax2 = plt.subplots(2, 1, sharex=True)
cal_sec0 = second_cal_df.iloc[-7]['data']['sec'].iloc[0]
cal_sec1 = second_cal_df.iloc[-1]['data']['sec'].iloc[-1]
calind0 = np.argmin(np.abs(sec - cal_sec0))
calind1 = np.argmin(np.abs(sec - cal_sec1))
ax2[0].plot(sec[calind0:], y_test[calind0:])
ax2[0].plot(sec[calind0:],preds[calind0:])
ax2[0].set_title('raw')
ax2[1].plot(sec[calind0:],
movingaverage(y_test, 12)[calind0:],
color='tab:blue')
ax2[1].plot(sec[calind0:], br[calind0:], 'k')
ax2[1].plot(sec[calind0:],
movingaverage(preds, 12)[calind0:],
color='tab:orange')
ax2[1].legend([lbl_str, 'br', 'pred'])
ll = len(second_cal_df)
for i in range(ll-7, ll):
cal_sec0 = second_cal_df.iloc[i]['data']['sec'].iloc[0]
cal_sec1 = second_cal_df.iloc[i]['data']['sec'].iloc[-1]
calind0 = np.argmin(np.abs(sec - cal_sec0))
calind1 = np.argmin(np.abs(sec - cal_sec1))
ax2[0].axvline(sec[calind0], c='g', linestyle='--')
ax2[0].axvline(sec[calind1], c='g', linestyle='--')
ax2[1].axvline(sec[calind0], c='g', linestyle='--')
ax2[1].axvline(sec[calind1], c='g', linestyle='--')
def pressure_ax_plot(func, win_size=12, win_shift=0.2, fs=120):
dt = test_df.sec
t = dt.map(lambda x: datetime.fromtimestamp(x))
pss_sig = test_df.PSS.values
processed_wins = vsw(pss_sig, len(pss_sig),
sub_window_size=int(win_size*fs),
stride_size=int(win_size*win_shift*fs))
processed_wins = map(func, processed_wins)
ps_freq = [get_max_frequency(ps_out, fs=fs) for ps_out in
processed_wins]
ft = vsw(dt.values, len(dt),
sub_window_size=int(win_size*fs),
stride_size=int(win_size*win_shift*fs))
fig, axs = plt.subplots(3, 1, sharex=True)
# axs[0].get_shared_x_axes().join(axs[0], axs[1])
axs[0].plot(t, pss_sig)
axs[1].plot(t, func(pss_sig))
mtime = map(lambda x: datetime.fromtimestamp(x),
ft.mean(axis=1))
freqdt = np.array([t for t in mtime])
tt = y_test_df.sec.map(lambda x:
datetime.fromtimestamp(x)).values
axs[2].plot(freqdt, ps_freq)
axs[2].plot(tt, y_test_df.pss.values)
# fig.savefig(join(fig_dir, fig_title+"cal_check.png"))
fig2.savefig(join(fig_dir, fig_title+"cal_check.png"))
# func = partial(pressure_signal_processing, fs=fs)
# pressure_ax_plot(func)
"""Returns arguments in a Namespace to configure the subject specific model
"""
parser = argparse.ArgumentParser()
parser.add_argument("-m", '--model', type=str,
default='linreg',
choices=['linreg', 'ard', 'xgboost', 'knn',
'svr', 'cnn1d', 'fnn', 'lstm', 'ridge',
'elastic'],
)
parser.add_argument("-s", '--subject', type=int,
choices=list(range(1,N_SUBJECT_MAX+1))+[-1],
)
parser.add_argument("-f", '--feature_method', type=str,
default='minirocket',
choices=['tsfresh', 'minirocket', 'None']
)
parser.add_argument("-o", '--overwrite', type=int,
default=0,
)
parser.add_argument('--win_size', type=int,
)
parser.add_argument('-tl', '--train_len', type=int,
parser.add_argument('-d', '--data_input', type=str,
default='imu',
help='imu, bvp, imu+bvp: select data cols for input'
)
parser.add_argument('-ts', '--test_standing', type=int,
help='1 or 0 input, choose if standing data will be '\
'recorded or not'
)
parser.add_argument('--method', type=str,
default='ml',
help="choose between 'ml' or 'dsp' methods for"\
args = parser.parse_args()
return args
if __name__ == '__main__':
np.random.seed(100)
args = arg_parser()
mdl_str = args.model
subject = args.subject
feature_method = args.feature_method
window_size = args.win_size
window_shift = args.win_shift
lbl_str = args.lbl_str
train_len = args.train_len
overwrite = args.overwrite
data_input = args.data_input
test_standing = args.test_standing
print(args)
assert train_len>0,"--train_len must be an integer greater than 0"
assert train_len <= 7,"--train_len must be an integer less than 8"
sens_rr_model(subject,
window_size=window_size,
window_shift=window_shift,
lbl_str=lbl_str,
mdl_str=mdl_str,
overwrite=overwrite,
feature_method=feature_method,
train_len=train_len,
test_standing=test_standing,
data_input=data_input,
)
elif subject <= 0 and method == 'ml':
subjects = [subject_pre_string+str(i).zfill(2) for i in \
rr_func = partial(sens_rr_model,
window_size=window_size,
window_shift=window_shift,
lbl_str=lbl_str,
mdl_str=mdl_str,
overwrite=overwrite,
feature_method=feature_method,
train_len=train_len,
test_standing=test_standing,
data_input=data_input,
)
for subject in subjects:
rr_func(subject)
subject = subject_pre_string+str(subject).zfill(2)
imu_rr_dsp(subject,
window_size=window_size,
window_shift=window_shift,
lbl_str=lbl_str,
overwrite=overwrite,
train_len=train_len,
test_standing=test_standing)
elif subject <= 0 and method == 'dsp':
subjects = [subject_pre_string+str(i).zfill(2) for i in \
range(1, N_SUBJECT_MAX+1)]
func = partial(imu_rr_dsp, window_size=window_size,
window_shift=window_shift,
lbl_str=lbl_str,
overwrite=overwrite,
train_len=train_len,
test_standing=test_standing)
for subject in subjects:
func(subject)
if 'bvp' in data_input:
# Missing bvp for S03, S04
subjects = [sbf for sbj in subjects if sbj in ['S03']]
rr_func = partial(sens_rr_model,
window_size=window_size,
window_shift=window_shift,
lbl_str=lbl_str,
mdl_str=mdl_str,
overwrite=overwrite,
feature_method=feature_method,
train_len=train_len,
test_standing=test_standing,
data_input=data_input,
)