Skip to content
Snippets Groups Projects
Commit 8f7611df authored by Raymond Chia's avatar Raymond Chia
Browse files

mvp

parent 40e01903
Branches
No related merge requests found
3 0 → 100644
This diff is collapsed.
File deleted
File deleted
File deleted
...@@ -18,7 +18,7 @@ import pytz ...@@ -18,7 +18,7 @@ import pytz
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from functools import partial from functools import partial
from collections import Counter from collections import Counter
from itertools import repeat, chain from itertools import repeat, chain, combinations
from multiprocessing import Pool, cpu_count from multiprocessing import Pool, cpu_count
import tensorflow as tf import tensorflow as tf
...@@ -380,9 +380,9 @@ def load_and_sync_xsens(subject): ...@@ -380,9 +380,9 @@ def load_and_sync_xsens(subject):
xsens_list = [] xsens_list = []
# skip the first and last x minute(s) # skip the first and last x minute(s)
minutes_to_skip = 2 minutes_to_skip = .5
br_skiprows = br_skipfooter = minutes_to_skip*60 br_skiprows = br_skipfooter = int(minutes_to_skip*60)
pss_skiprows = pss_skipfooter = minutes_to_skip*60*BR_FS pss_skiprows = pss_skipfooter = int(minutes_to_skip*60*BR_FS)
# load each bioharness file and sync the imu to it # load each bioharness file and sync the imu to it
for pss_file, br_file in zip(pss_list, br_list): for pss_file, br_file in zip(pss_list, br_list):
pss_df = load_bioharness_file(pss_file, skiprows=pss_skiprows, pss_df = load_bioharness_file(pss_file, skiprows=pss_skiprows,
...@@ -492,6 +492,70 @@ def get_respiration_log(subject): ...@@ -492,6 +492,70 @@ def get_respiration_log(subject):
log_dfs = [pd.read_json(f) for f in log_list] log_dfs = [pd.read_json(f) for f in log_list]
return pd.concat(log_dfs, axis=0) return pd.concat(log_dfs, axis=0)
def get_cal_data(event_df, xsens_df):
fmt ="%Y-%m-%d %H.%M.%S"
cal_list = []
cpms = []
start_sec = 0
stop_sec = 0
for index, row in event_df.iterrows():
event = row['eventTag']
timestamp = row['timestamp']
inhalePeriod = row['inhalePeriod']
exhalePeriod = row['exhalePeriod']
cpm = np.round( 60/(inhalePeriod + exhalePeriod) )
sec = timestamp.to_pydatetime().timestamp()
if event == 'Start':
start_sec = sec
continue
elif event == 'Stop':
stop_sec = sec
dsync = DataSynchronizer()
dsync.set_bounds(xsens_df['sec'].values, start_sec, stop_sec)
sync_df = dsync.sync_df(xsens_df.copy())
cal_data = {'cpm': cpm, 'data': sync_df}
cal_list.append(cal_data)
assert np.round(sync_df.sec.iloc[0])==np.round(start_sec), \
"error with start sync"
assert np.round(sync_df.sec.iloc[-1])==np.round(stop_sec), \
"error with stop sync"
return pd.DataFrame(cal_list)
def get_test_data(cal_df, activity_df, xsens_df, test_standing):
fmt = "%d/%m/%Y %H:%M:%S"
start_time = cal_df.iloc[-1]['data'].sec.values[-1]
data_df = xsens_df[xsens_df.sec > start_time]
activity_start = 0
activity_end = 0
activity_list = []
for index, row in activity_df.iterrows():
sec = datetime.strptime(row['Timestamps'], fmt).timestamp()
if not test_standing and row['Activity'] == 'standing':
continue
if row['Event'] == 'start':
activity_start = sec
elif row['Event'] == 'end':
activity_stop = sec
dsync = DataSynchronizer()
dsync.set_bounds(data_df['sec'].values, activity_start,
activity_stop)
sync_df = dsync.sync_df(data_df.copy())
activity_data = {'activity': row['Activity'], 'data': sync_df}
activity_list.append(activity_data)
return pd.DataFrame(activity_list)
# save evaluation metrics in single file that handles the models for the # save evaluation metrics in single file that handles the models for the
# subject and config # subject and config
class EvalHandler(): class EvalHandler():
...@@ -529,7 +593,8 @@ class EvalHandler(): ...@@ -529,7 +593,8 @@ class EvalHandler():
index_list = eval_hist[ index_list = eval_hist[
(eval_hist['subject'] == self.entry['subject']) &\ (eval_hist['subject'] == self.entry['subject']) &\
(eval_hist['config_id'] == self.entry['config_id']) &\ (eval_hist['config_id'] == self.entry['config_id']) &\
(eval_hist['mdl_str'] == self.entry['mdl_str'])\ (eval_hist['mdl_str'] == self.entry['mdl_str']) &\
(eval_hist['cpm'] == self.entry['cpm'])\
].index.tolist() ].index.tolist()
if len(index_list) == 0: if len(index_list) == 0:
print("adding new entry") print("adding new entry")
...@@ -549,11 +614,20 @@ def imu_rr_model(subject, ...@@ -549,11 +614,20 @@ def imu_rr_model(subject,
mdl_str='knn', mdl_str='knn',
overwrite=False, overwrite=False,
feature_method='tsfresh', feature_method='tsfresh',
train_len=1 train_len:int=3,
test_standing=False,
): ):
# window_size, window_shift, intra, inter # window_size, window_shift, intra, inter
cal_str = 'cpm'
fs = IMU_FS fs = IMU_FS
tmp = [] tmp = []
imu_cols = ['acc_x', 'acc_y', 'acc_z', 'gyro_x', 'gyro_y', 'gyro_z']
bvp_cols= ['bvp']
# TODO:
# implement and input args config by data cols
# implement and input args config with test_standing
data_cols = imu_cols + bvp_cols
do_minirocket = False do_minirocket = False
use_tsfresh = False use_tsfresh = False
...@@ -571,6 +645,8 @@ def imu_rr_model(subject, ...@@ -571,6 +645,8 @@ def imu_rr_model(subject,
'do_minirocket' : do_minirocket, 'do_minirocket' : do_minirocket,
'use_tsfresh' : use_tsfresh, 'use_tsfresh' : use_tsfresh,
'train_len' : train_len, 'train_len' : train_len,
'data_cols' : data_cols,
'test_standing' : test_standing,
} }
pfh = ProjectFileHandler(config) pfh = ProjectFileHandler(config)
...@@ -586,8 +662,6 @@ def imu_rr_model(subject, ...@@ -586,8 +662,6 @@ def imu_rr_model(subject,
print('Using pre-set data id: ', pfh.fset_id) print('Using pre-set data id: ', pfh.fset_id)
project_dir = pfh.project_directory project_dir = pfh.project_directory
marker = f'imu_rr_{subject}_id{pfh.fset_id}'
if not use_tsfresh: if not use_tsfresh:
xsens_df = load_and_sync_xsens(subject) xsens_df = load_and_sync_xsens(subject)
else: else:
...@@ -598,115 +672,115 @@ def imu_rr_model(subject, ...@@ -598,115 +672,115 @@ def imu_rr_model(subject,
fs=IMU_FS, fs=IMU_FS,
overwrite=overwrite_tsfresh) overwrite=overwrite_tsfresh)
large_win_size = 60*fs activity_df = get_activity_log(subject)
xsens_inds = np.arange(len(xsens_df)) event_df = get_respiration_log(subject)
large_windows = vsw(xsens_inds, len(xsens_inds),
sub_window_size=large_win_size, cal_df = get_cal_data(event_df, xsens_df)
stride_size=large_win_size)
keep_inds = [] # include standing or not
for i, win in enumerate(large_windows): test_df_tmp = get_test_data(cal_df, activity_df, xsens_df, test_standing)
if win[-1] != 0: test_df = pd.concat([df for df in test_df_tmp['data']], axis=0)
keep_inds.append(i)
large_windows = large_windows[keep_inds]
train_inds, test_inds = train_test_split(large_windows, for combi in combinations(cal_df[cal_str].values, train_len):
train_size=train_size, combi_str = "-".join([str(x) for x in combi])
shuffle=True, pfh.config[cal_str] = combi_str
random_state=123) marker = f'imu_rr_{subject}_id{pfh.fset_id}_combi{combi_str}'
print(marker)
train_df = pd.concat([xsens_df.iloc[win] for win in train_inds if
win[-1]!=0]) train_df = pd.concat(
test_df = pd.concat([xsens_df.iloc[win] for win in test_inds if win[-1]!=0]) [cal_df[cal_df[cal_str] == cpm]['data'].iloc[0] for cpm in combi],
axis=0
assert np.isin(train_df.index.values, test_df.index.values).any()==False,\ )
"overlapping test and train data"
assert np.isin(train_df.index.values, test_df.index.values).any()==False,\
print("train") "overlapping test and train data"
print(train_df.shape)
print("test") print("train")
print(test_df.shape) print(train_df.shape)
print("test")
if do_minirocket: print(test_df.shape)
cols = ['acc_x', 'acc_y', 'acc_z', 'gyro_x', 'gyro_y', 'gyro_z']
x_train_df, y_train_df = get_df_windows(train_df, if do_minirocket:
imu_df_win_task, x_train_df, y_train_df = get_df_windows(train_df,
window_size=window_size, imu_df_win_task,
window_shift=window_shift, window_size=window_size,
fs=fs, window_shift=window_shift,
) fs=fs,
x_test_df, y_test_df = get_df_windows(test_df, )
imu_df_win_task, x_test_df, y_test_df = get_df_windows(test_df,
window_size=window_size, imu_df_win_task,
window_shift=window_shift, window_size=window_size,
fs=fs, window_shift=window_shift,
) fs=fs,
)
x_train = make_windows_from_id(x_train_df, cols)
x_test = make_windows_from_id(x_test_df, cols) x_train = make_windows_from_id(x_train_df, imu_cols)
y_train = y_train_df[lbl_str].values.reshape(-1, 1) x_test = make_windows_from_id(x_test_df, imu_cols)
y_test = y_test_df[lbl_str].values.reshape(-1, 1) y_train = y_train_df[lbl_str].values.reshape(-1, 1)
y_test = y_test_df[lbl_str].values.reshape(-1, 1)
print("minirocket transforming...")
x_train = np.swapaxes(x_train, 1, 2) print("minirocket transforming...")
x_test = np.swapaxes(x_test, 1, 2) x_train = np.swapaxes(x_train, 1, 2)
minirocket = MiniRocketMultivariate() x_test = np.swapaxes(x_test, 1, 2)
x_train = minirocket.fit_transform(x_train) minirocket = MiniRocketMultivariate()
x_test = minirocket.transform(x_test) x_train = minirocket.fit_transform(x_train)
elif use_tsfresh: x_test = minirocket.transform(x_test)
x_train = train_df.iloc[:, 3:].values elif use_tsfresh:
y_train = train_df[lbl_str].values.reshape(-1, 1) x_train = train_df.iloc[:, 3:].values
x_test = test_df.iloc[:, 3:].values y_train = train_df[lbl_str].values.reshape(-1, 1)
y_test = test_df[lbl_str].values.reshape(-1, 1) x_test = test_df.iloc[:, 3:].values
else: y_test = test_df[lbl_str].values.reshape(-1, 1)
cols = ['acc_x', 'acc_y', 'acc_z', 'gyro_x', 'gyro_y', 'gyro_z'] else:
x_train_df, y_train_df = get_df_windows(train_df, x_train_df, y_train_df = get_df_windows(train_df,
imu_df_win_task, imu_df_win_task,
window_size=window_size, window_size=window_size,
window_shift=window_shift, window_shift=window_shift,
fs=fs, fs=fs,
) )
x_test_df, y_test_df = get_df_windows(test_df, x_test_df, y_test_df = get_df_windows(test_df,
imu_df_win_task, imu_df_win_task,
window_size=window_size, window_size=window_size,
window_shift=window_shift, window_shift=window_shift,
fs=fs, fs=fs,
) )
x_train = make_windows_from_id(x_train_df, cols) x_train = make_windows_from_id(x_train_df, imu_cols)
x_test = make_windows_from_id(x_test_df, cols) x_test = make_windows_from_id(x_test_df, imu_cols)
y_train = y_train_df[lbl_str].values.reshape(-1, 1) y_train = y_train_df[lbl_str].values.reshape(-1, 1)
y_test = y_test_df[lbl_str].values.reshape(-1, 1) y_test = y_test_df[lbl_str].values.reshape(-1, 1)
transforms, model = model_training(mdl_str, x_train, y_train, transforms, model = model_training(mdl_str, x_train, y_train,
marker, validation_data=None, marker, validation_data=None,
overwrite=overwrite, overwrite=overwrite,
is_regression=True, is_regression=True,
project_directory=project_dir, project_directory=project_dir,
window_size=int(window_size*fs), window_size=int(window_size*fs),
extra_train=200, extra_train=200,
) )
if transforms is not None: if transforms is not None:
x_test = transforms.transform(x_test) x_test = transforms.transform(x_test)
preds = model.predict(x_test) preds = model.predict(x_test)
eval_handle = EvalHandler(y_test.flatten(), preds.flatten(), subject, eval_handle = EvalHandler(y_test.flatten(), preds.flatten(), subject,
pfh, mdl_str, overwrite=overwrite) pfh, mdl_str, overwrite=overwrite)
eval_handle.update_eval_history() eval_handle.update_eval_history()
eval_handle.save_eval_history() eval_handle.save_eval_history()
pp = PrettyPrinter()
pp.pprint(eval_handle.load_eval_history()) pp = PrettyPrinter()
pp.pprint(eval_handle.load_eval_history())
fig, ax = plt.subplots()
ax.plot(y_test) fig, ax = plt.subplots()
ax.plot(preds) fig_title = '_'.join([mdl_str, subject]+[combi_str])
ax.set_title(' '.join([mdl_str, subject])) ax.plot(y_test)
ax.legend([lbl_str, 'pred']) ax.plot(preds)
fig_dir = join(project_dir, 'figures',) ax.set_title(fig_title)
if not exists(fig_dir): mkdir(fig_dir) ax.legend([lbl_str, 'pred'])
fig.savefig(join(fig_dir, mdl_str)) fig_dir = join(project_dir, 'figures')
if not exists(fig_dir): mkdir(fig_dir)
fig.savefig(join(fig_dir, fig_title+".png"))
plt.close()
def arg_parser(): def arg_parser():
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
...@@ -717,7 +791,7 @@ def arg_parser(): ...@@ -717,7 +791,7 @@ def arg_parser():
'elastic'], 'elastic'],
) )
parser.add_argument("-s", '--subject', type=int, parser.add_argument("-s", '--subject', type=int,
default=1, default=2,
choices=list(range(1,3))+[-1], choices=list(range(1,3))+[-1],
) )
parser.add_argument("-f", '--feature_method', type=str, parser.add_argument("-f", '--feature_method', type=str,
...@@ -734,10 +808,10 @@ def arg_parser(): ...@@ -734,10 +808,10 @@ def arg_parser():
default=0.2, default=0.2,
) )
parser.add_argument('-l', '--lbl_str', type=str, parser.add_argument('-l', '--lbl_str', type=str,
default='br', default='pss',
) )
parser.add_argument('-tl', '--train_len', type=int, parser.add_argument('-tl', '--train_len', type=int,
default=1, default=3,
help='minutes of data to use for calibration' help='minutes of data to use for calibration'
) )
args = parser.parse_args() args = parser.parse_args()
...@@ -748,13 +822,6 @@ if __name__ == '__main__': ...@@ -748,13 +822,6 @@ if __name__ == '__main__':
# '[!M]*' # '[!M]*'
np.random.seed(100) np.random.seed(100)
n_subject_max = 2 n_subject_max = 2
xsens_df = load_and_sync_xsens('Pilot02')
activity_df = get_activity_log('Pilot02')
event_df = get_respiration_log('Pilot02')
ipdb.set_trace()
args = arg_parser() args = arg_parser()
mdl_str = args.model mdl_str = args.model
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment