Skip to content
Snippets Groups Projects
Commit cf1ea305 authored by Raymond Chia's avatar Raymond Chia
Browse files

minor edit

parent ad1cf087
No related merge requests found
...@@ -254,7 +254,7 @@ def acc_signal_processing(data, fs:int=100): ...@@ -254,7 +254,7 @@ def acc_signal_processing(data, fs:int=100):
def imu_signal_processing(data, fs:int=IMU_FS): def imu_signal_processing(data, fs:int=IMU_FS):
bp = butter_bandpass_filter(data, bp = butter_bandpass_filter(data,
3/60, 3/60,
70/60, fs=fs, order=2) 45/60, fs=fs, order=2)
ma = movingaverage(bp, 8, axis=0) ma = movingaverage(bp, 8, axis=0)
return ma return ma
...@@ -332,7 +332,7 @@ def hernandez_sp(data=None, fs:int=IMU_FS): ...@@ -332,7 +332,7 @@ def hernandez_sp(data=None, fs:int=IMU_FS):
return accel, bp return accel, bp
def pressure_signal_processing(pressure_data, fs=BR_FS): def pressure_signal_processing(pressure_data, fs=BR_FS, movmean_win=32):
''' Run pressure signal through the following steps: ''' Run pressure signal through the following steps:
* Moving average with 8 samples * Moving average with 8 samples
* Standard scaler * Standard scaler
...@@ -341,10 +341,10 @@ def pressure_signal_processing(pressure_data, fs=BR_FS): ...@@ -341,10 +341,10 @@ def pressure_signal_processing(pressure_data, fs=BR_FS):
# Normalize # Normalize
data_sd = std_scaler(pressure_data) data_sd = std_scaler(pressure_data)
data_ma = movingaverage(data_sd, 16) data_ma = movingaverage(data_sd, movmean_win)
# bandpass filter the lbls # bandpass filter the lbls
bp_data = butter_bandpass_filter(data_ma, 4/60, 70/60, fs=fs, order=5) bp_data = butter_bandpass_filter(data_ma, 4/60, 45/60, fs=fs, order=5)
return bp_data return bp_data
def vectorized_slide_win(array, max_time, sub_window_size=3, def vectorized_slide_win(array, max_time, sub_window_size=3,
...@@ -362,13 +362,17 @@ def vectorized_slide_win(array, max_time, sub_window_size=3, ...@@ -362,13 +362,17 @@ def vectorized_slide_win(array, max_time, sub_window_size=3,
np.expand_dims(np.arange(sub_window_size), 0) + np.expand_dims(np.arange(sub_window_size), 0) +
np.expand_dims(np.arange(max_time, step=stride_size), 0).T np.expand_dims(np.arange(max_time, step=stride_size), 0).T
) )
i = [i for i, win in enumerate(sub_windows) if max_time in win][0]
"""
pad_len = int(np.max(sub_windows) - max_time + 1) pad_len = int(np.max(sub_windows) - max_time + 1)
# pad right side data array with zero # pad right side data array with zero
arr = np.pad(array,(0, pad_len), 'constant', arr = np.pad(array,(0, pad_len), 'constant',
constant_values=0) constant_values=0)
return arr[sub_windows] return arr[sub_windows]
"""
return array[sub_windows[:i]]
def generate_noisy_sine_windows(sig=None, def generate_noisy_sine_windows(sig=None,
window_size=WINDOW_SIZE*FS_RESAMPLE, window_size=WINDOW_SIZE*FS_RESAMPLE,
......
...@@ -376,10 +376,10 @@ def get_df_windows(df, func, window_size=15, window_shift=0.2, fs=IMU_FS, ...@@ -376,10 +376,10 @@ def get_df_windows(df, func, window_size=15, window_shift=0.2, fs=IMU_FS,
args = zip(wins.tolist(), repeat(df, N), i_list, [cols]*N) args = zip(wins.tolist(), repeat(df, N), i_list, [cols]*N)
out_data = [] out_data = []
# with Pool(cpu_count()) as p: with Pool(cpu_count()) as p:
# out_data = p.starmap(func, args) out_data = p.starmap(func, args)
for i, win in enumerate(wins): # for i, win in enumerate(wins):
out_data.append(func(win, df, i, cols)) # out_data.append(func(win, df, i, cols))
x, y = [], [] x, y = [], []
for out in out_data: for out in out_data:
......
...@@ -10,6 +10,7 @@ import pickle ...@@ -10,6 +10,7 @@ import pickle
import sys import sys
import time import time
from zipfile import ZipFile from zipfile import ZipFile
from joblib import dump, load
import argparse import argparse
from datetime import datetime, timedelta, timezone, timedelta from datetime import datetime, timedelta, timezone, timedelta
...@@ -265,12 +266,16 @@ def load_imu_files(f_list:list): ...@@ -265,12 +266,16 @@ def load_imu_files(f_list:list):
""" """
data, hdr = [], [] data, hdr = [], []
tmp = [] tmp = []
for f in f_list: with Pool(int(cpu_count()//1.5)) as p:
tmp.append(load_imu_file(f)) tmp = p.map(load_imu_file, f_list)
for l in tmp: data, hdr = zip(*tmp)
data.append(l[0]) # for f in f_list:
hdr.append(l[1]) # tmp.append(load_imu_file(f))
# for l in tmp:
# data.append(l[0])
# hdr.append(l[1])
data_df = pd.concat(data, axis=0) data_df = pd.concat(data, axis=0)
data_df.sort_values(by='sec', inplace=True)
return data_df, hdr return data_df, hdr
def load_e4_file(e4_file:str): def load_e4_file(e4_file:str):
...@@ -392,7 +397,7 @@ def df_win_task(w_inds, df, i, cols): ...@@ -392,7 +397,7 @@ def df_win_task(w_inds, df, i, cols):
pandas.DataFrame, pandas.DataFrame pandas.DataFrame, pandas.DataFrame
""" """
time = df['sec'].values time = df['sec'].values
if w_inds[-1] == 0: return # if w_inds[-1] == 0: return
w_df = df.iloc[w_inds] w_df = df.iloc[w_inds]
t0, t1 = time[w_inds][0], time[w_inds][-1] t0, t1 = time[w_inds][0], time[w_inds][-1]
diff = time[w_inds[1:]] - time[w_inds[0:-1]] diff = time[w_inds[1:]] - time[w_inds[0:-1]]
...@@ -429,9 +434,9 @@ def df_win_task(w_inds, df, i, cols): ...@@ -429,9 +434,9 @@ def df_win_task(w_inds, df, i, cols):
x_vec_time = np.median(time[w_inds]) x_vec_time = np.median(time[w_inds])
fs = 1/np.mean(diff) # fs = 1/np.mean(diff)
ps_out = pressure_signal_processing(ps_out, fs=fs) ps_out = pressure_signal_processing(ps_out, fs=fs)
ps_freq = int(get_max_frequency(ps_out, fs=fs)) ps_freq = get_max_frequency(ps_out, fs=fs)
y_tmp = np.array([x_vec_time, np.nanmedian(sm_out), ps_freq]) y_tmp = np.array([x_vec_time, np.nanmedian(sm_out), ps_freq])
x_out['sec'] = x_vec_time x_out['sec'] = x_vec_time
...@@ -711,7 +716,7 @@ def get_respiration_log(subject): ...@@ -711,7 +716,7 @@ def get_respiration_log(subject):
""" """
log_list = get_file_list('*.json', sbj=subject) log_list = get_file_list('*.json', sbj=subject)
log_dfs = [pd.read_json(f) for f in log_list] log_dfs = [pd.read_json(f, convert_dates=False) for f in log_list]
return pd.concat(log_dfs, axis=0) return pd.concat(log_dfs, axis=0)
def get_cal_data(event_df, xsens_df): def get_cal_data(event_df, xsens_df):
...@@ -731,7 +736,7 @@ def get_cal_data(event_df, xsens_df): ...@@ -731,7 +736,7 @@ def get_cal_data(event_df, xsens_df):
pd.DataFrame pd.DataFrame
""" """
fmt ="%Y-%m-%d %H.%M.%S" fmt ="%d/%m/%Y %I:%M:%S %p"
cal_list = [] cal_list = []
cpms = [] cpms = []
start_sec = 0 start_sec = 0
...@@ -744,7 +749,7 @@ def get_cal_data(event_df, xsens_df): ...@@ -744,7 +749,7 @@ def get_cal_data(event_df, xsens_df):
cpm = np.round( 60/(inhalePeriod + exhalePeriod) ) cpm = np.round( 60/(inhalePeriod + exhalePeriod) )
sec = timestamp.to_pydatetime().timestamp() sec = datetime.strptime(str(timestamp), fmt).timestamp()
if event == 'Start': if event == 'Start':
start_sec = sec start_sec = sec
...@@ -849,6 +854,10 @@ def dsp_win_func(w_inds, df, i, cols): ...@@ -849,6 +854,10 @@ def dsp_win_func(w_inds, df, i, cols):
data = w_df[cols].values data = w_df[cols].values
fs_est = 1/np.median(diff)
if fs_est > 70 and ('acc_x' in cols or 'gyro_x' in cols): fs = IMU_FS
elif fs_est < 70 and 'bvp' in cols: fs = PPG_FS
if reject_artefact((data-np.mean(data,axis=0))/np.std(data,axis=0)): if reject_artefact((data-np.mean(data,axis=0))/np.std(data,axis=0)):
return return
...@@ -872,10 +881,10 @@ def dsp_win_func(w_inds, df, i, cols): ...@@ -872,10 +881,10 @@ def dsp_win_func(w_inds, df, i, cols):
x_vec_time = np.median(time[w_inds]) x_vec_time = np.median(time[w_inds])
fs = 1/np.mean(diff) # fs = 1/np.mean(diff)
ps_out = pressure_signal_processing(ps_out, fs=fs) ps_out = pressure_signal_processing(ps_out, fs=fs)
ps_freq = int(get_max_frequency(ps_out, fs=IMU_FS)) ps_freq = get_max_frequency(ps_out, fs=IMU_FS)
y_tmp = np.array([x_vec_time, np.nanmedian(sm_out), ps_freq]) y_tmp = np.array([x_vec_time, np.nanmedian(sm_out), ps_freq])
...@@ -1052,8 +1061,6 @@ def imu_rr_dsp(subject, ...@@ -1052,8 +1061,6 @@ def imu_rr_dsp(subject,
train_len:int=3, train_len:int=3,
test_standing=False, test_standing=False,
): ):
# TODO:
# implement evaluation saving
"""Loads, preprocesses, and performs Hernandez digital signal processing """Loads, preprocesses, and performs Hernandez digital signal processing
pipeline on the selected subject. Uses the specified parameters. Runs on pipeline on the selected subject. Uses the specified parameters. Runs on
both accelerometer and gyroscope. both accelerometer and gyroscope.
...@@ -1154,9 +1161,11 @@ def imu_rr_dsp(subject, ...@@ -1154,9 +1161,11 @@ def imu_rr_dsp(subject,
pp.pprint(acc_eval.load_eval_history()) pp.pprint(acc_eval.load_eval_history())
fig, ax = plt.subplots(2, 1) fig, ax = plt.subplots(2, 1)
ax[0].plot(acc_y_dsp_df[lbl_str]); plt.plot(acc_dsp_df['pred']) ax[0].plot(acc_y_dsp_df[lbl_str].values);
ax[0].plot(acc_dsp_df['pred'].values)
ax[0].set_title("ACC") ax[0].set_title("ACC")
ax[1].plot(gyr_y_dsp_df[lbl_str]); plt.plot(gyr_dsp_df['pred']) ax[1].plot(gyr_y_dsp_df[lbl_str].values);
ax[1].plot(gyr_dsp_df['pred'].values)
ax[1].set_title("GYRO") ax[1].set_title("GYRO")
ax[1].legend([lbl_str, 'estimate']) ax[1].legend([lbl_str, 'estimate'])
fig_dir = join(project_dir, 'figures') fig_dir = join(project_dir, 'figures')
...@@ -1232,6 +1241,7 @@ def sens_rr_model(subject, ...@@ -1232,6 +1241,7 @@ def sens_rr_model(subject,
use_tsfresh = False use_tsfresh = False
overwrite_tsfresh = overwrite overwrite_tsfresh = overwrite
train_size = int(train_len) train_size = int(train_len)
minirocket = None
if feature_method == 'tsfresh': if feature_method == 'tsfresh':
use_tsfresh = True use_tsfresh = True
...@@ -1262,7 +1272,7 @@ def sens_rr_model(subject, ...@@ -1262,7 +1272,7 @@ def sens_rr_model(subject,
project_dir = pfh.project_directory project_dir = pfh.project_directory
xsens_df = load_and_sync_xsens(subject, sens_list=sens_list) xsens_df = load_and_sync_xsens(subject, sens_list=sens_list)
activity_df = get_activity_log(subject) activity_df = get_activity_log(subject).reset_index(drop=True)
event_df = get_respiration_log(subject) event_df = get_respiration_log(subject)
cal_df = get_cal_data(event_df, xsens_df) cal_df = get_cal_data(event_df, xsens_df)
...@@ -1339,12 +1349,29 @@ def sens_rr_model(subject, ...@@ -1339,12 +1349,29 @@ def sens_rr_model(subject,
# x_train = y_train_df['hr_est'].values.reshape(-1, 1) # x_train = y_train_df['hr_est'].values.reshape(-1, 1)
# x_test = y_test_df['hr_est'].values.reshape(-1, 1) # x_test = y_test_df['hr_est'].values.reshape(-1, 1)
print("minirocket transforming...")
x_train = np.swapaxes(x_train, 1, 2) x_train = np.swapaxes(x_train, 1, 2)
x_test = np.swapaxes(x_test, 1, 2) x_test = np.swapaxes(x_test, 1, 2)
minirocket = MiniRocketMultivariate()
x_train = minirocket.fit_transform(x_train) directory = join(project_dir, '_'.join([mdl_str, marker]))
x_test = minirocket.transform(x_test) minirocket_fname = join(directory, "minirocket.joblib")
if not overwrite and exists(minirocket_fname):
with open(minirocket_fname, 'rb') as mfile:
minirocket = load(mfile)
x_train = minirocket.transform(x_train)
print("loaded minirocket...")
else:
minirocket = MiniRocketMultivariate()
x_train = minirocket.fit_transform(x_train)
x_test = minirocket.transform(x_test)
if overwrite or not exists(minirocket_fname):
if not exists(directory): makedirs(directory)
with open(minirocket_fname, 'wb') as mfile:
dump(minirocket, mfile)
print("saved new minirocket...")
elif use_tsfresh: elif use_tsfresh:
y_cols = ['sec', 'br', 'pss', 'cpm'] y_cols = ['sec', 'br', 'pss', 'cpm']
x_cols = [col for col in train_df.columns.values if col not in y_cols] x_cols = [col for col in train_df.columns.values if col not in y_cols]
...@@ -1398,9 +1425,11 @@ def sens_rr_model(subject, ...@@ -1398,9 +1425,11 @@ def sens_rr_model(subject,
if lbl_str == 'pss': if lbl_str == 'pss':
br = y_test_df['br'].values br = y_test_df['br'].values
ax[1].plot(movingaverage(y_test, 12), color='tab:blue') ax[1].plot(movingaverage(y_test, 12),
color='tab:blue')
ax[1].plot(br, 'k') ax[1].plot(br, 'k')
ax[1].plot(movingaverage(preds, 12), color='tab:orange') ax[1].plot(movingaverage(preds, 12),
color='tab:orange')
ax[1].legend([lbl_str, 'br', 'pred']) ax[1].legend([lbl_str, 'br', 'pred'])
else: else:
ax[1].plot(y_test, 'k') ax[1].plot(y_test, 'k')
...@@ -1409,8 +1438,100 @@ def sens_rr_model(subject, ...@@ -1409,8 +1438,100 @@ def sens_rr_model(subject,
ax[1].set_title('smoothened') ax[1].set_title('smoothened')
fig_dir = join(project_dir, 'figures') fig_dir = join(project_dir, 'figures')
if not exists(fig_dir): mkdir(fig_dir) if not exists(fig_dir): mkdir(fig_dir)
if subject == 'S02':
sec = y_test_df['sec'].values
activity_df2 = get_activity_log(subject+'-repeat')
event_df2 = get_respiration_log(subject+'-repeat')
second_cal_df = get_cal_data(event_df2, xsens_df)\
.reset_index(drop=True)
sec = y_test_df['sec'].values
activity_df2 = get_activity_log(subject+'-repeat')
event_df2 = get_respiration_log(subject+'-repeat')
second_cal_df = get_cal_data(event_df2, xsens_df)\
.reset_index(drop=True)
fmt = "%d/%m/%Y %H:%M:%S"
activity_sec0 = datetime.strptime(
activity_df2['Timestamps'].iloc[0], fmt).timestamp()
activity_sec1 = datetime.strptime(
activity_df2['Timestamps'].iloc[-1], fmt).timestamp()
ind0 = np.argmin(np.abs(sec - activity_sec0))
ind1 = np.argmin(np.abs(sec - activity_sec1))
ax[0].axvline(ind0, c='r', linestyle='--')
ax[0].axvline(ind1, c='r', linestyle='--')
ax[1].axvline(ind0, c='r', linestyle='--')
ax[1].axvline(ind1, c='r', linestyle='--')
fig2, ax2 = plt.subplots(2, 1, sharex=True)
cal_sec0 = second_cal_df.iloc[-7]['data']['sec'].iloc[0]
cal_sec1 = second_cal_df.iloc[-1]['data']['sec'].iloc[-1]
calind0 = np.argmin(np.abs(sec - cal_sec0))
calind1 = np.argmin(np.abs(sec - cal_sec1))
ax2[0].plot(sec[calind0:], y_test[calind0:])
ax2[0].plot(sec[calind0:],preds[calind0:])
ax2[0].set_title('raw')
ax2[1].plot(sec[calind0:],
movingaverage(y_test, 12)[calind0:],
color='tab:blue')
ax2[1].plot(sec[calind0:], br[calind0:], 'k')
ax2[1].plot(sec[calind0:],
movingaverage(preds, 12)[calind0:],
color='tab:orange')
ax2[1].legend([lbl_str, 'br', 'pred'])
ll = len(second_cal_df)
for i in range(ll-7, ll):
cal_sec0 = second_cal_df.iloc[i]['data']['sec'].iloc[0]
cal_sec1 = second_cal_df.iloc[i]['data']['sec'].iloc[-1]
calind0 = np.argmin(np.abs(sec - cal_sec0))
calind1 = np.argmin(np.abs(sec - cal_sec1))
ax2[0].axvline(sec[calind0], c='g', linestyle='--')
ax2[0].axvline(sec[calind1], c='g', linestyle='--')
ax2[1].axvline(sec[calind0], c='g', linestyle='--')
ax2[1].axvline(sec[calind1], c='g', linestyle='--')
def pressure_ax_plot(func, win_size=12, win_shift=0.2, fs=120):
dt = test_df.sec
t = dt.map(lambda x: datetime.fromtimestamp(x))
pss_sig = test_df.PSS.values
processed_wins = vsw(pss_sig, len(pss_sig),
sub_window_size=int(win_size*fs),
stride_size=int(win_size*win_shift*fs))
processed_wins = map(func, processed_wins)
ps_freq = [get_max_frequency(ps_out, fs=fs) for ps_out in
processed_wins]
ft = vsw(dt.values, len(dt),
sub_window_size=int(win_size*fs),
stride_size=int(win_size*win_shift*fs))
fig, axs = plt.subplots(3, 1, sharex=True)
# axs[0].get_shared_x_axes().join(axs[0], axs[1])
axs[0].plot(t, pss_sig)
axs[1].plot(t, func(pss_sig))
mtime = map(lambda x: datetime.fromtimestamp(x),
ft.mean(axis=1))
freqdt = np.array([t for t in mtime])
tt = y_test_df.sec.map(lambda x:
datetime.fromtimestamp(x)).values
axs[2].plot(freqdt, ps_freq)
axs[2].plot(tt, y_test_df.pss.values)
# fig.savefig(join(fig_dir, fig_title+"cal_check.png"))
fig2.savefig(join(fig_dir, fig_title+"cal_check.png"))
# func = partial(pressure_signal_processing, fs=fs)
# pressure_ax_plot(func)
fig.savefig(join(fig_dir, fig_title+".png")) fig.savefig(join(fig_dir, fig_title+".png"))
plt.close() plt.close('all')
def arg_parser(): def arg_parser():
"""Returns arguments in a Namespace to configure the subject specific model """Returns arguments in a Namespace to configure the subject specific model
...@@ -1459,7 +1580,7 @@ def arg_parser(): ...@@ -1459,7 +1580,7 @@ def arg_parser():
parser.add_argument('--method', type=str, parser.add_argument('--method', type=str,
default='ml', default='ml',
help="choose between 'ml' or 'dsp' methods for"\ help="choose between 'ml' or 'dsp' methods for"\
" regression", "regression",
choices=['ml', 'dsp'] choices=['ml', 'dsp']
) )
args = parser.parse_args() args = parser.parse_args()
...@@ -1484,6 +1605,7 @@ if __name__ == '__main__': ...@@ -1484,6 +1605,7 @@ if __name__ == '__main__':
print(args) print(args)
assert train_len>0,"--train_len must be an integer greater than 0" assert train_len>0,"--train_len must be an integer greater than 0"
assert train_len <= 7,"--train_len must be an integer less than 8"
subject_pre_string = 'S' # Pilot / S subject_pre_string = 'S' # Pilot / S
...@@ -1528,9 +1650,20 @@ if __name__ == '__main__': ...@@ -1528,9 +1650,20 @@ if __name__ == '__main__':
overwrite=overwrite, overwrite=overwrite,
train_len=train_len, train_len=train_len,
test_standing=test_standing) test_standing=test_standing)
elif subject <= 0 and method == 'dsp':
subjects = [subject_pre_string+str(i).zfill(2) for i in \
range(1, N_SUBJECT_MAX+1)]
func = partial(imu_rr_dsp, window_size=window_size,
window_shift=window_shift,
lbl_str=lbl_str,
overwrite=overwrite,
train_len=train_len,
test_standing=test_standing)
for subject in subjects:
func(subject)
elif subject <= 0 and method == 'ml': elif subject <= 0 and method == 'ml':
subjects = [subject_pre_string+str(i).zfill(2) for i in \ subjects = [subject_pre_string+str(i).zfill(2) for i in \
range(1, n_subject_max+1)] range(1, N_SUBJECT_MAX+1)]
rr_func = partial(sens_rr_model, rr_func = partial(sens_rr_model,
window_size=window_size, window_size=window_size,
......
This diff is collapsed.
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment