import numpy as np import pandas as pd import matplotlib.pyplot as plt import ipdb import glob from datetime import datetime, timedelta import pytz from multiprocessing import Pool from os.path import join, splitext from modules.datapipeline import datetime_to_ms def get_flist(f_glob): f_list = sorted(glob.glob(f_glob, recursive=True)) return f_list def load_only_imu(f): try: df = pd.read_json(f, lines=True, compression='gzip') except EOFError: df = pd.read_json(splitext(f)[0], lines=True) if df.empty: return data_df = pd.DataFrame(df['data'].tolist()) df = pd.concat([df.drop('data', axis=1), data_df], axis=1) mask = pd.isna(df['accelerometer']) na_inds = df.loc[mask, :].index.values not_na_inds = df.loc[~mask, :].index.values df_na = df.drop(index=not_na_inds) df_not_na = df.drop(index=na_inds) return df_not_na def get_mean_fs(df): time = df['timestamp'].values diff = time[1:] - time[:-1] fs = 1/np.mean(diff) print(max(diff)) print(min(diff)) plt.plot(diff) plt.show() return fs def fname_fs(f): imu_df = load_only_imu(f) if imu_df is None: return fs = get_mean_fs(imu_df) return f, fs def imu_start_end_time(hdr_fname, data_fname): imu_hdr = pd.read_json(hdr_fname, orient='index') imu_hdr = imu_hdr.to_dict().pop(0) imu_df = load_only_imu(data_fname) iso_tz = imu_hdr['created'] tzinfo = pytz.timezone(imu_hdr['timezone']) # adjust for UTC start_time = datetime.fromisoformat(iso_tz[:-1]) + timedelta(hours=11) imu_times = imu_df['timestamp'].values imu_datetimes = [start_time + timedelta(seconds=val) \ for val in imu_times] nbins = len(imu_times) est_end = datetime.fromtimestamp(imu_datetimes[0].timestamp() + nbins*(1/120)) print("endtime: {0}\testimate: {1}".format(imu_datetimes[-1], est_end)) return imu_datetimes[0], imu_datetimes[-1] def str_to_datetime(time_in): fmt ="%d/%m/%Y %H:%M:%S.%f" dstr = datetime.strptime(time_in, fmt) return dstr def harness_start_end_time(fname, fs=100): df = pd.read_csv(fname) t0 = str_to_datetime(df['Time'].iloc[0]) t1 = str_to_datetime(df['Time'].iloc[-1]) nbins = len(df) est_end = datetime.fromtimestamp(t0.timestamp() + nbins*(1/fs)) print("endtime: {0}\testimate: {1}".format(t1, est_end)) return t0, t1 if __name__ == '__main__': data_dir = '/data/rqchia/aria_walk/Data/test-rest' f_glob = join(data_dir, '**', 'imudata.gz') h_glob = join(data_dir, '**', 'recording.g3') data_fname = glob.glob(f_glob)[0] hdr_fname = glob.glob(h_glob)[0] t0, t1 = imu_start_end_time(hdr_fname, data_fname) a_glob = join(data_dir, '**', '*_Accel.csv') b_glob = join(data_dir, '**', '*_Breathing.csv') s_glob = join(data_dir, '**', '*_SummaryEnhanced.csv') a_fname = glob.glob(a_glob)[0] a0, a1 = harness_start_end_time(a_fname, fs=100) b_fname = glob.glob(b_glob)[0] b0, b1 = harness_start_end_time(b_fname, fs=25) s_fname = glob.glob(s_glob)[0] s0, s1 = harness_start_end_time(s_fname, fs=1) print(f"imu {t0}\t{t1}\n" f"acc {a0}\t{a1}\n" f"bre {b0}\t{b1}\n" f"sum {s0}\t{s1}\n") # flist = get_flist(f_glob) # tmp = [] # tmp = fname_fs(flist[0]) # # with Pool(10) as p: # # tmp = p.map(fname_fs, flist) # df = pd.DataFrame(tmp, columns=['fname', 'fs']) # df.dropna(inplace=True) # mask = df['fs'] > 10 # df = df[mask] # print(df)