From 1b5a2fc24786720b21f9604df2c4de178bdaa4a1 Mon Sep 17 00:00:00 2001 From: Raymond Chia <rqchia@janus0.ihpc.uts.edu.au> Date: Sun, 26 Nov 2023 01:24:41 +1100 Subject: [PATCH] tsfresh tested --- create_features.py | 14 +- logs/singularity_2776358.out | 18 ++ logs/singularity_2776834.out | 42 +++ logs/singularity_2810990.out | 7 + .../__pycache__/datapipeline.cpython-38.pyc | Bin 25878 -> 16921 bytes modules/datapipeline.py | 273 ------------------ regress_rr.py | 101 ++++--- 7 files changed, 135 insertions(+), 320 deletions(-) create mode 100644 logs/singularity_2776358.out create mode 100644 logs/singularity_2776834.out create mode 100644 logs/singularity_2810990.out diff --git a/create_features.py b/create_features.py index b180232..bcf8f46 100644 --- a/create_features.py +++ b/create_features.py @@ -32,7 +32,7 @@ from modules.animationplotter import AnimationPlotter, AnimationPlotter2D from modules.digitalsignalprocessing import imu_signal_processing from modules.digitalsignalprocessing import vectorized_slide_win from modules.digitalsignalprocessing import get_video_features -from modules.datapipeline import SubjectData, datetime_to_sec\ +from modules.datapipeline import datetime_to_sec\ ,sec_to_datetime, DataSynchronizer from modules.datapipeline import get_file_list, load_files_conditions from modules.evaluations import Evaluation @@ -183,16 +183,14 @@ def plot_video_features(video_fname, nframes=1000): ipdb.set_trace() def map_imu_tsfresh_subject(subject, - tlx_df=None, - conditions=['R', 'L0', 'L1', 'L2', 'L3'], window_size=5, window_shift=0.2): - sbj_data = SubjectData(subject=subject) - sbj_data.set_imu_fname() - sbj_dir = sbj_data.subject_dir + pfh = ProjectFileHandler({}) + pfh.set_home_directory(join(DATA_DIR, 'subject_specific', subject)) for condition in conditions: tsfresh_pkl = path_join( - sbj_dir, "{0}__winsize_{1}__winshift_{2}_imu_tsfresh_df.pkl"\ - .format(condition, window_size, window_shift)) + pfh.home_directory, + "{0}__winsize_{1}__winshift_{2}_tsfresh_df.pkl"\ + .format(window_size, window_shift)) if path_exists(tsfresh_pkl): continue print(f"trying {subject} for {condition}") data_df = load_df(subject, condition) diff --git a/logs/singularity_2776358.out b/logs/singularity_2776358.out new file mode 100644 index 0000000..23dd6d6 --- /dev/null +++ b/logs/singularity_2776358.out @@ -0,0 +1,18 @@ +2023-11-26 00:44:07.923231: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. +To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. +Traceback (most recent call last): + File "regress_rr.py", line 23, in <module> + import tensorflow as tf + File "/home/rqchia/.local/lib/python3.8/site-packages/tensorflow/__init__.py", line 54, in <module> + from ._api.v2 import data + File "/home/rqchia/.local/lib/python3.8/site-packages/tensorflow/_api/v2/data/__init__.py", line 11, in <module> + from . import experimental + File "/home/rqchia/.local/lib/python3.8/site-packages/tensorflow/_api/v2/data/experimental/__init__.py", line 87, in <module> + from . import service + File "<frozen importlib._bootstrap>", line 991, in _find_and_load + File "<frozen importlib._bootstrap>", line 975, in _find_and_load_unlocked + File "<frozen importlib._bootstrap>", line 671, in _load_unlocked + File "<frozen importlib._bootstrap_external>", line 844, in exec_module + File "<frozen importlib._bootstrap_external>", line 939, in get_code + File "<frozen importlib._bootstrap_external>", line 1037, in get_data +KeyboardInterrupt diff --git a/logs/singularity_2776834.out b/logs/singularity_2776834.out new file mode 100644 index 0000000..4b9cae6 --- /dev/null +++ b/logs/singularity_2776834.out @@ -0,0 +1,42 @@ +2023-11-26 00:44:26.282311: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. +To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. +Using TensorFlow backend +Namespace(data_input='imu-bvp', feature_method='tsfresh', lbl_str='pss', model='linreg', overwrite=0, subject=3, test_standing=1, train_len=5, win_shift=0.2, win_size=12) +unable to find matching config id +Data id not set, auto assigned to: 2 +Dependency not available for matrix_profile, this feature will be disabled! + Feature Extraction: 0%| | 0/65 [00:00<?, ?it/s] Feature Extraction: 2%|■| 1/65 [00:00<00:08, 7.47it/s] Feature Extraction: 12%|█■| 8/65 [00:00<00:01, 38.61it/s] Feature Extraction: 25%|██■| 16/65 [00:00<00:00, 53.36it/s] Feature Extraction: 37%|███▋ | 24/65 [00:00<00:00, 58.89it/s] Feature Extraction: 49%|████▉ | 32/65 [00:00<00:00, 63.82it/s] Feature Extraction: 62%|██████■| 40/65 [00:00<00:00, 65.14it/s] Feature Extraction: 74%|███████■| 48/65 [00:00<00:00, 67.87it/s] Feature Extraction: 86%|████████▌ | 56/65 [00:00<00:00, 69.75it/s] Feature Extraction: 98%|█████████▊| 64/65 [00:01<00:00, 72.31it/s] Feature Extraction: 100%|██████████| 65/65 [00:01<00:00, 63.53it/s] +Dependency not available for matrix_profile, this feature will be disabled! + Feature Extraction: 0%| | 0/47 [00:00<?, ?it/s] Feature Extraction: 100%|██████████| 47/47 [00:00<00:00, 997.74it/s] +Dependency not available for matrix_profile, this feature will be disabled! + Feature Extraction: 0%| | 0/47 [00:00<?, ?it/s] Feature Extraction: 100%|██████████| 47/47 [00:00<00:00, 975.44it/s] +Dependency not available for matrix_profile, this feature will be disabled! + Feature Extraction: 0%| | 0/47 [00:00<?, ?it/s] Feature Extraction: 100%|██████████| 47/47 [00:00<00:00, 1034.00it/s] +Dependency not available for matrix_profile, this feature will be disabled! + Feature Extraction: 0%| | 0/47 [00:00<?, ?it/s] Feature Extraction: 100%|██████████| 47/47 [00:00<00:00, 1018.94it/s] +Dependency not available for matrix_profile, this feature will be disabled! + Feature Extraction: 0%| | 0/49 [00:00<?, ?it/s] Feature Extraction: 100%|██████████| 49/49 [00:00<00:00, 1008.80it/s] +Dependency not available for matrix_profile, this feature will be disabled! + Feature Extraction: 0%| | 0/47 [00:00<?, ?it/s] Feature Extraction: 100%|██████████| 47/47 [00:00<00:00, 950.21it/s] +Dependency not available for matrix_profile, this feature will be disabled! + Feature Extraction: 0%| | 0/49 [00:00<?, ?it/s] Feature Extraction: 100%|██████████| 49/49 [00:00<00:00, 1035.00it/s] +imu-bvp_rr_Pilot03_id2_combi5.0-7.0-10.0-12.0-15.0 +train +(101, 74) +test +(978, 73) +---LinearRegression--- +Traceback (most recent call last): + File "regress_rr.py", line 1374, in <module> + sens_rr_model(subject, + File "regress_rr.py", line 1268, in sens_rr_model + x_test = transforms.transform(x_test) + File "/usr/local/lib/python3.8/dist-packages/sklearn/utils/_set_output.py", line 140, in wrapped + data_to_wrap = f(self, X, *args, **kwargs) + File "/usr/local/lib/python3.8/dist-packages/sklearn/preprocessing/_polynomial.py", line 432, in transform + X = self._validate_data( + File "/usr/local/lib/python3.8/dist-packages/sklearn/base.py", line 625, in _validate_data + self._check_n_features(X, reset=reset) + File "/usr/local/lib/python3.8/dist-packages/sklearn/base.py", line 414, in _check_n_features + raise ValueError( +ValueError: X has 70 features, but PolynomialFeatures is expecting 71 features as input. diff --git a/logs/singularity_2810990.out b/logs/singularity_2810990.out new file mode 100644 index 0000000..179501a --- /dev/null +++ b/logs/singularity_2810990.out @@ -0,0 +1,7 @@ +2023-11-26 01:21:46.434356: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. +To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. +Using TensorFlow backend +Namespace(data_input='imu-bvp', feature_method='tsfresh', lbl_str='pss', model='linreg', overwrite=1, subject=3, test_standing=1, train_len=5, win_shift=0.2, win_size=12) +Using pre-set data id: 2 +Dependency not available for matrix_profile, this feature will be disabled! + Feature Extraction: 0%| | 0/65 [00:00<?, ?it/s] \ No newline at end of file diff --git a/modules/__pycache__/datapipeline.cpython-38.pyc b/modules/__pycache__/datapipeline.cpython-38.pyc index 450a764cdbfb582417b9f1631e429109e238e31f..112731661244257e314c326704b0de1742c5608c 100644 GIT binary patch delta 1473 zcmZuwZA@EL818%MN-1k;VDe!ZWl&0Q3xz@y!6A!IaInq6EU;T_X*ng7tk4}wHguY( zjmG$6F>kU&w?A7T#znI%jY~}Y;m6{~T-1^{wMOF{#yFkCInhltp7)l;XlT;sKIc8p z`+47c^gO(f0*l{lHtFPV!#m;VWY6=KN_g^&XFEVICg2WxUvw+K{Wx$tuTE`HyVg!y zcuh*LZsR-g9MpMsNvm6GJiI|_w)5TG%bQZ5-p`x4FQrqvc?)lq&mR6DZemZDKgHBN zJv#mX-;vVs9rD!AVXscvlYX1sRb^GpGqdq{Sj>gSr^4~5nl^2*0%Xuuld*mz>lDKT z`v@-L0dq;ZsI~$O89IN4UdP3<Z~r4wm{_|bt+&nw@E!I#+u<urIoodez2#h}S0HPc z7P075Qpw9D`Cdj+&9V4wXo8DeIpPyM<0^qGc%uFZ$nk5~=IXCoq^zG4d_kZUahcf9 z1Xl>I>Chf2!*j0p*E|>Tgz`-7GTj{|h)IySe;}?|8zp-=Ph_yqefpl?$y_aAw*ZOV z;tz?6UU=n0kn&$gY1NtzlJy9|O&S&UdFJ7UHnhBk>tNC}m*jOe4c;Pf{7TB}1iukz zE&WMsmEbRee5{I;!wT+dDq}0U^uRy^IJ7VuWlh3D&`dfFe8ecqhmbzGw-=xYuRc^I z-{nTX!WIGB{0EKXCwtD|S!w$Q@REPtP)1=&@wvePD8a7>E7^~LtAm|j!;T?umzK#a zC0XP6{%|rB4<|$gd6W}qB}~ekSz|&($D_%yiCfcnlgg>YmxgND65xfQ(=9ggc$Um| zVhhBq#O5Vib5r$)LVQNm&q)*xe08{*Edzc$+}v6~N|OWyx)q~XZ+f?pL@Tb2SUpXz z!YJu^`JbWh&IqP_qs6E<6$&Hv4{XAN`!0im0ky@h%~c`4bw+v<@D+6`r)gYA(;#g+ zhV6kP(1;%dHs?0ohd%{eERSL7$To1}(8%FRH#rwj=RRTwsB=SZecZS_;$=pL_J=Q2 zHpyG1KxpziNqLHV9@88#Kj@M>O)GX^u#^=u3<M7sx0A|4ZfoCQuclZH7Bd?|YiNo6 zt;3a2Gx)H6)M3^-rJ!<_85|xBFgwH7Mvra2o8tH>QKw{CJ)eyy1gW*XiHAL`k>QKs z)_yP9Ky*+H%m3bobc>B8b31LLSTrEfLNG{2t)&h+YU#R&-Ak{_;F9zP;8wZ!F7!w0 zU;rPDxD8o-VlMdzo{F@~y<CcPn$^NYI5`oD9*WH*XV5t|S@<Odt|#ckV`DoFzYtaM z>R1i5p_P}zr>JnJC##Q4Pwy9J>GpMkHwfM(c#mL(fR;wM@Nw>h30&YYW@-8dYNmgF delta 10206 zcmd^FdyrezdB5lCO1dkp<n`|2H%9m^uURj)F*tsMc^Ve*8U_T8D(}&*wceH1=UxL= zw5VZCY!X}^m!_dWy4!)YZabmWoz5iFnf8xN+DSu0Vx~=R(xDwfTPEp)l0f4mxWC`I zl2%$9hM7$IPj}~j_ndRj<2&bje@D+A7oY!zNZyr5#0>m>;k~=_Up?|va*25Qm7YC9 z-0F>st1i1OzfCTBUdYR2>UcP_vsSxYE<0{zcFBJ4Yhq>3?Py(#K0R_B`s|T+$X>br zxX4^D`{b3!jm!;lgWQPsjq*;f+x*O$eE(!9bH|N_yh+}C+>kd<nwdLMyG8CrZ7*v3 zq!=*L`_BA{dCk%eKj9o58_TI<*`Z=?EbpH=m+BDWtQTMQ?T*)gx4MVmUV`s>xkT5Q z^UD^B$XV9^!CSiG@8<uYrBm8ewQuaZbmd|p{?+@$sx9IT&s}wOecUUnURr%x_>qz- z<cn@P6fE*KX7Uq-v7^~TQU&O!?|9Q|y2OvXr&fPj1m*X<)obot@j6%a3BE_5k$8)x z|04Ji!F%5GYyPP==ymU@^oix~u=(=@1pqAjLzZ>x6)2q!sk7eJwJ-1Z8Jkl8{Yn7d zpneXRwp1@(ialorqE+wrlWcv0-~*nLx3%X*@e6%k@Amvy#B`rG@i}wmoh#;umG86X z{}B9?K=0-}O9mEHf*>Zm7uL)d=e+If=U30++aR8nKdV-AzJx;F$Ovg<Oer!Ux#om1 zB%~$7$J;WNjL0b1BP`RhO~$}D5xG{j%LGbMnUr(z&6XW<F5YdjQ+DAUlil(XyyJ47 zoR4?4T`rIdQAo%|axvaXxkO%y_Z+!YUWRvvOv%gfo-3Eh<#>0>6>=rsU2>INjd%A6 z!!k0LfbDwx_`uPFBl$shPtMJie=T)uDJPv&p_r1Wr-n)@<sQnXRAIOvQwK}(*t*m~ zRn6zzLxtjSO6A=Ax>Sv0Q$vN(yc0{|FISW)=U8#j;oz}UZg8-qBnFHgOR23m9X~YC zcjaKH>G)U<<Kf#m3E&!&#*~-_0V<+mWX*~R7^>hv7aCP*9uBFsy2&cebHi?ADk|}D z+MKedjY$r$Bw?F|F*jZ@M%wX&COJ*QO-{|3Hc;wll;$={osCkLG;l;#z;?bED2MKs zJ?U=08g`D3=T*Qb`+=<bQ-Dm&2~ILwkm_!>t|nLm;D?7*>FBs0cif}$Sk`fK%JtiV zO1>!71Ur749pm*mWI+XD_=avCS9!-ds`6Pm<j1OY9f69C!_AMfIYLpDLy#lQ-VMAU z*=^bE;AqZqvRT!~rLH6(m%j?ub{3&vh?t1rzg^hoJeJH3GlsU9D0eq8NN+Y<C>Fd= zU*7G#nr^G^z`%;wV;tBtfpZ-(D&~}M&1pd*9gZlACw@VjX@aN&#t}=JINeac`UGZC ze+>ZX)mZOk!yE#hYi&b*So4=Uz;?ZhKJRd%t(q9lyV=1~QD*u54Q{9%T?}E1Zqa$( z#5T$co43%?zrTeG>l~=La2F<#VuCzp9Bxzdq&bBRPNT&YQ)VSJEglmmjOqz7WXh1V zj@xeNBzSKRo(apfD%K?RdbqzFy|3?zEeCp5lw(2RQ+*RF27zbe&qOJLrwfm>0bm%s zd0_2?a1B?S#0gKDlR{apc~VS;Dq>pbTJWW6K4l)U27)8O0VXh9^OJ8KER{wz`@sra zjb>lS(`LYCVU-`wKh(qwTk!pdJUbhOh%n8hk^G>1Mf16h792MV_RNAco!)JurNP{& zv!jX<J`9$|#&@K{egqn0Eaz(0_oHkDQB;&jJq(Zz2h6E?-3IGB4;I`*nJCbKR4CuB z&mW+6DKOH^>L*%O{}*D~f%yzkwLoa{X}gG=k3?EY-7@Hg7E-riaGl)Fmr)`AJ_Z6S z*kfrP*L)31hCo}^LIGW&#aXj-EiA*b%4)b)jp&cHTCr2B{V3XO3JE3)SA?_|8WRy$ zRKigI_M_%P%`ykp-EU4rCu|^LR_w!;s_0U<5|y#}2878#i>O37_DH1?0b|9>cct#g zsj8EApn(sjKow~5k~)^6;{eTDq6*Kug|WQW$AgFRgNL0|Za7ycI&KO?3iOax*mb21 zili=JO&-$#9%mkaX3Zp76MHcx&1%4YX>qO{>j!GScX(r3XeI=|Y0it*=vot8%oFG! zvfWQfOd3zH-T89Qz>GQS)i`Ilg&Brx;x}Si#lYaNE=3wu>NJgNCs7>{L{^GZHDQ53 zXgA}((F<kmQ_<Z)ndoNP8(t5&H&8ZcpTYTO4V)#A{_qo9icuPkc@sck%9w^tlSZH* zYX@g)rv|d3U{&)HDEvE@`VW8_I_f{^byay*zi6y`v1KV{diTxCx>z#UJ(+D<)RztP zE|%cA`H3M_8Y?)ZKzvUJ=b)(H>)ZGKXpOCLIN!j*FpUegPCce^NTHy&zM#H!H=uzS zG3Og~EZ&Zz8!S}#Q0b_0pz7D6uBd<gr~`h2hEQoN7+_JIv0gk4tY71t55uGjot)Ow z02!b;K>~=?Wc@URHbOWIYG0-8&q}O%S_EdOzHHS{To)E>hGX@}ew9Ky(AQQ=agq*< zPqzi?ZW+q3&rbHJJJ{q>(_iu}c-M}46GJlCF)TR@|4~jbbNIbH0NU)TA8!(gchT(d zyo^x9v|zN&^AWQdCIyW+T0^#2AePXW&;JndYM~?fhyr;X9_L{I>PAq}sFIan0pX3& z#KRJzGzoPHjn!K*V4MQ`XjPy#Cry|^x@M7zMUw^X7k0)gp{b~A<M=JD2iY@xv7Y>{ z9XN1PIuf{Gs+s~a<><Bh*7goM59pIG&sm$jmZC$4l&$HQ>g0JXCpd!sY6DB!#o}HR zHAX;U63b6ETq_-MrK17K^c<v{S_P0v=w4NNV8f%OgPEpLnu<~B!ZU*Ov?>d~(}8!= zx<Z`Xvj9sQAXyC7tKE!&Oi5wk;k&%FnMlnORD&?=!-@3>W~~$Jm!_3_0j;W`N<@a| zL+xKk+KUn_1$@fw13^gJM6^V<x$R?<Y*)=K{98=>A%tqL`^K6~Gto4uq&9i4T-8;b z%OkCK(?nnVQ6x!{B$v)6Nz3>ag3z;QNDsr?IzhA*67U#KnOcHEO%h~5f;OWgz0I(U zphpNUWJpFYderbCARAHLqYXVGH93oDIg7$;2zuDMM+`k|x2<BsZG5_7S1~lEBb#`? zTF#Rc>T)tVo*>TNkCyeQl`Yu8wSj-Wg7u^`m@noZD9C)NcWiu9I{tA)?VFtT#{hnO z9AcW?n|}xnqocmXrE3aumbBTlznXw;ENRDo#whcH>LbSAVn|<2ke<39J)a)ZatY?9 zcqJ)bBB{kI5Hh-o_N<7YWqb=QTQuS0n6pmHCgH07N;P4SE~tB$@EV)-qD{RBeWtBX zIkuajV3U-^*JgDS)nd!Ga6~%b(1v;yD%wyloq$oSdnt`DE{ZU&ZYE7%3<s7n<cCK~ z4<ZKMYjj|JYSOy5a-iX@>?J9fZi~P^o1){qWt7*?7Nj74)neEPDw=$}kF6U5B>Z?V z-{y}}=CPoUX0TfSAhrzj!yNKM6I$E3jUY(AxWZ1F<;5Sdt(K#2;iyM&HgKfjpGw4d z@KjAVGzBHALp+AmQx+{M1U8IvM5ipG=#H3Tp)m!SW;DwBNKCgvqe$y8zs08w8HRp= z4ng9Bwl>U?jL_;<Fk3cJi2x?GB8c{v!<uh0vHgWAylpo(HtzW0>mNWqupAp;z9O|B z&4G?!z@DBHm{cvfGj??m#qw2rS8oCMar*9~g(CA7YJ|-@3AFS5TP!_GKvzl41<*Pj z{G5qtpFdCmdJd<i1Z>7F{Fq5JA{ZE6nQ3cOGf@_qR+(js+ez0tLbsEsftrAVfRc!r z9Z(qU=OZgk^8+gcJpo^;ykIt6>*Kv`=|$JC0QMjZ+<K&`>gg(D%B<BxeDi?SMs!4Q zeib*mncy0NZ3N%Z3CbsW!|OH6I$0vdD4ZD#{+H*?#v$nF{l)4<)ted^fS6zuBqm@) zG|M3SqHAGY1F>~2s%r=|r`o2C`<V>`>ty3IShj1*sU5^kCv!Kl)I~tEtGZd*N^prV z=G8L1^HBGr)WPs(^_IH5)Z*|qCJ~1e<Iy~~MP0y56HO|NYKzc}BJr4==|3pEe_pVl z`dtiQDv;ji2qK3GIGz;~xuq|-WPF`z8bM4>UmhkiXoDCu+N@d|M4-`zxE(?edd`q= zO*ITZXS@)eh`M1otI<ki8U|C(UPZ(`6`kf#d$BRmR)G~|ig7;5#ZKL&4L#c2*t8h2 z^mif^+eSSaNADo*n3xVRk2#g(2u%QJ+5zKqyx+I(&F7RBD?fZ&spuZ^Lwj<^%J#~8 z*RLE~UtKBHG8}<#jyZ*YLs;Zn`@#HbJ=!3F=kN+h^L>_d(ostk4%<*G0sMHO=;qaU zX%xX_5L{_7)NxLXRBQ;J&T~WEp5?GMQX=bAk;L__!O|$=lw&~D503>n%G@zO0-twi z%++ruZ&)uK9I5%rbd@PAv!$?uiQyTSSPqSnGnQge7;KXa->v^G!1f-1q}d7k-6_gT zTI68XEdL+)MO_Di)yWR&Gt_Ld>)3{LS2y$XT9!Tzz11!3zL(%ug4+oC32rC2gWyhp zOwDv&jIaq1wtui@IOWi<e*{^ZsoIj8)w~gyUO7}F>1)`;MODC)|7R*75Pyh1RKYCK z*GBjP(T{1-*S8Ke(NDCBen84(Ai-Lt&L$;EM@m+I1ypQ=MWmxMTSDh(D%1m<svLTT zqzr831rjP2)y1<#o3~6sQer3dKUx7AIH)tj8NnAE4fLXGB7cAqGYd@5NaIm+$Xr;3 z@*N3l{cns!kjX-}{gg?2i>}n_VI53`4_k1BkX)EH)Ms6Ls!c}RSS2(mrje@Gju01t z6$FgaA)+<7a3;4MV-qrpFf2+0aM^^S5oBr?7>j|~q`>0(x<j@>&l3&WfaG+oyv=au zz+l108tITR`djYYDcXt-8RyPmX1b=j5lGi&ofhsTyZ}5U<}(3Zn~&Sl`Qs|@1nr8c z6Ua8On}^#$+!9u7wqQCMB@UVkWWrPPb!`D!7HZB5Pc7nCZFICRlJ4wR9!`Eg{B`Yz z@$yykSfLO4BJl81Y@?g!cT_cx&>@7MMN^_ygJizV=5nbBqk^jH0casIEl}f5gU)k3 z8+(!-FUZU7)^o>*H?P11c#$V+frKA=AUAqi+~vpgeOZ$+VJb`~;=V@4WUPH*>R2-% z9TGWov9QAPqN}PwtP>!ig#ZX>l{u4f)GlOWy{|XcLqHQ4;l)b(J|SF(=sdD-6`W!& zI9Y9q^`-V_i1DKUek7-I#o;_NQ^+PdZ2tqcTVwfLG0@uDs_*6@TjR&vvbviG%=1)h zSmF_>lLQ}OzoW$*-D1br0dyedPjVgmAl$5+8g|qzRW5<7WGU;G{fJv;{wl~$!uacp z11F<>IwWEP9dQrW=V{=clJ++^-{0e@Y(}e&xX*XMk#0xEQQyBY`)IYBtKjr?BTE@W zMsf)gUTja9-DrkkZ%IlvO-vuVxC`@Y{uYN#r(2hd!?s!i4>q_Sp5<?&-KzQD;ky5w zHiHCWIuf`$JGqa21p5gF2=WBl8LtML%Z>6{^>4Vj;6kxmyW`p=zYyb~cZ&#+P<*b# zqp$Ud{D?r5jVvl$*d<gz(i#p;n8g^gJ~GO31sV?u1{#X@qX_YD0%jTe?!0mfxN^%x zZ>1}Tf<2HMF1T)Pv>dr7wTIXDk$Y?JjD8~@J>w;i8EaRmbt|s@y1>(8v2F*LP|!q9 z@~X6EjWPXM9tD+m5SfsQ<weatS!;KQG4J|SJJ3&y{<OFuJx{&Qmc}nF{Ae~SOM|$U zex2<)cw8>H{svOxV&Re|-7bE;mOv9x(@#@U6LE+i4gvUy+Pyc@BBS-YZ!OXD*+q+W z9HnpIaTv?l!ZMScm)p@;BDTQpz<($HJM4%ZwY%}(Wv{Sp6&GjjUcXMPprfoxoaqRG zo_I!Ia7W)a&wQ!x79o`P&Xr5?E1r%GY4c6f+q&UFo8w3h@2hBg$MoLX@M5IQX^weM zY}_XvV5Tx|ZyU{x9hA8pKR2J=v|T*x?by`2Q?EFVuD(44uRc4L8&?&M{1kykWdv*c zHqy>Rd3W$o{YNo>Nqno`A8uN1{>1cN+w}5=hdJUgHb27BODs7oy@*n6SjKuNE1isW z3{af_S}D^Ad1tTc+ce6aV*n(rJ&UvfMIT}pjoe9=KC6F3v+yv=->WkdZf~P@(zar8 zb=Vu;{JwbH%U-?VGJUv@bI?W3H6HT*`08TN=gEuoc>-;7L*Djlo)AxZuU>O$@I$9( zc&1)x%Oz%a$Xm2!mpJ9!x23Y=6enQPRz1TK6f-Uyf=hr?-n(0R&4nRv`6s@)<Y|0G zzEXF8mOZJrgCxi^UVQ5s{I-cdHM()@Jac)-%Wkc7KF?mi!C|$ZVco(WPjBlq*M+?9 zZ9gzg;ho#oC!X<owy#L&&2mnh<yr6k?f02|A@3{OpS$#noaQbrHieRJ$)jWAioG@e z9NE!h?g)8b+_7=rQ*5hUWhH0P1H%e~Za|gAoQr3qev=>w8(DgRKri>ZEWM=375N;> zv5>cG=K}FL@7|p&#ei4cxi+jmiFWVhom;?{Z|~e5R#&sWao0%vA36VOjj;FoyKasA zgvH0bbGw#_-}Wxuy+HiEw|V!fO=>Y0`7?g}3c;Tfe4XI02(%fbcq;OXA{MGoAMx+( TPKu)U<=q9lqY5k9m0bV77n4^z diff --git a/modules/datapipeline.py b/modules/datapipeline.py index 7c17bc4..a05ec34 100644 --- a/modules/datapipeline.py +++ b/modules/datapipeline.py @@ -455,279 +455,6 @@ class DataSynchronizer(): self.start_ind = start_ind self.end_ind = end_ind -class SubjectData(DataImporter): - ''' Loads in data for the rigid body, breathing rate, summary files - and syncs them accordingly ''' - def __init__(self, condition='M', subject='S01'): - super().__init__() - self.condition = condition - self.subject = subject - if subject[0] != 'S': - self.subject_id = subject - else: - self.subject_id = int(re.search(r'\d+', subject).group()) - self.study_start = 0 - self.study_end = 0 - - self.subject_dir = path_join(self.parent_dir, self.subject) - - self.pressure_df = pd.DataFrame() - self.summary_df = pd.DataFrame() - self.accel_df = pd.DataFrame() - self.imu_df = pd.DataFrame() - - def get_cond_file(self, files): - for f in files: - if self.condition in f.split(sep)[-1] and \ - self.subject in f.split(sep)[-1]: - return f - return '' - - def list_sub_dirs(self, parent_dir, endswith=None): - reg_str = r'[0-9]+$' - if endswith is not None: - reg_str = r'[0-9]+{0}$'.format(endswith) - regex = re.compile(reg_str) - sub_dirs = [ - path_join(parent_dir, d) for d in listdir(parent_dir) if \ - ( - isdir(path_join(parent_dir,d)) and bool(regex.search(d)) - ) - ] - return sorted(sub_dirs) - - def check_times(self, sub_dirs, is_utc=False): - ''' Parses sub directory names to datetime and checks against mat_start - and end ''' - sep = self.sep - - if is_utc: - imu_hdr_files = [path_join(sub_dir, 'recording.g3')\ - for sub_dir in sub_dirs] - hdrs = [pd.read_json(imu_hdr_file, orient='index')\ - for imu_hdr_file in imu_hdr_files] - times = [hdr.to_dict().pop(0)['created'] \ - for hdr in hdrs] - times = [datetime.fromisoformat(time[:-1]) for time in times] - times = [(time.timestamp()+ timedelta(hours=11).seconds) for time in times] - else: - times = [datetime_to_sec(sub_dir.split(sep)[-1])\ - for sub_dir in sub_dirs] - - sel_dir = sub_dirs[-1] - for i, time in enumerate(times[:-1]): - if self.study_start > time and self.study_start < times[i+1]: - sel_dir = sub_dirs[i] - return sel_dir - - def set_pressure_fname(self): - subject_dir = self.subject_dir - sub_dirs = self.list_sub_dirs(subject_dir) - sub_dir = sub_dirs[0] - if len(sub_dirs)> 1: - # Check directory times with timeline - sub_dir = self.check_times(sub_dirs) - - pressure_glob = path_join(sub_dir, 'BR*.csv') - pressure_files = sorted(glob.glob(pressure_glob)) - if not pressure_files: - dt_info = sub_dir.split(sep)[-1] - pressure_glob = path_join(sub_dir, '*_Breathing.csv') - pressure_files = sorted(glob.glob(pressure_glob)) - self.pressure_fname = pressure_files[-1] - - def set_summary_fname(self): - subject_dir = self.subject_dir - sub_dirs = self.list_sub_dirs(subject_dir) - sub_dir = sub_dirs[0] - if len(sub_dirs)> 1: - # Check directory times with timeline - sub_dir = self.check_times(sub_dirs) - - summary_glob = path_join(sub_dir, 'Summary*.csv') - summary_files = sorted(glob.glob(summary_glob)) - if not summary_files: - dt_info = sub_dir.split(sep)[-1] - summary_glob = path_join(sub_dir, dt_info+'_Summary*.csv') - summary_files = sorted(glob.glob(summary_glob)) - self.summary_fname = summary_files[-1] - - def set_imu_fname(self): - subject_dir = self.subject_dir - sub_dirs = self.list_sub_dirs(subject_dir, endswith='Z') - sub_dir = sub_dirs[0] - if len(sub_dirs)> 1: - sub_dir = self.check_times(sub_dirs, is_utc=True) - - imu_glob = path_join(sub_dir, 'imu*') - imu_files = sorted(glob.glob(imu_glob)) - self.imu_fname = imu_files[-1] - - imu_hdr_glob = path_join(sub_dir, 'recording.g3') - imu_hdr_files = sorted(glob.glob(imu_hdr_glob)) - self.imu_hdr_fname = imu_hdr_files[-1] - video_fname = path_join(sub_dir, 'scenevideo.mp4') - if path_exists(video_fname): - self.video_fname = video_fname - - def set_accel_fname(self): - subject_dir = self.subject_dir - sub_dirs = self.list_sub_dirs(subject_dir) - sub_dir = sub_dirs[0] - if len(sub_dirs)> 1: - # Check directory times with timeline - sub_dir = self.check_times(sub_dirs) - - accel_glob = path_join(sub_dir, 'Accel*.csv') - accel_files = sorted(glob.glob(accel_glob)) - if not accel_files: - dt_info = sub_dir.split(sep)[-1] - accel_glob = path_join(sub_dir, '*_Accel.csv') - accel_files = sorted(glob.glob(accel_glob)) - accel_files = [f for f in accel_files if 'g' not in \ - f.lower().split(sep)[-1]] - self.accel_fname = accel_files[-1] - - def set_timeline(self): - times_glob = path_join(self.subject_dir,f'*.csv') - times_files = sorted(glob.glob(times_glob)) - self.timeline_fname = self.get_cond_file(times_files) - self.timeline_df = self.import_time_data() - - mat_time = self.timeline_df['Timestamps'].map(mat_to_sec) - mat_start_ind = self.timeline_df.index[ - self.timeline_df['Event']=='Start Test' - ].tolist()[0] - mat_start = mat_time.values[mat_start_ind] - mat_end = mat_time.values[-1] - - self.study_start = mat_start - self.study_end = mat_end - - def set_fnames(self): - self.set_pressure_fname() - self.set_summary_fname() - self.set_imu_fname() - self.set_accel_fname() - - def load_dataframes(self): - self.timeline_df = self.import_time_data() - self.pressure_df = self.import_labels(self.pressure_fname) - self.summary_df = self.import_labels(self.summary_fname) - self.accel_df = self.import_labels(self.accel_fname) - self.imu_df, self.imu_hdr = self.import_imu_data() - - def sync_pressure_df(self): - data_sync = DataSynchronizer() - - cols = self.pressure_df.columns - if 'Year' in cols: - year = int(self.pressure_df['Year'].values[0]) - month = int(self.pressure_df['Month'].values[0]) - day = int(self.pressure_df['Day'].values[0]) - dt_fmt = "%Y/%m/%d" - dt_str = f"{year}/{month}/{day}" - dt_obj = datetime.strptime(dt_str, dt_fmt) - pressure_time = self.pressure_df['ms'].interpolate().values/1000 - pressure_time = pressure_time + dt_obj.timestamp() - else: - pressure_time = self.pressure_df['Time'].map(datetime_to_sec).values - - self.pressure_df['sec'] = pressure_time - data_sync.set_bounds(pressure_time, self.study_start, self.study_end) - self.pressure_df = data_sync.sync_df(self.pressure_df) - - def sync_accel_df(self): - data_sync = DataSynchronizer() - - cols = self.accel_df.columns - if 'Year' in cols: - year = int(self.accel_df['Year'].values[0]) - month = int(self.accel_df['Month'].values[0]) - day = int(self.accel_df['Day'].values[0]) - dt_fmt = "%Y/%m/%d" - dt_str = f"{year}/{month}/{day}" - dt_obj = datetime.strptime(dt_str, dt_fmt) - accel_time = self.accel_df['ms'].interpolate().values/1000 - accel_time = accel_time + dt_obj.timestamp() - else: - accel_time = self.accel_df['Time'].map(datetime_to_sec).values - - self.accel_df['sec'] = accel_time - data_sync.set_bounds(accel_time, self.study_start, self.study_end) - self.accel_df = data_sync.sync_df(self.accel_df) - - def sync_summary_df(self): - data_sync = DataSynchronizer() - - cols = self.summary_df.columns - if 'Year' in cols: - year = int(self.summary_df['Year'].values[0]) - month = int(self.summary_df['Month'].values[0]) - day = int(self.summary_df['Day'].values[0]) - dt_fmt = "%Y/%m/%d" - dt_str = f"{year}/{month}/{day}" - dt_obj = datetime.strptime(dt_str, dt_fmt) - summary_times = self.summary_df['ms'].values/1000 + dt_obj.timestamp() - else: - summary_times = self.summary_df['Time'].map(datetime_to_sec).values - - self.summary_df['sec'] = summary_times - data_sync.set_bounds(summary_times, self.study_start, self.study_end) - self.summary_df = data_sync.sync_df(self.summary_df) - - def sync_imu_df(self): - na_inds = self.imu_df\ - .loc[pd.isna(self.imu_df['accelerometer']), :].index.values - self.imu_df.drop(index=na_inds, inplace=True) - imu_times = self.imu_df['timestamp'].values - - ''' S21, S30 has strange time recordings ''' - mask = imu_times > 3*60*60 - if mask.any(): - bad_args = np.arange(0, len(mask))[mask] - self.imu_df.drop(index=self.imu_df.iloc[bad_args].index, - inplace=True) - # self.imu_df['timestamp'] = self.imu_df['timestamp'].values - \ - # self.imu_df['timestamp'].values[0] - imu_times = self.imu_df['timestamp'].values - - print(np.mean(1/(imu_times[1:] - imu_times[:-1]))) - self.imu_df['timestamp_interp'] = imu_times - self.imu_df['timestamp_interp'] = self.imu_df['timestamp_interp']\ - .interpolate() - - data_sync = DataSynchronizer() - - iso_tz = self.imu_hdr['created'] - tzinfo = pytz.timezone(self.imu_hdr['timezone']) - # adjust for UTC - start_time = datetime.fromisoformat(iso_tz[:-1]) + timedelta(hours=11) - imu_times = self.imu_df['timestamp_interp'].values - - imu_datetimes = [start_time + timedelta(seconds=val) \ - for val in imu_times] - imu_sec = np.array([time.timestamp() for time in imu_datetimes]) - self.imu_df['sec'] = imu_sec - data_sync.set_bounds(imu_sec, self.study_start, self.study_end) - self.imu_df = data_sync.sync_df(self.imu_df) - - def sync_all_df(self): - if self.study_start == 0 or self.study_start is None: - self.set_timeline() - self.sync_pressure_df() - self.sync_summary_df() - self.sync_accel_df() - self.sync_imu_df() - - def get_accel_data(self): - accel_cols = self.accel_df.columns - if 'Time' in accel_cols: - data_cols = ['Vertical', 'Lateral', 'Sagittal'] - else: - data_cols = ['X Data', 'Y Data', 'Z Data'] - return self.accel_df[data_cols].values - class TFDataPipeline(): def __init__(self, window_size=60, batch_size=32): self.window_size = window_size diff --git a/regress_rr.py b/regress_rr.py index 0255ce1..c420b0c 100644 --- a/regress_rr.py +++ b/regress_rr.py @@ -426,7 +426,10 @@ def df_win_task(w_inds, df, i, cols): for col in cols: data = w_df[col].values # DSP - sd_data = (data - np.mean(data, axis=0))/np.std(data, axis=0) + if sum(np.abs(data)) > 0: + sd_data = (data - np.mean(data, axis=0))/np.std(data, axis=0) + else: + sd_data = data.copy() # ys = cubic_interp(sd_data, BR_FS, FS_RESAMPLE) if col != 'bvp': filt_out.append(imu_signal_processing(sd_data, fs)) @@ -455,7 +458,7 @@ def df_win_task(w_inds, df, i, cols): if 'bvp' in cols: xf, yf = do_pad_fft(bvp_filt, fs=fs) bv_freq = int(xf[yf.argmax()]*60) - y_out['bvp_est'] = bv_freq + # y_out['bvp_est'] = bv_freq return x_out, y_out @@ -590,9 +593,8 @@ def load_and_sync_xsens(subject, sens_list:list=['imu', 'bvp']): return xsens_df def load_tsfresh(xsens_df, home_dir, - sens_list:list=['imu', 'bvp'], window_size=12, window_shift=0.2, fs=IMU_FS, - overwrite=False, data_cols=None): + overwrite=False, data_cols=None, prefix=None): """ Loads the tsfresh pickle file, or generates if it does not exist for the given configuration @@ -607,16 +609,26 @@ def load_tsfresh(xsens_df, home_dir, pd.DataFrame """ - raise NotImplementedError("To be implemented") + assert data_cols is not None, "invalid selection for data columns" + assert 'acc_x' in xsens_df.columns.tolist() and \ + 'gyro_x' in xsens_df.columns.tolist() and \ + 'bvp' in xsens_df.columns.tolist(), \ + "Does not include the full required dataset. Must have both IMU and BVP" - # make home directory + # raise NotImplementedError("To be implemented") - assert data_cols is not None, "invalid selection for data columns" - pkl_file = join(project_dir, 'tsfresh.pkl') + if prefix is not None: + pkl_fname = f'{prefix}__winsize_{window_size}__winshift_{window_shift}__tsfresh.pkl' + else: + pkl_fname = f'winsize_{window_size}__winshift_{window_shift}__tsfresh.pkl' + + pkl_dir = join(home_dir, + f'tsfresh__winsize_{window_size}__winshift_{window_shift}') + pkl_file = join(pkl_dir, pkl_fname) + if not exists(pkl_dir): mkdir(pkl_dir) if exists(pkl_file) and not overwrite: return pd.read_pickle(pkl_file) - ipdb.set_trace() x_df, y_df = get_df_windows(xsens_df, df_win_task, window_size=window_size, @@ -630,6 +642,9 @@ def load_tsfresh(xsens_df, home_dir, # default_fc_parameters=tsfresh_settings.MinimalFCParameters(), ) x_features_df.fillna(0, inplace=True) + x_features_df.reset_index(drop=True, inplace=True) + x_features_df = x_features_df.reindex(sorted(x_features_df.columns.values), + axis=1) cols = x_features_df.columns.values @@ -964,7 +979,7 @@ def imu_rr_dsp(subject, do_minirocket = False use_tsfresh = False - overwrite_tsfresh = True + overwrite_tsfresh = False train_size = int(train_len) config = {'window_size' : window_size, @@ -1050,8 +1065,6 @@ def sens_rr_model(subject, test_standing=False, data_input:str='imu+bvp', ): - # TODO: - # implement tsfresh """Loads, preprocesses, and trains a select model using the configured settings. Attributes @@ -1106,7 +1119,7 @@ def sens_rr_model(subject, do_minirocket = False use_tsfresh = False - overwrite_tsfresh = True + overwrite_tsfresh = overwrite train_size = int(train_len) if feature_method == 'tsfresh': @@ -1143,25 +1156,38 @@ def sens_rr_model(subject, cal_df = get_cal_data(event_df, xsens_df) + # include standing or not + test_df_tmp = get_test_data(cal_df, activity_df, xsens_df, test_standing) + test_df = pd.concat([df for df in test_df_tmp['data']], axis=0) + if use_tsfresh: - xsens_df = load_tsfresh(xsens_df, - project_dir, - sens_list=sens_list, + cal_df_list = [] + test_df = load_tsfresh(test_df, + pfh.home_directory, + window_size=window_size, + window_shift=window_shift, + fs=fs, + overwrite=overwrite_tsfresh, + data_cols=data_cols, + prefix='test', + ) + for index, row in cal_df.iterrows(): + data = load_tsfresh(row['data'], + pfh.home_directory, window_size=window_size, window_shift=window_shift, fs=fs, overwrite=overwrite_tsfresh, data_cols=data_cols, + prefix=f"calcpm_{row['cpm']}" ) + cal_df_list.append({'cpm': row['cpm'], 'data': data}) - # include standing or not - test_df_tmp = get_test_data(cal_df, activity_df, xsens_df, test_standing) - test_df = pd.concat([df for df in test_df_tmp['data']], axis=0) - - x_test_df, y_test_df = get_df_windows( - test_df, df_win_task, window_size=window_size, - window_shift=window_shift, fs=fs, cols=data_cols) - + cal_df = pd.DataFrame(cal_df_list) + else: + x_test_df, y_test_df = get_df_windows( + test_df, df_win_task, window_size=window_size, + window_shift=window_shift, fs=fs, cols=data_cols) for combi in combinations(cal_df[cal_str].values, train_len): combi_str = "-".join([str(x) for x in combi]) @@ -1178,7 +1204,7 @@ def sens_rr_model(subject, train_df_list.append(data_df) train_df = pd.concat(train_df_list) - assert np.isin(train_df.index.values, test_df.index.values).any()==False,\ + assert np.isin(train_df.sec.values, test_df.sec.values).any()==False,\ "overlapping test and train data" print("train") @@ -1209,10 +1235,13 @@ def sens_rr_model(subject, x_train = minirocket.fit_transform(x_train) x_test = minirocket.transform(x_test) elif use_tsfresh: - x_train = train_df.iloc[:, 3:].values + y_cols = ['sec', 'br', 'pss', 'cpm'] + x_cols = [col for col in train_df.columns.values if col not in y_cols] + x_train = train_df[x_cols].values y_train = train_df['cpm'].values.reshape(-1, 1) - x_test = test_df.iloc[:, 3:].values + x_test = test_df[x_cols].values y_test = test_df[lbl_str].values.reshape(-1, 1) + y_test_df = test_df[y_cols[:-1]] else: x_train_df, y_train_df = get_df_windows(train_df, df_win_task, @@ -1303,7 +1332,7 @@ def arg_parser(): default='pss', ) parser.add_argument('-tl', '--train_len', type=int, - default=3, + default=5, help='minutes of data to use for calibration' ) parser.add_argument('-d', '--data_input', type=str, @@ -1311,7 +1340,7 @@ def arg_parser(): help='imu, bvp, imu+bvp: select data cols for input' ) parser.add_argument('-ts', '--test_standing', type=int, - default=0, + default=1, help='1 or 0 input, choose if standing data will be '\ 'recorded or not' ) @@ -1320,7 +1349,7 @@ def arg_parser(): if __name__ == '__main__': np.random.seed(100) - n_subject_max = 2 + n_subject_max = 3 args = arg_parser() # Load command line arguments @@ -1356,7 +1385,7 @@ if __name__ == '__main__': ) else: subjects = [subject_pre_string+str(i).zfill(2) for i in \ - range(1, n_subject_max+1) if i not in imu_issues] + range(2, n_subject_max+1)] rr_func = partial(sens_rr_model, window_size=window_size, @@ -1370,12 +1399,6 @@ if __name__ == '__main__': data_input=data_input, ) - if mdl_str in ['fnn', 'lstm', 'cnn1d', 'elastic', 'ard', 'xgboost']: - for subject in subjects: - rr_func(subject) - else: - ncpu = min(len(subjects), cpu_count()) - with Pool(ncpu) as p: - p.map(rr_func, subjects) - + for subject in subjects: + rr_func(subject) print(args) -- GitLab