new doc string

cc41ef43 · Raymond Chia · dd9ea695 · cc41ef43 · cc41ef43 · cc41ef43
Commit cc41ef43 authored 1 year ago by Raymond Chia
--- a/logs/singularity_110105.out
+++ b/logs/singularity_110105.out
--- a/logs/singularity_116254.out
+++ b/logs/singularity_116254.out
--- a/logs/singularity_1404675.out
+++ b/logs/singularity_1404675.out
--- a/logs/singularity_1617572.out
+++ b/logs/singularity_1617572.out
--- a/logs/singularity_1823495.out
+++ b/logs/singularity_1823495.out
--- a/logs/singularity_200468.out
+++ b/logs/singularity_200468.out
--- a/logs/singularity_200748.out
+++ b/logs/singularity_200748.out
--- a/logs/singularity_3739913.out
+++ b/logs/singularity_3739913.out
--- a/regress_rr.py
+++ b/regress_rr.py
@@ -574,9 +574,23 @@ def load_tsfresh(xsens_df, project_dir,
                 sens_list:list=['imu', 'bvp'],
                 window_size=12, window_shift=0.2, fs=IMU_FS,
                 overwrite=False, data_cols=None):
+    """
+    Loads the tsfresh pickle file, or generates if it does not exist for the
+    given configuration
+
+    Arguments
+    ---------
+    xsens_df : pandas.DataFrame
+        synchronised and frequency matched DataFrame with all data and labels
+    
+    Returns
+    -------
+    pd.DataFrame
+    """

-    assert data_cols is not None, "invalid selection for data columns"
+    raise NotImplementedError("To be implemented")

+    assert data_cols is not None, "invalid selection for data columns"
    pkl_file = join(project_dir, 'tsfresh.pkl')
    if exists(pkl_file) and not overwrite:
        return pd.read_pickle(pkl_file)
@@ -602,16 +616,58 @@ def load_tsfresh(xsens_df, project_dir,
    return df_out

 def get_activity_log(subject):
+    """
+    Loads and retrieves the sit and stand file
+
+    Arguments
+    ---------
+    subject: str
+        subject to retrieve (i.e. 'Pilot02', 'S02')
+    
+    Returns
+    -------
+    pd.DataFrame
+    """
    activity_list = get_file_list('activity*.csv', sbj=subject)
    activity_dfs = [pd.read_csv(f) for f in activity_list]
    return pd.concat(activity_dfs, axis=0)

 def get_respiration_log(subject):
+    """
+    Loads and retrieves the respiration calibration events, timestamps,
+    inhale/exhale
+
+    Arguments
+    ---------
+    subject: str
+        subject to retrieve (i.e. 'Pilot02', 'S02')
+    
+    Returns
+    -------
+    pd.DataFrame
+    """
+
    log_list = get_file_list('*.json', sbj=subject)
    log_dfs = [pd.read_json(f) for f in log_list]
    return pd.concat(log_dfs, axis=0)

 def get_cal_data(event_df, xsens_df):
+    """
+    Loads and retrieves the respiration calibration data
+
+    Arguments
+    ---------
+    event_df : pandas.DataFrame
+        timestamp, inhalation, exhalation, and event data from calibration
+        process
+    xsens_df : pandas.DataFrame
+        synchronised and frequency matched DataFrame with all data and labels
+    
+    Returns
+    -------
+    pd.DataFrame
+    """
+
    fmt ="%Y-%m-%d %H.%M.%S" 
    cal_list = []
    cpms = []
@@ -648,6 +704,26 @@ def get_cal_data(event_df, xsens_df):
    return pd.DataFrame(cal_list)

 def get_test_data(cal_df, activity_df, xsens_df, test_standing):
+    """
+    Loads and retrieves the activity timestamps from sitting and standing
+    events
+
+    Arguments
+    ---------
+    cal_df : pandas.DataFrame
+        synchronised and frequency matched respiration calibration data
+    activity_df : pandas.DataFrame
+        timestamps of activity events
+    xsens_df : pandas.DataFrame
+        synchronised and frequency matched DataFrame with all data and labels
+    test_standing : bool
+        list of column str
+    
+    Returns
+    -------
+    pd.DataFrame
+    """
+
    fmt = "%d/%m/%Y %H:%M:%S"
    start_time = cal_df.iloc[-1]['data'].sec.values[-1]
    data_df = xsens_df[xsens_df.sec > start_time]
@@ -676,6 +752,28 @@ def get_test_data(cal_df, activity_df, xsens_df, test_standing):
    return pd.DataFrame(activity_list)

 def dsp_win_func(w_inds, df, i, cols):
+    """
+    Runs artefact rejection, PCA, and Hernandez DSP for a window of data
+
+    Arguments
+    ---------
+    w_inds : numpy.ndarray
+        set of indexes for a given window
+    df : pandas.DataFrame
+        synchronised and frequency matched DataFrame with all data and labels
+    i : int
+        window index
+    cols : list
+        list of column str
+    
+    Returns
+    -------
+    y_hat : pandas.DataFrame
+        estimated respiration rate from Hernandez method
+    y_out : pandas.DataFrame
+        max PSS frequency and median breathing rate from bioharness summary 
+        file
+    """
    time = df['sec'].values
    if w_inds[-1] == 0: return
    w_df = df.iloc[w_inds]
@@ -686,9 +784,6 @@ def dsp_win_func(w_inds, df, i, cols):
    if diff_chk:
        return

-    # cols = ['acc_x', 'acc_y', 'acc_z',
-    #         'gyr_x', 'gyr_y', 'gyr_z']
-
    data = w_df[cols].values

    if reject_artefact((data-np.mean(data,axis=0))/np.std(data,axis=0)):
@@ -727,6 +822,36 @@ def dsp_win_func(w_inds, df, i, cols):
 # save evaluation metrics in single file that handles the models for the
 # subject and config
 class EvalHandler():
+    """
+    Handles the evaluation metric for each subject and configuration.
+    ...
+
+    Attributes
+    ----------
+    y_true : numpy.ndarray
+        a numpy array of the respiration rate ground truth values from the
+        bioharness
+    y_pred : numpy.ndarray
+        a numpy array of the predicted respiration rate
+    subject : str
+        the subject in format Pilot01, S01 etc.
+    pfh : ProjectFileHandler
+        custom class detailing the directories, metafile, and configurations
+    mdl_str : str
+        a string to inform what model was used
+    overwrite : bool
+        overwrites the evaluations (default False)
+
+    Methods
+    -------
+    load_eval_history()
+        loads the evaluation file
+    save_eval_history()
+        saves the evaluation file
+    update_eval_history()
+        updates the evaluation file using the new entry if there is no matching
+        model or configuration for the given subject
+    """
    def __init__(self, y_true, y_pred, subject, pfh, mdl_str, overwrite=False):
        self.subject = subject
        self.config = pfh.config
@@ -783,7 +908,32 @@ def imu_rr_dsp(subject,
               train_len:int=3,
               test_standing=False,
              ):
-    # window_size, window_shift, intra, inter
+    # TODO: 
+        # implement evaluation saving
+    """Loads, preprocesses, and performs Hernandez digital signal processing
+    pipeline on the selected subject. Uses the specified parameters. Runs on
+    both accelerometer and gyroscope.
+
+    Attributes
+    ----------
+    subject: str
+        specify the subject code (i.e. 'Pilot02', 'S02')
+    window_size : float
+        a numpy array of the respiration rate ground truth values from the
+        bioharness
+    window_shift : float
+        a portion of the window size between 0 and 1
+    mdl_str : str
+        a string to infoa portion of the window size between 0 and 1rm what model was used
+    overwrite : bool
+        overwrites the evaluations, models, and graphs (default False)
+    test_standing : bool
+        boolean to use standing data
+
+    Returns
+    -------
+    None
+    """
    cal_str = 'cpm'
    fs = IMU_FS
    tmp = []
@@ -883,7 +1033,36 @@ def sens_rr_model(subject,
                  test_standing=False,
                  data_input:str='imu+bvp',
                 ):
-    # window_size, window_shift, intra, inter
+    # TODO: 
+        # implement tsfresh
+    """Loads, preprocesses, and trains a select model using the configured
+    settings.
+    Attributes
+    ----------
+    subject: str
+        specify the subject code (i.e. 'Pilot02', 'S02')
+    window_size : float
+        a numpy array of the respiration rate ground truth values from the
+        bioharness
+    window_shift : float
+        a portion of the window size between 0 and 1
+    mdl_str : str
+        a string to infoa portion of the window size between 0 and 1rm what model was used
+    overwrite : bool
+        overwrites the evaluations, models, and graphs (default False)
+    feature_method : str
+        choose between 'minirocket', 'tsfresh', or 'None'
+    train_len : int
+        number of minutes to sample from, choose between 1 to 7
+    test_standing : bool
+        boolean to use standing data
+    data_input : str
+        sensors to use, choose from 'imu', 'bvp', 'imu+bvp'
+
+    Returns
+    ------
+    None
+    """
    cal_str = 'cpm'
    tmp = []
    imu_cols = IMU_COLS
@@ -947,7 +1126,6 @@ def sens_rr_model(subject,

    cal_df = get_cal_data(event_df, xsens_df)

-    # TODO: needs to be fixed
    if use_tsfresh:
        xsens_df = load_tsfresh(xsens_df,
                                project_dir,
@@ -1078,6 +1256,8 @@ def sens_rr_model(subject,
        plt.close()

 def arg_parser():
+    """Returns arguments in a Namespace to configure the subject specific model
+    """
    parser = argparse.ArgumentParser()
    parser.add_argument("-m", '--model', type=str,
                        default='linreg',
@@ -1122,12 +1302,11 @@ def arg_parser():
    return args

 if __name__ == '__main__':
-    # choose either intra or inter subject features to use for model training
-    # '[!M]*'
    np.random.seed(100)
    n_subject_max = 2
    args = arg_parser()

+    # Load command line arguments
    mdl_str        = args.model
    subject        = args.subject
    feature_method = args.feature_method