Source code for vbi.feature_extraction.calc_features

import os
import vbi
import sys
import tqdm
import importlib
import numpy as np
import pandas as pd
from multiprocessing import Pool
import vbi.feature_extraction.features
from vbi.feature_extraction.features_settings import Data_F



[docs]
def calc_features(
    ts: np.ndarray,
    fs: float,
    cfg: dict,
    preprocess=None,
    preprocess_args: dict = {},
    verbose: bool = False,
):
    """
    Extract features from time series data.

    Parameters
    ----------
    ts : np.ndarray
        Time series data
    fs : int, float
        Sampling frequency
    cfg : dict
        Dictionary of features configurations
    preprocess : function
        Function for preprocessing the time series
    preprocess_args : dictionary
        Arguments for preprocessing function

    Returns
    -------
    features : list of numpy arrays

    """

    features_path = cfg["features_path"] if ("features_path" in cfg.keys()) else None

    if features_path:
        module_name = features_path.split(os.sep)[-1][:-3]
        sys.path.append(features_path[: -len(features_path.split(os.sep)[-1]) - 1])
        exec("import " + module_name)
        importlib.reload(sys.modules[features_path.split(os.sep)[-1][:-3]])
        exec("from " + module_name + " import *")

    # module = sys.modules[module_name]
    # print(module.calc_mean)
    # print(module.calc_mean([1,2,3], 1, 2))

    def length(x):
        return (len(x)) if (len(x) > 0) else 0

    labels = []
    features = []
    info = {}

    domain = list(cfg.keys())
    # remove features_path from domain if exists
    if "features_path" in domain:
        domain.remove("features_path")

    for _type in domain:
        domain_feats = cfg[_type]
        for fe in domain_feats:
            if cfg[_type][fe]["use"] == "yes":
                c = length(features)
                func_name = fe
                func = cfg[_type][fe]["function"]
                params = cfg[_type][fe]["parameters"]
                
                if "verbose" in params.keys():
                    params["verbose"] = verbose

                if params is None:
                    params = {}

                if "fs" in params.keys():
                    params["fs"] = fs

                if preprocess is not None:
                    ts = preprocess(ts, **preprocess_args)
                val, lab = eval(func)(ts, **params)

                if isinstance(val, (np.ndarray, list)):
                    labels.extend(lab)
                    features.extend(val)
                else:
                    labels.append(func_name)
                    features.append(val)
                info[func_name] = {"index": [c, length(features)]}

    return features, labels, info




[docs]
def extract_features(
    ts: np.ndarray, fs: float, cfg: dict, output_type=Data_F, **kwargs
):
    """
    Extract features from time series data

    Parameters
    ----------
    ts : list of np.ndarray [[n_regions x n_samples]]
        Input from which the features are extracted
    fs : int, float
        Sampling frequency
    cfg : dictionary
        Dictionary of features to extract
    output_format : string
        Output format, either
        'list' (list of numpy arrays)
        'dataframe' (pandas dataframe)
        (default is 'list')

    **kwargs
    - n_workers : int
        Number of workers for parallelization, default is 1
        Parallelization is done by ensembles (first dimension of ts)
    - dtype : type
        Data type of the features extracted, default is np.float32
    - verbose : boolean
        If True, print the some information
    - preprocess : function
        Function for preprocessing the time series
    - preprocess_args : dictionary
        Arguments for preprocessing function
    - n_trial: int
        Number of trials

    Returns
    -------
    Data Object with the following attributes:
    values: list of numpy arrays or pandas dataframe
        extracted features
    labels: list of strings
        List of labels of the features
    info: dictionary
        Dictionary with the information of the features extracted

    """

    labels = []
    features = []

    n_workers = kwargs.get("n_workers", 1)
    dtype = kwargs.get("dtype", np.float32)
    verbose = kwargs.get("verbose", True)
    preprocess = kwargs.get("preprocess", None)
    preprocess_args = kwargs.get("preprocess_args", {})
    n_trial = kwargs.get("n_trial", len(ts))

    def update_bar(verbose):
        if verbose:
            pbar.update()
        else:
            pass

    # ts, n_trial = prepare_input(ts)
    labels = None
    info = None

    if n_workers == 1:
        features = []
        for i in tqdm.tqdm(range(n_trial), disable=not verbose):
            values, _labels, _info = calc_features(
                ts[i], fs, cfg, preprocess, preprocess_args
            )
            features.append(np.array(values).astype(dtype))
            if (labels is None) and (not np.isnan(values).any()):
                labels = _labels
                info = _info
    else:
        for i in range(n_trial):
            values, _labels, _info = calc_features(
                ts[i],
                fs,
                cfg,
                preprocess=preprocess,
                preprocess_args=preprocess_args
            )
            if (labels is None) and (not np.isnan(values).any()):
                labels = _labels
                info = _info
                break
        with Pool(processes=n_workers) as pool:
            with tqdm.tqdm(total=n_trial, disable=not verbose) as pbar:
                async_res = [
                    pool.apply_async(
                        calc_features,
                        args=(ts[i], fs, cfg, preprocess, preprocess_args),
                        # kwds=dict(kwargs),
                        callback=update_bar,
                    )
                    for i in range(n_trial)
                ]
                features = [np.array(res.get()[0]).astype(dtype) for res in async_res]

    if output_type == "dataframe":
        df = pd.DataFrame(features)
        if labels is not None:
            df.columns = labels
        return df
    elif output_type == "list":
        return features, labels

    data = Data_F(values=features, labels=labels, info=info)

    return data




[docs]
def extract_features_df(ts: np.ndarray, fs: float, cfg: dict, **kwargs):
    """
    Extract features from time series data and return a pandas dataframe

    Parameters
    ----------
    ts : list of np.ndarray [[n_regions x n_samples]]
        Input from which the features are extracted
    fs : int, float
        Sampling frequency
    cfg : dictionary
        Dictionary of features to extract

    **kwargs
    - n_workers : int
        Number of workers for parallelization, default is 1
        Parallelization is done by ensembles (first dimension of ts)
    - dtype : type
        Data type of the features extracted, default is np.float32
    - verbose : boolean
        If True, print the some information
    - preprocess : function
        Function for preprocessing the time series
    - preprocess_args : dictionary
        Arguments for preprocessing function

    Returns
    -------

    Data Object with the following attributes:
    - values: pandas dataframe
        extracted features
    - labels: list of strings
        List of labels of the features
    - info: dictionary
        Dictionary with the information of the features extracted
    """
    return extract_features(ts, fs, cfg, "dataframe", **kwargs)




[docs]
def extract_features_list(ts, fs, cfg, **kwargs):
    """
    extract features from time series data and return a list of features and labels

    Parameters
    ----------
    ts : list of np.ndarray [[n_regions x n_samples]]
        Input from which the features are extracted
    fs : int, float
        Sampling frequency
    cfg : dictionary
        Dictionary of features to extract

    **kwargs
    - n_workers : int
        Number of workers for parallelization, default is 1
        Parallelization is done by ensembles (first dimension of ts)
    - dtype : type
        Data type of the features extracted, default is np.float32
    - verbose : boolean
        If True, print the some information
    - preprocess : function
        Function for preprocessing the time series
    - preprocess_args : dictionary
        Arguments for preprocessing function

    Returns
    -------
    Data Object with the following attributes:
    - values: list of numpy arrays
        extracted features
    - labels: list of strings
        List of labels of the features
    - info: dictionary
        Dictionary with the information of the features extracted
    """
    return extract_features(ts, fs, cfg, "list", **kwargs)