Skip to content

Forecast Features - Definitions

ForecastFeatureDefinitions(perfdb)

Class used for handling forecast feature definitions. Can be accessed via perfdb.forecasts.features.definitions.

Parameters:

  • perfdb

    (PerfDB) –

    Top level object carrying all functionality and the connection handler.

Source code in echo_postgres/perfdb_root.py
Python
def __init__(self, perfdb: e_pg.PerfDB) -> None:
    """Base class that all subclasses should inherit from.

    Parameters
    ----------
    perfdb : PerfDB
        Top level object carrying all functionality and the connection handler.

    """
    self._perfdb: e_pg.PerfDB = perfdb

get(forecast_models=None, feature_names=None, data_source_types=None, filter_type='and', output_type='DataFrame')

Gets all feature definitions.

The most useful keys/columns returned are:

  • display_name
  • description
  • forecast_model_name
  • data_source_type_name
  • name_in_data_source
  • id_in_data_source

Parameters:

  • forecast_models

    (list[str] | None, default: None ) –

    List of forecast model names to filter the results. By default None

  • feature_names

    (list[str] | None, default: None ) –

    List of feature names to filter the results. By default None

  • data_source_types

    (list[str] | None, default: None ) –

    List of data source type names to filter the results. By default None

  • filter_type

    (Literal['and', 'or'], default: 'and' ) –

    How to treat multiple filters. Can be one of ["and", "or"]. By default "and"

  • output_type

    (Literal['dict', 'DataFrame', 'pl.DataFrame'], default: 'DataFrame' ) –

    Output type of the data. Can be one of ["dict", "DataFrame", "pl.DataFrame"] By default "DataFrame"

Returns:

  • DataFrame

    If output_type is "DataFrame", returns a pandas DataFrame with the following format: index=MultiIndex[forecast_model_name, name], columns=[attribute, ...]

  • DataFrame

    If output_type is "pl.DataFrame", returns a Polars DataFrame

  • dict[str, dict[str, dict[str, Any]]]

    If output_type is "dict", returns a dictionary with the following format: {forecast_model_name: {feature_name: {attribute: value, ...}, ...}, ...}

Source code in echo_postgres/forecast_feature_definitions.py
Python
@validate_call
def get(
    self,
    forecast_models: list[str] | None = None,
    feature_names: list[str] | None = None,
    data_source_types: list[str] | None = None,
    filter_type: Literal["and", "or"] = "and",
    output_type: Literal["dict", "DataFrame", "pl.DataFrame"] = "DataFrame",
) -> pd.DataFrame | pl.DataFrame | dict[str, dict[str, dict[str, Any]]]:
    """Gets all feature definitions.

    The most useful keys/columns returned are:

    - display_name
    - description
    - forecast_model_name
    - data_source_type_name
    - name_in_data_source
    - id_in_data_source

    Parameters
    ----------
    forecast_models : list[str] | None, optional
        List of forecast model names to filter the results.
        By default None
    feature_names : list[str] | None, optional
        List of feature names to filter the results.
        By default None
    data_source_types : list[str] | None, optional
        List of data source type names to filter the results.
        By default None
    filter_type : Literal["and", "or"], optional
        How to treat multiple filters. Can be one of ["and", "or"].
        By default "and"
    output_type : Literal["dict", "DataFrame", "pl.DataFrame"], optional
        Output type of the data. Can be one of ["dict", "DataFrame", "pl.DataFrame"]
        By default "DataFrame"

    Returns
    -------
    pd.DataFrame
        If output_type is "DataFrame", returns a pandas DataFrame with the following format: index=MultiIndex[forecast_model_name, name], columns=[attribute, ...]
    pl.DataFrame
        If output_type is "pl.DataFrame", returns a Polars DataFrame
    dict[str, dict[str, dict[str, Any]]]
        If output_type is "dict", returns a dictionary with the following format: {forecast_model_name: {feature_name: {attribute: value, ...}, ...}, ...}
    """
    # checking inputs
    where = self._check_get_args(
        forecast_models=forecast_models,
        feature_names=feature_names,
        data_source_types=data_source_types,
        filter_type=filter_type,
    )

    # getting the feature definitions
    query = [
        sql.SQL("SELECT * FROM performance.v_forecast_model_features "),
    ]
    if where:
        query.append(where)
    query.append(sql.SQL(" ORDER BY forecast_model_name, feature_name"))

    query = sql.Composed(query)

    df = self._perfdb.conn.read_to_polars(query)

    # dropping unnecessary columns before conversion
    if "forecast_model_id" in df.columns:
        df = df.drop("forecast_model_id")

    return convert_output(
        df,
        output_type,
        index_col=["forecast_model_name", "feature_name"],
        nest_by_index=True,
    )

get_ids(forecast_models=None, feature_names=None, data_source_types=None, filter_type='and')

Gets all feature definitions and their respective ids.

Parameters:

  • forecast_models

    (list[str] | None, default: None ) –

    List of forecast model names to filter the results. By default None

  • feature_names

    (list[str] | None, default: None ) –

    List of feature names to filter the results. By default None

  • data_source_types

    (list[str] | None, default: None ) –

    List of data source type names to filter the results. By default None

  • filter_type

    (Literal['and', 'or'], default: 'and' ) –

    How to treat multiple filters. Can be one of ["and", "or"]. By default "and"

Returns:

  • dict[str, dict[str, int]]

    Dictionary with the following format: {forecast_model_name: {feature_name: feature_id, ...}, ...}

Source code in echo_postgres/forecast_feature_definitions.py
Python
@validate_call
def get_ids(
    self,
    forecast_models: list[str] | None = None,
    feature_names: list[str] | None = None,
    data_source_types: list[str] | None = None,
    filter_type: Literal["and", "or"] = "and",
) -> dict[str, dict[str, int]]:
    """Gets all feature definitions and their respective ids.

    Parameters
    ----------
    forecast_models : list[str] | None, optional
        List of forecast model names to filter the results.
        By default None
    feature_names : list[str] | None, optional
        List of feature names to filter the results.
        By default None
    data_source_types : list[str] | None, optional
        List of data source type names to filter the results.
        By default None
    filter_type : Literal["and", "or"], optional
        How to treat multiple filters. Can be one of ["and", "or"].
        By default "and"

    Returns
    -------
    dict[str, dict[str, int]]
        Dictionary with the following format:
        {forecast_model_name: {feature_name: feature_id, ...}, ...}
    """
    # checking inputs
    where = self._check_get_args(
        forecast_models=forecast_models,
        feature_names=feature_names,
        data_source_types=data_source_types,
        filter_type=filter_type,
    )

    # getting the feature definitions
    query = [
        sql.SQL("SELECT forecast_model_name, feature_name, feature_id FROM performance.v_forecast_model_features "),
    ]
    if where:
        query.append(where)
    query.append(sql.SQL(" ORDER BY forecast_model_name, feature_name"))

    query = sql.Composed(query)

    df = self._perfdb.conn.read_to_polars(query)

    # converting to dictionary
    final_result = {}
    for row in df.iter_rows(named=True):
        final_result.setdefault(row["forecast_model_name"], {})[row["feature_name"]] = row["feature_id"]

    return final_result