Forecast Features - Definitions¶
ForecastFeatureDefinitions(perfdb)
¶
Class used for handling forecast feature definitions. Can be accessed via perfdb.forecasts.features.definitions.
Parameters:
Source code in echo_postgres/perfdb_root.py
def __init__(self, perfdb: e_pg.PerfDB) -> None:
"""Base class that all subclasses should inherit from.
Parameters
----------
perfdb : PerfDB
Top level object carrying all functionality and the connection handler.
"""
self._perfdb: e_pg.PerfDB = perfdb
get(forecast_models=None, feature_names=None, data_source_types=None, filter_type='and', output_type='DataFrame')
¶
Gets all feature definitions.
The most useful keys/columns returned are:
- display_name
- description
- forecast_model_name
- data_source_type_name
- name_in_data_source
- id_in_data_source
Parameters:
-
(forecast_models¶list[str] | None, default:None) –List of forecast model names to filter the results. By default None
-
(feature_names¶list[str] | None, default:None) –List of feature names to filter the results. By default None
-
(data_source_types¶list[str] | None, default:None) –List of data source type names to filter the results. By default None
-
(filter_type¶Literal['and', 'or'], default:'and') –How to treat multiple filters. Can be one of ["and", "or"]. By default "and"
-
(output_type¶Literal['dict', 'DataFrame', 'pl.DataFrame'], default:'DataFrame') –Output type of the data. Can be one of ["dict", "DataFrame", "pl.DataFrame"] By default "DataFrame"
Returns:
-
DataFrame–If output_type is "DataFrame", returns a pandas DataFrame with the following format: index=MultiIndex[forecast_model_name, name], columns=[attribute, ...]
-
DataFrame–If output_type is "pl.DataFrame", returns a Polars DataFrame
-
dict[str, dict[str, dict[str, Any]]]–If output_type is "dict", returns a dictionary with the following format: {forecast_model_name: {feature_name: {attribute: value, ...}, ...}, ...}
Source code in echo_postgres/forecast_feature_definitions.py
@validate_call
def get(
self,
forecast_models: list[str] | None = None,
feature_names: list[str] | None = None,
data_source_types: list[str] | None = None,
filter_type: Literal["and", "or"] = "and",
output_type: Literal["dict", "DataFrame", "pl.DataFrame"] = "DataFrame",
) -> pd.DataFrame | pl.DataFrame | dict[str, dict[str, dict[str, Any]]]:
"""Gets all feature definitions.
The most useful keys/columns returned are:
- display_name
- description
- forecast_model_name
- data_source_type_name
- name_in_data_source
- id_in_data_source
Parameters
----------
forecast_models : list[str] | None, optional
List of forecast model names to filter the results.
By default None
feature_names : list[str] | None, optional
List of feature names to filter the results.
By default None
data_source_types : list[str] | None, optional
List of data source type names to filter the results.
By default None
filter_type : Literal["and", "or"], optional
How to treat multiple filters. Can be one of ["and", "or"].
By default "and"
output_type : Literal["dict", "DataFrame", "pl.DataFrame"], optional
Output type of the data. Can be one of ["dict", "DataFrame", "pl.DataFrame"]
By default "DataFrame"
Returns
-------
pd.DataFrame
If output_type is "DataFrame", returns a pandas DataFrame with the following format: index=MultiIndex[forecast_model_name, name], columns=[attribute, ...]
pl.DataFrame
If output_type is "pl.DataFrame", returns a Polars DataFrame
dict[str, dict[str, dict[str, Any]]]
If output_type is "dict", returns a dictionary with the following format: {forecast_model_name: {feature_name: {attribute: value, ...}, ...}, ...}
"""
# checking inputs
where = self._check_get_args(
forecast_models=forecast_models,
feature_names=feature_names,
data_source_types=data_source_types,
filter_type=filter_type,
)
# getting the feature definitions
query = [
sql.SQL("SELECT * FROM performance.v_forecast_model_features "),
]
if where:
query.append(where)
query.append(sql.SQL(" ORDER BY forecast_model_name, feature_name"))
query = sql.Composed(query)
df = self._perfdb.conn.read_to_polars(query)
# dropping unnecessary columns before conversion
if "forecast_model_id" in df.columns:
df = df.drop("forecast_model_id")
return convert_output(
df,
output_type,
index_col=["forecast_model_name", "feature_name"],
nest_by_index=True,
)
get_ids(forecast_models=None, feature_names=None, data_source_types=None, filter_type='and')
¶
Gets all feature definitions and their respective ids.
Parameters:
-
(forecast_models¶list[str] | None, default:None) –List of forecast model names to filter the results. By default None
-
(feature_names¶list[str] | None, default:None) –List of feature names to filter the results. By default None
-
(data_source_types¶list[str] | None, default:None) –List of data source type names to filter the results. By default None
-
(filter_type¶Literal['and', 'or'], default:'and') –How to treat multiple filters. Can be one of ["and", "or"]. By default "and"
Returns:
-
dict[str, dict[str, int]]–Dictionary with the following format: {forecast_model_name: {feature_name: feature_id, ...}, ...}
Source code in echo_postgres/forecast_feature_definitions.py
@validate_call
def get_ids(
self,
forecast_models: list[str] | None = None,
feature_names: list[str] | None = None,
data_source_types: list[str] | None = None,
filter_type: Literal["and", "or"] = "and",
) -> dict[str, dict[str, int]]:
"""Gets all feature definitions and their respective ids.
Parameters
----------
forecast_models : list[str] | None, optional
List of forecast model names to filter the results.
By default None
feature_names : list[str] | None, optional
List of feature names to filter the results.
By default None
data_source_types : list[str] | None, optional
List of data source type names to filter the results.
By default None
filter_type : Literal["and", "or"], optional
How to treat multiple filters. Can be one of ["and", "or"].
By default "and"
Returns
-------
dict[str, dict[str, int]]
Dictionary with the following format:
{forecast_model_name: {feature_name: feature_id, ...}, ...}
"""
# checking inputs
where = self._check_get_args(
forecast_models=forecast_models,
feature_names=feature_names,
data_source_types=data_source_types,
filter_type=filter_type,
)
# getting the feature definitions
query = [
sql.SQL("SELECT forecast_model_name, feature_name, feature_id FROM performance.v_forecast_model_features "),
]
if where:
query.append(where)
query.append(sql.SQL(" ORDER BY forecast_model_name, feature_name"))
query = sql.Composed(query)
df = self._perfdb.conn.read_to_polars(query)
# converting to dictionary
final_result = {}
for row in df.iter_rows(named=True):
final_result.setdefault(row["forecast_model_name"], {})[row["feature_name"]] = row["feature_id"]
return final_result