Feature Definitions - Attributes¶
FeatureDefinitionAttributes(perfdb)
¶
Class used for handling feature definition attributes. Can be accessed via perfdb.features.definitions.attributes.
Parameters:
Source code in echo_postgres/perfdb_root.py
def __init__(self, perfdb: e_pg.PerfDB) -> None:
"""Base class that all subclasses should inherit from.
Parameters
----------
perfdb : PerfDB
Top level object carrying all functionality and the connection handler.
"""
self._perfdb: e_pg.PerfDB = perfdb
get(feature_names=None, object_names=None, object_models=None, data_source_types=None, attribute_names=None, attribute_values=None, filter_type='and', output_type='dict', values_only=False)
¶
Method to get the attributes of the given features.
The most useful keys/columns returned are:
- data_source_type_name
- attribute_name
- attribute_value
- data_type_name
Parameters:
-
(feature_names¶list[str] | None, default:None) –Names of the features to get the attributes from. If set to None will get from all. By default None
-
(object_names¶list[str] | None, default:None) –List of object names to filter the results. If set, will add the models of these objects to the object_models filter. By default None
-
(object_models¶list[str] | None, default:None) –List of object model names to filter the results. By default None
-
(data_source_types¶list[str] | None, default:None) –List of data source type names to filter the results. By default None
-
(attribute_names¶list[str] | None, default:None) –List of attribute names to filter the results. If set to None will get all. By default None
-
(attribute_values¶dict[str, Any] | None, default:None) –Dictionary of attribute names and values to filter the results. It must be in the format {attribute_name: attribute_value, ...}. By default None
-
(filter_type¶Literal['and', 'or'], default:'and') –How to treat multiple filters. Can be one of ["and", "or"]. By default "and"
-
(output_type¶Literal['dict', 'DataFrame'], default:'dict') –Output type of the data. Can be one of ["dict", "DataFrame"] By default "dict"
-
(values_only¶bool, default:False) –If set to True, will only return the values of the attributes, skipping display_name, id, etc.
Returns:
-
dict[str, dict[str, dict[str, Any | dict[str, Any]]]]–In case output_type is "dict", returns a dictionary in the format {object_model_name: {feature_name: {attribute_name: {attribute: value, ...}, ...}, ...} ...} If values_only is True, the innermost dictionary will be {attribute_name: value, ...}
-
DataFrame–In case output_type is "DataFrame", returns a DataFrame with the following format: index = MultiIndex[object_model_name, feature_name, attribute_name], columns = [attribute, ...] If values_only is True, the columns will be ["attribute_value"]
Source code in echo_postgres/feature_definition_attributes.py
@validate_call
def get(
self,
feature_names: list[str] | None = None,
object_names: list[str] | None = None,
object_models: list[str] | None = None,
data_source_types: list[str] | None = None,
attribute_names: list[str] | None = None,
attribute_values: dict[str, Any] | None = None,
filter_type: Literal["and", "or"] = "and",
output_type: Literal["dict", "DataFrame"] = "dict",
values_only: bool = False,
) -> dict[str, dict[str, dict[str, Any | dict[str, Any]]]] | DataFrame:
"""Method to get the attributes of the given features.
The most useful keys/columns returned are:
- data_source_type_name
- attribute_name
- attribute_value
- data_type_name
Parameters
----------
feature_names : list[str] | None, optional
Names of the features to get the attributes from. If set to None will get from all. By default None
object_names : list[str] | None, optional
List of object names to filter the results. If set, will add the models of these objects to the object_models filter.
By default None
object_models : list[str] | None, optional
List of object model names to filter the results.
By default None
data_source_types : list[str] | None, optional
List of data source type names to filter the results.
By default None
attribute_names : list[str] | None, optional
List of attribute names to filter the results. If set to None will get all. By default None
attribute_values : dict[str, Any] | None, optional
Dictionary of attribute names and values to filter the results. It must be in the format {attribute_name: attribute_value, ...}.
By default None
filter_type : Literal["and", "or"], optional
How to treat multiple filters. Can be one of ["and", "or"].
By default "and"
output_type : Literal["dict", "DataFrame"], optional
Output type of the data. Can be one of ["dict", "DataFrame"]
By default "dict"
values_only : bool, optional
If set to True, will only return the values of the attributes, skipping display_name, id, etc.
Returns
-------
dict[str, dict[str, dict[str, Any | dict[str, Any]]]]
In case output_type is "dict", returns a dictionary in the format {object_model_name: {feature_name: {attribute_name: {attribute: value, ...}, ...}, ...} ...}
If values_only is True, the innermost dictionary will be {attribute_name: value, ...}
DataFrame
In case output_type is "DataFrame", returns a DataFrame with the following format: index = MultiIndex[object_model_name, feature_name, attribute_name], columns = [attribute, ...]
If values_only is True, the columns will be ["attribute_value"]
"""
# getting models of the objects
if object_names:
obj_models = (
self._perfdb.objects.instances.get(object_names=object_names, output_type="DataFrame")["object_model_name"]
.unique()
.tolist()
)
object_models = list(set(object_models + obj_models)) if object_models else obj_models
# checking if all attribute names are valid and also getting its ids
if attribute_names or attribute_values:
wanted_attrs = set()
if attribute_names:
wanted_attrs = set(attribute_names)
if attribute_values:
wanted_attrs = wanted_attrs.union(set(attribute_values.keys()))
wanted_attrs = list(wanted_attrs)
existing_attributes = self._perfdb.attributes.get_ids(attribute_names=wanted_attrs)
if missing_attributes := set(wanted_attrs) - set(
existing_attributes.keys(),
):
raise ValueError(f"The following attribute names do not exist: {missing_attributes}")
# building the WHERE clause
where = []
if feature_names:
where.append(
sql.SQL("feature_name IN ({names})").format(names=sql.SQL(", ").join(map(sql.Literal, feature_names))),
)
if object_models:
where.append(sql.SQL("object_model_name IN ({names})").format(names=sql.SQL(", ").join(map(sql.Literal, object_models))))
if data_source_types:
where.append(
sql.SQL("data_source_type_name IN ({names})").format(names=sql.SQL(", ").join(map(sql.Literal, data_source_types))),
)
if attribute_names:
where.append(
sql.SQL(" attribute_id IN ({attribute_ids}) ").format(
attribute_ids=sql.SQL(",").join(sql.Literal(aid) for aid in existing_attributes.values()),
),
)
if attribute_values:
attr_vals_query = [
sql.SQL(
" (attribute_id = {attribute_id} AND attribute_value = {attribute_value}::TEXT::JSONB) ",
).format(
attribute_id=sql.Literal(existing_attributes[an]),
attribute_value=sql.Literal(f'"{av}"') if isinstance(av, str) else sql.Literal(av),
)
for an, av in attribute_values.items()
]
attr_vals_query = sql.SQL(f" {filter_type.upper()} ").join(attr_vals_query)
where.append(attr_vals_query)
if where:
where = sql.SQL(f" {filter_type.upper()} ").join(where)
# checking if all attribute names are valid and also getting its ids
if attribute_names or attribute_values:
wanted_attrs = set()
if attribute_names:
wanted_attrs = set(attribute_names)
if attribute_values:
wanted_attrs = wanted_attrs.union(set(attribute_values.keys()))
wanted_attrs = list(wanted_attrs)
existing_attributes = self._perfdb.attributes.get_ids(attribute_names=wanted_attrs)
if missing_attributes := set(wanted_attrs) - set(
existing_attributes.keys(),
):
raise ValueError(f"The following attribute names do not exist: {missing_attributes}")
# building the query
query = [
sql.SQL(
"SELECT {values} FROM performance.v_feature_attributes",
).format(
values=sql.SQL(
"object_model_name, feature_name, attribute_name, attribute_value, data_type_name",
)
if values_only
else sql.SQL("*"),
),
]
if where:
query.append(sql.SQL(" WHERE "))
query.append(where)
query.append(sql.SQL(" ORDER BY object_model_name, feature_name, attribute_name"))
query = sql.Composed(query)
# executing the query
with self._perfdb.conn.reconnect() as conn:
# setting attribute_value as object to avoid casting json column as string
df = conn.read_to_pandas(query, post_convert="pyarrow")
# casting the attribute values
df = cast_attributes(df=df, index_cols=["object_model_name", "feature_name"])
df = df.set_index(["object_model_name", "feature_name", "attribute_name"])
# returning the result
if output_type == "dict":
# dropping unwanted columns
if values_only:
df = df["attribute_value"]
temp = df.to_dict()
else:
temp = df[["attribute_id", "attribute_value", "data_type_id", "data_type_name", "modified_date"]].to_dict(orient="index")
# converting from format {(object_model, feature_name, attribute_name): value} to {object_model: {feature_name: {attribute_name: value, ...}, ...}, ...}
output = {}
for (object_model, feature_name, attribute_name), value in temp.items():
if object_model not in output:
output[object_model] = {}
if feature_name not in output[object_model]:
output[object_model][feature_name] = {}
output[object_model][feature_name][attribute_name] = value
return output
if output_type == "DataFrame" and values_only:
df = df[["attribute_value"]].copy()
return df