Raw Data Definitions¶
RawDataDefinitions(perfdb)
¶
Class used for handling raw data definitions. Can be accessed via perfdb.rawdata.definitions.
Parameters:
Source code in echo_postgres/perfdb_root.py
def __init__(self, perfdb: e_pg.PerfDB) -> None:
"""Base class that all subclasses should inherit from.
Parameters
----------
perfdb : PerfDB
Top level object carrying all functionality and the connection handler.
"""
self._perfdb: e_pg.PerfDB = perfdb
get(object_names=None, object_models=None, raw_data_names=None, data_source_types=None, filter_type='and', output_type='DataFrame')
¶
Gets the raw data definitions that match the filters.
The most useful keys/columns returned are:
- raw_data_id
- display_name
- description
- data_type_name
- data_source_type_name
Parameters:
-
(object_names¶list[str] | None, default:None) –List of object names to filter the results. If set, will add the models of these objects to the object_models filter. By default None
-
(object_models¶list[str] | None, default:None) –List of object model names to filter the results. By default None
-
(raw_data_names¶str | list[str] | None, default:None) –List of raw data definition names to filter the results. If provided as a string will be treated as regex. By default None
-
(data_source_types¶list[str] | None, default:None) –List of data source type names to filter the results. By default None
-
(filter_type¶Literal['and', 'or'], default:'and') –How to treat multiple filters. Can be one of ["and", "or"]. By default "and"
-
(output_type¶Literal['dict', 'DataFrame'], default:'DataFrame') –Output type of the data. Can be one of ["dict", "DataFrame"] By default "DataFrame"
Returns:
-
DataFrame–If output_type is "DataFrame", returns a DataFrame with the following format: index=MultiIndex["object_model_name", "raw_data_name"], columns=["attribute1", "attribute2", ...]
-
dict[str, dict[str, dict[str, Any]]]–If output_type is "dict", returns a dictionary in the format {object_model: {raw_data_name: {attribute1: value1, attribute2: value2, ...}, ...}, ...}
Source code in echo_postgres/rawdata_definitions.py
@validate_call
def get(
self,
object_names: list[str] | None = None,
object_models: list[str] | None = None,
raw_data_names: str | list[str] | None = None,
data_source_types: list[str] | None = None,
filter_type: Literal["and", "or"] = "and",
output_type: Literal["dict", "DataFrame"] = "DataFrame",
) -> DataFrame | dict[str, dict[str, dict[str, Any]]]:
"""Gets the raw data definitions that match the filters.
The most useful keys/columns returned are:
- raw_data_id
- display_name
- description
- data_type_name
- data_source_type_name
Parameters
----------
object_names : list[str] | None, optional
List of object names to filter the results. If set, will add the models of these objects to the object_models filter.
By default None
object_models : list[str] | None, optional
List of object model names to filter the results.
By default None
raw_data_names : str | list[str] | None, optional
List of raw data definition names to filter the results. If provided as a string will be treated as regex.
By default None
data_source_types : list[str] | None, optional
List of data source type names to filter the results.
By default None
filter_type : Literal["and", "or"], optional
How to treat multiple filters. Can be one of ["and", "or"].
By default "and"
output_type : Literal["dict", "DataFrame"], optional
Output type of the data. Can be one of ["dict", "DataFrame"]
By default "DataFrame"
Returns
-------
DataFrame
If output_type is "DataFrame", returns a DataFrame with the following format: index=MultiIndex["object_model_name", "raw_data_name"], columns=["attribute1", "attribute2", ...]
dict[str, dict[str, dict[str, Any]]]
If output_type is "dict", returns a dictionary in the format {object_model: {raw_data_name: {attribute1: value1, attribute2: value2, ...}, ...}, ...}
"""
# checking inputs
where = self._check_get_args(
object_names=object_names,
object_models=object_models,
raw_data_names=raw_data_names,
data_source_types=data_source_types,
filter_type=filter_type,
)
if filter_type not in ["and", "or"]:
raise ValueError(f"filter_type must be one of ['and', 'or'], not {filter_type}")
if output_type not in ["dict", "DataFrame"]:
raise ValueError(f"output_type must be one of ['dict', 'DataFrame'], not {output_type}")
# getting the definitions
query = [
sql.SQL("SELECT * FROM performance.v_raw_data_def "),
]
if where:
query.append(where)
query.append(sql.SQL(" ORDER BY object_model_name, name"))
query = sql.Composed(query)
with self._perfdb.conn.reconnect() as conn:
df = conn.read_to_pandas(query)
df = df.rename(columns={"id": "raw_data_id", "name": "raw_data_name"})
df = df.set_index(["object_model_name", "raw_data_name"])
if output_type == "dict":
# getting rid of unnecessary columns
df = df.drop(columns=["object_model_id"])
# converting to dictionary
result = df.to_dict(orient="index")
# converting from format {(obj_model, rawdata_name): {attribute: value, ...}, ...} to {obj_model: {rawdata_name: {attribute: value, ...}, ...}, ...}
final_result = {}
for (obj_model, raw_data_name_name), rawdata_data in result.items():
final_result.setdefault(obj_model, {})[raw_data_name_name] = rawdata_data
return final_result
return df
get_ids(object_names=None, object_models=None, raw_data_names=None, data_source_types=None, filter_type='and')
¶
Gets the IDs of the raw data definitions that match the filters.
Parameters:
-
(object_names¶list[str] | None, default:None) –List of object names to filter the results. If set, will add the models of these objects to the object_models filter. By default None
-
(object_models¶list[str] | None, default:None) –List of object model names to filter the results. By default None
-
(raw_data_names¶str | list[str] | None, default:None) –List of raw data definition names to filter the results. If provided as a string will be treated as regex. By default None
-
(data_source_types¶list[str] | None, default:None) –List of data source type names to filter the results. By default None
-
(filter_type¶Literal['and', 'or'], default:'and') –How to treat multiple filters. Can be one of ["and", "or"]. By default "and"
Returns:
-
dict[str, dict[str, int]]–Dict in the format {object_model: {raw_data_name: raw_data_id, ...}, ...}
Source code in echo_postgres/rawdata_definitions.py
@validate_call
def get_ids(
self,
object_names: list[str] | None = None,
object_models: list[str] | None = None,
raw_data_names: str | list[str] | None = None,
data_source_types: list[str] | None = None,
filter_type: Literal["and", "or"] = "and",
) -> dict[str, dict[str, int]]:
"""Gets the IDs of the raw data definitions that match the filters.
Parameters
----------
object_names : list[str] | None, optional
List of object names to filter the results. If set, will add the models of these objects to the object_models filter.
By default None
object_models : list[str] | None, optional
List of object model names to filter the results.
By default None
raw_data_names : str | list[str] | None, optional
List of raw data definition names to filter the results. If provided as a string will be treated as regex.
By default None
data_source_types : list[str] | None, optional
List of data source type names to filter the results.
By default None
filter_type : Literal["and", "or"], optional
How to treat multiple filters. Can be one of ["and", "or"].
By default "and"
Returns
-------
dict[str, dict[str, int]]
Dict in the format {object_model: {raw_data_name: raw_data_id, ...}, ...}
"""
# checking inputs
where = self._check_get_args(
object_names=object_names,
object_models=object_models,
raw_data_names=raw_data_names,
data_source_types=data_source_types,
filter_type=filter_type,
)
if filter_type not in ["and", "or"]:
raise ValueError(f"filter_type must be one of ['and', 'or'], not {filter_type}")
# getting the definitions
query = [
sql.SQL("SELECT object_model_name, name, id FROM performance.v_raw_data_def "),
]
if where:
query.append(where)
query.append(sql.SQL(" ORDER BY object_model_name, name"))
query = sql.Composed(query)
with self._perfdb.conn.reconnect() as conn:
df = conn.read_to_pandas(query)
df = df.set_index(["object_model_name", "name"])
# converting to dictionary
result = df["id"].to_dict()
# converting from format {(obj_model, raw_data_name): rawdata_id, ...} to {obj_model: {raw_data_name: rawdata_id, ...}, ...}
final_result = {}
for (obj_model, raw_data_name), rawdata_id in result.items():
final_result.setdefault(obj_model, {})[raw_data_name] = rawdata_id
return final_result