Vibration Time Series¶
For more details on vibration data see this dedicated page in the reference section.
VibrationTimeSeries(perfdb)
¶
Class used for handling Vibration Time Series. Can be accessed via perfdb.vibration.timeseries.
Parameters:
Source code in echo_postgres/perfdb_root.py
def __init__(self, perfdb: e_pg.PerfDB) -> None:
"""Base class that all subclasses should inherit from.
Parameters
----------
perfdb : PerfDB
Top level object carrying all functionality and the connection handler.
"""
self._perfdb: e_pg.PerfDB = perfdb
get(period, object_names=None, manufacturer='Gamesa', data_type='Vibration', sensors=None, acquisition_frequencies=None, variable_names=None, filter_type='and', output_type='DataFrame')
¶
Gets the vibration time series data.
The values will be numpy ndarrays with two dimensions [2, :], where first dimension is the time in seconds (starting at zero) and the second is the value.
Assuming array is a value of one row and column "value", if you want to get time of the array, you can do array[0, :] and for the value itself array[1, :]
Parameters:
-
(period¶DateTimeRange | list[date]) –Can be a DateTimeRange or a list of dates.
- If DateTimeRange, will get the data for the entire range (limiting on start and end)
- If list of dates, will get the data for each date in the list.
-
(object_names¶list[str] | None, default:None) –Names of the objects to check. If None will check for all objects. By default None
-
(manufacturer¶Literal['Gamesa'], default:'Gamesa') –Manufacturer of the wind turbine. Must be Gamesa for time series. By default "Gamesa"
-
(data_type¶Literal['Vibration'], default:'Vibration') –Type of the data to get. Can be one of ["Vibration"]. If "Vibration", will get vibration data. By default "Vibration"
-
(sensors¶list[VIBRATION_GAMESA_ALLOWED_SENSOR_NAMES] | None, default:None) –List of the sensors to get the data for. The options are as shown below:
- Gamesa Vibration: "1 - Generator GS - Radial", "2 - Planetary - Axial", "3 - Main Bearing GS - Radial", "4 - HSS - Radial", "5 - Main Bearing RS - Axial", "6 - HSS - Axial", "7 - Generator RS - Axial", "8 - Generator RS - Radial"
By default, None
-
(acquisition_frequencies¶list[Literal['Low', 'High', 'Filter']] | None, default:None) –Acquisition frequency, only applicable for Gamesa turbines. By default, None
-
(variable_names¶list[Literal['Acceleration - X', 'Acceleration - Y', 'Position - X', 'Position - Y']] | None, default:None) –Names of the variables to filter by. How to treat multiple filters. Can be one of ["and", "or"]. By default "and"
-
(output_type¶Literal['dict', 'DataFrame'], default:'DataFrame') –Output type of the data. Can be one of ["dict", "DataFrame"] By default "DataFrame"
Returns:
-
DataFrame–DataFrame with a MultiIndex[object_name, sensor, acquisition_frequency, timestamp] and columns: value, metadata. Value column contais a numpy ndarray with the time series (2d array with first dimension as time and second as value).
-
dict[str, dict[str, dict[datetime, dict[str, dict[str, Any]]]]]–Dictionary in the format {object_name: {sensor: {acquisition_frequency: {datetime: {value: value, metadata: metadata}, ...}, ...}, ...}, ...}
Source code in echo_postgres/vibration_timeseries.py
@validate_call
def get(
self,
period: DateTimeRange | list[date],
object_names: list[str] | None = None,
manufacturer: Literal["Gamesa"] = "Gamesa",
data_type: Literal["Vibration"] = "Vibration",
sensors: list[VIBRATION_GAMESA_ALLOWED_SENSOR_NAMES] | None = None,
acquisition_frequencies: list[Literal["Low", "High", "Filter"]] | None = None,
variable_names: list[Literal["Acceleration - X", "Acceleration - Y", "Position - X", "Position - Y"]] | None = None,
filter_type: Literal["and", "or"] = "and",
output_type: Literal["dict", "DataFrame"] = "DataFrame",
) -> DataFrame | dict[str, dict[str, dict[datetime, dict[str, dict[str, Any]]]]]:
"""Gets the vibration time series data.
The values will be numpy ndarrays with two dimensions [2, :], where first dimension is the time in seconds (starting at zero) and the second is the value.
Assuming array is a value of one row and column "value", if you want to get time of the array, you can do array[0, :] and for the value itself array[1, :]
Parameters
----------
period : DateTimeRange | list[date]
Can be a DateTimeRange or a list of dates.
- If DateTimeRange, will get the data for the entire range (limiting on start and end)
- If list of dates, will get the data for each date in the list.
object_names : list[str] | None, optional
Names of the objects to check. If None will check for all objects. By default None
manufacturer : Literal["Gamesa"], optional
Manufacturer of the wind turbine. Must be Gamesa for time series. By default "Gamesa"
data_type : Literal["Vibration"], optional
Type of the data to get. Can be one of ["Vibration"]. If "Vibration", will get vibration data. By default "Vibration"
sensors : list[VIBRATION_GAMESA_ALLOWED_SENSOR_NAMES] | None, optional
List of the sensors to get the data for. The options are as shown below:
- Gamesa Vibration: "1 - Generator GS - Radial", "2 - Planetary - Axial", "3 - Main Bearing GS - Radial", "4 - HSS - Radial", "5 - Main Bearing RS - Axial", "6 - HSS - Axial", "7 - Generator RS - Axial", "8 - Generator RS - Radial"
By default, None
acquisition_frequencies : list[Literal["Low", "High", "Filter"]] | None, optional
Acquisition frequency, only applicable for Gamesa turbines. By default, None
variable_names : list[Literal["Acceleration - X", "Acceleration - Y", "Position - X", "Position - Y"]] | None, optional
Names of the variables to filter by.
How to treat multiple filters. Can be one of ["and", "or"]. By default "and"
output_type : Literal["dict", "DataFrame"], optional
Output type of the data. Can be one of ["dict", "DataFrame"]
By default "DataFrame"
Returns
-------
DataFrame
DataFrame with a MultiIndex[object_name, sensor, acquisition_frequency, timestamp] and columns: value, metadata. Value column contais a numpy ndarray with the time series (2d array with first dimension as time and second as value).
dict[str, dict[str, dict[datetime, dict[str, dict[str, Any]]]]]
Dictionary in the format {object_name: {sensor: {acquisition_frequency: {datetime: {value: value, metadata: metadata}, ...}, ...}, ...}, ...}
"""
# checking arguments
if output_type not in ["dict", "DataFrame"]:
raise ValueError(f"output_type must be one of ['dict', 'DataFrame'], not {output_type}")
_, wanted_names = self._check_get_args(
object_names=object_names,
period=period,
acquisition_frequencies=acquisition_frequencies,
variable_names=variable_names,
manufacturer=manufacturer,
data_type=data_type,
sensors=sensors,
filter_type=filter_type,
)
# getting the raw data values
df: DataFrame = self._perfdb.rawdata.values.get(
object_names=wanted_names["object_names"],
raw_data_names=wanted_names["raw_data_names"],
period=period,
filter_type=filter_type,
)
if not df.empty:
if data_type == "Vibration":
# creating column with sampling time name as "sampling_time_" and last element of "raw_data_name" index level separated by "_"
df["sampling_time_name"] = (
df.reset_index(drop=False)["raw_data_name"]
.apply(
lambda x: "sampling_time_" + x.split("_")[-1],
)
.values
)
# getting sampling time
df["sampling_time"] = df[["metadata", "sampling_time_name"]].apply(
lambda row: row["metadata"][row["sampling_time_name"]] if row["sampling_time_name"] in row["metadata"] else NA, # noqa: SIM401
axis=1,
)
# dropping rows without sampling time
df = df.dropna(subset=["sampling_time"])
# adding time dimension to numpy array
df["value"] = df[["value", "sampling_time"]].apply(
lambda row: np.concatenate(
(
np.expand_dims(np.arange(0, row["value"].shape[0]), axis=0) * row["sampling_time"],
np.expand_dims(row["value"], axis=0),
),
axis=0,
),
axis=1,
)
else:
# creating empty DataFrame
df["sampling_time"] = NA
df["sampling_time"] = df["sampling_time"].astype("Float64")
# keeping only wanted columns
df = df[["value", "metadata", "sampling_time"]].copy()
# converting names
df = self._perfdb.vibration.spectrum._convert_raw_names_to_sensor_names( # noqa: SLF001
df=df,
manufacturer=manufacturer,
spectrum_type=None,
)
# returning in case of DataFrame output
if output_type == "DataFrame":
return df
# converting to dict
result = df.to_dict(orient="index")
final_result = {}
for (object_name, sensor, acquisition_frequency, timestamp), values in result.items():
if object_name not in final_result:
final_result[object_name] = {}
if sensor not in final_result[object_name]:
final_result[object_name][sensor] = {}
if acquisition_frequency not in final_result[object_name][sensor]:
final_result[object_name][sensor][acquisition_frequency] = {}
final_result[object_name][sensor][acquisition_frequency][timestamp] = values
return final_result
get_timestamps(object_names=None, period=None, manufacturer='Gamesa', data_type='Vibration', sensors=None, acquisition_frequencies=None, variable_names=None, value_type='date', output_type='DataFrame')
¶
Gets the timestamps/dates where there is vibration time series data available.
If you only want the timestamps as a list, you can set the output_type to 'DataFrame' and then do df["timestamp"].unique().tolist() at the result.
Parameters:
-
(object_names¶list[str] | None, default:None) –Names of the objects to check. If None will check for all objects. By default None
-
(period¶DateTimeRange | None, default:None) –Period to check. If None the entire raw_data_values table will be scanned. By default None
-
(manufacturer¶Literal['Gamesa'], default:'Gamesa') –Manufacturer of the wind turbine. Must be Gamesa for time series. By default "Gamesa"
-
(data_type¶Literal['Vibration'], default:'Vibration') –Type of the data to get. Can be one of ["Vibration"].
-
(sensors¶list[VIBRATION_GAMESA_ALLOWED_SENSOR_NAMES] | None, default:None) –List of the sensors to get the data for. The options are as shown below:
- Gamesa Vibration: "1 - Generator GS - Radial", "2 - Planetary - Axial", "3 - Main Bearing GS - Radial", "4 - HSS - Radial", "5 - Main Bearing RS - Axial", "6 - HSS - Axial", "7 - Generator RS - Axial", "8 - Generator RS - Radial"
By default, None
-
(acquisition_frequencies¶list[Literal['Low', 'High', 'Filter']] | None, default:None) –Acquisition frequency, only applicable for Gamesa turbines. By default, None
-
(variable_names¶list[Literal['Acceleration - X', 'Acceleration - Y', 'Position - X', 'Position - Y']] | None, default:None) –Names of the variables to filter by.
-
(value_type¶Literal['timestamp', 'date'], default:'date') –If timestamp, will return timestamps as datetimes, if date will return as date (removing hour, minute, second).
-
(output_type¶Literal['dict', 'DataFrame'], default:'DataFrame') –Output type of the data. Can be one of ["dict", "DataFrame"] By default "DataFrame"
Returns:
-
DataFrame–DataFrame with columns: object_name, sensor, acquisition_frequency, timestamp. Index can be ignored.
-
dict[str, dict[str, list[date | datetime]]]–Dictionary in the format {object_name: {sensor: {acquisition_frequency: [date | datetime], ...}, ...}, ...}
Source code in echo_postgres/vibration_timeseries.py
@validate_call
def get_timestamps(
self,
object_names: list[str] | None = None,
period: DateTimeRange | None = None,
manufacturer: Literal["Gamesa"] = "Gamesa",
data_type: Literal["Vibration"] = "Vibration",
sensors: list[VIBRATION_GAMESA_ALLOWED_SENSOR_NAMES] | None = None,
acquisition_frequencies: list[Literal["Low", "High", "Filter"]] | None = None,
variable_names: list[Literal["Acceleration - X", "Acceleration - Y", "Position - X", "Position - Y"]] | None = None,
value_type: Literal["timestamp", "date"] = "date",
output_type: Literal["dict", "DataFrame"] = "DataFrame",
) -> DataFrame | dict[str, dict[str, list[date | datetime]]]:
"""Gets the timestamps/dates where there is vibration time series data available.
If you only want the timestamps as a list, you can set the output_type to 'DataFrame' and then do df["timestamp"].unique().tolist() at the result.
Parameters
----------
object_names : list[str] | None, optional
Names of the objects to check. If None will check for all objects. By default None
period : DateTimeRange | None, optional
Period to check. If None the entire raw_data_values table will be scanned. By default None
manufacturer : Literal["Gamesa"], optional
Manufacturer of the wind turbine. Must be Gamesa for time series. By default "Gamesa"
data_type : Literal["Vibration"], optional
Type of the data to get. Can be one of ["Vibration"].
sensors : list[VIBRATION_GAMESA_ALLOWED_SENSOR_NAMES] | None, optional
List of the sensors to get the data for. The options are as shown below:
- Gamesa Vibration: "1 - Generator GS - Radial", "2 - Planetary - Axial", "3 - Main Bearing GS - Radial", "4 - HSS - Radial", "5 - Main Bearing RS - Axial", "6 - HSS - Axial", "7 - Generator RS - Axial", "8 - Generator RS - Radial"
By default, None
acquisition_frequencies : list[Literal["Low", "High", "Filter"]] | None, optional
Acquisition frequency, only applicable for Gamesa turbines. By default, None
variable_names : list[Literal["Acceleration - X", "Acceleration - Y", "Position - X", "Position - Y"]] | None, optional
Names of the variables to filter by.
value_type : Literal["timestamp", "date"], optional
If timestamp, will return timestamps as datetimes, if date will return as date (removing hour, minute, second).
output_type : Literal["dict", "DataFrame"], optional
Output type of the data. Can be one of ["dict", "DataFrame"]
By default "DataFrame"
Returns
-------
DataFrame
DataFrame with columns: object_name, sensor, acquisition_frequency, timestamp. Index can be ignored.
dict[str, dict[str, list[date | datetime]]]
Dictionary in the format {object_name: {sensor: {acquisition_frequency: [date | datetime], ...}, ...}, ...}
"""
# checking arguments
if output_type not in ["dict", "DataFrame"]:
raise ValueError(f"output_type must be one of ['dict', 'DataFrame'], not {output_type}")
if value_type not in ["date", "timestamp"]:
raise ValueError(f"value_type must be one of ['date', 'timestamp']. Got {value_type}")
where, _ = self._check_get_args(
object_names=object_names,
period=period,
acquisition_frequencies=acquisition_frequencies,
variable_names=variable_names,
manufacturer=manufacturer,
data_type=data_type,
sensors=sensors,
filter_type="and",
)
# building the query
query = [
sql.SQL("SELECT DISTINCT object_name, raw_data_name, timestamp{type_cast} FROM v_raw_data_values ").format(
type_cast=sql.SQL("::DATE") if value_type == "date" else sql.SQL("::TIMESTAMP"),
),
where,
sql.SQL(" ORDER BY object_name, raw_data_name, timestamp"),
]
query = sql.Composed(query)
# executing the query
with self._perfdb.conn.reconnect() as conn:
df: DataFrame = conn.read_to_pandas(
query,
dtype={
"object_name": "string[pyarrow]",
"raw_data_name": "string[pyarrow]",
"timestamp": "datetime64[s]",
},
)
# converting names
df = df.set_index(["object_name", "raw_data_name"])
df = self._perfdb.vibration.spectrum._convert_raw_names_to_sensor_names( # noqa: SLF001
df=df,
manufacturer=manufacturer,
spectrum_type=None,
)
df = df.reset_index(drop=False)
if output_type == "DataFrame":
return df
# converting to dict
df = df.groupby(["object_name", "sensor", "acquisition_frequency"])["timestamp"].apply(list)
result = df.to_dict()
final_result = {}
for (object_name, sensor, acquisition_frequency), value in result.items():
if object_name not in final_result:
final_result[object_name] = {}
if sensor not in final_result[object_name]:
final_result[object_name][sensor] = {}
final_result[object_name][sensor][acquisition_frequency] = value
return final_result