Source code for mvtk.supervisor.processing

import pandas as pd
import numpy as np
import public


[docs]@public.add def replace_nulls(df, replace, column_names): return df.fillna({k: replace for k in column_names})
[docs]@public.add # Normalize timestamp column values. def normalize_ts_columns(df, column_names): for column_name in column_names: normalize_ts_column(df, column_name) return df
# convert timestamp in HH:mm:ss to seconds - # pandas timedelta takes the time format and converts them to seconds. # divide by the result by the total number of seconds in a day. # this normalizes the timestamp to a number between 0 and 1. # round off the value to 5 decimal places.
[docs]@public.add def normalize_ts_column(df, column_name): df[column_name] = pd.to_timedelta( df[column_name].dt.strftime("%H:%M:%S") ).dt.total_seconds() df[column_name] = df[column_name].replace(np.nan, -1) df[column_name] = df[column_name].apply( lambda x: round(x / 86400, 5) if x >= 0 else x ) return df