Source code for xhydro.modelling.obj_funcs

# Created on Tue Dec 12 21:55:25 2023
# @author: Richard Arsenault
"""
Objective function package for xhydro, for calibration and model evaluation.

This package provides a flexible suite of popular objective function metrics in
hydrological modelling and hydrological model calibration. The main function
'get_objective_function' returns the value of the desired objective function
while allowing users to customize many aspects:

    1-  Select the objective function to run;
    2-  Allow providing a mask to remove certain elements from the objective
    function calculation (e.g. for odd/even year calibration, or calibration
    on high or low flows only, or any custom setup).
    3-  Apply a transformation on the flows to modify the behaviour of the
    objective function calculation (e.g taking the log, inverse or square
    root transform of the flows before computing the objective function).

This function also contains some tools and inputs reserved for the calibration
toolbox, such as the ability to take the negative of the objective function to
maximize instead of minimize a metric according to the needs of the optimizing
algorithm.
"""

# Import packages

import numpy as np
import xarray as xr


__all__ = ["get_objective_function", "transform_flows"]


[docs] def get_objective_function( qobs: np.ndarray | xr.Dataset, qsim: np.ndarray | xr.Dataset, obj_func: str = "rmse", take_negative: bool = False, mask: np.ndarray | xr.Dataset | None = None, transform: str | None = None, epsilon: float | None = None, ): """ Entrypoint function for the objective function calculation. More can be added by adding the function to this file and adding the option in this function. Parameters ---------- qobs : array_like Vector containing the Observed streamflow to be used in the objective function calculation. It is the target to attain. qsim : array_like Vector containing the Simulated streamflow as generated by the hydrological model. It is modified by changing parameters and resumulating the hydrological model. obj_func : str String representing the objective function to use in the calibration. Options must be one of the accepted objective functions: - "abs_bias" : Absolute value of the "bias" metric - "abs_pbias": Absolute value of the "pbias" metric - "abs_volume_error" : Absolute value of the volume_error metric - "agreement_index": Index of agreement - "bias" : Bias metric - "correlation_coeff": Correlation coefficient - "high_flow_rel_error" : High flow relative error - "kge" : Kling Gupta Efficiency metric (2009 version) - "kge_mod" : Kling Gupta Efficiency metric (2012 version) - "kge_2021" : Kling-Gupta Efficiency (2021 version) - "lce" : Least-squares combined efficiency - "low_flow_rel_error" : Low flow relative error - "mae": Mean Absolute Error metric - "mare": Mean Absolute Relative Error metric - "mse" : Mean Square Error metric - "nse": Nash-Sutcliffe Efficiency metric - "pbias" : Percent bias (relative bias) - "persistence_index" : Measure of the relative magnitude of the residual variance to the variance of the errors - "r2" : r-squared, i.e. square of correlation_coeff. - "rmse" : Root Mean Square Error - "rrmse" : Relative Root Mean Square Error (RMSE-to-mean ratio) - "rsr" : Ratio of RMSE to standard deviation. - "volume_error": Total volume error over the period. - "volumetric_efficiency" : Fraction of volume delivered at the proper time The default is 'rmse'. take_negative : bool Used to force the objective function to be multiplied by minus one (-1) such that it is possible to maximize it if the optimizer is a minimizer and vice versa. Should always be set to False unless required by an optimization setup, which is handled internally and transparently to the user. The default is False. mask : array_like Array of 0 or 1 on which the objective function should be applied. Values of 1 indicate that the value is included in the calculation, and values of 0 indicate that the value is excluded and will have no impact on the objective function calculation. This can be useful for specific optimization strategies such as odd/even year calibration, seasonal calibration or calibration based on high/low flows. The default is None and all data are preserved. transform : str Indicates the type of transformation required. Can be one of the following values: - "sqrt" : Square root transformation of the flows [sqrt(Q)] - "log" : Logarithmic transformation of the flows [log(Q)] - "inv" : Inverse transformation of the flows [1/Q] The default value is "None", by which no transformation is performed. epsilon : float Indicates the perturbation to add to the flow time series during a transformation to avoid division by zero and logarithmic transformation. The perturbation is equal to: `perturbation = epsilon * mean(qobs)`. The default value is 0.01. Returns ------- float Value of the selected objective function (obj_fun). Notes ----- All data corresponding to NaN values in the observation set are removed from the calculation. If a mask is passed, it must be the same size as the qsim and qobs vectors. If any NaNs are present in the qobs dataset, all corresponding data in the qobs, qsim and mask will be removed prior to passing to the processing function. """ # List of available objective functions obj_func_dict = { "abs_bias": _abs_bias, "abs_pbias": _abs_pbias, "abs_volume_error": _abs_volume_error, "agreement_index": _agreement_index, "bias": _bias, "correlation_coeff": _correlation_coeff, "high_flow_rel_error": _high_flow_rel_error, # "high_flow_timing_error": _high_flow_timing_error, # FIXME: Requires xarray to work "kge": _kge, "kge_mod": _kge_mod, "kge_2021": _kge_2021, "lce": _lce, "low_flow_rel_error": _low_flow_rel_error, "mae": _mae, "mare": _mare, "mse": _mse, "nse": _nse, "pbias": _pbias, "persistence_index": _persistence_index, # "persistence_index_weekly": _persistence_index_weekly, # FIXME: Requires xarray to work "r2": _r2, "rmse": _rmse, "rrmse": _rrmse, "rsr": _rsr, "volume_error": _volume_error, "volumetric_efficiency": _volumetric_efficiency, } # If we got a dataset, change to np.array # FIXME: Implement a more flexible method if isinstance(qsim, xr.Dataset): qsim = qsim.q if isinstance(qobs, xr.Dataset): qobs = qobs.q # Basic error checking if qobs.shape[0] != qsim.shape[0]: raise ValueError("Observed and Simulated flows are not of the same size.") # Check mask size and contents if mask is not None: # Size if qobs.shape[0] != mask.shape[0]: raise ValueError("Mask is not of the same size as the streamflow data.") # All zero or one? if not np.setdiff1d(np.unique(mask), np.array([0, 1])).size == 0: raise ValueError("Mask contains values other than 0 or 1. Please modify.") # Check that the objective function is in the list of available methods if obj_func not in obj_func_dict: raise ValueError( "Selected objective function is currently unavailable. " + "Consider contributing to our project at: " + "github.com/hydrologie/xhydro" ) # Ensure there are no NaNs in the dataset if mask is not None: mask = mask[~np.isnan(qobs)] qsim = qsim[~np.isnan(qobs)] qobs = qobs[~np.isnan(qobs)] # Apply mask before transform if mask is not None: qsim = qsim[mask == 1] qobs = qobs[mask == 1] mask = mask[mask == 1] # Transform data if needed if transform is not None: qsim, qobs = transform_flows(qsim, qobs, transform, epsilon) # Compute objective function by switching to the correct algorithm. Ensure # that the function name is the same as the obj_func tag or this will fail. function_call = obj_func_dict[obj_func] obj_fun_val = function_call(qsim, qobs) # Take the negative value of the Objective Function to return to the # optimizer. if take_negative: obj_fun_val = obj_fun_val * -1 return obj_fun_val
def _get_objfun_minimize_or_maximize(obj_func: str) -> bool: """ Check whether the objective function needs to be maximized or minimized. Returns a boolean value, where True means it should be maximized, and False means that it should be minimized. Objective functions other than those programmed here will raise an error. Parameters ---------- obj_func : str Label of the desired objective function. Returns ------- bool Indicator of if the objective function needs to be maximized. """ # Define metrics that need to be maximized: if obj_func in [ "agreement_index", "correlation_coeff", "kge", "kge_mod", "kge_2021", "lce", "nse", "persistence_index", "persistence_index_weekly", "r2", "volumetric_efficiency", ]: maximize = True # Define the metrics that need to be minimized: elif obj_func in [ "abs_bias", "abs_pbias", "abs_volume_error", "mae", "mare", "mse", "rmse", "rrmse", "rsr", ]: maximize = False # Check for the metrics that exist but cannot be used for optimization elif obj_func in ["bias", "pbias", "volume_error"]: raise ValueError(f"The '{obj_func}' metric cannot be minimized or maximized. Please use 'abs_{obj_func}' instead.") elif obj_func in ["high_flow_timing_error", "high_flow_rel_error", "low_flow_rel_error"]: raise ValueError(f"The {obj_func} metric cannot be used for optimization.") else: raise NotImplementedError("The objective function is unknown.") return maximize def _get_optimizer_minimize_or_maximize(algorithm: str) -> bool: """ Find the direction in which the optimizer searches. Some optimizers try to maximize the objective function value, and others try to minimize it. Since our objective functions include some that need to be maximized and others minimized, it is imperative to ensure that the optimizer/objective-function pair work in tandem. Parameters ---------- algorithm : str Name of the optimizing algorithm. Returns ------- bool Indicator of the direction of the optimizer search, True to maximize. """ # Define metrics that need to be maximized: if algorithm in [ "DDS", ]: maximize = True # Define the metrics that need to be minimized: elif algorithm in [ "SCEUA", ]: maximize = False # Any other optimizer at this date else: raise NotImplementedError("The optimization algorithm is unknown.") return maximize
[docs] def transform_flows( qsim: np.ndarray, qobs: np.ndarray, transform: str | None = None, epsilon: float = 0.01, ) -> tuple[np.ndarray, np.ndarray]: """ Transform flows before computing the objective function. It is used to transform flows such that the objective function is computed on a transformed flow metric rather than on the original units of flow (ex: inverse, log-transformed, square-root) Parameters ---------- qsim : array_like Simulated streamflow vector. qobs : array_like Observed streamflow vector. transform : str, optional Indicates the type of transformation required. Can be one of the following values: - "sqrt" : Square root transformation of the flows [sqrt(Q)] - "log" : Logarithmic transformation of the flows [log(Q)] - "inv" : Inverse transformation of the flows [1/Q] The default value is "None", by which no transformation is performed. epsilon : float Indicates the perturbation to add to the flow time series during a transformation to avoid division by zero and logarithmic transformation. The perturbation is equal to: `perturbation = epsilon * mean(qobs)`. The default value is 0.01. Returns ------- qsim : array_like Transformed simulated flow according to user request. qobs : array_like Transformed observed flow according to user request. """ # Quick check if transform is not None: if transform not in ["log", "inv", "sqrt"]: raise NotImplementedError("Flow transformation method not recognized.") # Transform the flow series if required if transform == "log": # log transformation epsilon = epsilon * np.nanmean(qobs) qobs, qsim = np.log(qobs + epsilon), np.log(qsim + epsilon) elif transform == "inv": # inverse transformation epsilon = epsilon * np.nanmean(qobs) qobs, qsim = 1.0 / (qobs + epsilon), 1.0 / (qsim + epsilon) elif transform == "sqrt": # square root transformation qobs, qsim = np.sqrt(qobs), np.sqrt(qsim) # Return the flows after transformation (or original if no transform) return qsim, qobs
""" BEGIN OBJECTIVE FUNCTIONS DEFINITIONS """ def _abs_bias(qsim: np.ndarray, qobs: np.ndarray) -> float: """ Absolute bias metric. Parameters ---------- qsim : array_like Simulated streamflow vector. qobs : array_like Observed streamflow vector. Returns ------- float Absolute value of the "bias" metric. This metric is useful when calibrating on the bias, because bias should aim to be 0 but can take large positive or negative values. Taking the absolute value of the bias will let the optimizer minimize the value to zero. Notes ----- The abs_bias should be MINIMIZED. """ return np.abs(_bias(qsim, qobs)) def _abs_pbias(qsim: np.ndarray, qobs: np.ndarray) -> float: """ Absolute pbias metric. Parameters ---------- qsim : array_like Simulated streamflow vector. qobs : array_like Observed streamflow vector. Returns ------- float The absolute value of the "pbias" metric. This metric is useful when calibrating on the pbias, because pbias should aim to be 0 but can take large positive or negative values. Taking the absolute value of the pbias will let the optimizer minimize the value to zero. Notes ----- The abs_pbias should be MINIMIZED. """ return np.abs(_pbias(qsim, qobs)) def _abs_volume_error(qsim: np.ndarray, qobs: np.ndarray) -> float: """ Absolute value of the volume error metric. Parameters ---------- qsim : array_like Simulated streamflow vector. qobs : array_like Observed streamflow vector. Returns ------- float The absolute value of the "volume_error" metric. This metric is useful when calibrating on the volume_error, because volume_error should aim to be 0 but can take large positive or negative values. Taking the absolute value of the volume_error will let the optimizer minimize the value to zero. Notes ----- The abs_volume_error should be MINIMIZED. """ return np.abs(_volume_error(qsim, qobs)) def _agreement_index(qsim: np.ndarray, qobs: np.ndarray) -> float: """ Index of agreement metric. Parameters ---------- qsim : array_like Simulated streamflow vector. qobs : array_like Observed streamflow vector. Returns ------- float The agreement index of Willmott (1981). Varies between 0 and 1. Notes ----- The Agreement index should be MAXIMIZED. """ # Decompose into clearer chunks a = np.sum((qobs - qsim) ** 2) b = np.abs(qsim - np.mean(qobs)) + np.abs(qobs - np.mean(qobs)) c = np.sum(b**2) return 1 - (a / c) def _bias(qsim: np.ndarray, qobs: np.ndarray) -> float: """ The bias metric. Parameters ---------- qsim : array_like Simulated streamflow vector. qobs : array_like Observed streamflow vector. Returns ------- float The bias in the simulation. Can be negative or positive and gives the average error between the observed and simulated flows. This interpretation uses the definition that a positive bias value means that the simulation overestimates the true value (as opposed to many other sources on bias calculations that use the contrary interpretation). Notes ----- BIAS SHOULD AIM TO BE ZERO AND SHOULD NOT BE USED FOR CALIBRATION. FOR CALIBRATION, USE "abs_bias" TO TAKE THE ABSOLUTE VALUE. """ return np.mean(qsim - qobs) def _correlation_coeff(qsim: np.ndarray, qobs: np.ndarray) -> np.ndarray: """ Correlation coefficient metric. Parameters ---------- qsim : array_like Simulated streamflow vector. qobs : array_like Observed streamflow vector. Returns ------- array-like The correlation coefficient. Notes ----- The correlation_coeff should be MAXIMIZED. """ return np.corrcoef(qobs, qsim)[0, 1] def _high_flow_rel_error(qobs: np.ndarray, qsim: np.ndarray, exceedance_probability: int = 10) -> float: """ High Flow Relative Error. Relative error for observed flows that are exceeded a given percentage of the time. Parameters ---------- qsim : np.array Daily Simulated streamflow data. qobs : np.array Daily Observed streamflow data. exceedance_probability : int exceedance_probability for high flows, default is 10%. Returns ------- float: Relative error in flow that is exceeded a given percentage of the time. Notes ----- High Flow Relative Error should AIM TO BE ZERO, therefore it cannot be used for optimization References ---------- Sauquet, E., Evin, G., Siauve, S., Aissat, R., Arnaud, P., Bérel, M., ... & Vidal, J. P. (2025). A large transient multi-scenario multi-model ensemble of future streamflow and groundwater projections in France. EGUsphere, 2025, 1-41. """ thresh = np.nanpercentile(qobs, 100 - exceedance_probability) # Select only high flow time steps mask = qobs >= thresh qobs_high = qobs[mask] qsim_high = qsim[mask] return ((qsim_high - qobs_high).sum() / qobs_high.sum()).item() def _kge(qsim: np.ndarray, qobs: np.ndarray) -> float: """ Kling-Gupta efficiency metric (2009 version). Parameters ---------- qsim : array_like Simulated streamflow vector. qobs : array_like Observed streamflow vector. Returns ------- float The Kling-Gupta Efficiency (KGE) metric of 2009. It can take values from -inf to 1 (best case). Notes ----- The KGE should be MAXIMIZED. """ # This pops up a lot, precalculate. qsim_mean = np.mean(qsim) qobs_mean = np.mean(qobs) # Calculate the components of KGE r_num = np.sum((qsim - qsim_mean) * (qobs - qobs_mean)) r_den = np.sqrt(np.sum((qsim - qsim_mean) ** 2) * np.sum((qobs - qobs_mean) ** 2)) r = r_num / r_den a = np.std(qsim) / np.std(qobs) b = np.sum(qsim) / np.sum(qobs) # Calculate the KGE kge = 1 - np.sqrt((r - 1) ** 2 + (a - 1) ** 2 + (b - 1) ** 2) return kge def _kge_mod(qsim: np.ndarray, qobs: np.ndarray) -> float: """ Kling-Gupta efficiency metric (2012 version). Parameters ---------- qsim : array_like Simulated streamflow vector. qobs : array_like Observed streamflow vector. Returns ------- float The modified Kling-Gupta Efficiency (KGE) metric of 2012. It can take values from -inf to 1 (best case). Notes ----- The kge_mod should be MAXIMIZED. """ # These pop up a lot, precalculate qsim_mean = np.mean(qsim) qobs_mean = np.mean(qobs) # Calc KGE components r_num = np.sum((qsim - qsim_mean) * (qobs - qobs_mean)) r_den = np.sqrt(np.sum((qsim - qsim_mean) ** 2) * np.sum((qobs - qobs_mean) ** 2)) r = r_num / r_den g = (np.std(qsim) / qsim_mean) / (np.std(qobs) / qobs_mean) b = np.mean(qsim) / np.mean(qobs) # Calc the modified KGE metric kge_mod = 1 - np.sqrt((r - 1) ** 2 + (g - 1) ** 2 + (b - 1) ** 2) return kge_mod def _kge_2021(qsim: np.ndarray, qobs: np.ndarray) -> float: """ Kling-Gupta efficiency metric version of Tang et al. (2021) Parameters ---------- qsim : array_like Simulated streamflow vector. qobs : array_like Observed streamflow vector. Returns ------- float The modified Kling-Gupta Efficiency (KGE) modified metric of 2021: KGE". Notes ----- The kge_2021 should be MAXIMIZED: values from -inf to 1 (best case). ref : Tang, G., Clark, M. P., & Papalexiou, S. M. (2021). SC-Earth: A station-based serially complete Earth dataset from 1950 to 2019. Journal of Climate, 34(16), 6493-6511. """ # These pop up a lot, precalculate qsim_mean = np.mean(qsim) qobs_mean = np.mean(qobs) # Calc KGE" components r_num = np.sum((qsim - qsim_mean) * (qobs - qobs_mean)) r_den = np.sqrt(np.sum((qsim - qsim_mean) ** 2) * np.sum((qobs - qobs_mean) ** 2)) r = r_num / r_den a = np.std(qsim) / np.std(qobs) b_n = (np.mean(qsim) - np.mean(qobs)) / np.std(qobs) # Calc the KGE" metric kge_2021 = 1 - np.sqrt((a - 1) ** 2 + (b_n) ** 2 + (r - 1) ** 2) return kge_2021 def _lce(qsim: np.ndarray, qobs: np.ndarray) -> float: """ Least-squares combined efficiency: Performance criterion combining the least-squares regression coefficients from the two regression lines in both-way regression analysis between simulations and observations Performance criterion that avoids tendency towards underestimation of flow variability Parameters ---------- qsim : array_like Simulated streamflow vector. qobs : array_like Observed streamflow vector. Returns ------- float The least-squares combined efficiency. Notes ----- The LCE should be MAXIMIZED; values from -inf to 1 (best case). ref : Lee, J. S., & Choi, H. I. (2022). A rebalanced performance criterion for hydrological model calibration. Journal of Hydrology, 606, 127372. https://doi.org/10.1016/j.jhydrol.2021.127372 """ # These pop up a lot, precalculate qsim_mean = np.mean(qsim) qobs_mean = np.mean(qobs) # Calc LCE components r_num = np.sum((qsim - qsim_mean) * (qobs - qobs_mean)) r_den = np.sqrt(np.sum((qsim - qsim_mean) ** 2) * np.sum((qobs - qobs_mean) ** 2)) r = r_num / r_den b = np.mean(qsim) / np.mean(qobs) a = np.std(qsim) / np.std(qobs) return 1 - np.sqrt((r * a - 1) ** 2 + (r / a - 1) ** 2 + (b - 1) ** 2) def _low_flow_rel_error(qobs: np.ndarray, qsim: np.ndarray, exceedance_probability: int = 90) -> float: """ Low Flow Relative Error. Relative error for observed flows that are exceeded 90 % of the time. Parameters ---------- qsim : np.array Daily Simulated streamflow data. qobs : np.array Daily Observed streamflow data. exceedance_probability : int exceedance_probability for low flows, default is 90%. Returns ------- float: Relative error in flow that is exceeded 90 % of the time. Notes ----- Low Flow Relative Error should AIM TO BE ZERO, therefore it cannot be used for optimization ref : Sauquet, E., Evin, G., Siauve, S., Aissat, R., Arnaud, P., Bérel, M., ... & Vidal, J. P. (2025). A large transient multi-scenario multi-model ensemble of future streamflow and groundwater projections in France. EGUsphere, 2025, 1-41. """ threshold = np.nanpercentile(qobs, 100 - exceedance_probability) mask = qobs >= threshold qsim_low = qsim[mask] qobs_low = qobs[mask] return float(np.nansum(qsim_low - qobs_low) / np.nansum(qobs_low)) def _mae(qsim: np.ndarray, qobs: np.ndarray): """ Mean absolute error metric. Parameters ---------- qsim : array_like Simulated streamflow vector. qobs : array_like Observed streamflow vector. Returns ------- float Mean Absolute Error. It can be interpreted as the average error (absolute) between observations and simulations for any time step. Notes ----- The mae should be MINIMIZED. """ return np.mean(np.abs(qsim - qobs)) def _mare(qsim: np.ndarray, qobs: np.ndarray) -> float: """ Mean absolute relative error metric. Parameters ---------- qsim : array_like Simulated streamflow vector. qobs : array_like Observed streamflow vector. Returns ------- float Mean Absolute Relative Error. For streamflow, where qobs is always zero or positive, the MARE is always positive. Notes ----- The mare should be MINIMIZED. """ return np.sum(np.abs(qobs - qsim)) / np.sum(qobs) def _mse(qsim: np.ndarray, qobs: np.ndarray) -> float: """ Mean square error metric. Parameters ---------- qsim : array_like Simulated streamflow vector. qobs : array_like Observed streamflow vector. Returns ------- float Mean Square Error. It is the sum of squared errors for each day divided by the total number of days. Units are thus squared units, and the best possible value is 0. Notes ----- The mse should be MINIMIZED. """ return np.mean((qobs - qsim) ** 2) def _nse(qsim: np.ndarray, qobs: np.ndarray) -> float: """ Nash-Sutcliffe efficiency metric. Parameters ---------- qsim : array_like Simulated streamflow vector. qobs : array_like Observed streamflow vector. Returns ------- float Nash-Sutcliffe Efficiency (NSE) metric. It can take values from -inf to 1, with 0 being as good as using the mean observed flow as the estimator. Notes ----- The nse should be MAXIMIZED. """ num = np.sum((qobs - qsim) ** 2) den = np.sum((qobs - np.mean(qobs)) ** 2) return 1 - (num / den) def _pbias(qsim: np.ndarray, qobs: np.ndarray) -> float: """ Percent bias metric. Parameters ---------- qsim : array_like Simulated streamflow vector. qobs : array_like Observed streamflow vector. Returns ------- float Percent bias. Can be negative or positive and gives the average relative error between the observed and simulated flows. This interpretation uses the definition that a positive bias value means that the simulation overestimates the true value (as opposed to many other sources on bias calculations that use the contrary interpretation). Notes ----- PBIAS SHOULD AIM TO BE ZERO AND SHOULD NOT BE USED FOR CALIBRATION. FOR CALIBRATION, USE "abs_pbias" TO TAKE THE ABSOLUTE VALUE. """ return (np.sum(qsim - qobs) / np.sum(qobs)) * 100 def _persistence_index(qsim: np.ndarray, qobs: np.ndarray) -> float: """ Persistence index or persistence model efficiency; Measure of the relative magnitude of the residual variance (noise) to the variance of the errors obtained by the use of a simple persistence model that assumes “tomorrow's flow will be the same as today's”. Parameters ---------- qsim : array_like Simulated streamflow vector. qobs : array_like Observed streamflow vector. Returns ------- float Persistence index single value obtained by given time step Notes ----- The Persistence index should be MAXIMIZED. The optimal value is 1.0, and values should be larger than 0.0 to indicate minimally acceptable performance. Ref: Gupta, H. V., Sorooshian, S., & Yapo, P. O. (1999). Status of automatic calibration for hydrologic models: comparison with multilevel expert calibration. Journal of Hydrologic Engineering, 4(2), 135-143. http://dx.doi.org/10.1061/(ASCE)1084-0699(1999)4:2(135). """ if len(qobs) < 2: raise ValueError("At least 2 timesteps are needed to compute persistence index.") # Remove any rows with NaNs valid = ~np.isnan(qsim) & ~np.isnan(qobs) qsim = qsim[valid] qobs = qobs[valid] # Align arrays: ignore the first time step qsim = qsim[1:] qobs_now = qobs[1:] qobs_prev = qobs[:-1] return 1 - np.sum((qsim - qobs_now) ** 2) / np.sum((qobs_prev - qobs_now) ** 2) def _r2(qsim: np.ndarray, qobs: np.ndarray) -> float: """ The r-squred metric. Parameters ---------- qsim : array_like Simulated streamflow vector. qobs : array_like Observed streamflow vector. Returns ------- float The r-squared (R2) metric equal to the square of the correlation coefficient. Notes ----- The r2 should be MAXIMIZED. """ return _correlation_coeff(qsim, qobs) ** 2 def _rmse(qsim: np.ndarray, qobs: np.ndarray) -> float: """ Root-mean-square error metric. Parameters ---------- qsim : array_like Simulated streamflow vector. qobs : array_like Observed streamflow vector. Returns ------- float Root Mean Square Error. Units are the same as the timeseries data (ex. m3/s). It can take zero or positive values. Notes ----- The rmse should be MINIMIZED. """ return np.sqrt(np.mean((qobs - qsim) ** 2)) def _rrmse(qsim: np.ndarray, qobs: np.ndarray) -> float: """ Relative root-mean-square error (ratio of rmse to mean) metric. Parameters ---------- qsim : array_like Simulated streamflow vector. qobs : array_like Observed streamflow vector. Returns ------- float Ratio of the RMSE to the mean of the observations. It allows scaling RMSE values to compare results between time series of different magnitudes (ex. flows from small and large watersheds). Also known as the CVRMSE. Notes ----- The rrmse should be MINIMIZED. """ return _rmse(qsim, qobs) / np.mean(qobs) def _rsr(qsim: np.ndarray, qobs: np.ndarray): """ Ratio of root mean square error to standard deviation metric. Parameters ---------- qsim : array_like Simulated streamflow vector. qobs : array_like Observed streamflow vector. Returns ------- float Root Mean Square Error (RMSE) divided by the standard deviation of the observations. Also known as the "Ratio of the Root Mean Square Error to the Standard Deviation of Observations". Notes ----- The rsr should be MINIMIZED. """ return _rmse(qobs, qsim) / np.std(qobs) def _volume_error(qsim: np.ndarray, qobs: np.ndarray) -> float: """ Volume error metric. Parameters ---------- qsim : array_like Simulated streamflow vector. qobs : array_like Observed streamflow vector. Returns ------- float Total error in terms of volume over the entire period. Expressed in terms of the same units as input data, so for flow rates it is important to multiply by the duration of the time-step to obtain actual volumes. Notes ----- The volume_error should be MINIMIZED. """ return np.sum(qsim - qobs) / np.sum(qobs) def _volumetric_efficiency(qsim: np.ndarray, qobs: np.ndarray) -> float: """ Volumetric efficiency; Represents the fraction of water delivered at the proper time. Parameters ---------- qsim : array_like Simulated streamflow vector. qobs : array_like Observed streamflow vector. Returns ------- float Volumetric efficiency (VE) Notes ----- The Volumetric efficiency should be MAXIMIZED ; ranges from -inf to 1 For values= 0 : Simulation error equals total observed flow For negative values : Simulation error exceeds observed volume. Multiplying by the duration of the time-step computes actual volumes and not values in flow units. Ref: Criss, R. E., & Winston, W. E. (2008). Do Nash values have value? Discussion and alternate proposals. Hydrological Processes, 22(14), 2723-2725. http://dx.doi.org/10.1002/hyp.7072. """ return 1 - (np.sum(abs(qsim - qobs)) / np.sum(qobs)) # FIXME: Enable when time handling tools are available for the calibration module # def _persistence_index_weekly(qsim: np.ndarray, qobs: np.ndarray): # """ # Persistence index or persistence model efficiency based on weekly averages; # Measure of the relative magnitude of the residual variance (noise) to the variance of the errors # obtained by the use of a simple persistence model that assumes “Netx weeks flow will be the same as this week's”. # Parameters # ---------- # qsim : array_like # Simulated streamflow vector. # qobs : array_like # Observed streamflow vector. # Returns # ------- # float # Persistence index single value based on weekly averages # the optimal value is 1.0, and values should be larger than 0.0 to indicate minimally acceptable performance. # Ref: Gupta, H. V., Sorooshian, S., & Yapo, P. O. (1999). Status of automatic calibration for hydrologic models: # comparison with multilevel expert calibration. Journal of Hydrologic Engineering, 4(2), 135-143. # http://dx.doi.org/10.1061/(ASCE)1084-0699(1999)4:2(135). # Notes # ----- # The weekly persistence index should be MAXIMIZED. # The optimal value is 1.0, and values should be larger than 0.0 to indicate minimally acceptable performance. # """ # # FIXME: This should be able to maintain timestamps, timing tool development is required # # Resample to weekly means (weeks starting on Monday) # qsim_weekly = qsim.resample(time="W-MON").mean() # qobs_weekly = qobs.resample(time="W-MON").mean() # # Remove NaNs from both series simultaneously # valid = qsim_weekly.notnull() & qobs_weekly.notnull() # qsim_weekly = qsim_weekly[valid] # qobs_weekly = qobs_weekly[valid] # # Need at least two weekly values # if len(qobs_weekly) < 2: # raise ValueError("At least 2 weekly averages are needed to compute persistence index.") # # Compute differences # qsim = qsim_weekly[1:].values # qobs_now = qobs_weekly[1:].values # qobs_prev = qobs_weekly[:-1].values # # Compute Persistence Index # denom = np.sum((qobs_prev - qobs_now) ** 2) # if denom == 0: # return np.nan # avoid division by zero # return 1 - np.sum((qsim - qobs_now) ** 2) / denom # def _high_flow_timing_error( # qobs: xr.DataArray, # qsim: xr.DataArray, # percentile: int = 10, # ) -> xr.DataArray: # """ # Timing error between the circular mean of observed high flows DOY and simulated high flows DOY. # Parameters # ---------- # qobs : array_like # Daily Observed streamflow data. # qsim : array_like # Daily Simulated streamflow data. # percentile : int # frequency percentile for high flows, default is 10%. # Returns # ------- # float # Difference in Julian day occurrence of high flows, e.g. flows that are exceeded 10 % of the time. # Notes # ----- # High Flow timing Error should AIM TO BE ZERO # ref : Gupta, A., Hantush, M. M., Govindaraju, R. S., & Beven, K. (2024). Evaluation of hydrological models at # gauged and ungauged basins using machine learning-based limits-of-acceptability and hydrological signatures. # Journal of Hydrology, 641, 131774. https://doi.org/10.1016/j.jhydrol.2024.131774 # """ # # FIXME: This should be able to maintain timestamps, timing tool development is required # threshold = np.nanpercentile(qobs, 100 - percentile) # mask1 = qobs >= threshold # Select only high flow time steps for obs flows # qobs_high = qobs.where(mask1, drop=True) # date_obs = pd.DatetimeIndex(qobs_high.time.values) # doy_obs = date_obs.dayofyear # # circmean for signe DOY # mean_doy_obs = circmean(doy_obs, high=365, low=1) # # Select only high flow time steps for sim flows # mask2 = qsim >= threshold # qsim_high = qsim.where(mask2, drop=True) # date_sim = pd.DatetimeIndex(qsim_high.time.values) # doy_sim = date_sim.dayofyear # # circmean for signe DOY # mean_doy_sim = circmean(doy_sim, high=365, low=1) # return mean_doy_sim - mean_doy_obs