# Created on Tue Dec 12 21:55:25 2023
# @author: Richard Arsenault
"""
Objective function package for xhydro, for calibration and model evaluation.
This package provides a flexible suite of popular objective function metrics in
hydrological modelling and hydrological model calibration. The main function
'get_objective_function' returns the value of the desired objective function
while allowing users to customize many aspects:
1- Select the objective function to run;
2- Allow providing a mask to remove certain elements from the objective
function calculation (e.g. for odd/even year calibration, or calibration
on high or low flows only, or any custom setup).
3- Apply a transformation on the flows to modify the behaviour of the
objective function calculation (e.g taking the log, inverse or square
root transform of the flows before computing the objective function).
This function also contains some tools and inputs reserved for the calibration
toolbox, such as the ability to take the negative of the objective function to
maximize instead of minimize a metric according to the needs of the optimizing
algorithm.
"""
# Import packages
import numpy as np
import xarray as xr
__all__ = ["get_objective_function", "transform_flows"]
[docs]
def get_objective_function(
qobs: np.ndarray | xr.Dataset,
qsim: np.ndarray | xr.Dataset,
obj_func: str = "rmse",
take_negative: bool = False,
mask: np.ndarray | xr.Dataset | None = None,
transform: str | None = None,
epsilon: float | None = None,
):
"""
Entrypoint function for the objective function calculation.
More can be added by adding the function to this file and adding the option
in this function.
Parameters
----------
qobs : array_like
Vector containing the Observed streamflow to be used in the objective
function calculation. It is the target to attain.
qsim : array_like
Vector containing the Simulated streamflow as generated by the hydrological model.
It is modified by changing parameters and resumulating the hydrological model.
obj_func : str
String representing the objective function to use in the calibration.
Options must be one of the accepted objective functions:
- "abs_bias" : Absolute value of the "bias" metric
- "abs_pbias": Absolute value of the "pbias" metric
- "abs_volume_error" : Absolute value of the volume_error metric
- "agreement_index": Index of agreement
- "bias" : Bias metric
- "correlation_coeff": Correlation coefficient
- "high_flow_rel_error" : High flow relative error
- "kge" : Kling Gupta Efficiency metric (2009 version)
- "kge_mod" : Kling Gupta Efficiency metric (2012 version)
- "kge_2021" : Kling-Gupta Efficiency (2021 version)
- "lce" : Least-squares combined efficiency
- "low_flow_rel_error" : Low flow relative error
- "mae": Mean Absolute Error metric
- "mare": Mean Absolute Relative Error metric
- "mse" : Mean Square Error metric
- "nse": Nash-Sutcliffe Efficiency metric
- "pbias" : Percent bias (relative bias)
- "persistence_index" : Measure of the relative magnitude of the residual variance to the variance of the errors
- "r2" : r-squared, i.e. square of correlation_coeff.
- "rmse" : Root Mean Square Error
- "rrmse" : Relative Root Mean Square Error (RMSE-to-mean ratio)
- "rsr" : Ratio of RMSE to standard deviation.
- "volume_error": Total volume error over the period.
- "volumetric_efficiency" : Fraction of volume delivered at the proper time
The default is 'rmse'.
take_negative : bool
Used to force the objective function to be multiplied by minus one (-1)
such that it is possible to maximize it if the optimizer is a minimizer
and vice versa. Should always be set to False unless required by an
optimization setup, which is handled internally and transparently to
the user. The default is False.
mask : array_like
Array of 0 or 1 on which the objective function should be applied.
Values of 1 indicate that the value is included in the calculation, and
values of 0 indicate that the value is excluded and will have no impact
on the objective function calculation. This can be useful for specific
optimization strategies such as odd/even year calibration, seasonal
calibration or calibration based on high/low flows. The default is None
and all data are preserved.
transform : str
Indicates the type of transformation required. Can be one of the
following values:
- "sqrt" : Square root transformation of the flows [sqrt(Q)]
- "log" : Logarithmic transformation of the flows [log(Q)]
- "inv" : Inverse transformation of the flows [1/Q]
The default value is "None", by which no transformation is performed.
epsilon : float
Indicates the perturbation to add to the flow time series during a
transformation to avoid division by zero and logarithmic transformation.
The perturbation is equal to: `perturbation = epsilon * mean(qobs)`.
The default value is 0.01.
Returns
-------
float
Value of the selected objective function (obj_fun).
Notes
-----
All data corresponding to NaN values in the observation set are removed from the calculation.
If a mask is passed, it must be the same size as the qsim and qobs vectors.
If any NaNs are present in the qobs dataset, all corresponding data in the qobs, qsim and mask
will be removed prior to passing to the processing function.
"""
# List of available objective functions
obj_func_dict = {
"abs_bias": _abs_bias,
"abs_pbias": _abs_pbias,
"abs_volume_error": _abs_volume_error,
"agreement_index": _agreement_index,
"bias": _bias,
"correlation_coeff": _correlation_coeff,
"high_flow_rel_error": _high_flow_rel_error,
# "high_flow_timing_error": _high_flow_timing_error, # FIXME: Requires xarray to work
"kge": _kge,
"kge_mod": _kge_mod,
"kge_2021": _kge_2021,
"lce": _lce,
"low_flow_rel_error": _low_flow_rel_error,
"mae": _mae,
"mare": _mare,
"mse": _mse,
"nse": _nse,
"pbias": _pbias,
"persistence_index": _persistence_index,
# "persistence_index_weekly": _persistence_index_weekly, # FIXME: Requires xarray to work
"r2": _r2,
"rmse": _rmse,
"rrmse": _rrmse,
"rsr": _rsr,
"volume_error": _volume_error,
"volumetric_efficiency": _volumetric_efficiency,
}
# If we got a dataset, change to np.array
# FIXME: Implement a more flexible method
if isinstance(qsim, xr.Dataset):
qsim = qsim.q
if isinstance(qobs, xr.Dataset):
qobs = qobs.q
# Basic error checking
if qobs.shape[0] != qsim.shape[0]:
raise ValueError("Observed and Simulated flows are not of the same size.")
# Check mask size and contents
if mask is not None:
# Size
if qobs.shape[0] != mask.shape[0]:
raise ValueError("Mask is not of the same size as the streamflow data.")
# All zero or one?
if not np.setdiff1d(np.unique(mask), np.array([0, 1])).size == 0:
raise ValueError("Mask contains values other than 0 or 1. Please modify.")
# Check that the objective function is in the list of available methods
if obj_func not in obj_func_dict:
raise ValueError(
"Selected objective function is currently unavailable. " + "Consider contributing to our project at: " + "github.com/hydrologie/xhydro"
)
# Ensure there are no NaNs in the dataset
if mask is not None:
mask = mask[~np.isnan(qobs)]
qsim = qsim[~np.isnan(qobs)]
qobs = qobs[~np.isnan(qobs)]
# Apply mask before transform
if mask is not None:
qsim = qsim[mask == 1]
qobs = qobs[mask == 1]
mask = mask[mask == 1]
# Transform data if needed
if transform is not None:
qsim, qobs = transform_flows(qsim, qobs, transform, epsilon)
# Compute objective function by switching to the correct algorithm. Ensure
# that the function name is the same as the obj_func tag or this will fail.
function_call = obj_func_dict[obj_func]
obj_fun_val = function_call(qsim, qobs)
# Take the negative value of the Objective Function to return to the
# optimizer.
if take_negative:
obj_fun_val = obj_fun_val * -1
return obj_fun_val
def _get_objfun_minimize_or_maximize(obj_func: str) -> bool:
"""
Check whether the objective function needs to be maximized or minimized.
Returns a boolean value, where True means it should be maximized, and False means that it should be minimized.
Objective functions other than those programmed here will raise an error.
Parameters
----------
obj_func : str
Label of the desired objective function.
Returns
-------
bool
Indicator of if the objective function needs to be maximized.
"""
# Define metrics that need to be maximized:
if obj_func in [
"agreement_index",
"correlation_coeff",
"kge",
"kge_mod",
"kge_2021",
"lce",
"nse",
"persistence_index",
"persistence_index_weekly",
"r2",
"volumetric_efficiency",
]:
maximize = True
# Define the metrics that need to be minimized:
elif obj_func in [
"abs_bias",
"abs_pbias",
"abs_volume_error",
"mae",
"mare",
"mse",
"rmse",
"rrmse",
"rsr",
]:
maximize = False
# Check for the metrics that exist but cannot be used for optimization
elif obj_func in ["bias", "pbias", "volume_error"]:
raise ValueError(f"The '{obj_func}' metric cannot be minimized or maximized. Please use 'abs_{obj_func}' instead.")
elif obj_func in ["high_flow_timing_error", "high_flow_rel_error", "low_flow_rel_error"]:
raise ValueError(f"The {obj_func} metric cannot be used for optimization.")
else:
raise NotImplementedError("The objective function is unknown.")
return maximize
def _get_optimizer_minimize_or_maximize(algorithm: str) -> bool:
"""
Find the direction in which the optimizer searches.
Some optimizers try to maximize the objective function value, and others
try to minimize it. Since our objective functions include some that need to
be maximized and others minimized, it is imperative to ensure that the
optimizer/objective-function pair work in tandem.
Parameters
----------
algorithm : str
Name of the optimizing algorithm.
Returns
-------
bool
Indicator of the direction of the optimizer search, True to maximize.
"""
# Define metrics that need to be maximized:
if algorithm in [
"DDS",
]:
maximize = True
# Define the metrics that need to be minimized:
elif algorithm in [
"SCEUA",
]:
maximize = False
# Any other optimizer at this date
else:
raise NotImplementedError("The optimization algorithm is unknown.")
return maximize
"""
BEGIN OBJECTIVE FUNCTIONS DEFINITIONS
"""
def _abs_bias(qsim: np.ndarray, qobs: np.ndarray) -> float:
"""
Absolute bias metric.
Parameters
----------
qsim : array_like
Simulated streamflow vector.
qobs : array_like
Observed streamflow vector.
Returns
-------
float
Absolute value of the "bias" metric. This metric is useful when calibrating
on the bias, because bias should aim to be 0 but can take large positive
or negative values. Taking the absolute value of the bias will let the
optimizer minimize the value to zero.
Notes
-----
The abs_bias should be MINIMIZED.
"""
return np.abs(_bias(qsim, qobs))
def _abs_pbias(qsim: np.ndarray, qobs: np.ndarray) -> float:
"""
Absolute pbias metric.
Parameters
----------
qsim : array_like
Simulated streamflow vector.
qobs : array_like
Observed streamflow vector.
Returns
-------
float
The absolute value of the "pbias" metric. This metric is useful when
calibrating on the pbias, because pbias should aim to be 0 but can take
large positive or negative values. Taking the absolute value of the
pbias will let the optimizer minimize the value to zero.
Notes
-----
The abs_pbias should be MINIMIZED.
"""
return np.abs(_pbias(qsim, qobs))
def _abs_volume_error(qsim: np.ndarray, qobs: np.ndarray) -> float:
"""
Absolute value of the volume error metric.
Parameters
----------
qsim : array_like
Simulated streamflow vector.
qobs : array_like
Observed streamflow vector.
Returns
-------
float
The absolute value of the "volume_error" metric. This metric is useful
when calibrating on the volume_error, because volume_error should aim
to be 0 but can take large positive or negative values. Taking the
absolute value of the volume_error will let the optimizer minimize the
value to zero.
Notes
-----
The abs_volume_error should be MINIMIZED.
"""
return np.abs(_volume_error(qsim, qobs))
def _agreement_index(qsim: np.ndarray, qobs: np.ndarray) -> float:
"""
Index of agreement metric.
Parameters
----------
qsim : array_like
Simulated streamflow vector.
qobs : array_like
Observed streamflow vector.
Returns
-------
float
The agreement index of Willmott (1981). Varies between 0 and 1.
Notes
-----
The Agreement index should be MAXIMIZED.
"""
# Decompose into clearer chunks
a = np.sum((qobs - qsim) ** 2)
b = np.abs(qsim - np.mean(qobs)) + np.abs(qobs - np.mean(qobs))
c = np.sum(b**2)
return 1 - (a / c)
def _bias(qsim: np.ndarray, qobs: np.ndarray) -> float:
"""
The bias metric.
Parameters
----------
qsim : array_like
Simulated streamflow vector.
qobs : array_like
Observed streamflow vector.
Returns
-------
float
The bias in the simulation. Can be negative or positive and gives the
average error between the observed and simulated flows. This
interpretation uses the definition that a positive bias value means
that the simulation overestimates the true value (as opposed to many
other sources on bias calculations that use the contrary interpretation).
Notes
-----
BIAS SHOULD AIM TO BE ZERO AND SHOULD NOT BE USED FOR CALIBRATION. FOR
CALIBRATION, USE "abs_bias" TO TAKE THE ABSOLUTE VALUE.
"""
return np.mean(qsim - qobs)
def _correlation_coeff(qsim: np.ndarray, qobs: np.ndarray) -> np.ndarray:
"""
Correlation coefficient metric.
Parameters
----------
qsim : array_like
Simulated streamflow vector.
qobs : array_like
Observed streamflow vector.
Returns
-------
array-like
The correlation coefficient.
Notes
-----
The correlation_coeff should be MAXIMIZED.
"""
return np.corrcoef(qobs, qsim)[0, 1]
def _high_flow_rel_error(qobs: np.ndarray, qsim: np.ndarray, exceedance_probability: int = 10) -> float:
"""
High Flow Relative Error.
Relative error for observed flows that are exceeded a given percentage of the time.
Parameters
----------
qsim : np.array
Daily Simulated streamflow data.
qobs : np.array
Daily Observed streamflow data.
exceedance_probability : int
exceedance_probability for high flows, default is 10%.
Returns
-------
float:
Relative error in flow that is exceeded a given percentage of the time.
Notes
-----
High Flow Relative Error should AIM TO BE ZERO, therefore it cannot be used for optimization
References
----------
Sauquet, E., Evin, G., Siauve, S., Aissat, R., Arnaud, P., Bérel, M., ... & Vidal, J. P. (2025).
A large transient multi-scenario multi-model ensemble of future streamflow and groundwater projections in France.
EGUsphere, 2025, 1-41.
"""
thresh = np.nanpercentile(qobs, 100 - exceedance_probability)
# Select only high flow time steps
mask = qobs >= thresh
qobs_high = qobs[mask]
qsim_high = qsim[mask]
return ((qsim_high - qobs_high).sum() / qobs_high.sum()).item()
def _kge(qsim: np.ndarray, qobs: np.ndarray) -> float:
"""
Kling-Gupta efficiency metric (2009 version).
Parameters
----------
qsim : array_like
Simulated streamflow vector.
qobs : array_like
Observed streamflow vector.
Returns
-------
float
The Kling-Gupta Efficiency (KGE) metric of 2009. It can take values
from -inf to 1 (best case).
Notes
-----
The KGE should be MAXIMIZED.
"""
# This pops up a lot, precalculate.
qsim_mean = np.mean(qsim)
qobs_mean = np.mean(qobs)
# Calculate the components of KGE
r_num = np.sum((qsim - qsim_mean) * (qobs - qobs_mean))
r_den = np.sqrt(np.sum((qsim - qsim_mean) ** 2) * np.sum((qobs - qobs_mean) ** 2))
r = r_num / r_den
a = np.std(qsim) / np.std(qobs)
b = np.sum(qsim) / np.sum(qobs)
# Calculate the KGE
kge = 1 - np.sqrt((r - 1) ** 2 + (a - 1) ** 2 + (b - 1) ** 2)
return kge
def _kge_mod(qsim: np.ndarray, qobs: np.ndarray) -> float:
"""
Kling-Gupta efficiency metric (2012 version).
Parameters
----------
qsim : array_like
Simulated streamflow vector.
qobs : array_like
Observed streamflow vector.
Returns
-------
float
The modified Kling-Gupta Efficiency (KGE) metric of 2012. It can take
values from -inf to 1 (best case).
Notes
-----
The kge_mod should be MAXIMIZED.
"""
# These pop up a lot, precalculate
qsim_mean = np.mean(qsim)
qobs_mean = np.mean(qobs)
# Calc KGE components
r_num = np.sum((qsim - qsim_mean) * (qobs - qobs_mean))
r_den = np.sqrt(np.sum((qsim - qsim_mean) ** 2) * np.sum((qobs - qobs_mean) ** 2))
r = r_num / r_den
g = (np.std(qsim) / qsim_mean) / (np.std(qobs) / qobs_mean)
b = np.mean(qsim) / np.mean(qobs)
# Calc the modified KGE metric
kge_mod = 1 - np.sqrt((r - 1) ** 2 + (g - 1) ** 2 + (b - 1) ** 2)
return kge_mod
def _kge_2021(qsim: np.ndarray, qobs: np.ndarray) -> float:
"""
Kling-Gupta efficiency metric version of Tang et al. (2021)
Parameters
----------
qsim : array_like
Simulated streamflow vector.
qobs : array_like
Observed streamflow vector.
Returns
-------
float
The modified Kling-Gupta Efficiency (KGE) modified metric of 2021: KGE".
Notes
-----
The kge_2021 should be MAXIMIZED: values from -inf to 1 (best case).
ref : Tang, G., Clark, M. P., & Papalexiou, S. M. (2021). SC-Earth: A station-based serially complete Earth
dataset from 1950 to 2019. Journal of Climate, 34(16), 6493-6511.
"""
# These pop up a lot, precalculate
qsim_mean = np.mean(qsim)
qobs_mean = np.mean(qobs)
# Calc KGE" components
r_num = np.sum((qsim - qsim_mean) * (qobs - qobs_mean))
r_den = np.sqrt(np.sum((qsim - qsim_mean) ** 2) * np.sum((qobs - qobs_mean) ** 2))
r = r_num / r_den
a = np.std(qsim) / np.std(qobs)
b_n = (np.mean(qsim) - np.mean(qobs)) / np.std(qobs)
# Calc the KGE" metric
kge_2021 = 1 - np.sqrt((a - 1) ** 2 + (b_n) ** 2 + (r - 1) ** 2)
return kge_2021
def _lce(qsim: np.ndarray, qobs: np.ndarray) -> float:
"""
Least-squares combined efficiency:
Performance criterion combining the least-squares regression coefficients from the two regression lines
in both-way regression analysis between simulations and observations
Performance criterion that avoids tendency towards underestimation of flow variability
Parameters
----------
qsim : array_like
Simulated streamflow vector.
qobs : array_like
Observed streamflow vector.
Returns
-------
float
The least-squares combined efficiency.
Notes
-----
The LCE should be MAXIMIZED; values from -inf to 1 (best case).
ref : Lee, J. S., & Choi, H. I. (2022). A rebalanced performance criterion for hydrological model calibration.
Journal of Hydrology, 606, 127372. https://doi.org/10.1016/j.jhydrol.2021.127372
"""
# These pop up a lot, precalculate
qsim_mean = np.mean(qsim)
qobs_mean = np.mean(qobs)
# Calc LCE components
r_num = np.sum((qsim - qsim_mean) * (qobs - qobs_mean))
r_den = np.sqrt(np.sum((qsim - qsim_mean) ** 2) * np.sum((qobs - qobs_mean) ** 2))
r = r_num / r_den
b = np.mean(qsim) / np.mean(qobs)
a = np.std(qsim) / np.std(qobs)
return 1 - np.sqrt((r * a - 1) ** 2 + (r / a - 1) ** 2 + (b - 1) ** 2)
def _low_flow_rel_error(qobs: np.ndarray, qsim: np.ndarray, exceedance_probability: int = 90) -> float:
"""
Low Flow Relative Error.
Relative error for observed flows that are exceeded 90 % of the time.
Parameters
----------
qsim : np.array
Daily Simulated streamflow data.
qobs : np.array
Daily Observed streamflow data.
exceedance_probability : int
exceedance_probability for low flows, default is 90%.
Returns
-------
float:
Relative error in flow that is exceeded 90 % of the time.
Notes
-----
Low Flow Relative Error should AIM TO BE ZERO, therefore it cannot be used for optimization
ref : Sauquet, E., Evin, G., Siauve, S., Aissat, R., Arnaud, P., Bérel, M., ... & Vidal, J. P. (2025).
A large transient multi-scenario multi-model ensemble of future streamflow and groundwater projections in France.
EGUsphere, 2025, 1-41.
"""
threshold = np.nanpercentile(qobs, 100 - exceedance_probability)
mask = qobs >= threshold
qsim_low = qsim[mask]
qobs_low = qobs[mask]
return float(np.nansum(qsim_low - qobs_low) / np.nansum(qobs_low))
def _mae(qsim: np.ndarray, qobs: np.ndarray):
"""
Mean absolute error metric.
Parameters
----------
qsim : array_like
Simulated streamflow vector.
qobs : array_like
Observed streamflow vector.
Returns
-------
float
Mean Absolute Error. It can be interpreted as the average error
(absolute) between observations and simulations for any time step.
Notes
-----
The mae should be MINIMIZED.
"""
return np.mean(np.abs(qsim - qobs))
def _mare(qsim: np.ndarray, qobs: np.ndarray) -> float:
"""
Mean absolute relative error metric.
Parameters
----------
qsim : array_like
Simulated streamflow vector.
qobs : array_like
Observed streamflow vector.
Returns
-------
float
Mean Absolute Relative Error. For streamflow, where qobs is always zero
or positive, the MARE is always positive.
Notes
-----
The mare should be MINIMIZED.
"""
return np.sum(np.abs(qobs - qsim)) / np.sum(qobs)
def _mse(qsim: np.ndarray, qobs: np.ndarray) -> float:
"""
Mean square error metric.
Parameters
----------
qsim : array_like
Simulated streamflow vector.
qobs : array_like
Observed streamflow vector.
Returns
-------
float
Mean Square Error. It is the sum of squared errors for each day divided
by the total number of days. Units are thus squared units, and the best
possible value is 0.
Notes
-----
The mse should be MINIMIZED.
"""
return np.mean((qobs - qsim) ** 2)
def _nse(qsim: np.ndarray, qobs: np.ndarray) -> float:
"""
Nash-Sutcliffe efficiency metric.
Parameters
----------
qsim : array_like
Simulated streamflow vector.
qobs : array_like
Observed streamflow vector.
Returns
-------
float
Nash-Sutcliffe Efficiency (NSE) metric. It can take values from -inf to
1, with 0 being as good as using the mean observed flow as the estimator.
Notes
-----
The nse should be MAXIMIZED.
"""
num = np.sum((qobs - qsim) ** 2)
den = np.sum((qobs - np.mean(qobs)) ** 2)
return 1 - (num / den)
def _pbias(qsim: np.ndarray, qobs: np.ndarray) -> float:
"""
Percent bias metric.
Parameters
----------
qsim : array_like
Simulated streamflow vector.
qobs : array_like
Observed streamflow vector.
Returns
-------
float
Percent bias. Can be negative or positive and gives the average
relative error between the observed and simulated flows. This
interpretation uses the definition that a positive bias value means
that the simulation overestimates the true value (as opposed to many
other sources on bias calculations that use the contrary interpretation).
Notes
-----
PBIAS SHOULD AIM TO BE ZERO AND SHOULD NOT BE USED FOR CALIBRATION. FOR
CALIBRATION, USE "abs_pbias" TO TAKE THE ABSOLUTE VALUE.
"""
return (np.sum(qsim - qobs) / np.sum(qobs)) * 100
def _persistence_index(qsim: np.ndarray, qobs: np.ndarray) -> float:
"""
Persistence index or persistence model efficiency;
Measure of the relative magnitude of the residual variance (noise) to the variance of the errors
obtained by the use of a simple persistence model that assumes “tomorrow's flow will be the same as today's”.
Parameters
----------
qsim : array_like
Simulated streamflow vector.
qobs : array_like
Observed streamflow vector.
Returns
-------
float
Persistence index single value obtained by given time step
Notes
-----
The Persistence index should be MAXIMIZED.
The optimal value is 1.0, and values should be larger than 0.0 to indicate minimally acceptable performance.
Ref: Gupta, H. V., Sorooshian, S., & Yapo, P. O. (1999). Status of automatic calibration for hydrologic models:
comparison with multilevel expert calibration. Journal of Hydrologic Engineering, 4(2), 135-143.
http://dx.doi.org/10.1061/(ASCE)1084-0699(1999)4:2(135).
"""
if len(qobs) < 2:
raise ValueError("At least 2 timesteps are needed to compute persistence index.")
# Remove any rows with NaNs
valid = ~np.isnan(qsim) & ~np.isnan(qobs)
qsim = qsim[valid]
qobs = qobs[valid]
# Align arrays: ignore the first time step
qsim = qsim[1:]
qobs_now = qobs[1:]
qobs_prev = qobs[:-1]
return 1 - np.sum((qsim - qobs_now) ** 2) / np.sum((qobs_prev - qobs_now) ** 2)
def _r2(qsim: np.ndarray, qobs: np.ndarray) -> float:
"""
The r-squred metric.
Parameters
----------
qsim : array_like
Simulated streamflow vector.
qobs : array_like
Observed streamflow vector.
Returns
-------
float
The r-squared (R2) metric equal to the square of the correlation
coefficient.
Notes
-----
The r2 should be MAXIMIZED.
"""
return _correlation_coeff(qsim, qobs) ** 2
def _rmse(qsim: np.ndarray, qobs: np.ndarray) -> float:
"""
Root-mean-square error metric.
Parameters
----------
qsim : array_like
Simulated streamflow vector.
qobs : array_like
Observed streamflow vector.
Returns
-------
float
Root Mean Square Error. Units are the same as the timeseries data
(ex. m3/s). It can take zero or positive values.
Notes
-----
The rmse should be MINIMIZED.
"""
return np.sqrt(np.mean((qobs - qsim) ** 2))
def _rrmse(qsim: np.ndarray, qobs: np.ndarray) -> float:
"""
Relative root-mean-square error (ratio of rmse to mean) metric.
Parameters
----------
qsim : array_like
Simulated streamflow vector.
qobs : array_like
Observed streamflow vector.
Returns
-------
float
Ratio of the RMSE to the mean of the observations. It allows scaling
RMSE values to compare results between time series of different
magnitudes (ex. flows from small and large watersheds). Also known as
the CVRMSE.
Notes
-----
The rrmse should be MINIMIZED.
"""
return _rmse(qsim, qobs) / np.mean(qobs)
def _rsr(qsim: np.ndarray, qobs: np.ndarray):
"""
Ratio of root mean square error to standard deviation metric.
Parameters
----------
qsim : array_like
Simulated streamflow vector.
qobs : array_like
Observed streamflow vector.
Returns
-------
float
Root Mean Square Error (RMSE) divided by the standard deviation of the
observations. Also known as the "Ratio of the Root Mean Square Error to
the Standard Deviation of Observations".
Notes
-----
The rsr should be MINIMIZED.
"""
return _rmse(qobs, qsim) / np.std(qobs)
def _volume_error(qsim: np.ndarray, qobs: np.ndarray) -> float:
"""
Volume error metric.
Parameters
----------
qsim : array_like
Simulated streamflow vector.
qobs : array_like
Observed streamflow vector.
Returns
-------
float
Total error in terms of volume over the entire period. Expressed in
terms of the same units as input data, so for flow rates it is
important to multiply by the duration of the time-step to obtain actual
volumes.
Notes
-----
The volume_error should be MINIMIZED.
"""
return np.sum(qsim - qobs) / np.sum(qobs)
def _volumetric_efficiency(qsim: np.ndarray, qobs: np.ndarray) -> float:
"""
Volumetric efficiency;
Represents the fraction of water delivered at the proper time.
Parameters
----------
qsim : array_like
Simulated streamflow vector.
qobs : array_like
Observed streamflow vector.
Returns
-------
float
Volumetric efficiency (VE)
Notes
-----
The Volumetric efficiency should be MAXIMIZED ; ranges from -inf to 1
For values= 0 : Simulation error equals total observed flow
For negative values : Simulation error exceeds observed volume.
Multiplying by the duration of the time-step computes actual volumes and not values in flow units.
Ref: Criss, R. E., & Winston, W. E. (2008). Do Nash values have value? Discussion and alternate proposals.
Hydrological Processes, 22(14), 2723-2725. http://dx.doi.org/10.1002/hyp.7072.
"""
return 1 - (np.sum(abs(qsim - qobs)) / np.sum(qobs))
# FIXME: Enable when time handling tools are available for the calibration module
# def _persistence_index_weekly(qsim: np.ndarray, qobs: np.ndarray):
# """
# Persistence index or persistence model efficiency based on weekly averages;
# Measure of the relative magnitude of the residual variance (noise) to the variance of the errors
# obtained by the use of a simple persistence model that assumes “Netx weeks flow will be the same as this week's”.
# Parameters
# ----------
# qsim : array_like
# Simulated streamflow vector.
# qobs : array_like
# Observed streamflow vector.
# Returns
# -------
# float
# Persistence index single value based on weekly averages
# the optimal value is 1.0, and values should be larger than 0.0 to indicate minimally acceptable performance.
# Ref: Gupta, H. V., Sorooshian, S., & Yapo, P. O. (1999). Status of automatic calibration for hydrologic models:
# comparison with multilevel expert calibration. Journal of Hydrologic Engineering, 4(2), 135-143.
# http://dx.doi.org/10.1061/(ASCE)1084-0699(1999)4:2(135).
# Notes
# -----
# The weekly persistence index should be MAXIMIZED.
# The optimal value is 1.0, and values should be larger than 0.0 to indicate minimally acceptable performance.
# """
# # FIXME: This should be able to maintain timestamps, timing tool development is required
# # Resample to weekly means (weeks starting on Monday)
# qsim_weekly = qsim.resample(time="W-MON").mean()
# qobs_weekly = qobs.resample(time="W-MON").mean()
# # Remove NaNs from both series simultaneously
# valid = qsim_weekly.notnull() & qobs_weekly.notnull()
# qsim_weekly = qsim_weekly[valid]
# qobs_weekly = qobs_weekly[valid]
# # Need at least two weekly values
# if len(qobs_weekly) < 2:
# raise ValueError("At least 2 weekly averages are needed to compute persistence index.")
# # Compute differences
# qsim = qsim_weekly[1:].values
# qobs_now = qobs_weekly[1:].values
# qobs_prev = qobs_weekly[:-1].values
# # Compute Persistence Index
# denom = np.sum((qobs_prev - qobs_now) ** 2)
# if denom == 0:
# return np.nan # avoid division by zero
# return 1 - np.sum((qsim - qobs_now) ** 2) / denom
# def _high_flow_timing_error(
# qobs: xr.DataArray,
# qsim: xr.DataArray,
# percentile: int = 10,
# ) -> xr.DataArray:
# """
# Timing error between the circular mean of observed high flows DOY and simulated high flows DOY.
# Parameters
# ----------
# qobs : array_like
# Daily Observed streamflow data.
# qsim : array_like
# Daily Simulated streamflow data.
# percentile : int
# frequency percentile for high flows, default is 10%.
# Returns
# -------
# float
# Difference in Julian day occurrence of high flows, e.g. flows that are exceeded 10 % of the time.
# Notes
# -----
# High Flow timing Error should AIM TO BE ZERO
# ref : Gupta, A., Hantush, M. M., Govindaraju, R. S., & Beven, K. (2024). Evaluation of hydrological models at
# gauged and ungauged basins using machine learning-based limits-of-acceptability and hydrological signatures.
# Journal of Hydrology, 641, 131774. https://doi.org/10.1016/j.jhydrol.2024.131774
# """
# # FIXME: This should be able to maintain timestamps, timing tool development is required
# threshold = np.nanpercentile(qobs, 100 - percentile)
# mask1 = qobs >= threshold # Select only high flow time steps for obs flows
# qobs_high = qobs.where(mask1, drop=True)
# date_obs = pd.DatetimeIndex(qobs_high.time.values)
# doy_obs = date_obs.dayofyear
# # circmean for signe DOY
# mean_doy_obs = circmean(doy_obs, high=365, low=1)
# # Select only high flow time steps for sim flows
# mask2 = qsim >= threshold
# qsim_high = qsim.where(mask2, drop=True)
# date_sim = pd.DatetimeIndex(qsim_high.time.values)
# doy_sim = date_sim.dayofyear
# # circmean for signe DOY
# mean_doy_sim = circmean(doy_sim, high=365, low=1)
# return mean_doy_sim - mean_doy_obs