Source code for xhydro.modelling.obj_funcs

# Created on Tue Dec 12 21:55:25 2023
# @author: Richard Arsenault
"""
Objective function package for xhydro, for calibration and model evaluation.

This package provides a flexible suite of popular objective function metrics in
hydrological modelling and hydrological model calibration. The main function
'get_objective_function' returns the value of the desired objective function
while allowing users to customize many aspects:

    1-  Select the objective function to run;
    2-  Allow providing a mask to remove certain elements from the objective
    function calculation (e.g. for odd/even year calibration, or calibration
    on high or low flows only, or any custom setup).
    3-  Apply a transformation on the flows to modify the behaviour of the
    objective function calculation (e.g taking the log, inverse or square
    root transform of the flows before computing the objective function).

This function also contains some tools and inputs reserved for the calibration
toolbox, such as the ability to take the negative of the objective function to
maximize instead of minimize a metric according to the needs of the optimizing
algorithm.
"""

# Import packages

import numpy as np
import xarray as xr


__all__ = ["get_objective_function", "transform_flows"]



[docs]
def get_objective_function(
    qobs: np.ndarray | xr.Dataset,
    qsim: np.ndarray | xr.Dataset,
    obj_func: str = "rmse",
    take_negative: bool = False,
    mask: np.ndarray | xr.Dataset | None = None,
    transform: str | None = None,
    epsilon: float | None = None,
):
    """
    Entrypoint function for the objective function calculation.

    More can be added by adding the function to this file and adding the option
    in this function.

    Parameters
    ----------
    qobs : array_like
        Vector containing the Observed streamflow to be used in the objective
        function calculation. It is the target to attain.
    qsim : array_like
        Vector containing the Simulated streamflow as generated by the hydrological model.
        It is modified by changing parameters and resumulating the hydrological model.
    obj_func : str
        String representing the objective function to use in the calibration.
        Options must be one of the accepted objective functions:
        - "abs_bias" : Absolute value of the "bias" metric
        - "abs_pbias": Absolute value of the "pbias" metric
        - "abs_volume_error" : Absolute value of the volume_error metric
        - "agreement_index": Index of agreement
        - "bias" : Bias metric
        - "correlation_coeff": Correlation coefficient
        - "high_flow_rel_error" : High flow relative error
        - "kge" : Kling Gupta Efficiency metric (2009 version)
        - "kge_mod" : Kling Gupta Efficiency metric (2012 version)
        - "kge_2021" : Kling-Gupta Efficiency (2021 version)
        - "lce" : Least-squares combined efficiency
        - "low_flow_rel_error" : Low flow relative error
        - "mae": Mean Absolute Error metric
        - "mare": Mean Absolute Relative Error metric
        - "mse" : Mean Square Error metric
        - "nse": Nash-Sutcliffe Efficiency metric
        - "pbias" : Percent bias (relative bias)
        - "persistence_index" : Measure of the relative magnitude of the residual variance to the variance of the errors
        - "r2" : r-squared, i.e. square of correlation_coeff.
        - "rmse" : Root Mean Square Error
        - "rrmse" : Relative Root Mean Square Error (RMSE-to-mean ratio)
        - "rsr" : Ratio of RMSE to standard deviation.
        - "volume_error": Total volume error over the period.
        - "volumetric_efficiency" : Fraction of volume delivered at the proper time
        The default is 'rmse'.
    take_negative : bool
        Used to force the objective function to be multiplied by minus one (-1)
        such that it is possible to maximize it if the optimizer is a minimizer
        and vice versa. Should always be set to False unless required by an
        optimization setup, which is handled internally and transparently to
        the user. The default is False.
    mask : array_like
        Array of 0 or 1 on which the objective function should be applied.
        Values of 1 indicate that the value is included in the calculation, and
        values of 0 indicate that the value is excluded and will have no impact
        on the objective function calculation. This can be useful for specific
        optimization strategies such as odd/even year calibration, seasonal
        calibration or calibration based on high/low flows. The default is None
        and all data are preserved.
    transform : str
        Indicates the type of transformation required. Can be one of the
        following values:
        - "sqrt" : Square root transformation of the flows [sqrt(Q)]
        - "log" : Logarithmic transformation of the flows [log(Q)]
        - "inv" : Inverse transformation of the flows [1/Q]
        The default value is "None", by which no transformation is performed.
    epsilon : float
        Indicates the perturbation to add to the flow time series during a
        transformation to avoid division by zero and logarithmic transformation.
        The perturbation is equal to: `perturbation = epsilon * mean(qobs)`.
        The default value is 0.01.

    Returns
    -------
    float
        Value of the selected objective function (obj_fun).

    Notes
    -----
    All data corresponding to NaN values in the observation set are removed from the calculation.
    If a mask is passed, it must be the same size as the qsim and qobs vectors.
    If any NaNs are present in the qobs dataset, all corresponding data in the qobs, qsim and mask
    will be removed prior to passing to the processing function.
    """
    # List of available objective functions
    obj_func_dict = {
        "abs_bias": _abs_bias,
        "abs_pbias": _abs_pbias,
        "abs_volume_error": _abs_volume_error,
        "agreement_index": _agreement_index,
        "bias": _bias,
        "correlation_coeff": _correlation_coeff,
        "high_flow_rel_error": _high_flow_rel_error,
        # "high_flow_timing_error": _high_flow_timing_error,  # FIXME: Requires xarray to work
        "kge": _kge,
        "kge_mod": _kge_mod,
        "kge_2021": _kge_2021,
        "lce": _lce,
        "low_flow_rel_error": _low_flow_rel_error,
        "mae": _mae,
        "mare": _mare,
        "mse": _mse,
        "nse": _nse,
        "pbias": _pbias,
        "persistence_index": _persistence_index,
        # "persistence_index_weekly": _persistence_index_weekly,  # FIXME: Requires xarray to work
        "r2": _r2,
        "rmse": _rmse,
        "rrmse": _rrmse,
        "rsr": _rsr,
        "volume_error": _volume_error,
        "volumetric_efficiency": _volumetric_efficiency,
    }

    # If we got a dataset, change to np.array
    # FIXME: Implement a more flexible method
    if isinstance(qsim, xr.Dataset):
        qsim = qsim.q

    if isinstance(qobs, xr.Dataset):
        qobs = qobs.q

    # Basic error checking
    if qobs.shape[0] != qsim.shape[0]:
        raise ValueError("Observed and Simulated flows are not of the same size.")

    # Check mask size and contents
    if mask is not None:
        # Size
        if qobs.shape[0] != mask.shape[0]:
            raise ValueError("Mask is not of the same size as the streamflow data.")

        # All zero or one?
        if not np.setdiff1d(np.unique(mask), np.array([0, 1])).size == 0:
            raise ValueError("Mask contains values other than 0 or 1. Please modify.")

    # Check that the objective function is in the list of available methods
    if obj_func not in obj_func_dict:
        raise ValueError(
            "Selected objective function is currently unavailable. " + "Consider contributing to our project at: " + "github.com/hydrologie/xhydro"
        )

    # Ensure there are no NaNs in the dataset
    if mask is not None:
        mask = mask[~np.isnan(qobs)]
    qsim = qsim[~np.isnan(qobs)]
    qobs = qobs[~np.isnan(qobs)]

    # Apply mask before transform
    if mask is not None:
        qsim = qsim[mask == 1]
        qobs = qobs[mask == 1]
        mask = mask[mask == 1]

    # Transform data if needed
    if transform is not None:
        qsim, qobs = transform_flows(qsim, qobs, transform, epsilon)

    # Compute objective function by switching to the correct algorithm. Ensure
    # that the function name is the same as the obj_func tag or this will fail.
    function_call = obj_func_dict[obj_func]
    obj_fun_val = function_call(qsim, qobs)

    # Take the negative value of the Objective Function to return to the
    # optimizer.
    if take_negative:
        obj_fun_val = obj_fun_val * -1

    return obj_fun_val



def _get_objfun_minimize_or_maximize(obj_func: str) -> bool:
    """
    Check whether the objective function needs to be maximized or minimized.

    Returns a boolean value, where True means it should be maximized, and False means that it should be minimized.
    Objective functions other than those programmed here will raise an error.

    Parameters
    ----------
    obj_func : str
        Label of the desired objective function.

    Returns
    -------
    bool
        Indicator of if the objective function needs to be maximized.
    """
    # Define metrics that need to be maximized:
    if obj_func in [
        "agreement_index",
        "correlation_coeff",
        "kge",
        "kge_mod",
        "kge_2021",
        "lce",
        "nse",
        "persistence_index",
        "persistence_index_weekly",
        "r2",
        "volumetric_efficiency",
    ]:
        maximize = True

    # Define the metrics that need to be minimized:
    elif obj_func in [
        "abs_bias",
        "abs_pbias",
        "abs_volume_error",
        "mae",
        "mare",
        "mse",
        "rmse",
        "rrmse",
        "rsr",
    ]:
        maximize = False

    # Check for the metrics that exist but cannot be used for optimization
    elif obj_func in ["bias", "pbias", "volume_error"]:
        raise ValueError(f"The '{obj_func}' metric cannot be minimized or maximized. Please use 'abs_{obj_func}' instead.")
    elif obj_func in ["high_flow_timing_error", "high_flow_rel_error", "low_flow_rel_error"]:
        raise ValueError(f"The {obj_func} metric cannot be used for optimization.")
    else:
        raise NotImplementedError("The objective function is unknown.")

    return maximize


def _get_optimizer_minimize_or_maximize(algorithm: str) -> bool:
    """
    Find the direction in which the optimizer searches.

    Some optimizers try to maximize the objective function value, and others
    try to minimize it. Since our objective functions include some that need to
    be maximized and others minimized, it is imperative to ensure that the
    optimizer/objective-function pair work in tandem.

    Parameters
    ----------
    algorithm : str
        Name of the optimizing algorithm.

    Returns
    -------
    bool
        Indicator of the direction of the optimizer search, True to maximize.
    """
    # Define metrics that need to be maximized:
    if algorithm in [
        "DDS",
    ]:
        maximize = True

    # Define the metrics that need to be minimized:
    elif algorithm in [
        "SCEUA",
    ]:
        maximize = False

    # Any other optimizer at this date
    else:
        raise NotImplementedError("The optimization algorithm is unknown.")

    return maximize



[docs]
def transform_flows(
    qsim: np.ndarray,
    qobs: np.ndarray,
    transform: str | None = None,
    epsilon: float = 0.01,
) -> tuple[np.ndarray, np.ndarray]:
    """
    Transform flows before computing the objective function.

    It is used to transform flows such that the objective function is computed
    on a transformed flow metric rather than on the original units of flow
    (ex: inverse, log-transformed, square-root)

    Parameters
    ----------
    qsim : array_like
        Simulated streamflow vector.
    qobs : array_like
        Observed streamflow vector.
    transform : str, optional
        Indicates the type of transformation required. Can be one of the
        following values:
        - "sqrt" : Square root transformation of the flows [sqrt(Q)]
        - "log" : Logarithmic transformation of the flows [log(Q)]
        - "inv" : Inverse transformation of the flows [1/Q]
        The default value is "None", by which no transformation is performed.
    epsilon : float
        Indicates the perturbation to add to the flow time series during a
        transformation to avoid division by zero and logarithmic transformation.
        The perturbation is equal to: `perturbation = epsilon * mean(qobs)`.
        The default value is 0.01.

    Returns
    -------
    qsim : array_like
        Transformed simulated flow according to user request.
    qobs : array_like
        Transformed observed flow according to user request.
    """
    # Quick check
    if transform is not None:
        if transform not in ["log", "inv", "sqrt"]:
            raise NotImplementedError("Flow transformation method not recognized.")

    # Transform the flow series if required
    if transform == "log":  # log transformation
        epsilon = epsilon * np.nanmean(qobs)
        qobs, qsim = np.log(qobs + epsilon), np.log(qsim + epsilon)

    elif transform == "inv":  # inverse transformation
        epsilon = epsilon * np.nanmean(qobs)
        qobs, qsim = 1.0 / (qobs + epsilon), 1.0 / (qsim + epsilon)

    elif transform == "sqrt":  # square root transformation
        qobs, qsim = np.sqrt(qobs), np.sqrt(qsim)

    # Return the flows after transformation (or original if no transform)
    return qsim, qobs



"""
BEGIN OBJECTIVE FUNCTIONS DEFINITIONS
"""


def _abs_bias(qsim: np.ndarray, qobs: np.ndarray) -> float:
    """
    Absolute bias metric.

    Parameters
    ----------
    qsim : array_like
        Simulated streamflow vector.
    qobs : array_like
        Observed streamflow vector.

    Returns
    -------
    float
        Absolute value of the "bias" metric. This metric is useful when calibrating
        on the bias, because bias should aim to be 0 but can take large positive
        or negative values. Taking the absolute value of the bias will let the
        optimizer minimize the value to zero.

    Notes
    -----
    The abs_bias should be MINIMIZED.
    """
    return np.abs(_bias(qsim, qobs))


def _abs_pbias(qsim: np.ndarray, qobs: np.ndarray) -> float:
    """
    Absolute pbias metric.

    Parameters
    ----------
    qsim : array_like
        Simulated streamflow vector.
    qobs : array_like
        Observed streamflow vector.

    Returns
    -------
    float
        The absolute value of the "pbias" metric. This metric is useful when
        calibrating on the pbias, because pbias should aim to be 0 but can take
        large positive or negative values. Taking the absolute value of the
        pbias will let the optimizer minimize the value to zero.

    Notes
    -----
    The abs_pbias should be MINIMIZED.
    """
    return np.abs(_pbias(qsim, qobs))


def _abs_volume_error(qsim: np.ndarray, qobs: np.ndarray) -> float:
    """
    Absolute value of the volume error metric.

    Parameters
    ----------
    qsim : array_like
        Simulated streamflow vector.
    qobs : array_like
        Observed streamflow vector.

    Returns
    -------
    float
        The absolute value of the "volume_error" metric. This metric is useful
        when calibrating on the volume_error, because volume_error should aim
        to be 0 but can take large positive or negative values. Taking the
        absolute value of the volume_error will let the optimizer minimize the
        value to zero.

    Notes
    -----
    The abs_volume_error should be MINIMIZED.
    """
    return np.abs(_volume_error(qsim, qobs))


def _agreement_index(qsim: np.ndarray, qobs: np.ndarray) -> float:
    """
    Index of agreement metric.

    Parameters
    ----------
    qsim : array_like
        Simulated streamflow vector.
    qobs : array_like
        Observed streamflow vector.

    Returns
    -------
    float
        The agreement index of Willmott (1981). Varies between 0 and 1.

    Notes
    -----
    The Agreement index should be MAXIMIZED.
    """
    # Decompose into clearer chunks
    a = np.sum((qobs - qsim) ** 2)
    b = np.abs(qsim - np.mean(qobs)) + np.abs(qobs - np.mean(qobs))
    c = np.sum(b**2)

    return 1 - (a / c)


def _bias(qsim: np.ndarray, qobs: np.ndarray) -> float:
    """
    The bias metric.

    Parameters
    ----------
    qsim : array_like
        Simulated streamflow vector.
    qobs : array_like
        Observed streamflow vector.

    Returns
    -------
    float
        The bias in the simulation. Can be negative or positive and gives the
        average error between the observed and simulated flows. This
        interpretation uses the definition that a positive bias value means
        that the simulation overestimates the true value (as opposed to many
        other sources on bias calculations that use the contrary interpretation).

    Notes
    -----
    BIAS SHOULD AIM TO BE ZERO AND SHOULD NOT BE USED FOR CALIBRATION. FOR
    CALIBRATION, USE "abs_bias" TO TAKE THE ABSOLUTE VALUE.
    """
    return np.mean(qsim - qobs)


def _correlation_coeff(qsim: np.ndarray, qobs: np.ndarray) -> np.ndarray:
    """
    Correlation coefficient metric.

    Parameters
    ----------
    qsim : array_like
        Simulated streamflow vector.
    qobs : array_like
        Observed streamflow vector.

    Returns
    -------
    array-like
        The correlation coefficient.

    Notes
    -----
    The correlation_coeff should be MAXIMIZED.
    """
    return np.corrcoef(qobs, qsim)[0, 1]


def _high_flow_rel_error(qobs: np.ndarray, qsim: np.ndarray, exceedance_probability: int = 10) -> float:
    """
    High Flow Relative Error.
    Relative error for observed flows that are exceeded a given percentage of the time.

    Parameters
    ----------
    qsim : np.array
        Daily Simulated streamflow data.
    qobs : np.array
        Daily Observed streamflow data.
    exceedance_probability : int
        exceedance_probability for high flows, default is 10%.

    Returns
    -------
    float:
        Relative error in flow that is exceeded a given percentage of the time.

    Notes
    -----
    High Flow Relative Error should AIM TO BE ZERO, therefore it cannot be used for optimization

    References
    ----------
    Sauquet, E., Evin, G., Siauve, S., Aissat, R., Arnaud, P., Bérel, M., ... & Vidal, J. P. (2025).
    A large transient multi-scenario multi-model ensemble of future streamflow and groundwater projections in France.
    EGUsphere, 2025, 1-41.


    """
    thresh = np.nanpercentile(qobs, 100 - exceedance_probability)

    # Select only high flow time steps
    mask = qobs >= thresh

    qobs_high = qobs[mask]
    qsim_high = qsim[mask]

    return ((qsim_high - qobs_high).sum() / qobs_high.sum()).item()


def _kge(qsim: np.ndarray, qobs: np.ndarray) -> float:
    """
    Kling-Gupta efficiency metric (2009 version).

    Parameters
    ----------
    qsim : array_like
        Simulated streamflow vector.
    qobs : array_like
        Observed streamflow vector.

    Returns
    -------
    float
        The Kling-Gupta Efficiency (KGE) metric of 2009. It can take values
        from -inf to 1 (best case).

    Notes
    -----
    The KGE should be MAXIMIZED.
    """
    # This pops up a lot, precalculate.
    qsim_mean = np.mean(qsim)
    qobs_mean = np.mean(qobs)

    # Calculate the components of KGE
    r_num = np.sum((qsim - qsim_mean) * (qobs - qobs_mean))
    r_den = np.sqrt(np.sum((qsim - qsim_mean) ** 2) * np.sum((qobs - qobs_mean) ** 2))
    r = r_num / r_den
    a = np.std(qsim) / np.std(qobs)
    b = np.sum(qsim) / np.sum(qobs)

    # Calculate the KGE
    kge = 1 - np.sqrt((r - 1) ** 2 + (a - 1) ** 2 + (b - 1) ** 2)

    return kge


def _kge_mod(qsim: np.ndarray, qobs: np.ndarray) -> float:
    """
    Kling-Gupta efficiency metric (2012 version).

    Parameters
    ----------
    qsim : array_like
        Simulated streamflow vector.
    qobs : array_like
        Observed streamflow vector.

    Returns
    -------
    float
        The modified Kling-Gupta Efficiency (KGE) metric of 2012. It can take
        values from -inf to 1 (best case).

    Notes
    -----
    The kge_mod should be MAXIMIZED.
    """
    # These pop up a lot, precalculate
    qsim_mean = np.mean(qsim)
    qobs_mean = np.mean(qobs)

    # Calc KGE components
    r_num = np.sum((qsim - qsim_mean) * (qobs - qobs_mean))
    r_den = np.sqrt(np.sum((qsim - qsim_mean) ** 2) * np.sum((qobs - qobs_mean) ** 2))
    r = r_num / r_den
    g = (np.std(qsim) / qsim_mean) / (np.std(qobs) / qobs_mean)
    b = np.mean(qsim) / np.mean(qobs)

    # Calc the modified KGE metric
    kge_mod = 1 - np.sqrt((r - 1) ** 2 + (g - 1) ** 2 + (b - 1) ** 2)

    return kge_mod


def _kge_2021(qsim: np.ndarray, qobs: np.ndarray) -> float:
    """
    Kling-Gupta efficiency metric version of Tang et al. (2021)

    Parameters
    ----------
    qsim : array_like
        Simulated streamflow vector.
    qobs : array_like
        Observed streamflow vector.

    Returns
    -------
    float
        The modified Kling-Gupta Efficiency (KGE) modified metric of 2021: KGE".

    Notes
    -----
    The kge_2021 should be MAXIMIZED: values from -inf to 1 (best case).
    ref : Tang, G., Clark, M. P., & Papalexiou, S. M. (2021). SC-Earth: A station-based serially complete Earth
    dataset from 1950 to 2019. Journal of Climate, 34(16), 6493-6511.

    """
    # These pop up a lot, precalculate
    qsim_mean = np.mean(qsim)
    qobs_mean = np.mean(qobs)

    # Calc KGE" components
    r_num = np.sum((qsim - qsim_mean) * (qobs - qobs_mean))
    r_den = np.sqrt(np.sum((qsim - qsim_mean) ** 2) * np.sum((qobs - qobs_mean) ** 2))
    r = r_num / r_den
    a = np.std(qsim) / np.std(qobs)
    b_n = (np.mean(qsim) - np.mean(qobs)) / np.std(qobs)

    # Calc the KGE" metric
    kge_2021 = 1 - np.sqrt((a - 1) ** 2 + (b_n) ** 2 + (r - 1) ** 2)

    return kge_2021


def _lce(qsim: np.ndarray, qobs: np.ndarray) -> float:
    """
    Least-squares combined efficiency:
    Performance criterion combining the least-squares regression coefficients from the two regression lines
    in both-way regression analysis between simulations and observations
    Performance criterion that avoids tendency towards underestimation of flow variability

    Parameters
    ----------
    qsim : array_like
        Simulated streamflow vector.
    qobs : array_like
        Observed streamflow vector.

    Returns
    -------
    float
        The least-squares combined efficiency.

    Notes
    -----
    The LCE should be MAXIMIZED; values from -inf to 1 (best case).
    ref : Lee, J. S., & Choi, H. I. (2022). A rebalanced performance criterion for hydrological model calibration.
    Journal of Hydrology, 606, 127372. https://doi.org/10.1016/j.jhydrol.2021.127372

    """
    # These pop up a lot, precalculate
    qsim_mean = np.mean(qsim)
    qobs_mean = np.mean(qobs)

    # Calc LCE components
    r_num = np.sum((qsim - qsim_mean) * (qobs - qobs_mean))
    r_den = np.sqrt(np.sum((qsim - qsim_mean) ** 2) * np.sum((qobs - qobs_mean) ** 2))
    r = r_num / r_den
    b = np.mean(qsim) / np.mean(qobs)
    a = np.std(qsim) / np.std(qobs)

    return 1 - np.sqrt((r * a - 1) ** 2 + (r / a - 1) ** 2 + (b - 1) ** 2)


def _low_flow_rel_error(qobs: np.ndarray, qsim: np.ndarray, exceedance_probability: int = 90) -> float:
    """
    Low Flow Relative Error.
    Relative error for observed flows that are exceeded 90 % of the time.

    Parameters
    ----------
    qsim : np.array
        Daily Simulated streamflow data.
    qobs : np.array
        Daily Observed streamflow data.
    exceedance_probability : int
        exceedance_probability for low flows, default is 90%.

    Returns
    -------
    float:
        Relative error in flow that is exceeded 90 % of the time.

    Notes
    -----
    Low Flow Relative Error should AIM TO BE ZERO, therefore it cannot be used for optimization
    ref : Sauquet, E., Evin, G., Siauve, S., Aissat, R., Arnaud, P., Bérel, M., ... & Vidal, J. P. (2025).
    A large transient multi-scenario multi-model ensemble of future streamflow and groundwater projections in France.
    EGUsphere, 2025, 1-41.

    """
    threshold = np.nanpercentile(qobs, 100 - exceedance_probability)
    mask = qobs >= threshold

    qsim_low = qsim[mask]
    qobs_low = qobs[mask]

    return float(np.nansum(qsim_low - qobs_low) / np.nansum(qobs_low))


def _mae(qsim: np.ndarray, qobs: np.ndarray):
    """
    Mean absolute error metric.

    Parameters
    ----------
    qsim : array_like
        Simulated streamflow vector.
    qobs : array_like
        Observed streamflow vector.

    Returns
    -------
    float
        Mean Absolute Error. It can be interpreted as the average error
        (absolute) between observations and simulations for any time step.

    Notes
    -----
    The mae should be MINIMIZED.
    """
    return np.mean(np.abs(qsim - qobs))


def _mare(qsim: np.ndarray, qobs: np.ndarray) -> float:
    """
    Mean absolute relative error metric.

    Parameters
    ----------
    qsim : array_like
        Simulated streamflow vector.
    qobs : array_like
        Observed streamflow vector.

    Returns
    -------
    float
        Mean Absolute Relative Error. For streamflow, where qobs is always zero
        or positive, the MARE is always positive.

    Notes
    -----
    The mare should be MINIMIZED.
    """
    return np.sum(np.abs(qobs - qsim)) / np.sum(qobs)


def _mse(qsim: np.ndarray, qobs: np.ndarray) -> float:
    """
    Mean square error metric.

    Parameters
    ----------
    qsim : array_like
        Simulated streamflow vector.
    qobs : array_like
        Observed streamflow vector.

    Returns
    -------
    float
        Mean Square Error. It is the sum of squared errors for each day divided
        by the total number of days. Units are thus squared units, and the best
        possible value is 0.

    Notes
    -----
    The mse should be MINIMIZED.
    """
    return np.mean((qobs - qsim) ** 2)


def _nse(qsim: np.ndarray, qobs: np.ndarray) -> float:
    """
    Nash-Sutcliffe efficiency metric.

    Parameters
    ----------
    qsim : array_like
        Simulated streamflow vector.
    qobs : array_like
        Observed streamflow vector.

    Returns
    -------
    float
        Nash-Sutcliffe Efficiency (NSE) metric. It can take values from -inf to
        1, with 0 being as good as using the mean observed flow as the estimator.

    Notes
    -----
    The nse should be MAXIMIZED.
    """
    num = np.sum((qobs - qsim) ** 2)
    den = np.sum((qobs - np.mean(qobs)) ** 2)

    return 1 - (num / den)


def _pbias(qsim: np.ndarray, qobs: np.ndarray) -> float:
    """
    Percent bias metric.

    Parameters
    ----------
    qsim : array_like
        Simulated streamflow vector.
    qobs : array_like
        Observed streamflow vector.

    Returns
    -------
    float
        Percent bias. Can be negative or positive and gives the average
        relative error between the observed and simulated flows. This
        interpretation uses the definition that a positive bias value means
        that the simulation overestimates the true value (as opposed to many
        other sources on bias calculations that use the contrary interpretation).

    Notes
    -----
    PBIAS SHOULD AIM TO BE ZERO AND SHOULD NOT BE USED FOR CALIBRATION. FOR
    CALIBRATION, USE "abs_pbias" TO TAKE THE ABSOLUTE VALUE.
    """
    return (np.sum(qsim - qobs) / np.sum(qobs)) * 100


def _persistence_index(qsim: np.ndarray, qobs: np.ndarray) -> float:
    """
    Persistence index or persistence model efficiency;
    Measure of the relative magnitude of the residual variance (noise) to the variance of the errors
    obtained by the use of a simple persistence model that assumes “tomorrow's flow will be the same as today's”.

    Parameters
    ----------
    qsim : array_like
        Simulated streamflow vector.
    qobs : array_like
        Observed streamflow vector.

    Returns
    -------
    float
       Persistence index single value obtained by given time step

    Notes
    -----
    The Persistence index should be MAXIMIZED.
    The optimal value is 1.0, and values should be larger than 0.0 to indicate minimally acceptable performance.
    Ref: Gupta, H. V., Sorooshian, S., & Yapo, P. O. (1999). Status of automatic calibration for hydrologic models:
    comparison with multilevel expert calibration. Journal of Hydrologic Engineering, 4(2), 135-143.
    http://dx.doi.org/10.1061/(ASCE)1084-0699(1999)4:2(135).

    """
    if len(qobs) < 2:
        raise ValueError("At least 2 timesteps are needed to compute persistence index.")

    # Remove any rows with NaNs
    valid = ~np.isnan(qsim) & ~np.isnan(qobs)
    qsim = qsim[valid]
    qobs = qobs[valid]

    # Align arrays: ignore the first time step
    qsim = qsim[1:]
    qobs_now = qobs[1:]
    qobs_prev = qobs[:-1]

    return 1 - np.sum((qsim - qobs_now) ** 2) / np.sum((qobs_prev - qobs_now) ** 2)


def _r2(qsim: np.ndarray, qobs: np.ndarray) -> float:
    """
    The r-squred metric.

    Parameters
    ----------
    qsim : array_like
        Simulated streamflow vector.
    qobs : array_like
        Observed streamflow vector.

    Returns
    -------
    float
        The r-squared (R2) metric equal to the square of the correlation
        coefficient.

    Notes
    -----
    The r2 should be MAXIMIZED.
    """
    return _correlation_coeff(qsim, qobs) ** 2


def _rmse(qsim: np.ndarray, qobs: np.ndarray) -> float:
    """
    Root-mean-square error metric.

    Parameters
    ----------
    qsim : array_like
        Simulated streamflow vector.
    qobs : array_like
        Observed streamflow vector.

    Returns
    -------
    float
        Root Mean Square Error. Units are the same as the timeseries data
        (ex. m3/s). It can take zero or positive values.

    Notes
    -----
    The rmse should be MINIMIZED.
    """
    return np.sqrt(np.mean((qobs - qsim) ** 2))


def _rrmse(qsim: np.ndarray, qobs: np.ndarray) -> float:
    """
    Relative root-mean-square error (ratio of rmse to mean) metric.

    Parameters
    ----------
    qsim : array_like
        Simulated streamflow vector.
    qobs : array_like
        Observed streamflow vector.

    Returns
    -------
    float
        Ratio of the RMSE to the mean of the observations. It allows scaling
        RMSE values to compare results between time series of different
        magnitudes (ex. flows from small and large watersheds). Also known as
        the CVRMSE.

    Notes
    -----
    The rrmse should be MINIMIZED.
    """
    return _rmse(qsim, qobs) / np.mean(qobs)


def _rsr(qsim: np.ndarray, qobs: np.ndarray):
    """
    Ratio of root mean square error to standard deviation metric.

    Parameters
    ----------
    qsim : array_like
        Simulated streamflow vector.
    qobs : array_like
        Observed streamflow vector.

    Returns
    -------
    float
        Root Mean Square Error (RMSE) divided by the standard deviation of the
        observations. Also known as the "Ratio of the Root Mean Square Error to
        the Standard Deviation of Observations".

    Notes
    -----
    The rsr should be MINIMIZED.
    """
    return _rmse(qobs, qsim) / np.std(qobs)


def _volume_error(qsim: np.ndarray, qobs: np.ndarray) -> float:
    """
    Volume error metric.

    Parameters
    ----------
    qsim : array_like
        Simulated streamflow vector.
    qobs : array_like
        Observed streamflow vector.

    Returns
    -------
    float
        Total error in terms of volume over the entire period. Expressed in
        terms of the same units as input data, so for flow rates it is
        important to multiply by the duration of the time-step to obtain actual
        volumes.

    Notes
    -----
    The volume_error should be MINIMIZED.
    """
    return np.sum(qsim - qobs) / np.sum(qobs)


def _volumetric_efficiency(qsim: np.ndarray, qobs: np.ndarray) -> float:
    """
    Volumetric efficiency;
    Represents the fraction of water delivered at the proper time.

    Parameters
    ----------
    qsim : array_like
        Simulated streamflow vector.
    qobs : array_like
        Observed streamflow vector.

    Returns
    -------
    float
        Volumetric efficiency (VE)


    Notes
    -----
    The Volumetric efficiency should be MAXIMIZED ; ranges from -inf to 1
    For values= 0 : Simulation error equals total observed flow
    For negative values : Simulation error exceeds observed volume.
    Multiplying by the duration of the time-step computes actual volumes and not values in flow units.

    Ref: Criss, R. E., & Winston, W. E. (2008). Do Nash values have value? Discussion and alternate proposals.
    Hydrological Processes, 22(14), 2723-2725. http://dx.doi.org/10.1002/hyp.7072.


    """
    return 1 - (np.sum(abs(qsim - qobs)) / np.sum(qobs))


# FIXME: Enable when time handling tools are available for the calibration module

# def _persistence_index_weekly(qsim: np.ndarray, qobs: np.ndarray):
#     """
#     Persistence index or persistence model efficiency based on weekly averages;
#     Measure of the relative magnitude of the residual variance (noise) to the variance of the errors
#     obtained by the use of a simple persistence model that assumes “Netx weeks flow will be the same as this week's”.

#     Parameters
#     ----------
#     qsim : array_like
#         Simulated streamflow vector.
#     qobs : array_like
#         Observed streamflow vector.

#     Returns
#     -------
#     float
#         Persistence index single value based on weekly averages
#         the optimal value is 1.0, and values should be larger than 0.0 to indicate minimally acceptable performance.

#         Ref: Gupta, H. V., Sorooshian, S., & Yapo, P. O. (1999). Status of automatic calibration for hydrologic models:
#         comparison with multilevel expert calibration. Journal of Hydrologic Engineering, 4(2), 135-143.
#         http://dx.doi.org/10.1061/(ASCE)1084-0699(1999)4:2(135).

#     Notes
#     -----
#     The weekly persistence index should be MAXIMIZED.
#     The optimal value is 1.0, and values should be larger than 0.0 to indicate minimally acceptable performance.
#     """
#     # FIXME: This should be able to maintain timestamps, timing tool development is required
#     # Resample to weekly means (weeks starting on Monday)
#     qsim_weekly = qsim.resample(time="W-MON").mean()
#     qobs_weekly = qobs.resample(time="W-MON").mean()

#     # Remove NaNs from both series simultaneously
#     valid = qsim_weekly.notnull() & qobs_weekly.notnull()
#     qsim_weekly = qsim_weekly[valid]
#     qobs_weekly = qobs_weekly[valid]

#     # Need at least two weekly values
#     if len(qobs_weekly) < 2:
#         raise ValueError("At least 2 weekly averages are needed to compute persistence index.")

#     # Compute differences
#     qsim = qsim_weekly[1:].values
#     qobs_now = qobs_weekly[1:].values
#     qobs_prev = qobs_weekly[:-1].values

#     # Compute Persistence Index
#     denom = np.sum((qobs_prev - qobs_now) ** 2)
#     if denom == 0:
#         return np.nan  # avoid division by zero
#     return 1 - np.sum((qsim - qobs_now) ** 2) / denom


# def _high_flow_timing_error(
#     qobs: xr.DataArray,
#     qsim: xr.DataArray,
#     percentile: int = 10,
# ) -> xr.DataArray:
#     """
#     Timing error between the circular mean of observed high flows DOY and simulated high flows DOY.

#     Parameters
#     ----------
#     qobs : array_like
#         Daily Observed streamflow data.
#     qsim : array_like
#         Daily Simulated streamflow data.
#     percentile : int
#         frequency percentile for high flows, default is 10%.

#     Returns
#     -------
#     float
#         Difference in Julian day occurrence of high flows, e.g. flows that are exceeded 10 % of the time.

#     Notes
#     -----
#     High Flow timing Error should AIM TO BE ZERO
#     ref : Gupta, A., Hantush, M. M., Govindaraju, R. S., & Beven, K. (2024). Evaluation of hydrological models at
#     gauged and ungauged basins using machine learning-based limits-of-acceptability and hydrological signatures.
#     Journal of Hydrology, 641, 131774. https://doi.org/10.1016/j.jhydrol.2024.131774

#     """
#     # FIXME: This should be able to maintain timestamps, timing tool development is required
#     threshold = np.nanpercentile(qobs, 100 - percentile)

#     mask1 = qobs >= threshold  # Select only high flow time steps for obs flows

#     qobs_high = qobs.where(mask1, drop=True)
#     date_obs = pd.DatetimeIndex(qobs_high.time.values)
#     doy_obs = date_obs.dayofyear
#     # circmean for signe DOY
#     mean_doy_obs = circmean(doy_obs, high=365, low=1)

#     # Select only high flow time steps for sim flows
#     mask2 = qsim >= threshold
#     qsim_high = qsim.where(mask2, drop=True)
#     date_sim = pd.DatetimeIndex(qsim_high.time.values)
#     doy_sim = date_sim.dayofyear

#     # circmean for signe DOY
#     mean_doy_sim = circmean(doy_sim, high=365, low=1)

#     return mean_doy_sim - mean_doy_obs