Source code for risksyn.risk

import math
import warnings

from dpmm.models.base.mechanisms.cdp2adp import cdp_delta
from riskcal.analysis import get_advantage_from_zcdp, get_beta_from_zcdp

# Bisection defaults
_DEFAULT_TOL = 1e-4
_DEFAULT_MAX_ITER = 100

# Warn when multiplicative increase in success rate exceeds this factor
ADVANTAGE_WARN_MULTIPLIER = 3


[docs] class Risk: """Specification on the target level of risk. Internally converts to zCDP. Use factory methods to create: - Risk.from_zcdp(rho) - Risk.from_advantage(advantage) - Risk.from_err_rates(tpr, fpr) - Risk.from_advantage_at_baseline(advantage, baseline) """
[docs] def __init__(self, rho: float): self._rho = rho
[docs] @staticmethod def from_zcdp(rho: float) -> "Risk": return Risk(rho)
[docs] @staticmethod def from_advantage( advantage: float, tol: float = _DEFAULT_TOL, max_iter: int = _DEFAULT_MAX_ITER, ) -> "Risk": """ From worst-case advantage, maximum difference between success rate - baseline or TPR - FPR in the case of membership inference attacks. """ rho = _find_rho_for_advantage(advantage, tol, max_iter) return Risk(rho)
[docs] @staticmethod def from_err_rates( tpr: float, fpr: float, tol: float = _DEFAULT_TOL, max_iter: int = _DEFAULT_MAX_ITER, ) -> "Risk": """ From membership inference TPR and FPR. """ alpha = fpr beta = 1 - tpr rho = _find_rho_for_alpha_beta(alpha, beta, tol, max_iter) return Risk(rho)
[docs] @staticmethod def from_success_at_baseline( success: float, baseline: float, tol: float = _DEFAULT_TOL, max_iter: int = _DEFAULT_MAX_ITER, ) -> "Risk": """ From success rate (TPR) at a given baseline (FPR). """ return Risk.from_err_rates( tpr=success, fpr=baseline, tol=tol, max_iter=max_iter )
[docs] @staticmethod def from_advantage_at_baseline( advantage: float, baseline: float, tol: float = _DEFAULT_TOL, max_iter: int = _DEFAULT_MAX_ITER, ) -> "Risk": """ From advantage (success rate - baseline) and baseline success rate. """ # advantage = success - baseline = (1 - beta) - alpha beta = 1 - advantage - baseline if not 0 <= beta <= 1: raise ValueError(f"Invalid: We must have advantage + baseline <= 1.") times = (advantage + baseline) / baseline if times >= ADVANTAGE_WARN_MULTIPLIER: warnings.warn( f"The current advantage corresponds to the adversary's success " f"increasing {times:.1f}x. Consider setting success and baseline " f"requirements exactly.", UserWarning, stacklevel=2, ) rho = _find_rho_for_alpha_beta(baseline, beta, tol, max_iter) return Risk(rho)
@property def zcdp(self) -> float: """Get the converted zCDP parameter.""" return self._rho
[docs] def __or__(self, other: "Risk") -> "Risk": """Combine risk specs — takes minimum rho (most restrictive).""" if not isinstance(other, Risk): return NotImplemented return Risk(min(self._rho, other._rho))
def __repr__(self) -> str: return f"Risk(rho={self._rho:.6f})"
def _find_rho_for_advantage( target_advantage: float, tol: float = _DEFAULT_TOL, max_iter: int = _DEFAULT_MAX_ITER, ) -> float: """Binary search for rho that yields target advantage.""" lo, hi = 1e-6, 100.0 for _ in range(max_iter): mid = (lo + hi) / 2 adv = get_advantage_from_zcdp(mid) if abs(adv - target_advantage) < tol: return mid if adv > target_advantage: hi = mid else: lo = mid return (lo + hi) / 2 def _find_rho_for_alpha_beta( alpha: float, target_beta: float, tol: float = _DEFAULT_TOL, max_iter: int = _DEFAULT_MAX_ITER, ) -> float: """Binary search for rho that yields target beta at given alpha.""" lo, hi = 1e-6, 100.0 for _ in range(max_iter): mid = (lo + hi) / 2 beta = get_beta_from_zcdp(mid, alpha) if abs(beta - target_beta) < tol: return mid if beta > target_beta: lo = mid else: hi = mid return (lo + hi) / 2 # Low-level implementation detail; not the actual privacy guarantee. _CALIBRATION_EPSILON = 1.0 def _epsilon_to_rho(epsilon: float) -> float: """Convert pure-DP epsilon to zCDP rho: rho = epsilon^2 / 2.""" return epsilon**2 / 2
[docs] def calibrate_parameters_to_risk(risk: Risk, proc_epsilon: float | None = None) -> dict: """Calibrate (epsilon, delta) parameters for dpmm from a Risk specification. Converts a Risk object into the (epsilon, delta) parameters needed by differential privacy pipelines. Optionally accounts for preprocessing budget. This calibration is specific to dpmm, and does not necessarily apply to other implementations. Parameters ---------- risk : Risk Risk specification. proc_epsilon : float, optional Epsilon budget to reserve for preprocessing. If provided, deducted from the total privacy budget and included in the output. Returns ------- dict ``{"epsilon": ..., "delta": ...}`` and optionally ``"proc_epsilon"``. Raises ------ ValueError If the privacy budget is insufficient. """ total_rho = risk.zcdp if proc_epsilon is not None: proc_rho = _epsilon_to_rho(proc_epsilon) gen_rho = total_rho - proc_rho if gen_rho <= 0: raise ValueError( f"Insufficient privacy budget: risk.zcdp={total_rho:.6f} <= " f"proc_rho={proc_rho:.6f}. Provide domain bounds for numeric " "columns, relax the risk requirement, or decrease proc_epsilon." ) else: gen_rho = total_rho delta = cdp_delta(gen_rho, _CALIBRATION_EPSILON) result = {"epsilon": _CALIBRATION_EPSILON, "delta": delta} if proc_epsilon is not None: result["proc_epsilon"] = proc_epsilon return result