Source code for gwGenealogy.hosts.host_retention

#! /usr/bin/env python
#-*- coding: utf-8 -*-
#==============================================================================
#
#    FILE: host_retention.py
#
#    Environment escape-speed distributions for retention analysis.
#    Provides escape-speed CDFs for common astrophysical environments
#    and a generic interface for computing environment-marginalised
#    retention fractions from kick samples.
#
#    Predefined environment presets:
#      GC:  log-normal v_esc with mu=1.5, sig=0.30 (base-10)
#           from Antonini & Rasio (2016), Mapelli et al. (2021)
#      NSC: log-normal v_esc with mu=2.2, sig=0.36 (base-10)
#           from Antonini & Rasio (2016), Mapelli et al. (2021)
#      EG:  uniform v_esc in [400, 2500] km/s (early-type galaxies)
#      DG:  uniform v_esc in [100, 250] km/s (dwarf galaxies)
#      MW:  Gaussian v_esc with mean=497, std=8 km/s
#           from Koppelman & Helmi (2021)
#      M31: Gaussian v_esc with mean=470, std=40 km/s
#           from Kafle et al. (2018)
#
#    References:
#      Antonini & Rasio (2016): https://arxiv.org/abs/1606.04889
#      Mapelli et al. (2021): https://arxiv.org/abs/2106.07179
#      Koppelman & Helmi (2021): https://arxiv.org/abs/2006.16283
#      Kafle et al. (2018): https://doi.org/10.1093/mnras/sty082
#
#    AUTHOR: Tousif Islam
#    CREATED: 06-06-2026
#    LAST MODIFIED:
#    REVISION: ---
#==============================================================================
__author__ = "Tousif Islam"

import numpy as np
from scipy.stats import lognorm, norm, gaussian_kde
from scipy.integrate import trapezoid

GC_ENVIRONMENT = {'kind': 'lognormal', 'mu': 1.5, 'sig': 0.30, 'label': 'GCs'}
NSC_ENVIRONMENT = {'kind': 'lognormal', 'mu': 2.2, 'sig': 0.36, 'label': 'NSCs'}
EG_ENVIRONMENT = {'kind': 'uniform', 'vmin': 400, 'vmax': 2500, 'label': 'EGs'}
DG_ENVIRONMENT = {'kind': 'uniform', 'vmin': 100, 'vmax': 250, 'label': 'DGs'}
MW_ENVIRONMENT = {'kind': 'gaussian', 'mean': 497, 'std': 8, 'label': 'MW'}
M31_ENVIRONMENT = {'kind': 'gaussian', 'mean': 470, 'std': 40, 'label': 'M31'}

ENVIRONMENTS = {'GC': GC_ENVIRONMENT, 'NSC': NSC_ENVIRONMENT,
                'EG': EG_ENVIRONMENT, 'DG': DG_ENVIRONMENT,
                'MW': MW_ENVIRONMENT, 'M31': M31_ENVIRONMENT}


[docs] def sample_escape_speed(n_samples, environment, seed=None): """Draw escape velocity samples from an environment's distribution. Parameters ---------- n_samples : int Number of samples to draw. environment : dict Environment specification (see ``escape_speed_cdf``). seed : int or None Random seed. Returns ------- v_esc : numpy array Escape velocity samples [km/s] """ rng = np.random.default_rng(seed) kind = environment['kind'] if kind == 'lognormal': s = environment['sig'] * np.log(10) scale = np.exp(environment['mu'] * np.log(10)) return lognorm.rvs(s, scale=scale, size=n_samples, random_state=rng) elif kind == 'uniform': return rng.uniform(environment['vmin'], environment['vmax'], size=n_samples) elif kind == 'gaussian': return np.abs(rng.normal(environment['mean'], environment['std'], size=n_samples)) else: raise ValueError(f"Unknown environment kind: {kind!r}")
[docs] def sample_multi_escape_speed(n_samples, environments=None, seed=None): """Draw escape velocity samples for multiple environments. Parameters ---------- n_samples : int Number of samples per environment. environments : dict of {name: environment_dict}, optional Defaults to all predefined environments. seed : int or None Random seed. Returns ------- dict of {name: numpy array of v_esc} """ if environments is None: environments = ENVIRONMENTS rng = np.random.default_rng(seed) return {name: sample_escape_speed(n_samples, env, seed=int(rng.integers(0, 2**31))) for name, env in environments.items()}
[docs] def escape_speed_cdf(v, environment): """CDF of the escape-speed distribution for a given environment. Parameters ---------- v : float or array Escape speed value(s) [km/s] environment : dict Environment specification with 'kind' key: - ``{'kind': 'lognormal', 'mu': float, 'sig': float}`` mu and sig are base-10 log mean and std dev (v_esc ~ 10^N(mu, sig)) - ``{'kind': 'uniform', 'vmin': float, 'vmax': float}`` - ``{'kind': 'gaussian', 'mean': float, 'std': float}`` Returns ------- cdf : float or array """ kind = environment['kind'] if kind == 'lognormal': return lognorm.cdf(v, s=environment['sig'] * np.log(10), scale=np.exp(environment['mu'] * np.log(10))) elif kind == 'uniform': return np.clip( (np.asarray(v, dtype=float) - environment['vmin']) / (environment['vmax'] - environment['vmin']), 0.0, 1.0) elif kind == 'gaussian': return norm.cdf(v, loc=environment['mean'], scale=environment['std']) else: raise ValueError(f"Unknown environment kind: {kind!r}")
[docs] def compute_environment_retention(v_kick, environment): """Per-kick retention probability for a given environment. p_ret_i = 1 - CDF(v_kick_i) Parameters ---------- v_kick : array Kick velocity samples [km/s] environment : dict Environment specification (see ``escape_speed_cdf``) Returns ------- p_ret : numpy array Individual retention probability for each kick sample """ p_ret = 1.0 - escape_speed_cdf(np.asarray(v_kick), environment) return p_ret
[docs] def compute_multi_environment_retention(v_kick, environments=None): """Per-kick retention probabilities across multiple environments. Parameters ---------- v_kick : array Kick velocity samples [km/s] environments : dict of {name: environment_dict}, optional Defaults to all predefined environments (GC, NSC, EG, DG) Returns ------- dict of {name: numpy array of p_ret} """ if environments is None: environments = ENVIRONMENTS return {name: compute_environment_retention(v_kick, env) for name, env in environments.items()}
[docs] def retention_curve(v_kick, v_esc_array): """Retention probability as a function of escape speed. p_ret(v_esc) = P(v_kick < v_esc), the empirical CDF of kick samples evaluated on v_esc_array. Parameters ---------- v_kick : array Kick velocity samples [km/s] v_esc_array : array Escape speed values at which to evaluate retention [km/s] Returns ------- p_ret : numpy array Retention probability at each v_esc value """ v_sorted = np.sort(np.asarray(v_kick)) cdf = np.arange(1, len(v_sorted) + 1) / len(v_sorted) return np.interp(v_esc_array, v_sorted, cdf, left=0.0, right=1.0)
[docs] def compute_environment_cumulative_retention(v_kick, environment, method='kde', vmax=5000.0, ngrid=5000): """Integrated retention fraction P_ret for a given environment. P_ret = ∫ p_kick(v) [1 - CDF_esc(v)] dv Parameters ---------- v_kick : array Kick velocity samples [km/s] environment : dict Environment specification (see ``escape_speed_cdf``) method : str 'kde' (default): fit a KDE to the kick samples and integrate analytically. More accurate for posterior/MCMC samples. 'mc': Monte Carlo mean (1/N) Σ p_ret_i. Exact when samples are iid draws from the kick distribution. vmax : float Upper integration limit for KDE method [km/s] (default: 5000). ngrid : int Number of grid points for KDE integration (default: 5000). Returns ------- P_ret : float Integrated retention fraction """ v_kick = np.asarray(v_kick) if method == 'kde': kde = gaussian_kde(v_kick) v_grid = np.linspace(0.0, vmax, ngrid) p_kick = kde(v_grid) survival = 1.0 - escape_speed_cdf(v_grid, environment) P_ret = float(trapezoid(p_kick * survival, v_grid)) elif method == 'mc': p_ret = compute_environment_retention(v_kick, environment) P_ret = float(np.mean(p_ret)) else: raise ValueError(f"Unknown method: '{method}'. Choose 'kde' or 'mc'.") return P_ret
[docs] def compute_multi_environment_cumulative_retention(v_kick, environments=None, method='kde', **kwargs): """Integrated retention fraction P_ret across multiple environments. Parameters ---------- v_kick : array Kick velocity samples [km/s] environments : dict of {name: environment_dict}, optional Defaults to all predefined environments. method : str 'kde' (default) or 'mc'. See ``compute_environment_cumulative_retention``. **kwargs Additional keyword arguments passed to ``compute_environment_cumulative_retention`` (vmax, ngrid). Returns ------- dict of {name: P_ret} """ if environments is None: environments = ENVIRONMENTS return {name: compute_environment_cumulative_retention(v_kick, env, method=method, **kwargs) for name, env in environments.items()}