import math as m
import numpy as npProbability interval boundaries with Python
fastai
Probability interval boundaries with Python
In the previous post we calculated proabilities and z values with Python. Here we continue with finding the interval boundaries belonging to a particular tail or two-sided probabiltiy.
Libraries and helper functions
def to_string(number: float, column_width: int = 20) -> str:
# return str(number).ljust(column_width)
return f"{str(number): <{column_width}}"def calc_normal_cdf(x: float, mu: float = 0, sigma: float = 1) -> float:
return (1 + m.erf((x - mu) / m.sqrt(2) / sigma)) / 2def calc_inverse_normal_cdf(p: float, mu:float = 0, sigma: float = 1, tolerance: float = 1E-5, show_steps=False) -> float:
if p == 0: return -np.inf
if p == 1: return np.inf
# In case it is not a standard normal distribution, calculate the standard normal first and then rescale
if mu != 0 or sigma != 1:
return mu + sigma * calc_inverse_normal_cdf(p, tolerance=tolerance)
low_z = -10
hi_z = 10
if show_steps: print(f"{'':<19}".join(['low_z', 'mid_z', 'hi_z']), "\n")
while hi_z - low_z > tolerance:
mid_z = (low_z + hi_z) / 2
mid_p = calc_normal_cdf(mid_z)
if mid_p < p:
low_z = mid_z
else:
hi_z = mid_z
if show_steps: print("\t".join(map(to_string, [low_z, mid_z, hi_z])))
return mid_zUpper bound
def normal_upper_bound(probabilty: float, mu: float = 0, sigma: float = 1) -> float:
"""Return z for which P(Z <= z) = probability"""
return calc_inverse_normal_cdf(probabilty, mu, sigma)def normal_lower_bound(probabilty: float, mu: float = 0, sigma: float = 1) -> float:
"""Return z for which P(Z >= z) = probability"""
return calc_inverse_normal_cdf(1 - probabilty, mu, sigma)for i in np.arange(0, 1.1, .1):
print(f"{i:.1f}: {normal_upper_bound(i): >10.4f}, {normal_lower_bound(i): >10.4f}")0.0: -inf, inf
0.1: -1.2816, 1.2816
0.2: -0.8416, 0.8416
0.3: -0.5244, 0.5244
0.4: -0.2533, 0.2533
0.5: -0.0000, -0.0000
0.6: 0.2533, -0.2533
0.7: 0.5244, -0.5244
0.8: 0.8416, -0.8416
0.9: 1.2816, -1.2816
1.0: inf, -inf
Two sided bounds
Two variations
def normal_two_sided_bounds(probability: float, mu: float = 0, sigma: float = 1) -> float:
if probability == 0: return 0, 0
tail_probability = (1 - probability) / 2
lower_bound = normal_upper_bound(tail_probability, mu, sigma)
upper_bound = normal_lower_bound(tail_probability, mu, sigma)
return lower_bound, upper_bounddef normal_two_sided_bounds2(probability: float, mu: float = 0, sigma: float = 1) -> float:
if probability == 0: return 0, 0
if mu != 0:
raise ValueError("Requires standard normal distribution")
tail_probability = (1 - probability) / 2
z = normal_upper_bound(tail_probability, mu, sigma)
return -z, zBy default they do not produce the same results at the extremes because of how the inverse cdf function defines the theoretical uppern and lower boundaries (as in these cases it should produce +/- infinity).
Accordingly, it is useful to define extreme values (i.e. probablities of 0 and 1)
for i in np.arange(0, 1.1, .1):
print(f"{i:.1f}: {normal_two_sided_bounds(i) == normal_two_sided_bounds2(i)}, {normal_two_sided_bounds(i)}, {normal_two_sided_bounds2(i)}")0.0: True, (0, 0), (0, 0)
0.1: False, (-0.12566566467285156, 0.12566566467285156), (0.12566566467285156, -0.12566566467285156)
0.2: False, (-0.2533435821533203, 0.2533435821533203), (0.2533435821533203, -0.2533435821533203)
0.3: False, (-0.3853130340576172, 0.3853130340576172), (0.3853130340576172, -0.3853130340576172)
0.4: False, (-0.5243968963623047, 0.5243968963623047), (0.5243968963623047, -0.5243968963623047)
0.5: False, (-0.6744861602783203, 0.6744861602783203), (0.6744861602783203, -0.6744861602783203)
0.6: False, (-0.8416271209716797, 0.8416271209716797), (0.8416271209716797, -0.8416271209716797)
0.7: False, (-1.0364246368408203, 1.0364246368408203), (1.0364246368408203, -1.0364246368408203)
0.8: False, (-1.2815570831298828, 1.2815570831298828), (1.2815570831298828, -1.2815570831298828)
0.9: False, (-1.6448497772216797, 1.6448497772216797), (1.6448497772216797, -1.6448497772216797)
1.0: False, (-inf, inf), (inf, -inf)