Probability interval boundaries with Python

fastai

Published

October 10, 2020

Probability interval boundaries with Python

In the previous post we calculated proabilities and z values with Python. Here we continue with finding the interval boundaries belonging to a particular tail or two-sided probabiltiy.

Libraries and helper functions

import math as m
import numpy as np

def to_string(number: float, column_width: int = 20) -> str:
    # return str(number).ljust(column_width)
    return f"{str(number): <{column_width}}"

def calc_normal_cdf(x: float, mu: float = 0, sigma: float = 1) -> float:
    return (1 + m.erf((x - mu) / m.sqrt(2) / sigma)) / 2

def calc_inverse_normal_cdf(p: float, mu:float = 0, sigma: float = 1, tolerance: float = 1E-5, show_steps=False) -> float:

    if p == 0: return -np.inf
    if p == 1: return np.inf

    # In case it is not a standard normal distribution, calculate the standard normal first and then rescale
    if mu != 0 or sigma != 1:
        return mu + sigma * calc_inverse_normal_cdf(p, tolerance=tolerance)

    low_z = -10
    hi_z = 10

    if show_steps: print(f"{'':<19}".join(['low_z', 'mid_z', 'hi_z']), "\n")

    while hi_z - low_z > tolerance:
        mid_z = (low_z + hi_z) / 2
        mid_p = calc_normal_cdf(mid_z)

        if mid_p < p:
            low_z = mid_z
        else:
            hi_z = mid_z

        if show_steps: print("\t".join(map(to_string, [low_z, mid_z, hi_z])))

    return mid_z

Upper bound

def normal_upper_bound(probabilty: float, mu: float = 0, sigma: float = 1) -> float:
    """Return z for which P(Z <= z) = probability"""
    return calc_inverse_normal_cdf(probabilty, mu, sigma)

def normal_lower_bound(probabilty: float, mu: float = 0, sigma: float = 1) -> float:
    """Return z for which P(Z >= z) = probability"""
    return calc_inverse_normal_cdf(1 - probabilty, mu, sigma)

for i in np.arange(0, 1.1, .1):
    print(f"{i:.1f}: {normal_upper_bound(i): >10.4f}, {normal_lower_bound(i): >10.4f}")

0.0:       -inf,        inf
0.1:    -1.2816,     1.2816
0.2:    -0.8416,     0.8416
0.3:    -0.5244,     0.5244
0.4:    -0.2533,     0.2533
0.5:    -0.0000,    -0.0000
0.6:     0.2533,    -0.2533
0.7:     0.5244,    -0.5244
0.8:     0.8416,    -0.8416
0.9:     1.2816,    -1.2816
1.0:        inf,       -inf

Two sided bounds

Two variations

def normal_two_sided_bounds(probability: float, mu: float = 0, sigma: float = 1) -> float:
    if probability == 0: return 0, 0

    tail_probability = (1 - probability) / 2
    
    lower_bound = normal_upper_bound(tail_probability, mu, sigma)
    upper_bound = normal_lower_bound(tail_probability, mu, sigma)

    return lower_bound, upper_bound

def normal_two_sided_bounds2(probability: float, mu: float = 0, sigma: float = 1) -> float:
    if probability == 0: return 0, 0

    if mu != 0:
        raise ValueError("Requires standard normal distribution")

    tail_probability = (1 - probability) / 2
    z = normal_upper_bound(tail_probability, mu, sigma)
    
    return -z, z

By default they do not produce the same results at the extremes because of how the inverse cdf function defines the theoretical uppern and lower boundaries (as in these cases it should produce +/- infinity).

Accordingly, it is useful to define extreme values (i.e. probablities of 0 and 1)

for i in np.arange(0, 1.1, .1):
    print(f"{i:.1f}:  {normal_two_sided_bounds(i) == normal_two_sided_bounds2(i)}, {normal_two_sided_bounds(i)}, {normal_two_sided_bounds2(i)}")

0.0:  True, (0, 0), (0, 0)
0.1:  False, (-0.12566566467285156, 0.12566566467285156), (0.12566566467285156, -0.12566566467285156)
0.2:  False, (-0.2533435821533203, 0.2533435821533203), (0.2533435821533203, -0.2533435821533203)
0.3:  False, (-0.3853130340576172, 0.3853130340576172), (0.3853130340576172, -0.3853130340576172)
0.4:  False, (-0.5243968963623047, 0.5243968963623047), (0.5243968963623047, -0.5243968963623047)
0.5:  False, (-0.6744861602783203, 0.6744861602783203), (0.6744861602783203, -0.6744861602783203)
0.6:  False, (-0.8416271209716797, 0.8416271209716797), (0.8416271209716797, -0.8416271209716797)
0.7:  False, (-1.0364246368408203, 1.0364246368408203), (1.0364246368408203, -1.0364246368408203)
0.8:  False, (-1.2815570831298828, 1.2815570831298828), (1.2815570831298828, -1.2815570831298828)
0.9:  False, (-1.6448497772216797, 1.6448497772216797), (1.6448497772216797, -1.6448497772216797)
1.0:  False, (-inf, inf), (inf, -inf)