from typing import Tuple
import math as m
import pandas as pd
import numpy as npProbability test
fastai
Probability test
Calculating probabilities of values in relation to threshold values.
The code is based upon the respective example from Data Science from Scratch.
Libraries
Probability below a threshold
$ p = {2} $
where
$ z = {} _0^z e{-t2} dt $
is the error function.
def calc_normal_cdf(x: float, mu: float = 0, sigma: float = 1) -> float:
return (1 + m.erf((x - mu) / m.sqrt(2) / sigma)) / 2normal_probability_below = calc_normal_cdffor i in [n / 10 for n in range(-10, 10 + 1, 1)]:
print("\t".join([str(i), f"{normal_probability_below(i):.4f}"]))-1.0 0.1587
-0.9 0.1841
-0.8 0.2119
-0.7 0.2420
-0.6 0.2743
-0.5 0.3085
-0.4 0.3446
-0.3 0.3821
-0.2 0.4207
-0.1 0.4602
0.0 0.5000
0.1 0.5398
0.2 0.5793
0.3 0.6179
0.4 0.6554
0.5 0.6915
0.6 0.7257
0.7 0.7580
0.8 0.7881
0.9 0.8159
1.0 0.8413
Probability above a threshold
def normal_probability_above(lo: float, mu: float = 0, sigma: float = 1) -> float:
return 1 - normal_probability_below(lo, mu, sigma)for i in [n / 10 for n in range(-10, 10 + 1, 1)]:
print("\t".join([str(i), f"{normal_probability_above(i):.4f}"]))-1.0 0.8413
-0.9 0.8159
-0.8 0.7881
-0.7 0.7580
-0.6 0.7257
-0.5 0.6915
-0.4 0.6554
-0.3 0.6179
-0.2 0.5793
-0.1 0.5398
0.0 0.5000
0.1 0.4602
0.2 0.4207
0.3 0.3821
0.4 0.3446
0.5 0.3085
0.6 0.2743
0.7 0.2420
0.8 0.2119
0.9 0.1841
1.0 0.1587
Probability between thresholds
domain = np.arange(-10, 10 + 1, 2) / 10
domainarray([-1. , -0.8, -0.6, -0.4, -0.2, 0. , 0.2, 0.4, 0.6, 0.8, 1. ])
def normal_probability_between(lo: float, hi: float, mu: float = 0, sigma: float = 1) -> float:
return normal_probability_below(hi, mu, sigma) - normal_probability_below(lo, mu, sigma)probabilities_between = pd.DataFrame()
for i in domain:
for j in domain:
probabilities_between.loc[i, j] = normal_probability_between(i, j)
probabilities_between = pd.DataFrame(np.triu(probabilities_between), index=domain,
columns=domain).replace(0, np.NaN)
for i in domain:
probabilities_between.loc[i, i] = 0
probabilities_between| -1.0 | -0.8 | -0.6 | -0.4 | -0.2 | 0.0 | 0.2 | 0.4 | 0.6 | 0.8 | 1.0 | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| -1.0 | 0.0 | 0.0532 | 0.115598 | 0.185923 | 0.262085 | 0.341345 | 0.420604 | 0.496766 | 0.567092 | 0.629489 | 0.682689 |
| -0.8 | NaN | 0.0000 | 0.062398 | 0.132723 | 0.208885 | 0.288145 | 0.367404 | 0.443566 | 0.513891 | 0.576289 | 0.629489 |
| -0.6 | NaN | NaN | 0.000000 | 0.070325 | 0.146487 | 0.225747 | 0.305007 | 0.381169 | 0.451494 | 0.513891 | 0.567092 |
| -0.4 | NaN | NaN | NaN | 0.000000 | 0.076162 | 0.155422 | 0.234681 | 0.310843 | 0.381169 | 0.443566 | 0.496766 |
| -0.2 | NaN | NaN | NaN | NaN | 0.000000 | 0.079260 | 0.158519 | 0.234681 | 0.305007 | 0.367404 | 0.420604 |
| 0.0 | NaN | NaN | NaN | NaN | NaN | 0.000000 | 0.079260 | 0.155422 | 0.225747 | 0.288145 | 0.341345 |
| 0.2 | NaN | NaN | NaN | NaN | NaN | NaN | 0.000000 | 0.076162 | 0.146487 | 0.208885 | 0.262085 |
| 0.4 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0.000000 | 0.070325 | 0.132723 | 0.185923 |
| 0.6 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0.000000 | 0.062398 | 0.115598 |
| 0.8 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0.000000 | 0.053200 |
| 1.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0.000000 |
Probabilities outside a threshold range
def normal_probability_outside(lo: float, hi: float, mu: float = 0, sigma: float = 1) -> float:
return 1 - normal_probability_between(lo, hi, mu, sigma)probabilities_outside = pd.DataFrame()
for i in domain:
for j in domain:
probabilities_outside.loc[i, j] = normal_probability_outside(i, j)
pd.DataFrame(np.triu(probabilities_outside), index=domain, columns=domain).replace(0, np.NaN) | -1.0 | -0.8 | -0.6 | -0.4 | -0.2 | 0.0 | 0.2 | 0.4 | 0.6 | 0.8 | 1.0 | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| -1.0 | 1.0 | 0.9468 | 0.884402 | 0.814077 | 0.737915 | 0.658655 | 0.579396 | 0.503234 | 0.432908 | 0.370511 | 0.317311 |
| -0.8 | NaN | 1.0000 | 0.937602 | 0.867277 | 0.791115 | 0.711855 | 0.632596 | 0.556434 | 0.486109 | 0.423711 | 0.370511 |
| -0.6 | NaN | NaN | 1.000000 | 0.929675 | 0.853513 | 0.774253 | 0.694993 | 0.618831 | 0.548506 | 0.486109 | 0.432908 |
| -0.4 | NaN | NaN | NaN | 1.000000 | 0.923838 | 0.844578 | 0.765319 | 0.689157 | 0.618831 | 0.556434 | 0.503234 |
| -0.2 | NaN | NaN | NaN | NaN | 1.000000 | 0.920740 | 0.841481 | 0.765319 | 0.694993 | 0.632596 | 0.579396 |
| 0.0 | NaN | NaN | NaN | NaN | NaN | 1.000000 | 0.920740 | 0.844578 | 0.774253 | 0.711855 | 0.658655 |
| 0.2 | NaN | NaN | NaN | NaN | NaN | NaN | 1.000000 | 0.923838 | 0.853513 | 0.791115 | 0.737915 |
| 0.4 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1.000000 | 0.929675 | 0.867277 | 0.814077 |
| 0.6 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1.000000 | 0.937602 | 0.884402 |
| 0.8 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1.000000 | 0.946800 |
| 1.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1.000000 |