Source code for benford.expected

from pandas import DataFrame
from numpy import array, arange, log10
from .checks import _check_digs_
from .viz import plot_expected


[docs]class First(DataFrame): """Holds the expected probabilities of the First, First Two, or First Three digits according to Benford's distribution. Args: digs: 1, 2 or 3 - tells which of the first digits to consider: 1 for the First Digit, 2 for the First Two Digits and 3 for the First Three Digits. plot: option to plot a bar chart of the Expected proportions. Defaults to True. save_plot: string with the path/name of the file in which the generated plot will be saved. Uses matplotlib.pyplot.savefig(). File format is infered by the file name extension. Only available when plot=True. save_plot_kwargs: dict with any of the kwargs accepted by matplotlib.pyplot.savefig() https://matplotlib.org/api/_as_gen/matplotlib.pyplot.savefig.html Only available when plot=True and save_plot is a string with the figure file path/name. """ def __init__(self, digs, plot=True, save_plot=None, save_plot_kwargs=None): _check_digs_(digs) dig_name = f'First_{digs}_Dig' exp_array, dig_array = _gen_first_digits_(digs) DataFrame.__init__(self, {'Expected': exp_array}, index=dig_array) self.index.names = [dig_name] if plot: plot_expected(self, digs, save_plot=save_plot, save_plot_kwargs=save_plot_kwargs)
[docs]class Second(DataFrame): """Holds the expected probabilities of the Second Digits according to Benford's distribution. Args: plot: option to plot a bar chart of the Expected proportions. Defaults to True. save_plot: string with the path/name of the file in which the generated plot will be saved. Uses matplotlib.pyplot.savefig(). File format is infered by the file name extension. Only available when plot=True. save_plot_kwargs: dict with any of the kwargs accepted by matplotlib.pyplot.savefig() https://matplotlib.org/api/_as_gen/matplotlib.pyplot.savefig.html Only available when plot=True and save_plot is a string with the figure file path/name. """ def __init__(self, plot=True, save_plot=None, save_plot_kwargs=None): exp, sec_digs = _gen_second_digits_() DataFrame.__init__(self, {'Expected': exp, 'Sec_Dig': sec_digs}) self.set_index("Sec_Dig", inplace=True) if plot: plot_expected(self, 22, save_plot=save_plot, save_plot_kwargs=save_plot_kwargs)
[docs]class LastTwo(DataFrame): """Holds the expected probabilities of the Last Two Digits according to Benford's distribution. Args: plot: option to plot a bar chart of the Expected proportions. Defaults to True. save_plot: string with the path/name of the file in which the generated plot will be saved. Uses matplotlib.pyplot.savefig(). File format is infered by the file name extension. Only available when plot=True. save_plot_kwargs: dict with any of the kwargs accepted by matplotlib.pyplot.savefig() https://matplotlib.org/api/_as_gen/matplotlib.pyplot.savefig.html Only available when plot=True and save_plot is a string with the figure file path/name. """ def __init__(self, num=False, plot=True, save_plot=None, save_plot_kwargs=None): exp, l2d = _gen_last_two_digits_(num=num) DataFrame.__init__(self, {'Expected': exp, 'Last_2_Dig': l2d}) self.set_index('Last_2_Dig', inplace=True) if plot: plot_expected(self, -2, save_plot=save_plot, save_plot_kwargs=save_plot_kwargs)
def _get_expected_digits_(digs): """Chooses the Exxpected class to be used in a test Args: digs: the int corresponding to the Expected class to be instantiated Returns: the Expected instance forthe propoer test to be performed """ if digs in [1, 2, 3]: return First(digs, plot=False) elif digs == 22: return Second(plot=False) else: return LastTwo(num=True, plot=False) def _gen_last_two_digits_(num=False): """Creates two arrays, one with the possible last two digits and one with thei respective probabilities Args: num: returns numeric (ints) values. Defaluts to False, which returns strings. Returns: exp (np.array): Array with the (constant) probabilities of occurrence of each pair of last two digits l2d (np.array): Array of ints or str, in any case representing all 100 possible combinations of last two digits """ exp = array([1 / 99.] * 100) l2d = arange(0, 100) if num: return exp, l2d l2d = l2d.astype(str) l2d[:10] = array(['00', '01', '02', '03', '04', '05', '06', '07', '08', '09']) return exp, l2d def _gen_first_digits_(digs): """Creates two arrays, one with the possible digits combinations and the other with their respective expected probabilities according to Benford Args: digs (int): 1, 2 or 3, for generation of the first, first two, or first three digits Returns: (tuple of arrays): the expected probabilities array and the digits combination array. """ dig_array = arange(10 ** (digs - 1), 10 ** digs) exp_prob = log10(1 + (1. / dig_array)) return exp_prob, dig_array def _gen_second_digits_(): """Creates two arrays, one with he possible second digits combinations and the other with their respective expected probabilities according to Benford Returns: (tuple of arrays): the expected probabilities array and the second digits array. """ exp_f2d, _ = _gen_first_digits_(2) sec_digs = range(10) sec_digs_in_f2d = array(list(range(10)) * 9) exp = array([exp_f2d[sec_digs_in_f2d == i].sum() for i in sec_digs]) return exp, array(sec_digs)