Source code for benford.viz

from numpy import array, arange, maximum, sqrt, ones
import matplotlib.pyplot as plt
from matplotlib.text import Annotation
from .constants import COLORS, MAD_CONFORM


[docs]def plot_expected(df, digs, save_plot=None, save_plot_kwargs=None): """Plots the Expected Benford Distributions Args: df: DataFrame with the Expected Proportions digs: Test's digit save_plot: string with the path/name of the file in which the generated plot will be saved. Uses matplotlib.pyplot.savefig(). File format is infered by the file name extension. save_plot_kwargs: dict with any of the kwargs accepted by matplotlib.pyplot.savefig() https://matplotlib.org/api/_as_gen/matplotlib.pyplot.savefig.html """ if digs in [1, 2, 3]: y_max = (df.Expected.max() + (10 ** -(digs) / 3)) * 100 figsize = 2 * (digs ** 2 + 5), 1.5 * (digs ** 2 + 5) elif digs == 22: y_max = 13. figsize = 14, 10.5 elif digs == -2: y_max = 1.1 figsize = 15, 8 fig, ax = plt.subplots(figsize=figsize) plt.title('Expected Benford Distributions', size='xx-large') plt.xlabel(df.index.name, size='x-large') plt.ylabel('Distribution (%)', size='x-large') ax.set_facecolor(COLORS['b']) ax.set_ylim(0, y_max) ax.bar(df.index, df.Expected * 100, color=COLORS['t'], align='center') ax.set_xticks(df.index) ax.set_xticklabels(df.index) if save_plot: if not save_plot_kwargs: save_plot_kwargs = {} plt.savefig(save_plot, **save_plot_kwargs) plt.show(block=False)
def _get_plot_args(digs): """Selects the correct arguments for the plotting functions, depending on the the test (digs) chosen. """ if digs in [1, 2, 3]: text_x = False n, m = 10 ** (digs - 1), 10 ** (digs) x = arange(n, m) figsize = (2 * (digs ** 2 + 5), 1.5 * (digs ** 2 + 5)) elif digs == 22: text_x = False x = arange(10) figsize = (14, 10) else: text_x = True x = arange(100) figsize = (15, 7) return x, figsize, text_x
[docs]def plot_digs(df, x, y_Exp, y_Found, N, figsize, conf_Z, text_x=False, save_plot=None, save_plot_kwargs=None): """Plots the digits tests results Args: df: DataFrame with the data to be plotted x: sequence to be used in the x axis y_Exp: sequence of the expected proportions to be used in the y axis (line) y_Found: sequence of the found proportions to be used in the y axis (bars) N: lenght of sequence, to be used when plotting the confidence levels figsize: tuple to state the size of the plot figure conf_Z: Confidence level save_pic: file path to save figure text_x: Forces to show all x ticks labels. Defaluts to True. save_plot: string with the path/name of the file in which the generated plot will be saved. Uses matplotlib.pyplot.savefig(). File format is infered by the file name extension. save_plot_kwargs: dict with any of the kwargs accepted by matplotlib.pyplot.savefig() https://matplotlib.org/api/_as_gen/matplotlib.pyplot.savefig.html """ if len(x) > 10: rotation = 90 else: rotation = 0 fig, ax = plt.subplots(figsize=figsize) plt.title('Expected vs. Found Distributions', size='xx-large') plt.xlabel('Digits', size='x-large') plt.ylabel('Distribution (%)', size='x-large') if conf_Z is not None: sig = conf_Z * sqrt(y_Exp * (1 - y_Exp) / N) upper = y_Exp + sig + (1 / (2 * N)) lower_zeros = array([0]*len(upper)) lower = maximum(y_Exp - sig - (1 / (2 * N)), lower_zeros) u = (y_Found < lower) | (y_Found > upper) c = array([COLORS['m']] * len(u)) c[u] = COLORS['af'] lower *= 100. upper *= 100. ax.plot(x, upper, color=COLORS['s'], zorder=5) ax.plot(x, lower, color=COLORS['s'], zorder=5) ax.fill_between(x, upper, lower, color=COLORS['s'], alpha=.3, label='Conf') else: c = COLORS['m'] ax.bar(x, y_Found * 100., color=c, label='Found', zorder=3, align='center') ax.plot(x, y_Exp * 100., color=COLORS['s'], linewidth=2.5, label='Benford', zorder=4) ax.set_xticks(x) ax.set_xticklabels(x, rotation=rotation) ax.set_facecolor(COLORS['b']) if text_x: ind = array(df.index).astype(str) ind[:10] = array(['00', '01', '02', '03', '04', '05', '06', '07', '08', '09']) plt.xticks(x, ind, rotation='vertical') ax.legend() ax.set_ylim(0, max([y_Exp.max() * 100, y_Found.max() * 100]) + 10 / len(x)) ax.set_xlim(x[0] - 1, x[-1] + 1) if save_plot: if not save_plot_kwargs: save_plot_kwargs = {} plt.savefig(save_plot, **save_plot_kwargs) plt.show(block=False)
[docs]def plot_sum(df, figsize, li, text_x=False, save_plot=None, save_plot_kwargs=None): """Plots the summation test results Args: df: DataFrame with the data to be plotted figsize: sets the dimensions of the plot figure li: value with which to draw the horizontal line save_plot: string with the path/name of the file in which the generated plot will be saved. Uses matplotlib.pyplot.savefig(). File format is infered by the file name extension. save_plot_kwargs: dict with any of the kwargs accepted by matplotlib.pyplot.savefig() https://matplotlib.org/api/_as_gen/matplotlib.pyplot.savefig.html """ x = df.index rotation = 90 if len(x) > 10 else 0 fig = plt.figure(figsize=figsize) ax = fig.add_subplot(111) plt.title('Expected vs. Found Sums') plt.xlabel('Digits') plt.ylabel('Sums') ax.bar(x, df.Percent, color=COLORS['m'], label='Found Sums', zorder=3, align='center') ax.set_xlim(x[0] - 1, x[-1] + 1) ax.axhline(li, color=COLORS['s'], linewidth=2, label='Expected', zorder=4) ax.set_xticks(x) ax.set_xticklabels(x, rotation=rotation) ax.set_facecolor(COLORS['b']) if text_x: ind = array(x).astype(str) ind[:10] = array(['00', '01', '02', '03', '04', '05', '06', '07', '08', '09']) plt.xticks(x, ind, rotation='vertical') ax.legend() if save_plot: if not save_plot_kwargs: save_plot_kwargs = {} plt.savefig(save_plot, **save_plot_kwargs) plt.show(block=False)
[docs]def plot_ordered_mantissas(col, figsize=(12, 12), save_plot=None, save_plot_kwargs=None): """Plots the ordered mantissas and compares them to the expected, straight line that should be formed in a Benford-cmpliant set. Args: col (Series): column of mantissas to plot. figsize (tuple): sets the dimensions of the plot figure. save_plot: string with the path/name of the file in which the generated plot will be saved. Uses matplotlib.pyplot.savefig(). File format is infered by the file name extension. save_plot_kwargs: dict with any of the kwargs accepted by matplotlib.pyplot.savefig() https://matplotlib.org/api/_as_gen/matplotlib.pyplot.savefig.html """ ld = len(col) x = arange(1, ld + 1) n = ones(ld) / ld fig = plt.figure(figsize=figsize) ax = fig.add_subplot(111) ax.plot(x, col.sort_values(), linestyle='--', color=COLORS['s'], linewidth=3, label='Mantissas') ax.plot(x, n.cumsum(), color=COLORS['m'], linewidth=2, label='Expected') plt.ylim((0, 1.)) plt.xlim((1, ld + 1)) ax.set_facecolor(COLORS['b']) ax.set_title("Ordered Mantissas") plt.legend(loc='upper left') if save_plot: if not save_plot_kwargs: save_plot_kwargs = {} plt.savefig(save_plot, **save_plot_kwargs) plt.show(block=False);
[docs]def plot_mantissa_arc_test(df, gravity_center, grid=True, figsize=12, save_plot=None, save_plot_kwargs=None): """Draws thee Mantissa Arc Test after computing X and Y circular coordinates for every mantissa and the center of gravity for the set Args: df (DataFrame): pandas DataFrame with the mantissas and the X and Y coordinates. gravity_center (tuple): coordinates for plottling the gravity center grid (bool): show grid. Defaults to True. figsize (int): figure dimensions. No need to be a tuple, since the figure is a square. save_plot: string with the path/name of the file in which the generated plot will be saved. Uses matplotlib.pyplot.savefig(). File format is infered by the file name extension. save_plot_kwargs: dict with any of the kwargs accepted by matplotlib.pyplot.savefig() https://matplotlib.org/api/_as_gen/matplotlib.pyplot.savefig.html """ fig = plt.figure(figsize=(figsize, figsize)) ax = plt.subplot() ax.set_facecolor(COLORS['b']) ax.scatter(df.mant_x, df.mant_y, label="ARC TEST", color=COLORS['m']) ax.scatter(gravity_center[0], gravity_center[1], color=COLORS['s']) text_annotation = Annotation( " Gravity Center: " f"x({round(gravity_center[0], 3)})," f" y({round(gravity_center[1], 3)})", xy=(gravity_center[0] - 0.65, gravity_center[1] - 0.1), xycoords='data') ax.add_artist(text_annotation) ax.grid(True, which='both') ax.axhline(y=0, color='k') ax.axvline(x=0, color='k') ax.legend(loc='lower left') ax.set_title("Mantissas Arc Test") if save_plot: if not save_plot_kwargs: save_plot_kwargs = {} plt.savefig(save_plot, **save_plot_kwargs) plt.show(block=False);
[docs]def plot_roll_mse(roll_series, figsize, save_plot=None, save_plot_kwargs=None): """Shows the rolling MSE plot Args: roll_series: pd.Series resultant form rolling mse. figsize: the figure dimensions. save_plot: string with the path/name of the file in which the generated plot will be saved. Uses matplotlib.pyplot.savefig(). File format is infered by the file name extension. save_plot_kwargs: dict with any of the kwargs accepted by matplotlib.pyplot.savefig() https://matplotlib.org/api/_as_gen/matplotlib.pyplot.savefig.html """ fig, ax = plt.subplots(figsize=figsize) ax.set_facecolor(COLORS['b']) ax.plot(roll_series, color=COLORS['m']) if save_plot: if not save_plot_kwargs: save_plot_kwargs = {} plt.savefig(save_plot, **save_plot_kwargs) plt.show(block=False)
[docs]def plot_roll_mad(roll_mad, figsize, save_plot=None, save_plot_kwargs=None): """Shows the rolling MAD plot Args: roll_mad: pd.Series resultant form rolling mad. figsize: the figure dimensions. save_plot: string with the path/name of the file in which the generated plot will be saved. Uses matplotlib.pyplot.savefig(). File format is infered by the file name extension. save_plot_kwargs: dict with any of the kwargs accepted by matplotlib.pyplot.savefig() https://matplotlib.org/api/_as_gen/matplotlib.pyplot.savefig.html """ fig, ax = plt.subplots(figsize=figsize) ax.set_facecolor(COLORS['b']) ax.plot(roll_mad.roll_series, color=COLORS['m']) if roll_mad.test != -2: plt.axhline(y=MAD_CONFORM[roll_mad.test][0], color=COLORS['af'], linewidth=3) plt.axhline(y=MAD_CONFORM[roll_mad.test][1], color=COLORS['h2'], linewidth=3) plt.axhline(y=MAD_CONFORM[roll_mad.test][2], color=COLORS['s'], linewidth=3) if save_plot: if not save_plot_kwargs: save_plot_kwargs = {} plt.savefig(save_plot, **save_plot_kwargs) plt.show(block=False)