Source code for benford.viz

from numpy import array, arange, maximum, sqrt, ones
import matplotlib.pyplot as plt
from matplotlib.text import Annotation
from .constants import COLORS, MAD_CONFORM


[docs]def plot_expected(df, digs, save_plot=None, save_plot_kwargs=None):
    """Plots the Expected Benford Distributions

    Args:
        df: DataFrame with the Expected Proportions
        digs: Test's digit
        save_plot: string with the path/name of the file in which the generated
            plot will be saved. Uses matplotlib.pyplot.savefig(). File format
            is infered by the file name extension.
        save_plot_kwargs: dict with any of the kwargs accepted by
            matplotlib.pyplot.savefig()
            https://matplotlib.org/api/_as_gen/matplotlib.pyplot.savefig.html
    """
    if digs in [1, 2, 3]:
        y_max = (df.Expected.max() + (10 ** -(digs) / 3)) * 100
        figsize = 2 * (digs ** 2 + 5), 1.5 * (digs ** 2 + 5)
    elif digs == 22:
        y_max = 13.
        figsize = 14, 10.5
    elif digs == -2:
        y_max = 1.1
        figsize = 15, 8
    fig, ax = plt.subplots(figsize=figsize)
    plt.title('Expected Benford Distributions', size='xx-large')
    plt.xlabel(df.index.name, size='x-large')
    plt.ylabel('Distribution (%)', size='x-large')
    ax.set_facecolor(COLORS['b'])
    ax.set_ylim(0, y_max)
    ax.bar(df.index, df.Expected * 100, color=COLORS['t'], align='center')
    ax.set_xticks(df.index)
    ax.set_xticklabels(df.index)

    if save_plot:
        if not save_plot_kwargs:
            save_plot_kwargs = {}
        plt.savefig(save_plot, **save_plot_kwargs)

    plt.show(block=False)


def _get_plot_args(digs):
    """Selects the correct arguments for the plotting functions, depending on the
    the test (digs) chosen.
    """
    if digs in [1, 2, 3]:
        text_x = False
        n, m = 10 ** (digs - 1), 10 ** (digs)
        x = arange(n, m)
        figsize = (2 * (digs ** 2 + 5), 1.5 * (digs ** 2 + 5))
    elif digs == 22:
        text_x = False
        x = arange(10)
        figsize = (14, 10)
    else:
        text_x = True
        x = arange(100)
        figsize = (15, 7)
    return x, figsize, text_x

[docs]def plot_digs(df, x, y_Exp, y_Found, N, figsize, conf_Z, text_x=False,
              save_plot=None, save_plot_kwargs=None):
    """Plots the digits tests results

    Args:
        df: DataFrame with the data to be plotted
        x: sequence to be used in the x axis
        y_Exp: sequence of the expected proportions to be used in the y axis
            (line)
        y_Found: sequence of the found proportions to be used in the y axis
            (bars)
        N: lenght of sequence, to be used when plotting the confidence levels
        figsize: tuple to state the size of the plot figure
        conf_Z: Confidence level
        save_pic: file path to save figure
        text_x: Forces to show all x ticks labels. Defaluts to True.
        save_plot: string with the path/name of the file in which the generated
            plot will be saved. Uses matplotlib.pyplot.savefig(). File format
            is infered by the file name extension.
        save_plot_kwargs: dict with any of the kwargs accepted by
            matplotlib.pyplot.savefig()
            https://matplotlib.org/api/_as_gen/matplotlib.pyplot.savefig.html
        
    """
    if len(x) > 10:
        rotation = 90
    else:
        rotation = 0
    fig, ax = plt.subplots(figsize=figsize)
    plt.title('Expected vs. Found Distributions', size='xx-large')
    plt.xlabel('Digits', size='x-large')
    plt.ylabel('Distribution (%)', size='x-large')
    if conf_Z is not None:
        sig = conf_Z * sqrt(y_Exp * (1 - y_Exp) / N)
        upper = y_Exp + sig + (1 / (2 * N))
        lower_zeros = array([0]*len(upper))
        lower = maximum(y_Exp - sig - (1 / (2 * N)), lower_zeros)
        u = (y_Found < lower) | (y_Found > upper)
        c = array([COLORS['m']] * len(u))
        c[u] = COLORS['af']
        lower *= 100.
        upper *= 100.
        ax.plot(x, upper, color=COLORS['s'], zorder=5)
        ax.plot(x, lower, color=COLORS['s'], zorder=5)
        ax.fill_between(x, upper, lower, color=COLORS['s'],
                        alpha=.3, label='Conf')
    else:
        c = COLORS['m']
    ax.bar(x, y_Found * 100., color=c, label='Found', zorder=3, align='center')
    ax.plot(x, y_Exp * 100., color=COLORS['s'], linewidth=2.5,
            label='Benford', zorder=4)
    ax.set_xticks(x)
    ax.set_xticklabels(x, rotation=rotation)
    ax.set_facecolor(COLORS['b'])
    if text_x:
        ind = array(df.index).astype(str)
        ind[:10] = array(['00', '01', '02', '03', '04', '05',
                          '06', '07', '08', '09'])
        plt.xticks(x, ind, rotation='vertical')
    ax.legend()
    ax.set_ylim(0, max([y_Exp.max() * 100, y_Found.max() * 100]) + 10 / len(x))
    ax.set_xlim(x[0] - 1, x[-1] + 1)

    if save_plot:
        if not save_plot_kwargs:
            save_plot_kwargs = {}
        plt.savefig(save_plot, **save_plot_kwargs)

    plt.show(block=False)


[docs]def plot_sum(df, figsize, li, text_x=False, save_plot=None, save_plot_kwargs=None):
    """Plots the summation test results

    Args:
        df: DataFrame with the data to be plotted
        figsize: sets the dimensions of the plot figure
        li: value with which to draw the horizontal line
        save_plot: string with the path/name of the file in which the generated
            plot will be saved. Uses matplotlib.pyplot.savefig(). File format
            is infered by the file name extension.
        save_plot_kwargs: dict with any of the kwargs accepted by
            matplotlib.pyplot.savefig()
            https://matplotlib.org/api/_as_gen/matplotlib.pyplot.savefig.html
    """
    x = df.index
    rotation = 90 if len(x) > 10 else 0
    fig = plt.figure(figsize=figsize)
    ax = fig.add_subplot(111)
    plt.title('Expected vs. Found Sums')
    plt.xlabel('Digits')
    plt.ylabel('Sums')
    ax.bar(x, df.Percent, color=COLORS['m'],
           label='Found Sums', zorder=3, align='center')
    ax.set_xlim(x[0] - 1, x[-1] + 1)
    ax.axhline(li, color=COLORS['s'], linewidth=2, label='Expected', zorder=4)
    ax.set_xticks(x)
    ax.set_xticklabels(x, rotation=rotation)
    ax.set_facecolor(COLORS['b'])
    if text_x:
        ind = array(x).astype(str)
        ind[:10] = array(['00', '01', '02', '03', '04', '05',
                          '06', '07', '08', '09'])
        plt.xticks(x, ind, rotation='vertical')
    ax.legend()

    if save_plot:
        if not save_plot_kwargs:
            save_plot_kwargs = {}
        plt.savefig(save_plot, **save_plot_kwargs)

    plt.show(block=False)

[docs]def plot_ordered_mantissas(col, figsize=(12, 12),
                           save_plot=None, save_plot_kwargs=None):
    """Plots the ordered mantissas and compares them to the expected, straight
        line that should be formed in a Benford-cmpliant set.

    Args:
        col (Series): column of mantissas to plot.
        figsize (tuple): sets the dimensions of the plot figure.
        save_plot: string with the path/name of the file in which the generated
            plot will be saved. Uses matplotlib.pyplot.savefig(). File format
            is infered by the file name extension.
        save_plot_kwargs: dict with any of the kwargs accepted by
            matplotlib.pyplot.savefig()
            https://matplotlib.org/api/_as_gen/matplotlib.pyplot.savefig.html
 
    """
    ld = len(col)
    x = arange(1, ld + 1)
    n = ones(ld) / ld
    fig = plt.figure(figsize=figsize)
    ax = fig.add_subplot(111)
    ax.plot(x, col.sort_values(), linestyle='--',
            color=COLORS['s'], linewidth=3, label='Mantissas')
    ax.plot(x, n.cumsum(), color=COLORS['m'],
            linewidth=2, label='Expected')
    plt.ylim((0, 1.))
    plt.xlim((1, ld + 1))
    ax.set_facecolor(COLORS['b'])
    ax.set_title("Ordered Mantissas")
    plt.legend(loc='upper left')

    if save_plot:
        if not save_plot_kwargs:
            save_plot_kwargs = {}
        plt.savefig(save_plot, **save_plot_kwargs)

    plt.show(block=False);

[docs]def plot_mantissa_arc_test(df, gravity_center, grid=True, figsize=12,
                           save_plot=None, save_plot_kwargs=None):
    """Draws thee Mantissa Arc Test after computing X and Y circular coordinates
    for every mantissa and the center of gravity for the set

    Args:
        df (DataFrame): pandas DataFrame with the mantissas and the X and Y
            coordinates.
        gravity_center (tuple): coordinates for plottling the gravity center
        grid (bool): show grid. Defaults to True.
        figsize (int): figure dimensions. No need to be a tuple, since the
            figure is a square.
        save_plot: string with the path/name of the file in which the generated
            plot will be saved. Uses matplotlib.pyplot.savefig(). File format
            is infered by the file name extension.
        save_plot_kwargs: dict with any of the kwargs accepted by
            matplotlib.pyplot.savefig()
            https://matplotlib.org/api/_as_gen/matplotlib.pyplot.savefig.html
    """
    fig = plt.figure(figsize=(figsize, figsize))
    ax = plt.subplot()
    ax.set_facecolor(COLORS['b'])
    ax.scatter(df.mant_x, df.mant_y, label="ARC TEST",
               color=COLORS['m'])
    ax.scatter(gravity_center[0], gravity_center[1],
               color=COLORS['s'])
    text_annotation = Annotation(
        "  Gravity Center: "
        f"x({round(gravity_center[0], 3)}),"
        f" y({round(gravity_center[1], 3)})",
        xy=(gravity_center[0] - 0.65,
            gravity_center[1] - 0.1),
        xycoords='data')
    ax.add_artist(text_annotation)
    ax.grid(True, which='both')
    ax.axhline(y=0, color='k')
    ax.axvline(x=0, color='k')
    ax.legend(loc='lower left')
    ax.set_title("Mantissas Arc Test")

    if save_plot:
        if not save_plot_kwargs:
            save_plot_kwargs = {}
        plt.savefig(save_plot, **save_plot_kwargs)

    plt.show(block=False);

[docs]def plot_roll_mse(roll_series, figsize, save_plot=None, save_plot_kwargs=None):
    """Shows the rolling MSE plot

    Args:
        roll_series: pd.Series resultant form rolling mse.
        figsize: the figure dimensions.
        save_plot: string with the path/name of the file in which the generated
            plot will be saved. Uses matplotlib.pyplot.savefig(). File format
            is infered by the file name extension.
        save_plot_kwargs: dict with any of the kwargs accepted by
            matplotlib.pyplot.savefig()
            https://matplotlib.org/api/_as_gen/matplotlib.pyplot.savefig.html
    """
    fig, ax = plt.subplots(figsize=figsize)
    ax.set_facecolor(COLORS['b'])
    ax.plot(roll_series, color=COLORS['m'])

    if save_plot:
        if not save_plot_kwargs:
            save_plot_kwargs = {}
        plt.savefig(save_plot, **save_plot_kwargs)

    plt.show(block=False)

[docs]def plot_roll_mad(roll_mad, figsize, save_plot=None, save_plot_kwargs=None):
    """Shows the rolling MAD plot

    Args:
        roll_mad: pd.Series resultant form rolling mad.
        figsize: the figure dimensions.
        save_plot: string with the path/name of the file in which the generated
            plot will be saved. Uses matplotlib.pyplot.savefig(). File format
            is infered by the file name extension.
        save_plot_kwargs: dict with any of the kwargs accepted by
            matplotlib.pyplot.savefig()
            https://matplotlib.org/api/_as_gen/matplotlib.pyplot.savefig.html
    """
    fig, ax = plt.subplots(figsize=figsize)
    ax.set_facecolor(COLORS['b'])
    ax.plot(roll_mad.roll_series, color=COLORS['m'])

    if roll_mad.test != -2:
        plt.axhline(y=MAD_CONFORM[roll_mad.test][0], color=COLORS['af'], linewidth=3)
        plt.axhline(y=MAD_CONFORM[roll_mad.test][1], color=COLORS['h2'], linewidth=3)
        plt.axhline(y=MAD_CONFORM[roll_mad.test][2], color=COLORS['s'], linewidth=3)

    if save_plot:
        if not save_plot_kwargs:
            save_plot_kwargs = {}
        plt.savefig(save_plot, **save_plot_kwargs)

    plt.show(block=False)