import matplotlib.pyplot as plt
import numpy as np

# Get the likelyhood for a binning
def L_simple(N, bins, alpha = 5):
    D = bins[1:] - bins[:-1]
    s = sum(n + alpha for n in N)
    return sum(n * np.log((n + alpha - 1.0) / (d * (s - 1.0))) for n,d in zip(N, D))

# Find optimal binning of a data set
def optimal_binning(x):
    # grid search over likelyhood and alpha
    bin_numbers = [int(x) for x in 10 ** np.linspace(1, 4, 100)]
    alphas = [1.1, 5.0, 10.0, 100.0]
    Likelyhoods = np.zeros((len(bin_numbers), len(alphas)))

    for i, bins in enumerate(bin_numbers):
        for j, alpha in enumerate(alphas):
            N, bin_edges = np.histogram(x, bins = bins)
            L = L_simple(N, bin_edges, alpha)
            Likelyhoods[i][j] = L
    
    i, j = np.unravel_index(np.argmax(Likelyhoods), Likelyhoods.shape)
    return bin_numbers[i], alphas[j]


x = np.loadtxt("SJC_data.txt", unpack=True)
bins, alpha = optimal_binning(x)
print(bins, alpha)
plt.title("bins = %d" % (bins))
N, B, _ = plt.hist(x, bins = bins)
plt.show()