import matplotlib.pyplot as plt import numpy as np # Get the likelyhood for a binning def L_simple(N, bins, alpha = 5): D = bins[1:] - bins[:-1] s = sum(n + alpha for n in N) return sum(n * np.log((n + alpha - 1.0) / (d * (s - 1.0))) for n,d in zip(N, D)) # Find optimal binning of a data set def optimal_binning(x): # grid search over likelyhood and alpha bin_numbers = [int(x) for x in 10 ** np.linspace(1, 4, 100)] alphas = [1.1, 5.0, 10.0, 100.0] Likelyhoods = np.zeros((len(bin_numbers), len(alphas))) for i, bins in enumerate(bin_numbers): for j, alpha in enumerate(alphas): N, bin_edges = np.histogram(x, bins = bins) L = L_simple(N, bin_edges, alpha) Likelyhoods[i][j] = L i, j = np.unravel_index(np.argmax(Likelyhoods), Likelyhoods.shape) return bin_numbers[i], alphas[j] x = np.loadtxt("SJC_data.txt", unpack=True) bins, alpha = optimal_binning(x) print(bins, alpha) plt.title("bins = %d" % (bins)) N, B, _ = plt.hist(x, bins = bins) plt.show()