import numpy as np
import scipy.stats as sps
import matplotlib.pyplot as plt
from scipy.misc import logsumexp
#%matplotlib qt5
%matplotlib inline
plt.rcParams.update({'figure.figsize': (10.0, 6.0), 'font.size': 18})
where
and single variate Gasussian distribution
# Plot GMM pdf together with the individual components pdfs; return the GMM pdf line
def plot_GMM(t, mus, sigmas, pis):
p_xz = sps.norm.pdf(t[:,np.newaxis], mus, sigmas) * pis # all GMM components are evaluated at once
px = np.sum(p_xz, axis=1)
plt.plot(t, p_xz, ':')
plt.plot(t, px, 'k')
return px
#Handcraft some GMM parameter
mus = [-4.0, 0.0, 4.0, 5]
sigmas = [1.0, 1.4, 1.2, 1]
pis = [0.1, 0.4, 0.2, 0.3]
t = np.linspace(-10,10,1000)
true_GMM_pdf = plot_GMM(t, mus, sigmas, pis)
# Generate N datapoints from this GMM
N = 100
Nc = sps.multinomial.rvs(N, pis) # Draw observation counts for each component from multinomial distribution
x = sps.norm.rvs(np.repeat(mus, Nc), np.repeat(sigmas, Nc))
np.random.shuffle(x)
plt.plot(x, np.zeros_like(x), '+k');
#Choose some initial parameters
C = 3 # number of GMM components
mus = x[:C] # we choose few first observations as the initial means
sigmas = np.repeat(np.std(x), C) # sigma for all components is set to std of the the training data
pis = np.ones(C)/C
plt.clf()
plt.plot(t, true_GMM_pdf, 'gray')
plot_GMM(t, mus, sigmas, pis);
for _ in range(50):
#E-step
log_p_xz = sps.norm.logpdf(x[:,np.newaxis], mus, sigmas) + np.log(pis)
log_p_x = logsumexp(log_p_xz, axis=1, keepdims=True)
print "Training data log likelihood:", log_p_x.sum()
gammas = np.exp(log_p_xz - log_p_x)
#M-step
Nc = gammas.sum(axis=0)
mus = x.dot(gammas) / Nc
sigmas = np.sqrt((x**2).dot(gammas) / Nc - mus**2) # we use std, not variance!
pis = Nc / Nc.sum()
plot_GMM(t, mus, sigmas, pis)
plt.clf()
plt.plot(t, true_GMM_pdf, 'gray')
plot_GMM(t, mus, sigmas, pis);
plt.plot(x, np.zeros_like(x), '+k');