#前言
这个笔记是北大那位老师课程的学习笔记,讲的概念浅显易懂,非常有利于我们掌握基本的概念,从而掌握相关的技术。
#basic concepts
EM算法的核心是,首先假设模型符合什么分布,然后计算相关参数,再根据计算出的结果,重新划分样本分布,然后再计算相关参数,直到收敛为止。
公式证明比较繁琐,这里就不贴了,附上一个python实现的EM
#! -*- coding=utf-8 -*-#模拟两个正态分布的均值估计from numpy import *import numpy as npimport randomimport copySIGMA = 6EPS = 0.0001#生成方差相同,均值不同的样本def generate_data():Miu1 = 20Miu2 = 40N = 1000X = mat(zeros((N,1)))for i in range(N):temp = random.uniform(0,1)if(temp > 0.5):X[i] = temp*SIGMA + Miu1else:X[i] = temp*SIGMA + Miu2return X#EM算法def my_EM(X):k = 2N = len(X)Miu = np.random.rand(k,1)Posterior = mat(zeros((N,2)))dominator = 0numerator = 0#先求后验概率for iter in range(1000):for i in range(N):dominator = 0for j in range(k):dominator = dominator + np.exp(-1.0/(2.0*SIGMA**2) * (X[i] - Miu[j])**2)#print dominator,-1/(2*SIGMA**2) * (X[i] - Miu[j])**2,2*SIGMA**2,(X[i] - Miu[j])**2#returnfor j in range(k):numerator = np.exp(-1.0/(2.0*SIGMA**2) * (X[i] - Miu[j])**2)Posterior[i,j] = numerator/dominatoroldMiu = copy.deepcopy(Miu)#最大化for j in range(k):numerator = 0dominator = 0for i in range(N):numerator = numerator + Posterior[i,j] * X[i]dominator = dominator + Posterior[i,j]Miu[j] = numerator/dominatorprint (abs(Miu - oldMiu)).sum() #print '\n'if (abs(Miu - oldMiu)).sum() < EPS:print Miu,iterbreakif __name__ == '__main__':X = generate_data()my_EM(X)