ChengMingbo2017-07-07
$$\color{yellow}{X\sim \mathcal{N}(\mu, \sigma^2)}$$ $$\color{yellow}{f(x;\mu,\sigma^2)=\frac{1}{\sqrt{2\pi}\sigma}e^{-\frac{1}{2}\frac{(x-\mu)^2}{\sigma^2} } }$$
$$X\sim \left(\begin{bmatrix}0\\0\end{bmatrix}, \begin{bmatrix}1 & 0\\0& 1\end{bmatrix}\right)$$
$$X\sim \left(\begin{bmatrix}0\\0\end{bmatrix}, \begin{bmatrix}1 & 0\\0& 1\end{bmatrix}\right)$$ $$\Downarrow$$ $$f(x;\mu,\Sigma)=\frac{1}{{(2\pi)}^{\frac{2}{2}}\left|\begin{matrix}1 & 0\\0& 1\end{matrix}\right|^{\frac{1}{2}}}e^{-\frac{1}{2}{\left(x-\begin{bmatrix}0\\0\end{bmatrix}\right)^T}{\begin{bmatrix}1 & 0\\0& 1\end{bmatrix}}{\left(x-\begin{bmatrix}0\\0\end{bmatrix}\right)}}$$ $$x=\begin{bmatrix}0\\0\end{bmatrix},\quad f\left(\begin{bmatrix}0\\0\end{bmatrix};\begin{bmatrix}0\\0\end{bmatrix},\begin{bmatrix}1 & 0\\0& 1\end{bmatrix}\right)=\frac{1}{2\pi}$$
$$\mu=\begin{bmatrix}0\\0\end{bmatrix}, \quad\Sigma=\begin{bmatrix}\color{red}{4} & 0\\0& \color{red}{4}\end{bmatrix}\quad?$$
$$\mu=\begin{bmatrix}0\\0\end{bmatrix}, \quad\Sigma=\begin{bmatrix}\color{red}{1} & 0\\0&\color{blue}{5}\end{bmatrix}\quad?$$ $$\mu=\begin{bmatrix}0\\0\end{bmatrix}, \quad\Sigma=\begin{bmatrix}\color{blue}{5} & 0\\0& \color{red}{1}\end{bmatrix}\quad?$$
$$\begin{aligned}&\mu= \begin{bmatrix}0\\0\end{bmatrix}, \quad\Sigma= \begin{bmatrix}1 & \color{red}{0.5}\\\color{red}{0.5}& 1\end{bmatrix} \quad?\\\\&\mu=\begin{bmatrix}0\\0\end{bmatrix}, \quad\Sigma= \begin{bmatrix}1 & \color{red}{-0.5}\\\color{red}{-0.5}& 1\end{bmatrix}\quad? \end{aligned}$$
$$\begin{aligned}&\mu= \begin{bmatrix}0\\0\end{bmatrix}, \quad\Sigma= \begin{bmatrix}1 & -0.0001\\-0.0001& 1\end{bmatrix}\quad?\\\\&\mu= \begin{bmatrix}0\\0\end{bmatrix}, \quad\Sigma= \begin{bmatrix}1 & 0.99\\0.99& 1\end{bmatrix}\quad? \end{aligned}$$
$$\mu=\begin{bmatrix}0\\0\end{bmatrix}, \quad\Sigma=\begin{bmatrix}1 & 1\\1& 1\end{bmatrix}\quad?$$
$$f(x;\mu,\Sigma)=\frac{1}{{(2\pi)}^{\frac{2}{2}}\left|\color{red}{\begin{matrix}1 & 1\\1& 1\end{matrix}}\right|^{\frac{1}{2}}} e^{-\frac{1}{2}{\left(x-\begin{bmatrix}0\\0\end{bmatrix}\right)^T}\color{red}{\begin{bmatrix}1 & 1\\1& 1\end{bmatrix}^{-1}}{\left(x-\begin{bmatrix}0\\0\end{bmatrix}\right)}}$$
$$f(x;\mu,\Sigma)=\frac{1}{{(2\pi)}^{\frac{d}{2}}|\Sigma|^{\frac{1}{2}}}e^{-\frac{1}{2}{(x-\mu)^T}{\Sigma^{-1}}{(x-\mu)}}$$
$$x=\begin{bmatrix}x_1\\x_2\end{bmatrix},\quad \mu=\begin{bmatrix}\mu_1\\\mu_2\end{bmatrix},\quad \Sigma=\begin{bmatrix}\sigma_1^2&0\\0&\sigma_2^2\end{bmatrix}$$
$$\begin{aligned}f(x;\mu,\Sigma)&=\frac{1}{{2\pi}\left|\begin{matrix}\sigma_1^2 & 0\\0& \sigma_2^2\end{matrix}\right|^{\frac{1}{2}}}e^{-\frac{1}{2}{\left(\begin{bmatrix}x_1\\x_2\end{bmatrix}-\begin{bmatrix}\mu_1\\\mu_2\end{bmatrix}\right)^T}{\begin{bmatrix}\frac{1}{\sigma_1^2} & 0\\0& \frac{1}{\sigma_2^2}\end{bmatrix}}{\left(\begin{bmatrix}x_1\\x_2\end{bmatrix}-\begin{bmatrix}\mu_1\\\mu_2\end{bmatrix}\right)}}\\&=\frac{1}{2\pi\sigma_1\sigma_2}e^{-\frac{1}{2\sigma_1^2}(x_1-\mu_1)^2-\frac{1}{2\sigma_2^2}(x_1-\mu_2)^2}\end{aligned}$$
$$\mu_0=\begin{bmatrix}1\\1\end{bmatrix},\mu_1=\begin{bmatrix}4\\4\end{bmatrix},\Sigma=\begin{bmatrix}1&0\\0&1\end{bmatrix}$$
$$\begin{aligned}&p(y=\text{blue})=\phi\\&p(y=\text{red})=1-\phi\end{aligned}$$
$$\begin{aligned}&p(y=1)=\phi\\&p(y=0)=1-\phi\end{aligned}$$
$$\qquad\quad p(y;\phi)=\phi^y(1-\phi)^{(1-y)}$$
$$\begin{aligned}&p(X,y;\phi,\mu_0,\mu_1,\Sigma)\\ =&p(x^{(1)},x^{(2)},\cdots,x^{(m)},y^{(1)}y^{(2)},\cdots,y^{(m)};\phi,\mu_0,\mu_1,\Sigma)\\ \triangleq&\prod_{i=1}^{m}p(x^{(i)},y^{(i)};\phi,\mu_0,\mu_1,\Sigma) \end{aligned}$$
$$\begin{aligned}&\arg\max p(X,y;\phi,\mu_0,\mu_1,\Sigma)\\ =&\arg\max\prod_{i=1}^{m}p(x^{(i)},y^{(i)};\phi,\mu_0,\mu_1,\Sigma)\\ =&\arg\max\prod_{i=1}^{m}p(x^{(i)}|y^{(i)};\mu_0,\mu_1,\Sigma)p(y^{(i)};\phi)\\ =&\arg\max\sum_{i=1}^{m}\log p(x^{(i)}|y^{(i)};\mu_0,\mu_1,\Sigma)+\sum_{i=1}^{m}p(y^{(i)};\phi)\end{aligned}$$
$$\begin{aligned}&\frac{\partial\sum_{i=1}^{m} \log p(x^{(i)}|y^{(i)};\mu_0,\mu_1,\Sigma)+\sum_{i=1}^{m} \log p(y^{(i)};\phi)}{\partial \phi}=0\\ \Rightarrow&\frac{\partial\sum_{i=1}^{m}\log p(y^{(i)};\phi)}{\partial \phi}=0\\ \Rightarrow&\frac{\partial\sum_{i=1}^{m}\log \phi^{y^{(i)}}(1-\phi)^{(1-y^{(i)})}}{\partial \phi}=0\\ \Rightarrow&\frac{\partial\sum_{i=1}^{m}{y^{(i)}}\log \phi+{(1-y^{(i)})}\log(1-\phi)}{\partial \phi}=0\\ \Rightarrow&\frac{\partial\sum_{i=1}^{m}{{1}{\{y^{(i)}=1\}}}\log \phi+{1}{\{y^{(i)}=0\}}\log(1-\phi)}{\partial \phi}=0\\ \Rightarrow&\phi=\frac{1}{m}\sum_{i=1}^{m}1\{y^{(i)}=1\}\end{aligned}$$
$$\begin{aligned}&\frac{\partial\sum_{i=1}^{m} \log p(x^{(i)}|y^{(i)};\mu_0,\mu_1,\Sigma)+\sum_{i=1}^{m} \log p(y^{(i)};\phi)}{\partial \mu_0}=0\\ \Rightarrow&\frac{\partial\sum_{i=1}^{m} \log p(x^{(i)}|y^{(i)};\mu_0,\mu_1,\Sigma)}{\partial \mu_0}=0\\ \Rightarrow&\frac{\partial \sum_{i=1}^{m}\log\frac{1}{(2\pi)^{\frac{d}{2}}|\Sigma|^{\frac{1}{2}}}e^{-\frac{1}{2}(x^{(i)}-\mu_0)^T\Sigma^{-1}(x^{(i)}-\mu_0)}}{\partial \mu_0}=0\\ \Rightarrow&0+\frac{\partial \sum_{i=1}^{m}{-\frac{1}{2}(x^{(i)}-\mu_0)^T\Sigma^{-1}(x^{(i)}-\mu_0)}}{\partial \mu_0}=0 \end{aligned}$$
$$\begin{aligned}&0+\frac{\partial \sum_{i=1}^{m}{-\frac{1}{2}(x^{(i)}-\mu_0)^T\Sigma^{-1}(x^{(i)}-\mu_0)}}{\partial \mu_0}=0\\ \Rightarrow&{\sum_{i=1}^{m}-\frac{1}{2}((\Sigma^{-1})^T+\Sigma^{-1})(x^{(i)}-\mu_0)\cdot(-1)}=0\\ \Rightarrow& \sum_{i=1}^{m}1\{y^{(i)}=0\}x^{(i)}=\sum_{i=1}^{m}1\{y^{(i)}=0\}\mu_0\\ \Rightarrow&\mu_0=\frac{\sum_{i=1}^{m}1\{y^{(i)}=0\}x^{(i)}}{\sum_{i=1}^{m}1\{y^{(i)}=0\}} \end{aligned}$$
$$\mu_0=\frac{\sum_{i=1}^{m}1\{y^{(i)}=0\}x^{(i)}}{\sum_{i=1}^{m}1\{y^{(i)}=0\}}$$ $$\mu_1=\frac{\sum_{i=1}^{m}1\{y^{(i)}=1\}x^{(i)}}{\sum_{i=1}^{m}1\{y^{(i)}=1\}}$$
$$\begin{aligned}&\frac{\partial\sum_{i=1}^{m} \log p(x^{(i)}|y^{(i)};\mu_0,\mu_1,\Sigma)+\sum_{i=1}^{m} \log p(y^{(i)};\phi)}{\partial \Sigma}=0\\ \Rightarrow&\frac{\partial \sum_{i=1}^{m}\log\frac{1}{(2\pi)^{\frac{d}{2}}|\Sigma|^{\frac{1}{2}}}e^{-\frac{1}{2}(x^{(i)}-\mu_{y^{(i)}})^T\Sigma^{-1}(x^{(i)}-\mu_{y^{(i)}})}}{\partial \Sigma}=0\\ \Rightarrow&\frac{\partial \sum_{i=1}^{m}-\frac{d}{2}\log2\pi}{\partial \Sigma}+\frac{\partial \sum_{i=1}^{m}-\frac{1}{2}\log|\Sigma|}{\partial \Sigma}\\&+\frac{\partial \sum_{i=1}^{m}{-\frac{1}{2}(x^{(i)}-\mu_{y^{(i)}})^T\Sigma^{-1}(x^{(i)}-\mu_{y^{(i)}})}}{\partial \Sigma}=0\\\\ \Rightarrow&\frac{\partial \sum_{i=1}^{m}-\frac{1}{2}\log|\Sigma|}{\partial \Sigma}+\frac{\partial \sum_{i=1}^{m}{-\frac{1}{2}(x^{(i)}-\mu_{y^{(i)}})^T\Sigma^{-1}(x^{(i)}-\mu_{y^{(i)}})}}{\partial \Sigma}\\ &=0 \end{aligned}$$
$$\begin{aligned}&\frac{\partial \sum_{i=1}^{m}\log|\Sigma|}{\partial \Sigma}+\frac{\partial \sum_{i=1}^{m}{(x^{(i)}-\mu_{y^{(i)}})^T\Sigma^{-1}(x^{(i)}-\mu_{y^{(i)}})}}{\partial \Sigma}=0\\ \Rightarrow&m\frac{1}{|\Sigma|}|\Sigma|\Sigma^{-1}+\sum_{i=1}^m(x^{(i)}-\mu_{y^{(i)}})^T(x^{(i)}-\mu_{y^{(i)}})(-\Sigma^{-2}))=0\\ \Rightarrow&\Sigma=\frac{1}{m}\sum_{i=1}^{m}(x^{(i)}-\mu_{y^{(i)}})(x^{(i)}-\mu_{y^{(i)}})^T \end{aligned}$$
$$\Sigma=\frac{1}{m}\sum_{i=1}^{m}(x^{(i)}-\mu_{y^{(i)}})(x^{(i)}-\mu_{y^{(i)}})^T$$
$$\begin{aligned}&\phi=\frac{1}{m}\sum_{i=1}^{m}1\{y^{(i)}=1\}\\\\ &\mu_0=\frac{\sum_{i=1}^{m}1\{y^{(i)}=0\}x^{(i)}}{\sum_{i=1}^{m}1\{y^{(i)}=0\}}\\\\ &\mu_1=\frac{\sum_{i=1}^{m}1\{y^{(i)}=1\}x^{(i)}}{\sum_{i=1}^{m}1\{y^{(i)}=1\}}\\\\ &\Sigma=\frac{1}{m}\sum_{i=1}^{m}(x^{(i)}-\mu_{y^{(i)}})(x^{(i)}-\mu_{y^{(i)}})^T \end{aligned}$$
$$\begin{aligned} &\phi=0.5\\\\ &\mu_0=\begin{bmatrix}4.0551\\4.1008\end{bmatrix}\\\\ &\mu_1=\begin{bmatrix}0.85439\\1.03622\end{bmatrix}\\\\ &\Sigma=\begin{bmatrix}1.118822&-0.058976\\-0.058976&1.023049\end{bmatrix} \end{aligned}$$
$\phi=0.5\,\mu_0=\begin{bmatrix}4.055\\4.101\end{bmatrix} \Sigma=\begin{bmatrix}1.1188&-0.059\\-0.059&1.023\end{bmatrix}x=\begin{bmatrix}0.88\\3.95\end{bmatrix}$
$$\begin{aligned} &\frac{1}{2\pi|\Sigma|^{\frac{1}{2}}}e^{-\frac{1}{2}{\begin{bmatrix}x_1-\mu_1\\x_2-\mu_2\end{bmatrix}^T\Sigma^{-1}\begin{bmatrix}x_1-\mu_1\\x_2-\mu_2\end{bmatrix}}}\\=&\frac{1}{{2\pi}\left|\begin{matrix}1.1188&-0.059\\-0.059&1.023\end{matrix}\right|^{\frac{1}{2}}}e^{-\frac{1}{2}{\begin{bmatrix}-3.175\\-0.151\end{bmatrix}^T\begin{bmatrix}0.896&-0.052\\-0.0520&0.98\end{bmatrix}\begin{bmatrix}-3.175\\-0.151\end{bmatrix}}}\\ =&\frac{1}{2\pi\sqrt{(1.141)}}e^{-\frac{1}{2}\times 9.11} =0.149\times 0.01=0.0015 \end{aligned}$$
$\phi=0.5\,\mu_1=\begin{bmatrix}0.85\\1.036\end{bmatrix}\Sigma=\begin{bmatrix}1.1188&-0.059\\-0.059&1.023\end{bmatrix}x=\begin{bmatrix}0.88\\3.95\end{bmatrix}$
$$\begin{aligned} &\frac{1}{2\pi|\Sigma|^{\frac{1}{2}}}e^{-\frac{1}{2}{\begin{bmatrix}x_1-\mu_1\\x_2-\mu_2\end{bmatrix}^T\Sigma^{-1}\begin{bmatrix}x_1-\mu_1\\x_2-\mu_2\end{bmatrix}}}\\=&\frac{1}{{2\pi}\left|\begin{matrix}1.1188&-0.059\\-0.059&1.023\end{matrix}\right|^{\frac{1}{2}}}e^{-\frac{1}{2}{\begin{bmatrix}0.03\\2.91\end{bmatrix}^T\begin{bmatrix}0.896&-0.052\\-0.0520&0.98\end{bmatrix}\begin{bmatrix}0.03\\2.91\end{bmatrix}}}\\ =&\frac{1}{2\pi\sqrt{(1.141)}}e^{-\frac{1}{2}\times 8.336} =0.149\times 0.015=0.0022 \end{aligned}$$
$$\quad p\left(x=\begin{bmatrix}0.88\\\\3.95\end{bmatrix}\Bigg| y=0\right)=0.0015$$ $$\quad p\left(x=\begin{bmatrix}0.88\\\\3.95\end{bmatrix}\Bigg| y=1\right)=0.0022$$
$$\frac{p\left(x=\begin{bmatrix}0.88\\3.95\end{bmatrix} \Bigg| y=0\right)p(y=0)}{p\left(x=\begin{bmatrix}0.88\\3.95\end{bmatrix}\Bigg| y=1\right)p(y=1)}=\frac{0.0015}{0.0022}=0.68182<1$$