update

xhelin · Sep 3, 2017 · 34963ab · 34963ab
1 parent e149ef7
commit 34963ab
Show file tree

Hide file tree

Showing 9 changed files with 130 additions and 4 deletions.
diff --git a/.gitignore b/.gitignore
@@ -6,4 +6,5 @@ site
 **/.Archive
 .Rproj.user
 .Rhistory
+.RData
 .ipynb_checkpoints/
diff --git a/code/SOM/SOM_ex.R b/code/SOM/SOM_ex.R
@@ -0,0 +1,125 @@
+rm(list = lm(all=TRUE))
+## 
+## solution to Ex. 14.5
+##
+
+## ###############
+## generate data
+## ###############
+
+## center at (0, 0, 1)
+theta1 = runif(30, -pi/8, pi/8)
+phi1 = runif(30, 0, 2*pi)
+x1 = sin(theta1)*cos(phi1) + rnorm(30, 0, 0.6)
+y1 = sin(theta1)*sin(phi1) + rnorm(30, 0, 0.6)
+z1 = cos(theta1) + rnorm(30, 0, 0.6)
+
+## center at (1, 0, 0)
+theta2 = runif(30, pi/2-pi/4, pi/2+pi/4)
+phi2 = runif(30, -pi/4, pi/4)
+x2 = sin(theta2)*cos(phi2) + rnorm(30, 0, 0.6)
+y2 = sin(theta2)*sin(phi2) + rnorm(30, 0, 0.6)
+z2 = cos(theta2) + rnorm(30, 0, 0.6)
+
+## center at (0, 1, 0)
+theta3 = runif(30, pi/2-pi/4, pi/2 + pi/4)
+phi3 = runif(30, pi/2-pi/4, pi/2+pi/4)
+x3 = sin(theta3)*cos(phi3) + rnorm(30, 0, 0.6)
+y3 = sin(theta3)*sin(phi3) + rnorm(30, 0, 0.6)
+z3 = cos(theta3) + rnorm(30, 0, 0.6)
+
+## ###############
+## initialize
+## ###############
+q1 = q2 = 5 # K = 25
+x = c(x1, x2, x3)
+y = c(y1, y2, y3)
+z = c(z1, z2, z3)
+data = data.frame(x, y, z)
+idx = sample(90, 25)
+coor = data[idx, ]
+coor.grid = expand.grid(1:5, 1:5)
+
+## find the closest prototype to x in Euclidean distance in R^p
+classify <- function(x, prototype)
+{
+  d = apply(prototype, 1, function(y) sum((x-y)^2))
+  return(as.numeric(which.min(d)))
+}
+## can be optimized
+fulldist <- function(x)
+{
+   n = nrow(x)
+   res = matrix(nrow = n, ncol = n)
+   for(i in 1:n)
+   {
+     for (j in 1:n)
+     {
+       res[i, j] = sqrt(sum((x[i, ]- x[j, ])^2))
+     }
+   }
+   return(res)
+}
+## calculate the distance of vector to a matrix
+distance <- function(x, m)
+{
+  res = apply(m, 1, function(y) sqrt(sum((x-y)^2)))
+  return(as.numeric(res))
+}
+plot.som <- function(data, coor, iter)
+{
+  #plot(expand.grid(1:5, 1:5), cex = 10, xlim=c(0.5, 5.5), ylim=c(0.5, 5.5), pty="s")
+  res = apply(data, 1, function(x) classify(x, coor))
+  res = res - 0.001 ## avoid divisible
+  res.x = floor(res/5)
+  res.y = res - res.x*5
+  #res.y[res.y == 0] == 5 
+  res.x = res.x + 1 + runif(90, -0.3, 0.3) # avoid overlap
+  res.y = res.y + runif(90, -0.3, 0.3)
+  #points(res.x[1:30],  res.y[1:30], col = "red", pch = 16, cex = 0.8)
+  plot(res.x[1:30],  res.y[1:30], col = "red", pch = 16, xlim = c(0.5, 5.5), ylim = c(0.5, 5.5), pty="s", xlab = NA, ylab = NA, main = paste0("iteration ", iter))
+  points(res.x[31:60], res.y[31:60], col = "green", pch = 16)
+  points(res.x[61:90], res.y[61:90], col = "blue", pch = 16)
+  xy = expand.grid(1:5, 1:5)
+  symbols(xy[,1], xy[,2], circles = rep(0.45, nrow(xy)), add = T, inches = F)
+}
+## initial configuration
+png("iter_0.png")
+plot.som(data, coor, 0)
+dev.off()
+
+R = 2
+niter = 40
+for (iter in 1:niter)
+{
+  alpha = -1/niter*iter + 1
+  r = -1/niter*iter + 2
+  for (i in 1:90)
+  {
+    xi = data[i, ]
+    mj.idx = classify(xi, coor)
+    mj = coor.grid[mj.idx, ]
+    mk.idx = which(distance(mj, coor.grid) <= r)
+    mk = coor.grid[mk.idx, ]
+    # mj11 = t(sapply(1:R, function(x) c(mj[1]+x, mj[2])))
+    # mj12 = t(sapply(1:R, function(x) c(mj[1]-x, mj[2])))
+    # mj21 = t(sapply(1:R, function(x) c(mj[1], mj[2]+x)))
+    # mj22 = t(sapply(1:R, function(x) c(mj[1], mj[2]-x)))
+    # ## include itself
+    # mj.neighbor = rbind(mj, mj11, mj12, mj21, mj22)
+    # ## remove outside elements
+    # mj.neighbor = mj.neighbor[mj.neighbor[,1] <= 5 & mj.neighbor[,1] >= 1 & mj.neighbor[,2] <= 5 & mj.neighbor[,2] >= 1,]
+    # mj.neighbor = mj.neighbor[distance(mj, mj.neighbor) <= R, ]
+    xi.m = matrix(rep(1, length(mk.idx),nrow = length(mk.idx))) %*% as.matrix(xi)
+    coor[mk.idx, ]= coor[mk.idx, ] + alpha*(xi.m - coor[mk.idx, ])
+  } 
+  if (iter - 10*floor(iter/10) == 0)
+  {
+    png(paste0("iter_", iter, ".png"))
+    plot.som(data, coor, iter)
+    dev.off()
+  }
+}
+
+
+
diff --git a/code/SOM/iter_0.png b/code/SOM/iter_0.png
diff --git a/code/SOM/iter_10.png b/code/SOM/iter_10.png
diff --git a/code/SOM/iter_20.png b/code/SOM/iter_20.png
diff --git a/code/SOM/iter_30.png b/code/SOM/iter_30.png
diff --git a/code/SOM/iter_40.png b/code/SOM/iter_40.png
diff --git a/docs/14 Unsupervised Learning/14.4 Self-Organizing Maps.md b/docs/14 Unsupervised Learning/14.4 Self-Organizing Maps.md
@@ -5,17 +5,17 @@
 | 翻译   | szcf-weiya                               |
 | 时间   | 2017-09-03                   |
 
-这个方法可以看成是$K$均值聚类的带约束的版本，其中原型落在特征空间的一维或二维流形中。得到的流形也被称作约束拓扑图(constrained topological map)，因为原始的高纬观测可以映射到二维的坐标系统中。原始的SOM算法是online进行的——每次处理一个观测——后来提出了批量处理的版本。这个技巧也与主曲线和主曲面(principal curves and surfaces)有着密切的联系，这将在下一节中进行讨论。
+这个方法可以看成是$K$均值聚类的带约束的版本，其中原型落在特征空间的一维或二维流形中。因为原始的高纬观测可以映射到二维的坐标系统中, 所以得到的流形也被称作约束拓扑图(constrained topological map)。原始的SOM算法是online进行的——每次处理一个观测——后来提出了批量处理的版本。这个技巧也与主曲线和主曲面(principal curves and surfaces)有着密切的联系，这将在下一节中进行讨论。
 
-我们考虑$K$个原型$m_j\in R^p$的二维长方形网格的SOM（也有其它的选择，如六边形）。这$K$个原型用整数坐标对$\ell_j\in\cal Q_1\times \cal Q_2$进行参量化。这里$\cal Q_1=\\{1,2,\ldots,q_1\\}$，$\cal Q_2$也类似，并且$K=q_1\cdot q_2$。举个例子，$m_j$初始化后落在数据的二维主平面上（下一节进行讨论）。我们可以将这些原型看成是主平面上的“按键(button)”或者“线缝(sewn)”。SOM过程试图弯曲平面使得这些按键尽可能近似数据点。一旦模型拟合好，这些观测可以映射到二维网格中。
+我们考虑$K$个原型$m_j\in R^p$的二维长方形网格的SOM（也有其它的选择，如六边形网格）。这$K$个原型用整数坐标对$\ell_j\in\cal Q_1\times \cal Q_2$进行参量化。这里$\cal Q_1=\\{1,2,\ldots,q_1\\}$，$\cal Q_2$也类似，并且$K=q_1\cdot q_2$。举个例子，$m_j$初始化后落在数据的二维主平面上（下一节进行讨论）。我们可以将这些原型看成是主平面上的“按键(button)”或者“线缝(sewn)”。SOM过程试图弯曲平面使得这些按键尽可能近似数据点。一旦模型拟合好，这些观测可以映射到二维网格中。
 
 每次处理一个观测$x_i$。我们在$R^p$中寻找离$x_i$的欧式距离最近的原型$m_j$，接着对于$m_j$所有的邻居$m_k$，通过下式将$m_k$向$x_i$移动
 
 $$
 m_k\leftarrow m_k+\alpha (x_i-m_k)\qquad (14.46)
 $$
 
-$m_j$的邻居定义为使得$\ell_j$和$\ell_k$间的距离最小的所有$m_k$。最简单的方式是采用欧式距离，通过阈值$r$来决定“小”。邻域经常包含最近的原型$m_j$本身。
+$m_j$的邻居定义为使得$\ell_j$和$\ell_k$间的距离小的所有$m_k$。最简单的方式是采用欧式距离，通过阈值$r$来决定“小”。邻域经常包含最近的原型$m_j$本身。
 
 注意到这里的距离是定义在原型的整数拓扑坐标中的$\cal Q_1\times \cal Q_2$的空间中。式(14.46)的更新是将原型往数据点移动，但也是用来维持原型间的光滑的二维空间的关系。
 
@@ -28,7 +28,7 @@ m_k\leftarrow m_k + \alpha h(\Vert \ell_j-\ell_k\Vert)(x_i-m_k)\qquad (14.47)
 $$
 其中邻居函数(neighborhood function)$h$会给距离$\ell_j$更近的$\ell_k$更多的权重。
 
-如果我们取足够小的距离$r$使得每个邻域只包含一个点，则丢失了原型之间的空间联系。在这种情形下，可以证明SOM算法是K均值聚类的online版本，并且最后用K均值对找到的局部最小值进行标准化。因为SOM是K均值聚类的约束版本，则检查约束在任意给定的问题中约束是否合理是很重要的。可以计算这两种方法的重构误差$\Vert x-m_j\Vert$^2，这需要对观测进行求和，于是就可以判断合理性。
+如果我们取足够小的距离$r$使得每个邻域只包含一个点，则丢失了原型之间的空间联系。在这种情形下，可以证明SOM算法是K均值聚类的online版本，并且最后用K均值对找到的局部最小值进行标准化。因为SOM是K均值聚类的约束版本，则检查约束在任意给定的问题中约束是否合理是很重要的。可以计算这两种方法的重构误差$\Vert x-m_j\Vert^2$，这需要对观测进行求和，于是就可以判断合理性。
 
 举个例子来说明，我们在三维空间中半径为1的半球体的表面附近生成90个数据点。数据点在各自的类中——红、绿和蓝——落在(0,1,0),(0,0,1)以及(1,0,0)附近。数据点显示在图14.15中了。
 

diff --git a/docs/img/18/dtm1.png → docs/img/14/dtm1.png b/docs/img/18/dtm1.png → docs/img/14/dtm1.png