Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
szcf-weiya committed Sep 3, 2017
1 parent e149ef7 commit 34963ab
Show file tree
Hide file tree
Showing 9 changed files with 130 additions and 4 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,5 @@ site
**/.Archive
.Rproj.user
.Rhistory
.RData
.ipynb_checkpoints/
125 changes: 125 additions & 0 deletions code/SOM/SOM_ex.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
rm(list = lm(all=TRUE))
##
## solution to Ex. 14.5
##

## ###############
## generate data
## ###############

## center at (0, 0, 1)
theta1 = runif(30, -pi/8, pi/8)
phi1 = runif(30, 0, 2*pi)
x1 = sin(theta1)*cos(phi1) + rnorm(30, 0, 0.6)
y1 = sin(theta1)*sin(phi1) + rnorm(30, 0, 0.6)
z1 = cos(theta1) + rnorm(30, 0, 0.6)

## center at (1, 0, 0)
theta2 = runif(30, pi/2-pi/4, pi/2+pi/4)
phi2 = runif(30, -pi/4, pi/4)
x2 = sin(theta2)*cos(phi2) + rnorm(30, 0, 0.6)
y2 = sin(theta2)*sin(phi2) + rnorm(30, 0, 0.6)
z2 = cos(theta2) + rnorm(30, 0, 0.6)

## center at (0, 1, 0)
theta3 = runif(30, pi/2-pi/4, pi/2 + pi/4)
phi3 = runif(30, pi/2-pi/4, pi/2+pi/4)
x3 = sin(theta3)*cos(phi3) + rnorm(30, 0, 0.6)
y3 = sin(theta3)*sin(phi3) + rnorm(30, 0, 0.6)
z3 = cos(theta3) + rnorm(30, 0, 0.6)

## ###############
## initialize
## ###############
q1 = q2 = 5 # K = 25
x = c(x1, x2, x3)
y = c(y1, y2, y3)
z = c(z1, z2, z3)
data = data.frame(x, y, z)
idx = sample(90, 25)
coor = data[idx, ]
coor.grid = expand.grid(1:5, 1:5)

## find the closest prototype to x in Euclidean distance in R^p
classify <- function(x, prototype)
{
d = apply(prototype, 1, function(y) sum((x-y)^2))
return(as.numeric(which.min(d)))
}
## can be optimized
fulldist <- function(x)
{
n = nrow(x)
res = matrix(nrow = n, ncol = n)
for(i in 1:n)
{
for (j in 1:n)
{
res[i, j] = sqrt(sum((x[i, ]- x[j, ])^2))
}
}
return(res)
}
## calculate the distance of vector to a matrix
distance <- function(x, m)
{
res = apply(m, 1, function(y) sqrt(sum((x-y)^2)))
return(as.numeric(res))
}
plot.som <- function(data, coor, iter)
{
#plot(expand.grid(1:5, 1:5), cex = 10, xlim=c(0.5, 5.5), ylim=c(0.5, 5.5), pty="s")
res = apply(data, 1, function(x) classify(x, coor))
res = res - 0.001 ## avoid divisible
res.x = floor(res/5)
res.y = res - res.x*5
#res.y[res.y == 0] == 5
res.x = res.x + 1 + runif(90, -0.3, 0.3) # avoid overlap
res.y = res.y + runif(90, -0.3, 0.3)
#points(res.x[1:30], res.y[1:30], col = "red", pch = 16, cex = 0.8)
plot(res.x[1:30], res.y[1:30], col = "red", pch = 16, xlim = c(0.5, 5.5), ylim = c(0.5, 5.5), pty="s", xlab = NA, ylab = NA, main = paste0("iteration ", iter))
points(res.x[31:60], res.y[31:60], col = "green", pch = 16)
points(res.x[61:90], res.y[61:90], col = "blue", pch = 16)
xy = expand.grid(1:5, 1:5)
symbols(xy[,1], xy[,2], circles = rep(0.45, nrow(xy)), add = T, inches = F)
}
## initial configuration
png("iter_0.png")
plot.som(data, coor, 0)
dev.off()

R = 2
niter = 40
for (iter in 1:niter)
{
alpha = -1/niter*iter + 1
r = -1/niter*iter + 2
for (i in 1:90)
{
xi = data[i, ]
mj.idx = classify(xi, coor)
mj = coor.grid[mj.idx, ]
mk.idx = which(distance(mj, coor.grid) <= r)
mk = coor.grid[mk.idx, ]
# mj11 = t(sapply(1:R, function(x) c(mj[1]+x, mj[2])))
# mj12 = t(sapply(1:R, function(x) c(mj[1]-x, mj[2])))
# mj21 = t(sapply(1:R, function(x) c(mj[1], mj[2]+x)))
# mj22 = t(sapply(1:R, function(x) c(mj[1], mj[2]-x)))
# ## include itself
# mj.neighbor = rbind(mj, mj11, mj12, mj21, mj22)
# ## remove outside elements
# mj.neighbor = mj.neighbor[mj.neighbor[,1] <= 5 & mj.neighbor[,1] >= 1 & mj.neighbor[,2] <= 5 & mj.neighbor[,2] >= 1,]
# mj.neighbor = mj.neighbor[distance(mj, mj.neighbor) <= R, ]
xi.m = matrix(rep(1, length(mk.idx),nrow = length(mk.idx))) %*% as.matrix(xi)
coor[mk.idx, ]= coor[mk.idx, ] + alpha*(xi.m - coor[mk.idx, ])
}
if (iter - 10*floor(iter/10) == 0)
{
png(paste0("iter_", iter, ".png"))
plot.som(data, coor, iter)
dev.off()
}
}



Binary file added code/SOM/iter_0.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added code/SOM/iter_10.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added code/SOM/iter_20.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added code/SOM/iter_30.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added code/SOM/iter_40.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
8 changes: 4 additions & 4 deletions docs/14 Unsupervised Learning/14.4 Self-Organizing Maps.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,17 @@
| 翻译 | szcf-weiya |
| 时间 | 2017-09-03 |

这个方法可以看成是$K$均值聚类的带约束的版本,其中原型落在特征空间的一维或二维流形中。得到的流形也被称作约束拓扑图(constrained topological map),因为原始的高纬观测可以映射到二维的坐标系统中。原始的SOM算法是online进行的——每次处理一个观测——后来提出了批量处理的版本。这个技巧也与主曲线和主曲面(principal curves and surfaces)有着密切的联系,这将在下一节中进行讨论。
这个方法可以看成是$K$均值聚类的带约束的版本,其中原型落在特征空间的一维或二维流形中。因为原始的高纬观测可以映射到二维的坐标系统中, 所以得到的流形也被称作约束拓扑图(constrained topological map)。原始的SOM算法是online进行的——每次处理一个观测——后来提出了批量处理的版本。这个技巧也与主曲线和主曲面(principal curves and surfaces)有着密切的联系,这将在下一节中进行讨论。

我们考虑$K$个原型$m_j\in R^p$的二维长方形网格的SOM(也有其它的选择,如六边形)。这$K$个原型用整数坐标对$\ell_j\in\cal Q_1\times \cal Q_2$进行参量化。这里$\cal Q_1=\\{1,2,\ldots,q_1\\}$,$\cal Q_2$也类似,并且$K=q_1\cdot q_2$。举个例子,$m_j$初始化后落在数据的二维主平面上(下一节进行讨论)。我们可以将这些原型看成是主平面上的“按键(button)”或者“线缝(sewn)”。SOM过程试图弯曲平面使得这些按键尽可能近似数据点。一旦模型拟合好,这些观测可以映射到二维网格中。
我们考虑$K$个原型$m_j\in R^p$的二维长方形网格的SOM(也有其它的选择,如六边形网格)。这$K$个原型用整数坐标对$\ell_j\in\cal Q_1\times \cal Q_2$进行参量化。这里$\cal Q_1=\\{1,2,\ldots,q_1\\}$,$\cal Q_2$也类似,并且$K=q_1\cdot q_2$。举个例子,$m_j$初始化后落在数据的二维主平面上(下一节进行讨论)。我们可以将这些原型看成是主平面上的“按键(button)”或者“线缝(sewn)”。SOM过程试图弯曲平面使得这些按键尽可能近似数据点。一旦模型拟合好,这些观测可以映射到二维网格中。

每次处理一个观测$x_i$。我们在$R^p$中寻找离$x_i$的欧式距离最近的原型$m_j$,接着对于$m_j$所有的邻居$m_k$,通过下式将$m_k$向$x_i$移动

$$
m_k\leftarrow m_k+\alpha (x_i-m_k)\qquad (14.46)
$$

$m_j$的邻居定义为使得$\ell_j$和$\ell_k$间的距离最小的所有$m_k$。最简单的方式是采用欧式距离,通过阈值$r$来决定“小”。邻域经常包含最近的原型$m_j$本身。
$m_j$的邻居定义为使得$\ell_j$和$\ell_k$间的距离小的所有$m_k$。最简单的方式是采用欧式距离,通过阈值$r$来决定“小”。邻域经常包含最近的原型$m_j$本身。

注意到这里的距离是定义在原型的整数拓扑坐标中的$\cal Q_1\times \cal Q_2$的空间中。式(14.46)的更新是将原型往数据点移动,但也是用来维持原型间的光滑的二维空间的关系。

Expand All @@ -28,7 +28,7 @@ m_k\leftarrow m_k + \alpha h(\Vert \ell_j-\ell_k\Vert)(x_i-m_k)\qquad (14.47)
$$
其中邻居函数(neighborhood function)$h$会给距离$\ell_j$更近的$\ell_k$更多的权重。

如果我们取足够小的距离$r$使得每个邻域只包含一个点,则丢失了原型之间的空间联系。在这种情形下,可以证明SOM算法是K均值聚类的online版本,并且最后用K均值对找到的局部最小值进行标准化。因为SOM是K均值聚类的约束版本,则检查约束在任意给定的问题中约束是否合理是很重要的。可以计算这两种方法的重构误差$\Vert x-m_j\Vert$^2,这需要对观测进行求和,于是就可以判断合理性。
如果我们取足够小的距离$r$使得每个邻域只包含一个点,则丢失了原型之间的空间联系。在这种情形下,可以证明SOM算法是K均值聚类的online版本,并且最后用K均值对找到的局部最小值进行标准化。因为SOM是K均值聚类的约束版本,则检查约束在任意给定的问题中约束是否合理是很重要的。可以计算这两种方法的重构误差$\Vert x-m_j\Vert^2$,这需要对观测进行求和,于是就可以判断合理性。

举个例子来说明,我们在三维空间中半径为1的半球体的表面附近生成90个数据点。数据点在各自的类中——红、绿和蓝——落在(0,1,0),(0,0,1)以及(1,0,0)附近。数据点显示在图14.15中了。

Expand Down
File renamed without changes

0 comments on commit 34963ab

Please sign in to comment.