forked from PlayPurEo/ML-and-DL
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
11 changed files
with
172 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
# author : 'wangzhong'; | ||
# date: 21/11/2020 18:24 | ||
|
||
""" | ||
吴恩达pca作业 | ||
""" | ||
import numpy as np | ||
from scipy.io import loadmat | ||
import matplotlib.pyplot as plt | ||
|
||
data = loadmat('ex7data1.mat') | ||
X = data['X'] | ||
|
||
# 去均值化 | ||
X_demean = X - np.mean(X, axis=0) | ||
# 协方差矩阵 | ||
C = X_demean.T@X_demean / len(X_demean) | ||
|
||
# U为特征向量, 这里U和V相等 | ||
U, S, V = np.linalg.svd(C) | ||
U1 = U[:, 0] | ||
# 实现降维 | ||
X_reduction = X_demean@U1 | ||
|
||
# 矩阵还原 | ||
X_restore = X_reduction.reshape(50, 1)@U1.reshape(1,2) + np.mean(X, axis=0) | ||
|
||
plt.scatter(X[:, 0], X[:, 1]) | ||
plt.scatter(X_restore[:, 0], X_restore[:, 1]) | ||
plt.show() | ||
|
||
# PCA的效果评估,可以直接通过S特征值矩阵 | ||
# print(S[0] / (S[0] + S[1])) |
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
# author : 'wangzhong'; | ||
# date: 21/11/2020 22:33 | ||
|
||
import numpy as np | ||
from scipy.io import loadmat | ||
import matplotlib.pyplot as plt | ||
|
||
data = loadmat('ex7faces.mat') | ||
X = data['X'] | ||
|
||
|
||
def plot_100images(X): | ||
fig, axis = plt.subplots(ncols=10, nrows=10, figsize=(10, 10)) | ||
for c in range(10): | ||
for r in range(10): | ||
# 这里注意要转置 | ||
axis[c, r].imshow(X[10*c + r].reshape(32, 32).T) | ||
axis[c, r].set_xticks([]) | ||
axis[c, r].set_yticks([]) | ||
plt.show() | ||
|
||
|
||
# 画出来瞅一眼 | ||
# plot_100images(X) | ||
X_mean = np.mean(X, axis=0) | ||
X_demean = X - X_mean | ||
# 协方差矩阵 | ||
C = X_demean.T @ X_demean / len(X_demean) | ||
U, S, V = np.linalg.svd(C) | ||
|
||
U1 = U[:, :36] | ||
X_reduction = X_demean@U1 | ||
X_restore = X_reduction@U1.T + X_mean | ||
|
||
plot_100images(X_restore) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
# author : 'wangzhong'; | ||
# date: 21/11/2020 22:06 | ||
|
||
""" | ||
sklearn pca实战 | ||
""" | ||
import numpy as np | ||
from scipy.io import loadmat | ||
import matplotlib.pyplot as plt | ||
from sklearn.decomposition import PCA | ||
|
||
data = loadmat('ex7data1.mat') | ||
X = data['X'] | ||
X_mean = np.mean(X, axis=0) | ||
X_demean = X - X_mean | ||
|
||
# n_components可以为整数,表示需要降的维数K;可以为小数,表示对方差占比的要求 | ||
pca = PCA(n_components=0.80) | ||
pca.fit(X_demean) | ||
# 方差占比array | ||
print(pca.explained_variance_ratio_) | ||
# 特征值 | ||
print(pca.explained_variance_) | ||
# 特征向量 | ||
print(pca.components_) | ||
# 降维后的样本 | ||
X_reduction = pca.fit_transform(X_demean) | ||
# print(pca.fit_transform(X)) | ||
# 降维后 | ||
X_restore = pca.inverse_transform(X_reduction) + X_mean | ||
print(X_restore) | ||
# 测试集的均值归一和特征向量都要用训练集的 | ||
plt.scatter(X[:, 0], X[:, 1]) | ||
plt.scatter(X_restore[:, 0], X_restore[:, 1]) | ||
plt.show() |
Binary file not shown.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
## PCA算法原理 | ||
|
||
### 作者:wangzhong | ||
|
||
PCA主要用于高维建模,解决高维灾难。通过减少数据的特征值,来达到提高训练效率的目的 | ||
|
||
### 协方差矩阵 | ||
|
||
$$ | ||
cov(X,Y) = E((X - E(X)(Y-E(Y)) | ||
$$ | ||
|
||
此处的E(X)为样本中每个维度的均值 | ||
|
||
假设X为(m,n)的矩阵,m为样本数,n为维度,则协方差矩阵求解为(先做去均值化) | ||
$$ | ||
C = \frac{1}{m}X^TX | ||
$$ | ||
对角线上分别是x和y的方差,非对角线上是协方差。协方差大于0表示x和y若有一个增,另一个也增;小于0表示一个增,一个减;协方差为0时,两者独立。协方差绝对值越大,两者对彼此的影响越大,反之越小。 | ||
|
||
协防差矩阵一定是一个对角矩阵 | ||
|
||
### 特征值和特征向量 | ||
|
||
设A为n阶矩阵,如果数λ和n维列向量使关系式 | ||
$$ | ||
(A-\lambda E)x = 0 | ||
$$ | ||
则称λ为特征值,x为特征向量 | ||
|
||
有非零解的充要条件为上面的行列式 = 0,即 | ||
$$ | ||
|A-\lambda E| = 0 | ||
$$ | ||
行列式求解法则这里不详细说明,比如简单的2*2矩阵,为对角线相乘再相减 | ||
|
||
### 特征向量矩阵 | ||
|
||
特征向量矩阵U为n*n,按照特征值大小排列,若要降维到k维(k<n),则取前k列 | ||
|
||
X_reduction = X*U[:,:k] | ||
|
||
还原X则为:X_restore = X_reduction*U[:,:k].T | ||
|
||
### 如何评判k的选取 | ||
|
||
 | ||
|
||
上式等效于用奇异值分解返回的S矩阵的计算,计算如下: | ||
$$ | ||
1-\frac{\sum^k_1S_i}{\sum^n_1S_i} <0.01 | ||
$$ | ||
|
||
|
||
### 相关定理 | ||
|
||
若多个特征值不相等,则特征相关线性无关。 | ||
|
||
### 其他 | ||
|
||
代码中一般用奇异值分解去求得特征向量矩阵和奇异值矩阵 | ||
|
Binary file not shown.