-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpca.html
1337 lines (807 loc) · 44.6 KB
/
pca.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
<!DOCTYPE html>
<html class="theme-next mist use-motion" lang="zh-Hans">
<head>
<meta charset="UTF-8"/>
<meta http-equiv="X-UA-Compatible" content="IE=edge" />
<meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1"/>
<meta name="theme-color" content="#222">
<meta http-equiv="Cache-Control" content="no-transform" />
<meta http-equiv="Cache-Control" content="no-siteapp" />
<link href="/lib/fancybox/source/jquery.fancybox.css?v=2.1.5" rel="stylesheet" type="text/css" />
<link href="/lib/font-awesome/css/font-awesome.min.css?v=4.6.2" rel="stylesheet" type="text/css" />
<link href="/css/main.css?v=5.1.3" rel="stylesheet" type="text/css" />
<link rel="apple-touch-icon" sizes="180x180" href="/images/apple-touch-icon.png?v=5.1.3">
<link rel="icon" type="image/png" sizes="32x32" href="/images/favicon-32x32.png?v=5.1.3">
<link rel="icon" type="image/png" sizes="16x16" href="/images/favicon-16x16.png?v=5.1.3">
<link rel="mask-icon" href="/images/logo.svg?v=5.1.3" color="#222">
<meta name="keywords" content="PCA," />
<meta name="description" content="NG机器学习课程中关于PCA算法的理解及记录">
<meta property="og:type" content="article">
<meta property="og:title" content="机器学习|降维之主成分分析(PCA)">
<meta property="og:url" content="http://dhdsjy.github.io/posts/pca.html">
<meta property="og:site_name" content="机器之恋">
<meta property="og:description" content="NG机器学习课程中关于PCA算法的理解及记录">
<meta property="og:image" content="https://wx4.sinaimg.cn/mw690/a6facf89ly1ggziebcv3hj20kb0ip0t3.jpg">
<meta property="og:image" content="https://wx4.sinaimg.cn/mw690/a6facf89ly1ggyunjwgxkj20g10f8q3o.jpg">
<meta property="article:published_time" content="2020-07-10T11:41:05.000Z">
<meta property="article:modified_time" content="2020-07-22T12:28:06.814Z">
<meta property="article:author" content="Simon">
<meta property="article:tag" content="PCA">
<meta name="twitter:card" content="summary">
<meta name="twitter:image" content="https://wx4.sinaimg.cn/mw690/a6facf89ly1ggziebcv3hj20kb0ip0t3.jpg">
<script type="text/javascript" id="hexo.configurations">
var NexT = window.NexT || {};
var CONFIG = {
root: '/',
scheme: 'Mist',
version: '5.1.3',
sidebar: {"position":"left","display":"post","offset":12,"b2t":false,"scrollpercent":false,"onmobile":false},
fancybox: true,
tabs: true,
motion: {"enable":true,"async":false,"transition":{"post_block":"fadeIn","post_header":"slideDownIn","post_body":"slideDownIn","coll_header":"slideLeftIn","sidebar":"slideUpIn"}},
duoshuo: {
userId: '0',
author: '博主'
},
algolia: {
applicationID: '',
apiKey: '',
indexName: '',
hits: {"per_page":10},
labels: {"input_placeholder":"Search for Posts","hits_empty":"We didn't find any results for the search: ${query}","hits_stats":"${hits} results found in ${time} ms"}
}
};
</script>
<link rel="canonical" href="http://dhdsjy.github.io/posts/pca.html"/>
<title>机器学习|降维之主成分分析(PCA) | 机器之恋</title>
<meta name="generator" content="Hexo 4.2.1"></head>
<body itemscope itemtype="http://schema.org/WebPage" lang="zh-Hans">
<div class="container sidebar-position-left page-post-detail">
<div class="headband"></div>
<a href="https://github.com/dhdsjy/" target="_blank" rel="noopener" class="github-corner" aria-label="View source on GitHub"><svg width="80" height="80" viewBox="0 0 250 250" style="fill:#151513; color:#fff; position: absolute; top: 0; border: 0; right: 0;" aria-hidden="true"><path d="M0,0 L115,115 L130,115 L142,142 L250,250 L250,0 Z"></path><path d="M128.3,109.0 C113.8,99.7 119.0,89.6 119.0,89.6 C122.0,82.7 120.5,78.6 120.5,78.6 C119.2,72.0 123.4,76.3 123.4,76.3 C127.3,80.9 125.5,87.3 125.5,87.3 C122.9,97.6 130.6,101.9 134.4,103.2" fill="currentColor" style="transform-origin: 130px 106px;" class="octo-arm"></path><path d="M115.0,115.0 C114.9,115.1 118.7,116.5 119.8,115.4 L133.7,101.6 C136.9,99.2 139.9,98.4 142.2,98.6 C133.8,88.0 127.5,74.4 143.8,58.0 C148.5,53.4 154.0,51.2 159.7,51.0 C160.3,49.4 163.2,43.6 171.4,40.1 C171.4,40.1 176.1,42.5 178.8,56.2 C183.1,58.6 187.2,61.8 190.9,65.4 C194.5,69.0 197.7,73.2 200.1,77.6 C213.8,80.2 216.3,84.9 216.3,84.9 C212.7,93.1 206.9,96.0 205.4,96.6 C205.1,102.4 203.0,107.8 198.3,112.5 C181.9,128.9 168.3,122.5 157.7,114.1 C157.9,116.9 156.7,120.9 152.7,124.9 L141.0,136.5 C139.8,137.7 141.6,141.9 141.8,141.8 Z" fill="currentColor" class="octo-body"></path></svg></a><style>.github-corner:hover .octo-arm{animation:octocat-wave 560ms ease-in-out}@keyframes octocat-wave{0%,100%{transform:rotate(0)}20%,60%{transform:rotate(-25deg)}40%,80%{transform:rotate(10deg)}}@media (max-width:500px){.github-corner:hover .octo-arm{animation:none}.github-corner .octo-arm{animation:octocat-wave 560ms ease-in-out}}</style>
<header id="header" class="header" itemscope itemtype="http://schema.org/WPHeader">
<div class="header-inner"><div class="site-brand-wrapper">
<div class="site-meta ">
<div class="custom-logo-site-title">
<a href="/" class="brand" rel="start">
<span class="logo-line-before"><i></i></span>
<span class="site-title">机器之恋</span>
<span class="logo-line-after"><i></i></span>
</a>
</div>
<p class="site-subtitle">Make AI not cool again!</p>
</div>
<div class="site-nav-toggle">
<button>
<span class="btn-bar"></span>
<span class="btn-bar"></span>
<span class="btn-bar"></span>
</button>
</div>
</div>
<nav class="site-nav">
<ul id="menu" class="menu">
<li class="menu-item menu-item-home">
<a href="/" rel="section">
<i class="menu-item-icon fa fa-fw fa-home"></i> <br />
首页
</a>
</li>
<li class="menu-item menu-item-tags">
<a href="/tags/" rel="section">
<i class="menu-item-icon fa fa-fw fa-tags"></i> <br />
标签
</a>
</li>
<li class="menu-item menu-item-categories">
<a href="/categories/" rel="section">
<i class="menu-item-icon fa fa-fw fa-th"></i> <br />
分类
</a>
</li>
<li class="menu-item menu-item-archives">
<a href="/archives/" rel="section">
<i class="menu-item-icon fa fa-fw fa-archive"></i> <br />
归档
</a>
</li>
<li class="menu-item menu-item-search">
<a href="javascript:;" class="popup-trigger">
<i class="menu-item-icon fa fa-search fa-fw"></i> <br />
搜索
</a>
</li>
</ul>
<div class="site-search">
<div class="popup search-popup local-search-popup">
<div class="local-search-header clearfix">
<span class="search-icon">
<i class="fa fa-search"></i>
</span>
<span class="popup-btn-close">
<i class="fa fa-times-circle"></i>
</span>
<div class="local-search-input-wrapper">
<input autocomplete="off"
placeholder="搜索..." spellcheck="false"
type="text" id="local-search-input">
</div>
</div>
<div id="local-search-result"></div>
</div>
</div>
</nav>
</div>
</header>
<main id="main" class="main">
<div class="main-inner">
<div class="content-wrap">
<div id="content" class="content">
<div id="posts" class="posts-expand">
<article class="post post-type-normal" itemscope itemtype="http://schema.org/Article">
<div class="post-block">
<link itemprop="mainEntityOfPage" href="http://dhdsjy.github.io/posts/pca.html">
<span hidden itemprop="author" itemscope itemtype="http://schema.org/Person">
<meta itemprop="name" content="Simon">
<meta itemprop="description" content="">
<meta itemprop="image" content="/images/avatar.gif">
</span>
<span hidden itemprop="publisher" itemscope itemtype="http://schema.org/Organization">
<meta itemprop="name" content="机器之恋">
</span>
<header class="post-header">
<h1 class="post-title" itemprop="name headline">机器学习|降维之主成分分析(PCA)</h1>
<div class="post-meta">
<span class="post-time">
<span class="post-meta-item-icon">
<i class="fa fa-calendar-o"></i>
</span>
<span class="post-meta-item-text">发表于</span>
<time title="创建于" itemprop="dateCreated datePublished" datetime="2020-07-10T19:41:05+08:00">
2020-07-10
</time>
<span class="post-meta-divider">|</span>
<span class="post-meta-item-icon">
<i class="fa fa-calendar-check-o"></i>
</span>
<span class="post-meta-item-text">更新于:</span>
<time title="更新于" itemprop="dateModified" datetime="2020-07-22T20:28:06+08:00">
2020-07-22
</time>
</span>
<span class="post-category" >
<span class="post-meta-divider">|</span>
<span class="post-meta-item-icon">
<i class="fa fa-folder-o"></i>
</span>
<span class="post-meta-item-text">分类于</span>
<span itemprop="about" itemscope itemtype="http://schema.org/Thing">
<a href="/categories/uncategorized/" itemprop="url" rel="index">
<span itemprop="name">uncategorized</span>
</a>
</span>
</span>
<span class="post-meta-divider">|</span>
<span class="page-pv"><i class="fa fa-file-o"></i>
<span class="busuanzi-value" id="busuanzi_value_page_pv" ></span>
</span>
<div class="post-wordcount">
<span class="post-meta-item-icon">
<i class="fa fa-file-word-o"></i>
</span>
<span class="post-meta-item-text">字数统计:</span>
<span title="字数统计">
2.3k
</span>
<span class="post-meta-divider">|</span>
<span class="post-meta-item-icon">
<i class="fa fa-clock-o"></i>
</span>
<span class="post-meta-item-text">阅读时长 ≈</span>
<span title="阅读时长">
8
</span>
</div>
<div class="post-description">
NG机器学习课程中关于PCA算法的理解及记录
</div>
</div>
</header>
<div class="post-body" itemprop="articleBody">
<p>在这个大数据时代,我们能采集到成千上万的特征数据,甚至通过特征工程构建更多的特征数据,这时候数据压缩就显的非常有必要了。压缩后的数据可以占用更少的计算机内存或磁盘空间,同时也有利于加速学习算法,甚至有利于可视化,增强可解释性。</p>
<p>降维,顾名思义:就是将高维数据降到低维。但是,维度降低了,信息不也损失了吗(虽然实际数据本身常常存在的相关性)?那么怎么才能让信息损失的尽可能少呢?甚至是剔除冗余信息,精炼我们的数据呢?主成分分析(Principal Component Analysis,PCA)便应用而生了:PCA把具有可能具有相关性的高维变量合成线性无关的低维向量,称为主成分。新的低维数据集会尽可能的保留原始数据的变量。</p>
<h2 id="白话PCA-鱼和熊掌可以兼得吗"><a href="#白话PCA-鱼和熊掌可以兼得吗" class="headerlink" title="白话PCA - 鱼和熊掌可以兼得吗"></a>白话PCA - 鱼和熊掌可以兼得吗</h2><p>PCA相比于其它的算法,涉及到的数学知识还是蛮多的。网上关于PCA的文章虽然有很多,但数学味儿太浓了(还是陡峭的高等数学),一般人理解起来还是很困难的。但是数学就不能一种更好的方式呈现出来吗?那你一定没看过<a href="https://space.bilibili.com/88461692/channel/detail?cid=9450" target="_blank" rel="noopener">3Blue1Brown</a> 的线性代数课程吧?去看看吧,这是我听过最棒的一门数学课程!如果早点听了这门课,大学线代竞赛也不至于是二等奖了吧?哈哈哈哈</p>
<p>幸运的是,我在网上也看到一篇很有内味儿的<a href="http://blog.codinglabs.org/articles/pca-tutorial.html" target="_blank" rel="noopener">PCA</a>文章,对PCA的概念有点模糊时,我就拿来看一下。这一次,我结合这篇文章,尝试以一个更通俗的方式来阐述PCA的工作原理。从我们非常熟悉的考试成绩入手吧,请看下面我伪造的一个班级学生的期中考试成绩单:</p>
<div class="table-container">
<table>
<thead>
<tr>
<th>学生ID</th>
<th>语文</th>
<th>数学</th>
<th>英语</th>
<th>物理</th>
<th>化学</th>
</tr>
</thead>
<tbody>
<tr>
<td>1</td>
<td>90</td>
<td>143</td>
<td>111</td>
<td>92</td>
<td>78</td>
</tr>
<tr>
<td>2</td>
<td>90</td>
<td>110</td>
<td>123</td>
<td>81</td>
<td>88</td>
</tr>
<tr>
<td>3</td>
<td>90</td>
<td>82</td>
<td>108</td>
<td>55</td>
<td>85</td>
</tr>
<tr>
<td>…</td>
<td>…</td>
<td>…</td>
<td>…</td>
<td>…</td>
<td>…</td>
</tr>
<tr>
<td>m</td>
<td>90</td>
<td>128</td>
<td>99</td>
<td>85</td>
<td>63</td>
</tr>
</tbody>
</table>
</div>
<p>仅仅根据这份数据,如果让你预测下这些学生期末考试的成绩排名,你怎么做?一个常见的思路是直接把这些学生的期中考试成绩加起来,用期中的考试成绩排名直接作为期末成绩排名的预测。没毛病,这是时间序列分析的一个基本思想。但是有必要把这些成绩都加起来吗?这还仅仅是一个五维的数据,也仅仅通过一个“加法”算法,所以对计算机的存储或者算法的运行效率并没有影响。但如果是成千上万,甚至上亿维的数据,需要矩阵运算、梯度等复杂运算的算法时,计算机还能hold住吗?</p>
<p>这时候降维就非常有必要了,观察上面的成绩数据,我们发现所有人的语文成绩竟然都是90分!那么语文成绩对我们预测期末成绩排名就没有任何用处了,对算法来说,这是完全冗余的信息,可以将这个特征直接剔除!再者,我们也很容易发现数学成绩和物理成绩是比较相关的,那么预测期末考试的成绩排名有必要直到这两门课的成绩呢?好像也没必要,知道一门或者把两门成绩组合成一门就行了。通过这么一处理,我们是不是已经把5维特征变成3维特征了?语文成绩的处理方法是一种叫着方差分析的特征选择方法,而物理成绩和数学成绩合并成一个特征就是我们要说的PCA了!</p>
<p>那么PCA是怎么将物理成绩和数学成绩组合成一个特征?这其中又有多少信息损失呢?如何度量损失信息的多少呢?</p>
<p>先看一个简单的例子,如何将下面这个二维数据将为一维呢?我们可以直接将这些数据都投影到 $x$ 轴或者都投影到 $y$ 轴:投影到 $x$ 轴,最左边的2个点和中间的2个点都重叠在一起,这样在二维空间中4个各不相同的点降到一维空间只剩下2个不同的值了,这是一种严重的信息丢失;同样,如果映射到 $y$ 轴,最上面的2个点和中间的2个点也都重叠在一起。因此,$x$ 轴或者 $y$ 轴都不是一个好的选择。直观目测,如果向通过第一象限和第三象限的斜线投影,则五个点在投影后还是可以区分的。</p>
<p><del>直观上理解:我们应该让降维后的数据尽量不要重合,互相隔的越远越好,这样才能保持信息尽量少的损失</del></p>
<p><img src="https://wx4.sinaimg.cn/mw690/a6facf89ly1ggziebcv3hj20kb0ip0t3.jpg" alt="pca" style="zoom:67%;" /></p>
<p>PCA可以这样简单的理解:</p>
<p><del>这个算法的学习,我也是反反复复,经历了很长时间。我对PCA的理解,主要基于这篇文章,既</del></p>
<h2 id="PCA-知其然知其所以然"><a href="#PCA-知其然知其所以然" class="headerlink" title="PCA 知其然知其所以然"></a>PCA 知其然知其所以然</h2><p>PCA从本质上来说是通过线性变换将原始数据变换为一组<strong>各维度线性无关</strong>的表示,这些线性无关的表示即为原始数据的主要特征分量。</p>
<p>线性变换,可以将数据从一个空间(记为A空间)映射到另一个空间(记为B空间)。不同空间中的数据可以通过基和坐标唯一确定。如果PCA要实现降维,则线性变换后,基必须降低。所以线性变换的目的就是降维,那么这个线性变换如何表示呢?如果你看了<a href="https://space.bilibili.com/88461692/channel/detail?cid=9450" target="_blank" rel="noopener">3Blue1Brown</a> 的视频,很容易想到矩阵。在下图中黑色坐标系下 (基为(1,0)和(0,1)),向量 $\vec{p}$ 可表示为 $\vec{p}=(3,2 )$ , 那么向量 $\vec{p}$ 在蓝色坐标系下(基为(1,1)和(-1,1))如何表示呢?其实这就是一种线性变换,将数据从一个空间变换到另一个空间,这可以通过矩阵获得(左边的矩阵是经过单位化的基矩阵):</p>
<p>$\left[ \begin{matrix} \frac{1}{\sqrt{2}} & \frac{1}{\sqrt{2}} \\ -\frac{1}{\sqrt{2}} & \frac{1}{\sqrt{2}} \end{matrix} \right]$$\left[ \begin{matrix} 3 \\ 2 \end{matrix} \right]$ = $\left[ \begin{matrix} \frac{5}{\sqrt{2}} \\ -\frac{1}{\sqrt{2}} \end{matrix} \right]$ </p>
<p>该矩阵相乘的物理含义可这样理解:用B空间的基矩阵左乘A空间的数据点的向量表示即可获得该数据点在B空间的向量表示。所以变换后的向量 $\vec{p}$ 可表示为 $\vec{p}=(\frac{5}{\sqrt{2}},-\frac{1}{\sqrt{2}} )$ 。</p>
<p><img src="https://wx4.sinaimg.cn/mw690/a6facf89ly1ggyunjwgxkj20g10f8q3o.jpg" alt="base" style="zoom:80%;" /></p>
<p>如果想让线性变换后的维度降低,那么应该让基矩阵的维度降低。一般地,<strong>如果有M个N维向量,想将其变换为由R个N维向量表示的新空间中,那么首先将R个基按行组成矩阵A,然后将向量按列组成矩阵B,那么两矩阵的乘积AB就是变换结果,其中AB的第m列为A中第m列变换后的结果</strong> 。</p>
<p>那么这个基矩阵如何选择呢?一个合适的基矩阵除了能降低原始数据的维度,还应该保证降维后损失的信息应该尽量少。如果要保证损失的信息尽量少,至少降维后的数据不能重合,如果能相互之间距离较远(数据点比较分散)</p>
<p>PCA的损失函数可以概述为:找出能够最小化投影距离的方式。</p>
<p>这个和支持向量机有什么不同呢?支持向量机只计算距离支撑向量最近的点,而PCA是计算所有的点</p>
<h2 id="PCA-Q-amp-A"><a href="#PCA-Q-amp-A" class="headerlink" title="PCA Q&A"></a>PCA Q&A</h2><p><strong>Q1:</strong> PCA、线性回归、支持向量机区别在哪里?</p>
<p><strong>A1:</strong>首先,PCA是一种无监督学习算法,通常用于数据预处理,而线性回归和支持向量机是有监督学习算法,通常用于预测;其次,PCA的损失函数是最小化所有数据点到投影面的投影距离(点垂直于投影面),线性回归的损失函数是最小化所有数据点到决策边界的平方误差(点不垂直于决策面),至于支持向量机,它的损失函数只计算距离支撑向量最近点的损失</p>
<p><strong>Q2:</strong> 在使用PCA算法有什么需要注意的吗?</p>
<p><strong>A2:</strong> 在进行PCA之前,一定不要忘记进行特征缩放/均值标准化</p>
<h2 id="PCA脑洞"><a href="#PCA脑洞" class="headerlink" title="PCA脑洞"></a>PCA脑洞</h2><ul>
<li>高维数据探索和可视化</li>
<li>数据预处理</li>
</ul>
<p>算法工程师有必要推导算法吗</p>
<p>算法工具包,就如屠龙宝刀一样,有的人拿它切菜,有的人拿它称霸武林,而有的人却”毁”了它,拿到了《武穆遗书》,统兵百万!</p>
<p>一个不懂算法原理的调包侠就像一个</p>
<p>明白算法原理的算法工程师更像一个有经验的医生</p>
</div>
<div>
<div>
<div style="text-align:center;color: #ccc;font-size:14px;">-------------End-------------</div>
</div>
</div>
<div>
<div class="my_post_copyright">
<script src="//cdn.bootcss.com/clipboard.js/1.5.10/clipboard.min.js"></script>
<!-- JS库 sweetalert 可修改路径 -->
<script src="https://cdn.bootcss.com/jquery/2.0.0/jquery.min.js"></script>
<script src="https://unpkg.com/sweetalert/dist/sweetalert.min.js"></script>
<p><span>本文标题:</span><a href="/posts/pca.html">机器学习|降维之主成分分析(PCA)</a></p>
<p><span>文章作者:</span><a href="/" title="访问 Simon 的个人博客">Simon</a></p>
<p><span>发布时间:</span>2020年07月10日 - 19:07</p>
<p><span>最后更新:</span>2020年07月22日 - 20:07</p>
<p><span>原始链接:</span><a href="/posts/pca.html" title="机器学习|降维之主成分分析(PCA)">http://dhdsjy.github.io/posts/pca.html</a>
<span class="copy-path" title="点击复制文章链接"><i class="fa fa-clipboard" data-clipboard-text="http://dhdsjy.github.io/posts/pca.html" aria-label="复制成功!"></i></span>
</p>
<p><span>许可协议:</span><i class="fa fa-creative-commons"></i> <a rel="license" href="https://creativecommons.org/licenses/by-nc-nd/4.0/" target="_blank" title="Attribution-NonCommercial-NoDerivatives 4.0 International (CC BY-NC-ND 4.0)">署名-非商业性使用-禁止演绎 4.0 国际</a> 转载请保留原文链接及作者。</p>
</div>
<script>
var clipboard = new Clipboard('.fa-clipboard');
$(".fa-clipboard").click(function(){
clipboard.on('success', function(){
swal({
title: "",
text: '复制成功',
icon: "success",
showConfirmButton: true
});
});
});
</script>
</div>
<footer class="post-footer">
<div class="post-tags">
<a href="/tags/PCA/" rel="tag"># PCA</a>
</div>
<div class="post-nav">
<div class="post-nav-next post-nav-item">
<a href="/posts/kmeans.html" rel="next" title="机器学习|聚类算法之k-means">
<i class="fa fa-chevron-left"></i> 机器学习|聚类算法之k-means
</a>
</div>
<span class="post-nav-divider"></span>
<div class="post-nav-prev post-nav-item">
<a href="/posts/commandline.html" rel="prev" title="python|命令行参数argv、argparse和getopt使用">
python|命令行参数argv、argparse和getopt使用 <i class="fa fa-chevron-right"></i>
</a>
</div>
</div>
</footer>
</div>
</article>
<div class="post-spread">
</div>
</div>
</div>
<div class="comments" id="comments">
<div id="vcomments"></div>
</div>
</div>
<div class="sidebar-toggle">
<div class="sidebar-toggle-line-wrap">
<span class="sidebar-toggle-line sidebar-toggle-line-first"></span>
<span class="sidebar-toggle-line sidebar-toggle-line-middle"></span>
<span class="sidebar-toggle-line sidebar-toggle-line-last"></span>
</div>
</div>
<aside id="sidebar" class="sidebar">
<div class="sidebar-inner">
<ul class="sidebar-nav motion-element">
<li class="sidebar-nav-toc sidebar-nav-active" data-target="post-toc-wrap">
文章目录
</li>
<li class="sidebar-nav-overview" data-target="site-overview-wrap">
站点概览
</li>
</ul>
<section class="site-overview-wrap sidebar-panel">
<div class="site-overview">
<div class="site-author motion-element" itemprop="author" itemscope itemtype="http://schema.org/Person">
<p class="site-author-name" itemprop="name">Simon</p>
<p class="site-description motion-element" itemprop="description">数据挖掘,数据分析,爬虫,机器学习,深度学习,自然语言处理的学习记录</p>
</div>
<nav class="site-state motion-element">
<div class="site-state-item site-state-posts">
<a href="/archives/%7C%7C%20archive">
<span class="site-state-item-count">9</span>
<span class="site-state-item-name">日志</span>
</a>
</div>
<div class="site-state-item site-state-categories">
<a href="/categories/index.html">
<span class="site-state-item-count">6</span>
<span class="site-state-item-name">分类</span>
</a>
</div>
<div class="site-state-item site-state-tags">
<a href="/tags/index.html">
<span class="site-state-item-count">12</span>
<span class="site-state-item-name">标签</span>
</a>
</div>
</nav>
<div class="links-of-author motion-element">
</div>
</div>
</section>
<!--noindex-->
<section class="post-toc-wrap motion-element sidebar-panel sidebar-panel-active">
<div class="post-toc">
<div class="post-toc-content"><ol class="nav"><li class="nav-item nav-level-2"><a class="nav-link" href="#白话PCA-鱼和熊掌可以兼得吗"><span class="nav-number">1.</span> <span class="nav-text">白话PCA - 鱼和熊掌可以兼得吗</span></a></li><li class="nav-item nav-level-2"><a class="nav-link" href="#PCA-知其然知其所以然"><span class="nav-number">2.</span> <span class="nav-text">PCA 知其然知其所以然</span></a></li><li class="nav-item nav-level-2"><a class="nav-link" href="#PCA-Q-amp-A"><span class="nav-number">3.</span> <span class="nav-text">PCA Q&A</span></a></li><li class="nav-item nav-level-2"><a class="nav-link" href="#PCA脑洞"><span class="nav-number">4.</span> <span class="nav-text">PCA脑洞</span></a></li></ol></div>
</div>
</section>
<!--/noindex-->
</div>
</aside>
</div>
</main>
<footer id="footer" class="footer">
<div class="footer-inner">
<script async src="https://dn-lbstatics.qbox.me/busuanzi/2.3/busuanzi.pure.mini.js"></script>
<div class="copyright">© <span itemprop="copyrightYear">2020</span>
<span class="with-love">
<i class="fa fa-user"></i>
</span>
<span class="author" itemprop="copyrightHolder">Simon</span>
<!--
<span class="post-meta-divider">|</span>
<span class="post-meta-item-icon">
<i class="fa fa-area-chart"></i>
</span>
<span class="post-meta-item-text">Site words total count:</span>
<span title="Site words total count">7.2k</span>
</div>
<div class="powered-by">由 <a class="theme-link" target="_blank" href="https://hexo.io">Hexo</a> 强力驱动</div>
<span class="post-meta-divider">|</span>
<div class="theme-info">主题 — <a class="theme-link" target="_blank" href="https://github.com/iissnan/hexo-theme-next">NexT.Mist</a> v5.1.3</div>
-->
<div class="powered-by">
<i class="fa fa-user-md"></i><span id="busuanzi_container_site_uv">
本站访客数:<span id="busuanzi_value_site_uv"></span>
</span>
</div>
<span id="busuanzi_container_site_pv">
本站总访问量<span id="busuanzi_value_site_pv"></span>次
</span>
<div class="theme-info">
<div class="powered-by"></div>
<span class="post-count">博客全站共7.2k字</span>
</div>
<div class="busuanzi-count">
<script async src="https://dn-lbstatics.qbox.me/busuanzi/2.3/busuanzi.pure.mini.js"></script>
<span class="site-uv">
<i class="fa fa-user"></i>
<span class="busuanzi-value" id="busuanzi_value_site_uv"></span>
</span>
<span class="site-pv">
<i class="fa fa-eye"></i>
<span class="busuanzi-value" id="busuanzi_value_site_pv"></span>
</span>
</div>
</div>
</footer>
<div class="back-to-top">
<i class="fa fa-arrow-up"></i>
</div>
</div>
<script type="text/javascript">
if (Object.prototype.toString.call(window.Promise) !== '[object Function]') {
window.Promise = null;
}
</script>
<script type="text/javascript" src="/lib/jquery/index.js?v=2.1.3"></script>
<script type="text/javascript" src="/lib/fastclick/lib/fastclick.min.js?v=1.0.6"></script>
<script type="text/javascript" src="/lib/jquery_lazyload/jquery.lazyload.js?v=1.9.7"></script>
<script type="text/javascript" src="/lib/velocity/velocity.min.js?v=1.2.1"></script>
<script type="text/javascript" src="/lib/velocity/velocity.ui.min.js?v=1.2.1"></script>
<script type="text/javascript" src="/lib/fancybox/source/jquery.fancybox.pack.js?v=2.1.5"></script>
<script type="text/javascript" src="/lib/canvas-nest/canvas-nest.min.js"></script>
<script type="text/javascript" src="/js/src/utils.js?v=5.1.3"></script>
<script type="text/javascript" src="/js/src/motion.js?v=5.1.3"></script>
<script type="text/javascript" src="/js/src/scrollspy.js?v=5.1.3"></script>
<script type="text/javascript" src="/js/src/post-details.js?v=5.1.3"></script>
<script type="text/javascript" src="/js/src/bootstrap.js?v=5.1.3"></script>
<script src="//cdn1.lncld.net/static/js/3.0.4/av-min.js"></script>
<script src="//cdn.jsdelivr.net/npm/[email protected]/dist/Valine.min.js"></script>
<script type="text/javascript">
new Valine({
av: AV,
el: '#vcomments' ,
verify: false,
notify: false,
app_id: 'prBhiPfyA4R6DBPCqsp4PrB7-9Nh9j0Va',
app_key: 'VXQ03EXbFdhmYcjaY2M8qAUI',
placeholder: 'ヾノ≧∀≦)o来啊,吐槽啊!'
});
</script>
<script type="text/javascript">
// Popup Window;
var isfetched = false;
var isXml = true;
// Search DB path;
var search_path = "search.xml";
if (search_path.length === 0) {
search_path = "search.xml";
} else if (/json$/i.test(search_path)) {
isXml = false;
}
var path = "/" + search_path;
// monitor main search box;
var onPopupClose = function (e) {
$('.popup').hide();
$('#local-search-input').val('');
$('.search-result-list').remove();
$('#no-result').remove();
$(".local-search-pop-overlay").remove();
$('body').css('overflow', '');
}