diff --git a/_book/R4DS_files/figure-html/broom-10-1.png b/_book/R4DS_files/figure-html/broom-10-1.png deleted file mode 100644 index 06fe839..0000000 Binary files a/_book/R4DS_files/figure-html/broom-10-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/broom-12-1.png b/_book/R4DS_files/figure-html/broom-12-1.png deleted file mode 100644 index d49d11e..0000000 Binary files a/_book/R4DS_files/figure-html/broom-12-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/broom-14-1.png b/_book/R4DS_files/figure-html/broom-14-1.png deleted file mode 100644 index f07277a..0000000 Binary files a/_book/R4DS_files/figure-html/broom-14-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/broom-19-1.png b/_book/R4DS_files/figure-html/broom-19-1.png deleted file mode 100644 index 7595697..0000000 Binary files a/_book/R4DS_files/figure-html/broom-19-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/broom-19-2-1.png b/_book/R4DS_files/figure-html/broom-19-2-1.png deleted file mode 100644 index 5f7c33d..0000000 Binary files a/_book/R4DS_files/figure-html/broom-19-2-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/broom-2-1.png b/_book/R4DS_files/figure-html/broom-2-1.png deleted file mode 100644 index d652b1a..0000000 Binary files a/_book/R4DS_files/figure-html/broom-2-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/broom-3-1.png b/_book/R4DS_files/figure-html/broom-3-1.png deleted file mode 100644 index 1b84513..0000000 Binary files a/_book/R4DS_files/figure-html/broom-3-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-ames-houseprice-14-1.png b/_book/R4DS_files/figure-html/eda-ames-houseprice-14-1.png deleted file mode 100644 index 6d9a5e5..0000000 Binary files a/_book/R4DS_files/figure-html/eda-ames-houseprice-14-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-ames-houseprice-15-1.png b/_book/R4DS_files/figure-html/eda-ames-houseprice-15-1.png deleted file mode 100644 index f96fa14..0000000 Binary files a/_book/R4DS_files/figure-html/eda-ames-houseprice-15-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-ames-houseprice-18-1.png b/_book/R4DS_files/figure-html/eda-ames-houseprice-18-1.png deleted file mode 100644 index 37c46ef..0000000 Binary files a/_book/R4DS_files/figure-html/eda-ames-houseprice-18-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-ames-houseprice-19-1.png b/_book/R4DS_files/figure-html/eda-ames-houseprice-19-1.png deleted file mode 100644 index b668f79..0000000 Binary files a/_book/R4DS_files/figure-html/eda-ames-houseprice-19-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-ames-houseprice-20-1.png b/_book/R4DS_files/figure-html/eda-ames-houseprice-20-1.png deleted file mode 100644 index 9fc6c9a..0000000 Binary files a/_book/R4DS_files/figure-html/eda-ames-houseprice-20-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-ames-houseprice-21-1.png b/_book/R4DS_files/figure-html/eda-ames-houseprice-21-1.png deleted file mode 100644 index 3b34034..0000000 Binary files a/_book/R4DS_files/figure-html/eda-ames-houseprice-21-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-ames-houseprice-22-1.png b/_book/R4DS_files/figure-html/eda-ames-houseprice-22-1.png deleted file mode 100644 index 11ab8fc..0000000 Binary files a/_book/R4DS_files/figure-html/eda-ames-houseprice-22-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-ames-houseprice-23-1.png b/_book/R4DS_files/figure-html/eda-ames-houseprice-23-1.png deleted file mode 100644 index 948a5df..0000000 Binary files a/_book/R4DS_files/figure-html/eda-ames-houseprice-23-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-anscombe-18-1.png b/_book/R4DS_files/figure-html/eda-anscombe-18-1.png deleted file mode 100644 index 7af4c85..0000000 Binary files a/_book/R4DS_files/figure-html/eda-anscombe-18-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-career-decision-16-1.png b/_book/R4DS_files/figure-html/eda-career-decision-16-1.png deleted file mode 100644 index a0ba655..0000000 Binary files a/_book/R4DS_files/figure-html/eda-career-decision-16-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-career-decision-21-1.png b/_book/R4DS_files/figure-html/eda-career-decision-21-1.png deleted file mode 100644 index b949cb8..0000000 Binary files a/_book/R4DS_files/figure-html/eda-career-decision-21-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-career-decision-26-1.png b/_book/R4DS_files/figure-html/eda-career-decision-26-1.png deleted file mode 100644 index d189d88..0000000 Binary files a/_book/R4DS_files/figure-html/eda-career-decision-26-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-caribou-10-1.png b/_book/R4DS_files/figure-html/eda-caribou-10-1.png deleted file mode 100644 index 8400fa9..0000000 Binary files a/_book/R4DS_files/figure-html/eda-caribou-10-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-caribou-13-1.png b/_book/R4DS_files/figure-html/eda-caribou-13-1.png deleted file mode 100644 index 9b6759f..0000000 Binary files a/_book/R4DS_files/figure-html/eda-caribou-13-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-caribou-14-1.png b/_book/R4DS_files/figure-html/eda-caribou-14-1.png deleted file mode 100644 index 7b11451..0000000 Binary files a/_book/R4DS_files/figure-html/eda-caribou-14-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-caribou-15-1.png b/_book/R4DS_files/figure-html/eda-caribou-15-1.png deleted file mode 100644 index 4e2406a..0000000 Binary files a/_book/R4DS_files/figure-html/eda-caribou-15-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-caribou-17-1.png b/_book/R4DS_files/figure-html/eda-caribou-17-1.png deleted file mode 100644 index 6c36833..0000000 Binary files a/_book/R4DS_files/figure-html/eda-caribou-17-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-caribou-7-1.png b/_book/R4DS_files/figure-html/eda-caribou-7-1.png deleted file mode 100644 index 4712c70..0000000 Binary files a/_book/R4DS_files/figure-html/eda-caribou-7-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-caribou-8-1.png b/_book/R4DS_files/figure-html/eda-caribou-8-1.png deleted file mode 100644 index b4174a5..0000000 Binary files a/_book/R4DS_files/figure-html/eda-caribou-8-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-caribou-9-1.png b/_book/R4DS_files/figure-html/eda-caribou-9-1.png deleted file mode 100644 index 27ea1d0..0000000 Binary files a/_book/R4DS_files/figure-html/eda-caribou-9-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-covid2019-18-1.png b/_book/R4DS_files/figure-html/eda-covid2019-18-1.png deleted file mode 100644 index e59b092..0000000 Binary files a/_book/R4DS_files/figure-html/eda-covid2019-18-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-covid2019-21-1.png b/_book/R4DS_files/figure-html/eda-covid2019-21-1.png deleted file mode 100644 index 9a1e08c..0000000 Binary files a/_book/R4DS_files/figure-html/eda-covid2019-21-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-covid2019-24-1.png b/_book/R4DS_files/figure-html/eda-covid2019-24-1.png deleted file mode 100644 index d58f27f..0000000 Binary files a/_book/R4DS_files/figure-html/eda-covid2019-24-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-covid2019-25-1.png b/_book/R4DS_files/figure-html/eda-covid2019-25-1.png deleted file mode 100644 index 4b78112..0000000 Binary files a/_book/R4DS_files/figure-html/eda-covid2019-25-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-covid2019-30-1.png b/_book/R4DS_files/figure-html/eda-covid2019-30-1.png deleted file mode 100644 index 05ce84c..0000000 Binary files a/_book/R4DS_files/figure-html/eda-covid2019-30-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-covid2019-34-1.png b/_book/R4DS_files/figure-html/eda-covid2019-34-1.png deleted file mode 100644 index f3212ab..0000000 Binary files a/_book/R4DS_files/figure-html/eda-covid2019-34-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-covid2019-36-1.png b/_book/R4DS_files/figure-html/eda-covid2019-36-1.png deleted file mode 100644 index dffe1f6..0000000 Binary files a/_book/R4DS_files/figure-html/eda-covid2019-36-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-covid2019-38-1.png b/_book/R4DS_files/figure-html/eda-covid2019-38-1.png deleted file mode 100644 index 0db3c93..0000000 Binary files a/_book/R4DS_files/figure-html/eda-covid2019-38-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-covid2019-39-1.png b/_book/R4DS_files/figure-html/eda-covid2019-39-1.png deleted file mode 100644 index 000464e..0000000 Binary files a/_book/R4DS_files/figure-html/eda-covid2019-39-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-covid2019-41-1.png b/_book/R4DS_files/figure-html/eda-covid2019-41-1.png deleted file mode 100644 index f7d6d51..0000000 Binary files a/_book/R4DS_files/figure-html/eda-covid2019-41-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-covid2019-44-1.png b/_book/R4DS_files/figure-html/eda-covid2019-44-1.png deleted file mode 100644 index cf7f4c9..0000000 Binary files a/_book/R4DS_files/figure-html/eda-covid2019-44-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-height-11-1.png b/_book/R4DS_files/figure-html/eda-height-11-1.png deleted file mode 100644 index 7edec67..0000000 Binary files a/_book/R4DS_files/figure-html/eda-height-11-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-height-12-1.png b/_book/R4DS_files/figure-html/eda-height-12-1.png deleted file mode 100644 index d27844f..0000000 Binary files a/_book/R4DS_files/figure-html/eda-height-12-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-height-13-1.png b/_book/R4DS_files/figure-html/eda-height-13-1.png deleted file mode 100644 index 34c954c..0000000 Binary files a/_book/R4DS_files/figure-html/eda-height-13-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-height-14-1.png b/_book/R4DS_files/figure-html/eda-height-14-1.png deleted file mode 100644 index 4810b97..0000000 Binary files a/_book/R4DS_files/figure-html/eda-height-14-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-height-15-1.png b/_book/R4DS_files/figure-html/eda-height-15-1.png deleted file mode 100644 index 52525b7..0000000 Binary files a/_book/R4DS_files/figure-html/eda-height-15-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-height-19-1.png b/_book/R4DS_files/figure-html/eda-height-19-1.png deleted file mode 100644 index e8a807d..0000000 Binary files a/_book/R4DS_files/figure-html/eda-height-19-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-height-4-1.png b/_book/R4DS_files/figure-html/eda-height-4-1.png deleted file mode 100644 index c8ce47d..0000000 Binary files a/_book/R4DS_files/figure-html/eda-height-4-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-height-5-1.png b/_book/R4DS_files/figure-html/eda-height-5-1.png deleted file mode 100644 index 46bfaea..0000000 Binary files a/_book/R4DS_files/figure-html/eda-height-5-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-nobel-10-1.png b/_book/R4DS_files/figure-html/eda-nobel-10-1.png deleted file mode 100644 index 17fa990..0000000 Binary files a/_book/R4DS_files/figure-html/eda-nobel-10-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-nobel-11-1.png b/_book/R4DS_files/figure-html/eda-nobel-11-1.png deleted file mode 100644 index 59b6594..0000000 Binary files a/_book/R4DS_files/figure-html/eda-nobel-11-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-nobel-13-1.png b/_book/R4DS_files/figure-html/eda-nobel-13-1.png deleted file mode 100644 index ed8050c..0000000 Binary files a/_book/R4DS_files/figure-html/eda-nobel-13-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-nobel-24-1.png b/_book/R4DS_files/figure-html/eda-nobel-24-1.png deleted file mode 100644 index 6ed3123..0000000 Binary files a/_book/R4DS_files/figure-html/eda-nobel-24-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-nobel-26-1.png b/_book/R4DS_files/figure-html/eda-nobel-26-1.png deleted file mode 100644 index ed4233c..0000000 Binary files a/_book/R4DS_files/figure-html/eda-nobel-26-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-nobel-27-1.png b/_book/R4DS_files/figure-html/eda-nobel-27-1.png deleted file mode 100644 index 421f30f..0000000 Binary files a/_book/R4DS_files/figure-html/eda-nobel-27-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-nobel-28-1.png b/_book/R4DS_files/figure-html/eda-nobel-28-1.png deleted file mode 100644 index 16646c2..0000000 Binary files a/_book/R4DS_files/figure-html/eda-nobel-28-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-nobel-29-1.png b/_book/R4DS_files/figure-html/eda-nobel-29-1.png deleted file mode 100644 index 397222c..0000000 Binary files a/_book/R4DS_files/figure-html/eda-nobel-29-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-nobel-30-1.png b/_book/R4DS_files/figure-html/eda-nobel-30-1.png deleted file mode 100644 index 118ff4d..0000000 Binary files a/_book/R4DS_files/figure-html/eda-nobel-30-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-nobel-30-2.png b/_book/R4DS_files/figure-html/eda-nobel-30-2.png deleted file mode 100644 index 3b035dd..0000000 Binary files a/_book/R4DS_files/figure-html/eda-nobel-30-2.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-nobel-30-3.png b/_book/R4DS_files/figure-html/eda-nobel-30-3.png deleted file mode 100644 index 7ffcbd1..0000000 Binary files a/_book/R4DS_files/figure-html/eda-nobel-30-3.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-nobel-30-4.png b/_book/R4DS_files/figure-html/eda-nobel-30-4.png deleted file mode 100644 index e29a1c4..0000000 Binary files a/_book/R4DS_files/figure-html/eda-nobel-30-4.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-nobel-30-5.png b/_book/R4DS_files/figure-html/eda-nobel-30-5.png deleted file mode 100644 index 984885e..0000000 Binary files a/_book/R4DS_files/figure-html/eda-nobel-30-5.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-nobel-30-6.png b/_book/R4DS_files/figure-html/eda-nobel-30-6.png deleted file mode 100644 index 83bbe78..0000000 Binary files a/_book/R4DS_files/figure-html/eda-nobel-30-6.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-nobel-33-1.png b/_book/R4DS_files/figure-html/eda-nobel-33-1.png deleted file mode 100644 index 0c81716..0000000 Binary files a/_book/R4DS_files/figure-html/eda-nobel-33-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-nobel-34-1.png b/_book/R4DS_files/figure-html/eda-nobel-34-1.png deleted file mode 100644 index caac565..0000000 Binary files a/_book/R4DS_files/figure-html/eda-nobel-34-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-nobel-35-1.png b/_book/R4DS_files/figure-html/eda-nobel-35-1.png deleted file mode 100644 index ef42bd0..0000000 Binary files a/_book/R4DS_files/figure-html/eda-nobel-35-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-nobel-36-1.png b/_book/R4DS_files/figure-html/eda-nobel-36-1.png deleted file mode 100644 index 46a0560..0000000 Binary files a/_book/R4DS_files/figure-html/eda-nobel-36-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-nobel-42-1.png b/_book/R4DS_files/figure-html/eda-nobel-42-1.png deleted file mode 100644 index cd2fb9a..0000000 Binary files a/_book/R4DS_files/figure-html/eda-nobel-42-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-nobel-45-1.png b/_book/R4DS_files/figure-html/eda-nobel-45-1.png deleted file mode 100644 index 86524c9..0000000 Binary files a/_book/R4DS_files/figure-html/eda-nobel-45-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-nobel-46-1.png b/_book/R4DS_files/figure-html/eda-nobel-46-1.png deleted file mode 100644 index 42862d4..0000000 Binary files a/_book/R4DS_files/figure-html/eda-nobel-46-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-nobel-49-1.gif b/_book/R4DS_files/figure-html/eda-nobel-49-1.gif deleted file mode 100644 index 8e867ad..0000000 Binary files a/_book/R4DS_files/figure-html/eda-nobel-49-1.gif and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-nobel-52-1.png b/_book/R4DS_files/figure-html/eda-nobel-52-1.png deleted file mode 100644 index ad918fe..0000000 Binary files a/_book/R4DS_files/figure-html/eda-nobel-52-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-nobel-9-1.png b/_book/R4DS_files/figure-html/eda-nobel-9-1.png deleted file mode 100644 index 018ab57..0000000 Binary files a/_book/R4DS_files/figure-html/eda-nobel-9-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-olympics-15-1.png b/_book/R4DS_files/figure-html/eda-olympics-15-1.png deleted file mode 100644 index 82b1483..0000000 Binary files a/_book/R4DS_files/figure-html/eda-olympics-15-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-olympics-17-1.png b/_book/R4DS_files/figure-html/eda-olympics-17-1.png deleted file mode 100644 index 2208c79..0000000 Binary files a/_book/R4DS_files/figure-html/eda-olympics-17-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-olympics-4-1.png b/_book/R4DS_files/figure-html/eda-olympics-4-1.png deleted file mode 100644 index 0c30c94..0000000 Binary files a/_book/R4DS_files/figure-html/eda-olympics-4-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-olympics-7-1.png b/_book/R4DS_files/figure-html/eda-olympics-7-1.png deleted file mode 100644 index 0340368..0000000 Binary files a/_book/R4DS_files/figure-html/eda-olympics-7-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-penguins-11-1.png b/_book/R4DS_files/figure-html/eda-penguins-11-1.png deleted file mode 100644 index b2a53d1..0000000 Binary files a/_book/R4DS_files/figure-html/eda-penguins-11-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-penguins-12-1.png b/_book/R4DS_files/figure-html/eda-penguins-12-1.png deleted file mode 100644 index ff1749b..0000000 Binary files a/_book/R4DS_files/figure-html/eda-penguins-12-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-penguins-13-1.png b/_book/R4DS_files/figure-html/eda-penguins-13-1.png deleted file mode 100644 index 1a26eaf..0000000 Binary files a/_book/R4DS_files/figure-html/eda-penguins-13-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-penguins-14-1.png b/_book/R4DS_files/figure-html/eda-penguins-14-1.png deleted file mode 100644 index 1733e2a..0000000 Binary files a/_book/R4DS_files/figure-html/eda-penguins-14-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-penguins-15-1.png b/_book/R4DS_files/figure-html/eda-penguins-15-1.png deleted file mode 100644 index a251747..0000000 Binary files a/_book/R4DS_files/figure-html/eda-penguins-15-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-penguins-16-1.png b/_book/R4DS_files/figure-html/eda-penguins-16-1.png deleted file mode 100644 index 213c9cf..0000000 Binary files a/_book/R4DS_files/figure-html/eda-penguins-16-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-penguins-17-1.png b/_book/R4DS_files/figure-html/eda-penguins-17-1.png deleted file mode 100644 index 9420a99..0000000 Binary files a/_book/R4DS_files/figure-html/eda-penguins-17-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-penguins-18-1.png b/_book/R4DS_files/figure-html/eda-penguins-18-1.png deleted file mode 100644 index 13a25ae..0000000 Binary files a/_book/R4DS_files/figure-html/eda-penguins-18-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-penguins-19-1.png b/_book/R4DS_files/figure-html/eda-penguins-19-1.png deleted file mode 100644 index bc429c8..0000000 Binary files a/_book/R4DS_files/figure-html/eda-penguins-19-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-penguins-20-1.png b/_book/R4DS_files/figure-html/eda-penguins-20-1.png deleted file mode 100644 index 5cca3c9..0000000 Binary files a/_book/R4DS_files/figure-html/eda-penguins-20-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-penguins-21-1.png b/_book/R4DS_files/figure-html/eda-penguins-21-1.png deleted file mode 100644 index 93f17fa..0000000 Binary files a/_book/R4DS_files/figure-html/eda-penguins-21-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-penguins-22-1.png b/_book/R4DS_files/figure-html/eda-penguins-22-1.png deleted file mode 100644 index 7059056..0000000 Binary files a/_book/R4DS_files/figure-html/eda-penguins-22-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-penguins-23-1.png b/_book/R4DS_files/figure-html/eda-penguins-23-1.png deleted file mode 100644 index ee34008..0000000 Binary files a/_book/R4DS_files/figure-html/eda-penguins-23-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-penguins-24-1.png b/_book/R4DS_files/figure-html/eda-penguins-24-1.png deleted file mode 100644 index d4111f0..0000000 Binary files a/_book/R4DS_files/figure-html/eda-penguins-24-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-penguins-25-1.png b/_book/R4DS_files/figure-html/eda-penguins-25-1.png deleted file mode 100644 index d94a94d..0000000 Binary files a/_book/R4DS_files/figure-html/eda-penguins-25-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-penguins-26-1.png b/_book/R4DS_files/figure-html/eda-penguins-26-1.png deleted file mode 100644 index ced7e39..0000000 Binary files a/_book/R4DS_files/figure-html/eda-penguins-26-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-penguins-27-1.png b/_book/R4DS_files/figure-html/eda-penguins-27-1.png deleted file mode 100644 index 5e77c45..0000000 Binary files a/_book/R4DS_files/figure-html/eda-penguins-27-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-penguins-29-1.png b/_book/R4DS_files/figure-html/eda-penguins-29-1.png deleted file mode 100644 index 5e4694c..0000000 Binary files a/_book/R4DS_files/figure-html/eda-penguins-29-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-penguins-37-1.png b/_book/R4DS_files/figure-html/eda-penguins-37-1.png deleted file mode 100644 index 9f70c46..0000000 Binary files a/_book/R4DS_files/figure-html/eda-penguins-37-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-penguins-42-1.png b/_book/R4DS_files/figure-html/eda-penguins-42-1.png deleted file mode 100644 index a6e6d07..0000000 Binary files a/_book/R4DS_files/figure-html/eda-penguins-42-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-vaccine-effectiveness-14-1.png b/_book/R4DS_files/figure-html/eda-vaccine-effectiveness-14-1.png deleted file mode 100644 index c9f64ac..0000000 Binary files a/_book/R4DS_files/figure-html/eda-vaccine-effectiveness-14-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/eda-vaccine-effectiveness-8-1.png b/_book/R4DS_files/figure-html/eda-vaccine-effectiveness-8-1.png deleted file mode 100644 index 0bb957f..0000000 Binary files a/_book/R4DS_files/figure-html/eda-vaccine-effectiveness-8-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/forcats-13-1.png b/_book/R4DS_files/figure-html/forcats-13-1.png deleted file mode 100644 index 580c701..0000000 Binary files a/_book/R4DS_files/figure-html/forcats-13-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/forcats-14-1.png b/_book/R4DS_files/figure-html/forcats-14-1.png deleted file mode 100644 index a0d10d0..0000000 Binary files a/_book/R4DS_files/figure-html/forcats-14-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/forcats-15-1.png b/_book/R4DS_files/figure-html/forcats-15-1.png deleted file mode 100644 index f038cc8..0000000 Binary files a/_book/R4DS_files/figure-html/forcats-15-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/forcats-16-1.png b/_book/R4DS_files/figure-html/forcats-16-1.png deleted file mode 100644 index 5eba1ce..0000000 Binary files a/_book/R4DS_files/figure-html/forcats-16-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/forcats-17-1.png b/_book/R4DS_files/figure-html/forcats-17-1.png deleted file mode 100644 index 580c701..0000000 Binary files a/_book/R4DS_files/figure-html/forcats-17-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/forcats-18-1.png b/_book/R4DS_files/figure-html/forcats-18-1.png deleted file mode 100644 index c5f13bc..0000000 Binary files a/_book/R4DS_files/figure-html/forcats-18-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/forcats-19-1.png b/_book/R4DS_files/figure-html/forcats-19-1.png deleted file mode 100644 index 5dc6422..0000000 Binary files a/_book/R4DS_files/figure-html/forcats-19-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-aes2-15-1.png b/_book/R4DS_files/figure-html/ggplot2-aes2-15-1.png deleted file mode 100644 index 6249a9e..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-aes2-15-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-aes2-16-1.png b/_book/R4DS_files/figure-html/ggplot2-aes2-16-1.png deleted file mode 100644 index 72612e6..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-aes2-16-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-aes2-21-1.png b/_book/R4DS_files/figure-html/ggplot2-aes2-21-1.png deleted file mode 100644 index 1c8f313..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-aes2-21-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-aes2-27-1.png b/_book/R4DS_files/figure-html/ggplot2-aes2-27-1.png deleted file mode 100644 index 6249a9e..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-aes2-27-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-aes2-28-1.png b/_book/R4DS_files/figure-html/ggplot2-aes2-28-1.png deleted file mode 100644 index c5eb512..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-aes2-28-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-aes2-29-1.png b/_book/R4DS_files/figure-html/ggplot2-aes2-29-1.png deleted file mode 100644 index d090d98..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-aes2-29-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-aes2-3-1.png b/_book/R4DS_files/figure-html/ggplot2-aes2-3-1.png deleted file mode 100644 index a7f2358..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-aes2-3-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-aes2-3-2.png b/_book/R4DS_files/figure-html/ggplot2-aes2-3-2.png deleted file mode 100644 index ca968ad..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-aes2-3-2.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-aes2-30-1.png b/_book/R4DS_files/figure-html/ggplot2-aes2-30-1.png deleted file mode 100644 index e18eaa5..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-aes2-30-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-aes2-31-1.png b/_book/R4DS_files/figure-html/ggplot2-aes2-31-1.png deleted file mode 100644 index 72612e6..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-aes2-31-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-aes2-32-1.png b/_book/R4DS_files/figure-html/ggplot2-aes2-32-1.png deleted file mode 100644 index 72612e6..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-aes2-32-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-aes2-36-1.png b/_book/R4DS_files/figure-html/ggplot2-aes2-36-1.png deleted file mode 100644 index 4a65961..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-aes2-36-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-aes2-37-1.png b/_book/R4DS_files/figure-html/ggplot2-aes2-37-1.png deleted file mode 100644 index 816363d..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-aes2-37-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-aes2-38-1.png b/_book/R4DS_files/figure-html/ggplot2-aes2-38-1.png deleted file mode 100644 index 9af216b..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-aes2-38-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-aes2-9-1.png b/_book/R4DS_files/figure-html/ggplot2-aes2-9-1.png deleted file mode 100644 index 7ddab06..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-aes2-9-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-colors-4-1.png b/_book/R4DS_files/figure-html/ggplot2-colors-4-1.png deleted file mode 100644 index e7a35df..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-colors-4-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-colors-5-1.png b/_book/R4DS_files/figure-html/ggplot2-colors-5-1.png deleted file mode 100644 index f16f5a9..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-colors-5-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-colors-6-1.png b/_book/R4DS_files/figure-html/ggplot2-colors-6-1.png deleted file mode 100644 index 4a3a884..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-colors-6-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-colors-7-1.png b/_book/R4DS_files/figure-html/ggplot2-colors-7-1.png deleted file mode 100644 index 4d8fb2f..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-colors-7-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-customize-11-1.png b/_book/R4DS_files/figure-html/ggplot2-customize-11-1.png deleted file mode 100644 index 6587b2a..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-customize-11-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-customize-12-1.png b/_book/R4DS_files/figure-html/ggplot2-customize-12-1.png deleted file mode 100644 index 643bc8d..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-customize-12-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-customize-13-1.png b/_book/R4DS_files/figure-html/ggplot2-customize-13-1.png deleted file mode 100644 index 1a9d10d..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-customize-13-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-customize-14-1.png b/_book/R4DS_files/figure-html/ggplot2-customize-14-1.png deleted file mode 100644 index 3324fe7..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-customize-14-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-customize-15-1.png b/_book/R4DS_files/figure-html/ggplot2-customize-15-1.png deleted file mode 100644 index dbd9fb6..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-customize-15-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-customize-17-1.png b/_book/R4DS_files/figure-html/ggplot2-customize-17-1.png deleted file mode 100644 index e264cc4..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-customize-17-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-customize-18-1.png b/_book/R4DS_files/figure-html/ggplot2-customize-18-1.png deleted file mode 100644 index f481de9..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-customize-18-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-customize-19-1.png b/_book/R4DS_files/figure-html/ggplot2-customize-19-1.png deleted file mode 100644 index a12416f..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-customize-19-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-customize-21-1.png b/_book/R4DS_files/figure-html/ggplot2-customize-21-1.png deleted file mode 100644 index 0cb9023..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-customize-21-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-customize-23-1.png b/_book/R4DS_files/figure-html/ggplot2-customize-23-1.png deleted file mode 100644 index 825ae53..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-customize-23-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-customize-24-1.png b/_book/R4DS_files/figure-html/ggplot2-customize-24-1.png deleted file mode 100644 index 1021849..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-customize-24-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-customize-25-1.png b/_book/R4DS_files/figure-html/ggplot2-customize-25-1.png deleted file mode 100644 index 6cc896b..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-customize-25-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-customize-26-1.png b/_book/R4DS_files/figure-html/ggplot2-customize-26-1.png deleted file mode 100644 index 3f71789..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-customize-26-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-customize-27-1.png b/_book/R4DS_files/figure-html/ggplot2-customize-27-1.png deleted file mode 100644 index bbbef35..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-customize-27-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-customize-29-1.png b/_book/R4DS_files/figure-html/ggplot2-customize-29-1.png deleted file mode 100644 index 9e0c593..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-customize-29-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-customize-30-1.png b/_book/R4DS_files/figure-html/ggplot2-customize-30-1.png deleted file mode 100644 index a74ad61..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-customize-30-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-customize-31-1.png b/_book/R4DS_files/figure-html/ggplot2-customize-31-1.png deleted file mode 100644 index 3b22e9f..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-customize-31-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-customize-32-1.png b/_book/R4DS_files/figure-html/ggplot2-customize-32-1.png deleted file mode 100644 index e132e93..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-customize-32-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-customize-34-1.png b/_book/R4DS_files/figure-html/ggplot2-customize-34-1.png deleted file mode 100644 index cf86fe1..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-customize-34-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-customize-35-1.png b/_book/R4DS_files/figure-html/ggplot2-customize-35-1.png deleted file mode 100644 index ef4e54d..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-customize-35-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-customize-36-1.png b/_book/R4DS_files/figure-html/ggplot2-customize-36-1.png deleted file mode 100644 index 38dbd6f..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-customize-36-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-customize-4-1.png b/_book/R4DS_files/figure-html/ggplot2-customize-4-1.png deleted file mode 100644 index 9ce1656..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-customize-4-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-customize-6-1.png b/_book/R4DS_files/figure-html/ggplot2-customize-6-1.png deleted file mode 100644 index f16a905..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-customize-6-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-customize-7-1.png b/_book/R4DS_files/figure-html/ggplot2-customize-7-1.png deleted file mode 100644 index 3809ba0..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-customize-7-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-customize-8-1.png b/_book/R4DS_files/figure-html/ggplot2-customize-8-1.png deleted file mode 100644 index 25f7c2d..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-customize-8-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-geom-14-1.png b/_book/R4DS_files/figure-html/ggplot2-geom-14-1.png deleted file mode 100644 index 317b666..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-geom-14-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-geom-15-1.png b/_book/R4DS_files/figure-html/ggplot2-geom-15-1.png deleted file mode 100644 index ed1a40c..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-geom-15-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-geom-16-1.png b/_book/R4DS_files/figure-html/ggplot2-geom-16-1.png deleted file mode 100644 index 3db588f..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-geom-16-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-geom-17-1.png b/_book/R4DS_files/figure-html/ggplot2-geom-17-1.png deleted file mode 100644 index 317b666..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-geom-17-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-geom-19-1.png b/_book/R4DS_files/figure-html/ggplot2-geom-19-1.png deleted file mode 100644 index 4b717ea..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-geom-19-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-geom-20-1.png b/_book/R4DS_files/figure-html/ggplot2-geom-20-1.png deleted file mode 100644 index cc59781..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-geom-20-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-geom-21-1.png b/_book/R4DS_files/figure-html/ggplot2-geom-21-1.png deleted file mode 100644 index b812ee8..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-geom-21-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-geom-22-1.png b/_book/R4DS_files/figure-html/ggplot2-geom-22-1.png deleted file mode 100644 index 2dfa772..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-geom-22-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-geom-23-1.png b/_book/R4DS_files/figure-html/ggplot2-geom-23-1.png deleted file mode 100644 index 6adce2a..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-geom-23-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-geom-24-1.png b/_book/R4DS_files/figure-html/ggplot2-geom-24-1.png deleted file mode 100644 index f62baf9..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-geom-24-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-geom-25-1.png b/_book/R4DS_files/figure-html/ggplot2-geom-25-1.png deleted file mode 100644 index 7ed8ddc..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-geom-25-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-geom-26-1.png b/_book/R4DS_files/figure-html/ggplot2-geom-26-1.png deleted file mode 100644 index 880d94a..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-geom-26-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-geom-27-1.png b/_book/R4DS_files/figure-html/ggplot2-geom-27-1.png deleted file mode 100644 index 3b4c4aa..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-geom-27-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-geom-28-1.png b/_book/R4DS_files/figure-html/ggplot2-geom-28-1.png deleted file mode 100644 index 880d94a..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-geom-28-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-geom-28-2.png b/_book/R4DS_files/figure-html/ggplot2-geom-28-2.png deleted file mode 100644 index 3631df7..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-geom-28-2.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-geom-29-1.png b/_book/R4DS_files/figure-html/ggplot2-geom-29-1.png deleted file mode 100644 index d77ee43..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-geom-29-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-geom-30-1.png b/_book/R4DS_files/figure-html/ggplot2-geom-30-1.png deleted file mode 100644 index 4bd80d7..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-geom-30-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-geom-31-1.png b/_book/R4DS_files/figure-html/ggplot2-geom-31-1.png deleted file mode 100644 index d22c505..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-geom-31-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-geom-32-1.png b/_book/R4DS_files/figure-html/ggplot2-geom-32-1.png deleted file mode 100644 index 42fc9f5..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-geom-32-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-geom-33-1.png b/_book/R4DS_files/figure-html/ggplot2-geom-33-1.png deleted file mode 100644 index 931788d..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-geom-33-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-geom-34-1.png b/_book/R4DS_files/figure-html/ggplot2-geom-34-1.png deleted file mode 100644 index feb3af8..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-geom-34-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-geom-35-1.png b/_book/R4DS_files/figure-html/ggplot2-geom-35-1.png deleted file mode 100644 index bd938b2..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-geom-35-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-geom-36-1.png b/_book/R4DS_files/figure-html/ggplot2-geom-36-1.png deleted file mode 100644 index 7bec8b4..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-geom-36-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-geom-37-1.png b/_book/R4DS_files/figure-html/ggplot2-geom-37-1.png deleted file mode 100644 index 5444520..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-geom-37-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-geom-38-1.png b/_book/R4DS_files/figure-html/ggplot2-geom-38-1.png deleted file mode 100644 index 1f53ff3..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-geom-38-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-geom-39-1.png b/_book/R4DS_files/figure-html/ggplot2-geom-39-1.png deleted file mode 100644 index a62efb6..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-geom-39-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-geom-40-1.png b/_book/R4DS_files/figure-html/ggplot2-geom-40-1.png deleted file mode 100644 index 0e4e838..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-geom-40-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-geom-41-1.png b/_book/R4DS_files/figure-html/ggplot2-geom-41-1.png deleted file mode 100644 index 7263fde..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-geom-41-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-geom-42-1.png b/_book/R4DS_files/figure-html/ggplot2-geom-42-1.png deleted file mode 100644 index 374a9ef..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-geom-42-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-geom-43-1.png b/_book/R4DS_files/figure-html/ggplot2-geom-43-1.png deleted file mode 100644 index d9c3c55..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-geom-43-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-geom-44-1.png b/_book/R4DS_files/figure-html/ggplot2-geom-44-1.png deleted file mode 100644 index ea8071a..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-geom-44-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-geom-45-1.png b/_book/R4DS_files/figure-html/ggplot2-geom-45-1.png deleted file mode 100644 index 57224d0..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-geom-45-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-geom-46-1.png b/_book/R4DS_files/figure-html/ggplot2-geom-46-1.png deleted file mode 100644 index 777cb86..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-geom-46-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-geom-47-1.png b/_book/R4DS_files/figure-html/ggplot2-geom-47-1.png deleted file mode 100644 index 0d8f20f..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-geom-47-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-geom-48-1.png b/_book/R4DS_files/figure-html/ggplot2-geom-48-1.png deleted file mode 100644 index fce73c5..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-geom-48-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-geom-49-1.png b/_book/R4DS_files/figure-html/ggplot2-geom-49-1.png deleted file mode 100644 index 44f287a..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-geom-49-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-geom-5-1.png b/_book/R4DS_files/figure-html/ggplot2-geom-5-1.png deleted file mode 100644 index 97b8798..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-geom-5-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-geom-50-1.png b/_book/R4DS_files/figure-html/ggplot2-geom-50-1.png deleted file mode 100644 index 4c1b673..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-geom-50-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-geom-51-1.png b/_book/R4DS_files/figure-html/ggplot2-geom-51-1.png deleted file mode 100644 index cc85037..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-geom-51-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-geom-52-1.png b/_book/R4DS_files/figure-html/ggplot2-geom-52-1.png deleted file mode 100644 index 3bee4f4..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-geom-52-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-geom-53-1.png b/_book/R4DS_files/figure-html/ggplot2-geom-53-1.png deleted file mode 100644 index 7bcd3dc..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-geom-53-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-geom-54-1.png b/_book/R4DS_files/figure-html/ggplot2-geom-54-1.png deleted file mode 100644 index 764e3ae..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-geom-54-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-geom-55-1.png b/_book/R4DS_files/figure-html/ggplot2-geom-55-1.png deleted file mode 100644 index 142f7fa..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-geom-55-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-geom-56-1.png b/_book/R4DS_files/figure-html/ggplot2-geom-56-1.png deleted file mode 100644 index c755105..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-geom-56-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-geom-57-1.png b/_book/R4DS_files/figure-html/ggplot2-geom-57-1.png deleted file mode 100644 index bb6c63d..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-geom-57-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-geom-58-1.png b/_book/R4DS_files/figure-html/ggplot2-geom-58-1.png deleted file mode 100644 index daca301..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-geom-58-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-geom-59-1.png b/_book/R4DS_files/figure-html/ggplot2-geom-59-1.png deleted file mode 100644 index da73277..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-geom-59-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-geom-60-1.png b/_book/R4DS_files/figure-html/ggplot2-geom-60-1.png deleted file mode 100644 index 4fe7902..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-geom-60-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-geom-62-1.png b/_book/R4DS_files/figure-html/ggplot2-geom-62-1.png deleted file mode 100644 index f5bfe8a..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-geom-62-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-geom-63-1.png b/_book/R4DS_files/figure-html/ggplot2-geom-63-1.png deleted file mode 100644 index 64c2e37..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-geom-63-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-geom-64-1.png b/_book/R4DS_files/figure-html/ggplot2-geom-64-1.png deleted file mode 100644 index 7d178d9..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-geom-64-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-geom-65-1.png b/_book/R4DS_files/figure-html/ggplot2-geom-65-1.png deleted file mode 100644 index 8ee778f..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-geom-65-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-geom-66-1.png b/_book/R4DS_files/figure-html/ggplot2-geom-66-1.png deleted file mode 100644 index a919cd3..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-geom-66-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-geom-68-1.png b/_book/R4DS_files/figure-html/ggplot2-geom-68-1.png deleted file mode 100644 index b7b4688..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-geom-68-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-geom-70-1.png b/_book/R4DS_files/figure-html/ggplot2-geom-70-1.png deleted file mode 100644 index 0b6e804..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-geom-70-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-geom-71-1.png b/_book/R4DS_files/figure-html/ggplot2-geom-71-1.png deleted file mode 100644 index fe84d7b..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-geom-71-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-geom-72-1.png b/_book/R4DS_files/figure-html/ggplot2-geom-72-1.png deleted file mode 100644 index 3ec3f74..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-geom-72-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-geom-73-1.png b/_book/R4DS_files/figure-html/ggplot2-geom-73-1.png deleted file mode 100644 index a1bd9e5..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-geom-73-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-geom-74-1.png b/_book/R4DS_files/figure-html/ggplot2-geom-74-1.png deleted file mode 100644 index 756b2f0..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-geom-74-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-geom-75-1.png b/_book/R4DS_files/figure-html/ggplot2-geom-75-1.png deleted file mode 100644 index 2465589..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-geom-75-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-geom-76-1.png b/_book/R4DS_files/figure-html/ggplot2-geom-76-1.png deleted file mode 100644 index 0fe2121..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-geom-76-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-geom-77-1.png b/_book/R4DS_files/figure-html/ggplot2-geom-77-1.png deleted file mode 100644 index bb6c63d..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-geom-77-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-geom-78-1.png b/_book/R4DS_files/figure-html/ggplot2-geom-78-1.png deleted file mode 100644 index 31ca5ba..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-geom-78-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-geom-79-1.png b/_book/R4DS_files/figure-html/ggplot2-geom-79-1.png deleted file mode 100644 index bb6c63d..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-geom-79-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-geom-80-1.png b/_book/R4DS_files/figure-html/ggplot2-geom-80-1.png deleted file mode 100644 index 7182989..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-geom-80-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-geom-81-1.png b/_book/R4DS_files/figure-html/ggplot2-geom-81-1.png deleted file mode 100644 index 146415f..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-geom-81-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-geom-82-1.png b/_book/R4DS_files/figure-html/ggplot2-geom-82-1.png deleted file mode 100644 index 7863f33..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-geom-82-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-geom-83-1.png b/_book/R4DS_files/figure-html/ggplot2-geom-83-1.png deleted file mode 100644 index f7c9036..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-geom-83-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-geom-84-1.png b/_book/R4DS_files/figure-html/ggplot2-geom-84-1.png deleted file mode 100644 index f928780..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-geom-84-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-geom-85-1.png b/_book/R4DS_files/figure-html/ggplot2-geom-85-1.png deleted file mode 100644 index db08aba..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-geom-85-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-geom-86-1.png b/_book/R4DS_files/figure-html/ggplot2-geom-86-1.png deleted file mode 100644 index e6e155b..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-geom-86-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-geom-87-1.png b/_book/R4DS_files/figure-html/ggplot2-geom-87-1.png deleted file mode 100644 index c9a8006..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-geom-87-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-guides-1-1.png b/_book/R4DS_files/figure-html/ggplot2-guides-1-1.png deleted file mode 100644 index aab8de3..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-guides-1-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-guides-3-1.png b/_book/R4DS_files/figure-html/ggplot2-guides-3-1.png deleted file mode 100644 index 0b90d08..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-guides-3-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-guides-4-1.png b/_book/R4DS_files/figure-html/ggplot2-guides-4-1.png deleted file mode 100644 index 426acb2..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-guides-4-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-guides-5-1.png b/_book/R4DS_files/figure-html/ggplot2-guides-5-1.png deleted file mode 100644 index babebc8..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-guides-5-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-guides-6-1.png b/_book/R4DS_files/figure-html/ggplot2-guides-6-1.png deleted file mode 100644 index 5893b89..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-guides-6-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-guides-7-1.png b/_book/R4DS_files/figure-html/ggplot2-guides-7-1.png deleted file mode 100644 index 98da3da..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-guides-7-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-guides-8-1.png b/_book/R4DS_files/figure-html/ggplot2-guides-8-1.png deleted file mode 100644 index 8f1f974..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-guides-8-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-scales-1-1.png b/_book/R4DS_files/figure-html/ggplot2-scales-1-1.png deleted file mode 100644 index 71008ff..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-scales-1-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-scales-10-1.png b/_book/R4DS_files/figure-html/ggplot2-scales-10-1.png deleted file mode 100644 index a264422..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-scales-10-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-scales-11-1.png b/_book/R4DS_files/figure-html/ggplot2-scales-11-1.png deleted file mode 100644 index ca471b1..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-scales-11-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-scales-12-1.png b/_book/R4DS_files/figure-html/ggplot2-scales-12-1.png deleted file mode 100644 index 7b0274c..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-scales-12-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-scales-13-1.png b/_book/R4DS_files/figure-html/ggplot2-scales-13-1.png deleted file mode 100644 index 4211db1..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-scales-13-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-scales-14-1.png b/_book/R4DS_files/figure-html/ggplot2-scales-14-1.png deleted file mode 100644 index 1168873..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-scales-14-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-scales-15-1.png b/_book/R4DS_files/figure-html/ggplot2-scales-15-1.png deleted file mode 100644 index 4605545..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-scales-15-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-scales-2-1.png b/_book/R4DS_files/figure-html/ggplot2-scales-2-1.png deleted file mode 100644 index 71008ff..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-scales-2-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-scales-3-1.png b/_book/R4DS_files/figure-html/ggplot2-scales-3-1.png deleted file mode 100644 index 743e7fc..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-scales-3-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-scales-9-1.png b/_book/R4DS_files/figure-html/ggplot2-scales-9-1.png deleted file mode 100644 index 6db95c8..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-scales-9-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-stat-layer-1-1.png b/_book/R4DS_files/figure-html/ggplot2-stat-layer-1-1.png deleted file mode 100644 index b469ba0..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-stat-layer-1-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-stat-layer-11-1.png b/_book/R4DS_files/figure-html/ggplot2-stat-layer-11-1.png deleted file mode 100644 index 5619f55..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-stat-layer-11-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-stat-layer-13-1.png b/_book/R4DS_files/figure-html/ggplot2-stat-layer-13-1.png deleted file mode 100644 index 00927f4..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-stat-layer-13-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-stat-layer-14-1.png b/_book/R4DS_files/figure-html/ggplot2-stat-layer-14-1.png deleted file mode 100644 index ae48c94..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-stat-layer-14-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-stat-layer-20-1.png b/_book/R4DS_files/figure-html/ggplot2-stat-layer-20-1.png deleted file mode 100644 index ae48c94..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-stat-layer-20-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-stat-layer-21-1.png b/_book/R4DS_files/figure-html/ggplot2-stat-layer-21-1.png deleted file mode 100644 index 614f358..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-stat-layer-21-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-stat-layer-24-1.png b/_book/R4DS_files/figure-html/ggplot2-stat-layer-24-1.png deleted file mode 100644 index 800b07b..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-stat-layer-24-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-stat-layer-26-1.png b/_book/R4DS_files/figure-html/ggplot2-stat-layer-26-1.png deleted file mode 100644 index 9aae8da..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-stat-layer-26-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-stat-layer-5-1.png b/_book/R4DS_files/figure-html/ggplot2-stat-layer-5-1.png deleted file mode 100644 index 172481d..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-stat-layer-5-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-stat-layer-8-1.png b/_book/R4DS_files/figure-html/ggplot2-stat-layer-8-1.png deleted file mode 100644 index d39230e..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-stat-layer-8-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-stat-layer-9-1.png b/_book/R4DS_files/figure-html/ggplot2-stat-layer-9-1.png deleted file mode 100644 index 05bda72..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-stat-layer-9-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-theme-10-1.png b/_book/R4DS_files/figure-html/ggplot2-theme-10-1.png deleted file mode 100644 index 9079f70..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-theme-10-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-theme-11-1.png b/_book/R4DS_files/figure-html/ggplot2-theme-11-1.png deleted file mode 100644 index 4cce604..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-theme-11-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-theme-12-1.png b/_book/R4DS_files/figure-html/ggplot2-theme-12-1.png deleted file mode 100644 index b95a78f..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-theme-12-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-theme-13-1.png b/_book/R4DS_files/figure-html/ggplot2-theme-13-1.png deleted file mode 100644 index 24fa4b6..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-theme-13-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-theme-14-1.png b/_book/R4DS_files/figure-html/ggplot2-theme-14-1.png deleted file mode 100644 index 524b3ee..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-theme-14-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-theme-15-1.png b/_book/R4DS_files/figure-html/ggplot2-theme-15-1.png deleted file mode 100644 index 6e2e513..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-theme-15-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-theme-18-1.png b/_book/R4DS_files/figure-html/ggplot2-theme-18-1.png deleted file mode 100644 index 0daf2fd..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-theme-18-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-theme-6-1.png b/_book/R4DS_files/figure-html/ggplot2-theme-6-1.png deleted file mode 100644 index ddd7b96..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-theme-6-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-theme-7-1.png b/_book/R4DS_files/figure-html/ggplot2-theme-7-1.png deleted file mode 100644 index afcf196..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-theme-7-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-theme-8-1.png b/_book/R4DS_files/figure-html/ggplot2-theme-8-1.png deleted file mode 100644 index 76f28b8..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-theme-8-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ggplot2-theme-9-1.png b/_book/R4DS_files/figure-html/ggplot2-theme-9-1.png deleted file mode 100644 index b59c1e1..0000000 Binary files a/_book/R4DS_files/figure-html/ggplot2-theme-9-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/infer-13-1.png b/_book/R4DS_files/figure-html/infer-13-1.png deleted file mode 100644 index 410132a..0000000 Binary files a/_book/R4DS_files/figure-html/infer-13-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/infer-14-1.png b/_book/R4DS_files/figure-html/infer-14-1.png deleted file mode 100644 index cba3988..0000000 Binary files a/_book/R4DS_files/figure-html/infer-14-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/infer-17-1.png b/_book/R4DS_files/figure-html/infer-17-1.png deleted file mode 100644 index 1620f6f..0000000 Binary files a/_book/R4DS_files/figure-html/infer-17-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/infer-22-1.png b/_book/R4DS_files/figure-html/infer-22-1.png deleted file mode 100644 index c5046b0..0000000 Binary files a/_book/R4DS_files/figure-html/infer-22-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/infer-30-1.png b/_book/R4DS_files/figure-html/infer-30-1.png deleted file mode 100644 index c37ee3c..0000000 Binary files a/_book/R4DS_files/figure-html/infer-30-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/infer-5-1.png b/_book/R4DS_files/figure-html/infer-5-1.png deleted file mode 100644 index 10e0f3d..0000000 Binary files a/_book/R4DS_files/figure-html/infer-5-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/infer-6-1.png b/_book/R4DS_files/figure-html/infer-6-1.png deleted file mode 100644 index ccf9b27..0000000 Binary files a/_book/R4DS_files/figure-html/infer-6-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/lazyman-17-1.png b/_book/R4DS_files/figure-html/lazyman-17-1.png deleted file mode 100644 index 864f300..0000000 Binary files a/_book/R4DS_files/figure-html/lazyman-17-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/lazyman-19-1.png b/_book/R4DS_files/figure-html/lazyman-19-1.png deleted file mode 100644 index d2976c7..0000000 Binary files a/_book/R4DS_files/figure-html/lazyman-19-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/lazyman-gganonymize-1.png b/_book/R4DS_files/figure-html/lazyman-gganonymize-1.png deleted file mode 100644 index 854e53b..0000000 Binary files a/_book/R4DS_files/figure-html/lazyman-gganonymize-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/lincoln-temp-all-points-1.png b/_book/R4DS_files/figure-html/lincoln-temp-all-points-1.png deleted file mode 100644 index 36a8d63..0000000 Binary files a/_book/R4DS_files/figure-html/lincoln-temp-all-points-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/lincoln-temp-boxplots-1.png b/_book/R4DS_files/figure-html/lincoln-temp-boxplots-1.png deleted file mode 100644 index 1a36ff3..0000000 Binary files a/_book/R4DS_files/figure-html/lincoln-temp-boxplots-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/lincoln-temp-jittered-1.png b/_book/R4DS_files/figure-html/lincoln-temp-jittered-1.png deleted file mode 100644 index be39aef..0000000 Binary files a/_book/R4DS_files/figure-html/lincoln-temp-jittered-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/lincoln-temp-points-errorbars-1.png b/_book/R4DS_files/figure-html/lincoln-temp-points-errorbars-1.png deleted file mode 100644 index e5c4165..0000000 Binary files a/_book/R4DS_files/figure-html/lincoln-temp-points-errorbars-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/lincoln-temp-sina-1.png b/_book/R4DS_files/figure-html/lincoln-temp-sina-1.png deleted file mode 100644 index c327f80..0000000 Binary files a/_book/R4DS_files/figure-html/lincoln-temp-sina-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/lincoln-temp-violins-1.png b/_book/R4DS_files/figure-html/lincoln-temp-violins-1.png deleted file mode 100644 index 4fb45f4..0000000 Binary files a/_book/R4DS_files/figure-html/lincoln-temp-violins-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/lm-13-1.png b/_book/R4DS_files/figure-html/lm-13-1.png deleted file mode 100644 index 1d61dc9..0000000 Binary files a/_book/R4DS_files/figure-html/lm-13-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/lm-20-1.png b/_book/R4DS_files/figure-html/lm-20-1.png deleted file mode 100644 index f3d0e3a..0000000 Binary files a/_book/R4DS_files/figure-html/lm-20-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/lm-31-1.png b/_book/R4DS_files/figure-html/lm-31-1.png deleted file mode 100644 index 75eedbb..0000000 Binary files a/_book/R4DS_files/figure-html/lm-31-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/lm-39-1.png b/_book/R4DS_files/figure-html/lm-39-1.png deleted file mode 100644 index 0ca8091..0000000 Binary files a/_book/R4DS_files/figure-html/lm-39-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/lm-44-1.png b/_book/R4DS_files/figure-html/lm-44-1.png deleted file mode 100644 index 08b9571..0000000 Binary files a/_book/R4DS_files/figure-html/lm-44-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/lm-45-1.png b/_book/R4DS_files/figure-html/lm-45-1.png deleted file mode 100644 index 9b2050e..0000000 Binary files a/_book/R4DS_files/figure-html/lm-45-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/lm-50-1.png b/_book/R4DS_files/figure-html/lm-50-1.png deleted file mode 100644 index 1cc5493..0000000 Binary files a/_book/R4DS_files/figure-html/lm-50-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/lm-51-1.png b/_book/R4DS_files/figure-html/lm-51-1.png deleted file mode 100644 index b7e82d9..0000000 Binary files a/_book/R4DS_files/figure-html/lm-51-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/lm-9-1.png b/_book/R4DS_files/figure-html/lm-9-1.png deleted file mode 100644 index 24ff2a7..0000000 Binary files a/_book/R4DS_files/figure-html/lm-9-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/lmm-12-1.png b/_book/R4DS_files/figure-html/lmm-12-1.png deleted file mode 100644 index 129e9d9..0000000 Binary files a/_book/R4DS_files/figure-html/lmm-12-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/lmm-15-1.png b/_book/R4DS_files/figure-html/lmm-15-1.png deleted file mode 100644 index f28e718..0000000 Binary files a/_book/R4DS_files/figure-html/lmm-15-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/lmm-25-1.png b/_book/R4DS_files/figure-html/lmm-25-1.png deleted file mode 100644 index fb5f412..0000000 Binary files a/_book/R4DS_files/figure-html/lmm-25-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/lmm-6-1.png b/_book/R4DS_files/figure-html/lmm-6-1.png deleted file mode 100644 index f0b8971..0000000 Binary files a/_book/R4DS_files/figure-html/lmm-6-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/lmm-9-1.png b/_book/R4DS_files/figure-html/lmm-9-1.png deleted file mode 100644 index f6b640b..0000000 Binary files a/_book/R4DS_files/figure-html/lmm-9-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/logistic-regression-12-1.png b/_book/R4DS_files/figure-html/logistic-regression-12-1.png deleted file mode 100644 index 4d6fd19..0000000 Binary files a/_book/R4DS_files/figure-html/logistic-regression-12-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/logistic-regression-6-1.png b/_book/R4DS_files/figure-html/logistic-regression-6-1.png deleted file mode 100644 index 993ef52..0000000 Binary files a/_book/R4DS_files/figure-html/logistic-regression-6-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/logistic-regression-9-1.png b/_book/R4DS_files/figure-html/logistic-regression-9-1.png deleted file mode 100644 index 4638f07..0000000 Binary files a/_book/R4DS_files/figure-html/logistic-regression-9-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/ordinal-12-1.png b/_book/R4DS_files/figure-html/ordinal-12-1.png deleted file mode 100644 index b13d107..0000000 Binary files a/_book/R4DS_files/figure-html/ordinal-12-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/poisson-regression-16-1.png b/_book/R4DS_files/figure-html/poisson-regression-16-1.png deleted file mode 100644 index 1d991ba..0000000 Binary files a/_book/R4DS_files/figure-html/poisson-regression-16-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/poisson-regression-19-1.png b/_book/R4DS_files/figure-html/poisson-regression-19-1.png deleted file mode 100644 index 976a746..0000000 Binary files a/_book/R4DS_files/figure-html/poisson-regression-19-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/poisson-regression-22-1.png b/_book/R4DS_files/figure-html/poisson-regression-22-1.png deleted file mode 100644 index 9d28553..0000000 Binary files a/_book/R4DS_files/figure-html/poisson-regression-22-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/poisson-regression-4-1.png b/_book/R4DS_files/figure-html/poisson-regression-4-1.png deleted file mode 100644 index a4ff199..0000000 Binary files a/_book/R4DS_files/figure-html/poisson-regression-4-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/poisson-regression-6-1.png b/_book/R4DS_files/figure-html/poisson-regression-6-1.png deleted file mode 100644 index edcf88f..0000000 Binary files a/_book/R4DS_files/figure-html/poisson-regression-6-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/poisson-regression-7-1.png b/_book/R4DS_files/figure-html/poisson-regression-7-1.png deleted file mode 100644 index 56da214..0000000 Binary files a/_book/R4DS_files/figure-html/poisson-regression-7-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/sampling-12-1.png b/_book/R4DS_files/figure-html/sampling-12-1.png deleted file mode 100644 index 84f70f4..0000000 Binary files a/_book/R4DS_files/figure-html/sampling-12-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/sampling-13-1.png b/_book/R4DS_files/figure-html/sampling-13-1.png deleted file mode 100644 index a6e6eec..0000000 Binary files a/_book/R4DS_files/figure-html/sampling-13-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/sampling-14-1.png b/_book/R4DS_files/figure-html/sampling-14-1.png deleted file mode 100644 index 258baa9..0000000 Binary files a/_book/R4DS_files/figure-html/sampling-14-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/sampling-21-1.png b/_book/R4DS_files/figure-html/sampling-21-1.png deleted file mode 100644 index dce1f75..0000000 Binary files a/_book/R4DS_files/figure-html/sampling-21-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/sampling-23-1.png b/_book/R4DS_files/figure-html/sampling-23-1.png deleted file mode 100644 index 3eaa94f..0000000 Binary files a/_book/R4DS_files/figure-html/sampling-23-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/sampling-25-1.png b/_book/R4DS_files/figure-html/sampling-25-1.png deleted file mode 100644 index 36d57c7..0000000 Binary files a/_book/R4DS_files/figure-html/sampling-25-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/sampling-28-1.png b/_book/R4DS_files/figure-html/sampling-28-1.png deleted file mode 100644 index 3370ebe..0000000 Binary files a/_book/R4DS_files/figure-html/sampling-28-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/sampling-35-1.png b/_book/R4DS_files/figure-html/sampling-35-1.png deleted file mode 100644 index 393f7df..0000000 Binary files a/_book/R4DS_files/figure-html/sampling-35-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/sampling-36-1.png b/_book/R4DS_files/figure-html/sampling-36-1.png deleted file mode 100644 index 1f3b573..0000000 Binary files a/_book/R4DS_files/figure-html/sampling-36-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/sampling-4-1.png b/_book/R4DS_files/figure-html/sampling-4-1.png deleted file mode 100644 index 371077f..0000000 Binary files a/_book/R4DS_files/figure-html/sampling-4-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/sampling-40-1.png b/_book/R4DS_files/figure-html/sampling-40-1.png deleted file mode 100644 index a5d2f90..0000000 Binary files a/_book/R4DS_files/figure-html/sampling-40-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/sampling-43-1.png b/_book/R4DS_files/figure-html/sampling-43-1.png deleted file mode 100644 index 32d1d60..0000000 Binary files a/_book/R4DS_files/figure-html/sampling-43-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/sampling-5-1.png b/_book/R4DS_files/figure-html/sampling-5-1.png deleted file mode 100644 index 12b9374..0000000 Binary files a/_book/R4DS_files/figure-html/sampling-5-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/sampling-7-1.png b/_book/R4DS_files/figure-html/sampling-7-1.png deleted file mode 100644 index f160bce..0000000 Binary files a/_book/R4DS_files/figure-html/sampling-7-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/sampling-8-1.png b/_book/R4DS_files/figure-html/sampling-8-1.png deleted file mode 100644 index 3d73f50..0000000 Binary files a/_book/R4DS_files/figure-html/sampling-8-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/temp-ridgeline-1.png b/_book/R4DS_files/figure-html/temp-ridgeline-1.png deleted file mode 100644 index e520eeb..0000000 Binary files a/_book/R4DS_files/figure-html/temp-ridgeline-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/temp-ridgeline-colorbar-1.png b/_book/R4DS_files/figure-html/temp-ridgeline-colorbar-1.png deleted file mode 100644 index 58fd720..0000000 Binary files a/_book/R4DS_files/figure-html/temp-ridgeline-colorbar-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/tests-as-linear-42-1.png b/_book/R4DS_files/figure-html/tests-as-linear-42-1.png deleted file mode 100644 index 686cb7e..0000000 Binary files a/_book/R4DS_files/figure-html/tests-as-linear-42-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/tests-as-linear-5-1.png b/_book/R4DS_files/figure-html/tests-as-linear-5-1.png deleted file mode 100644 index cb95a92..0000000 Binary files a/_book/R4DS_files/figure-html/tests-as-linear-5-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/tidygraph-12-1.png b/_book/R4DS_files/figure-html/tidygraph-12-1.png deleted file mode 100644 index 33fb157..0000000 Binary files a/_book/R4DS_files/figure-html/tidygraph-12-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/tidygraph-19-1.png b/_book/R4DS_files/figure-html/tidygraph-19-1.png deleted file mode 100644 index e672d31..0000000 Binary files a/_book/R4DS_files/figure-html/tidygraph-19-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/tidygraph-22-1.png b/_book/R4DS_files/figure-html/tidygraph-22-1.png deleted file mode 100644 index 1325460..0000000 Binary files a/_book/R4DS_files/figure-html/tidygraph-22-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/tidygraph-24-1.png b/_book/R4DS_files/figure-html/tidygraph-24-1.png deleted file mode 100644 index a4f306b..0000000 Binary files a/_book/R4DS_files/figure-html/tidygraph-24-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/tidygraph-28-1.png b/_book/R4DS_files/figure-html/tidygraph-28-1.png deleted file mode 100644 index b3963f2..0000000 Binary files a/_book/R4DS_files/figure-html/tidygraph-28-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/tidygraph-9-1.png b/_book/R4DS_files/figure-html/tidygraph-9-1.png deleted file mode 100644 index d72e4df..0000000 Binary files a/_book/R4DS_files/figure-html/tidygraph-9-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/tidymodels-17-1.png b/_book/R4DS_files/figure-html/tidymodels-17-1.png deleted file mode 100644 index 4be493c..0000000 Binary files a/_book/R4DS_files/figure-html/tidymodels-17-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/tidyr-12-1.png b/_book/R4DS_files/figure-html/tidyr-12-1.png deleted file mode 100644 index e686ce7..0000000 Binary files a/_book/R4DS_files/figure-html/tidyr-12-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/tidystats-6-1.png b/_book/R4DS_files/figure-html/tidystats-6-1.png deleted file mode 100644 index 5e2f6be..0000000 Binary files a/_book/R4DS_files/figure-html/tidystats-6-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/tidyverse-tips-32-1.png b/_book/R4DS_files/figure-html/tidyverse-tips-32-1.png deleted file mode 100644 index f00709b..0000000 Binary files a/_book/R4DS_files/figure-html/tidyverse-tips-32-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/tidyverse-tips-33-1.png b/_book/R4DS_files/figure-html/tidyverse-tips-33-1.png deleted file mode 100644 index ada490b..0000000 Binary files a/_book/R4DS_files/figure-html/tidyverse-tips-33-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/tidyverse-tips-36-1.png b/_book/R4DS_files/figure-html/tidyverse-tips-36-1.png deleted file mode 100644 index 9189fde..0000000 Binary files a/_book/R4DS_files/figure-html/tidyverse-tips-36-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/tidyverse-tips-37-1.png b/_book/R4DS_files/figure-html/tidyverse-tips-37-1.png deleted file mode 100644 index 3ec535a..0000000 Binary files a/_book/R4DS_files/figure-html/tidyverse-tips-37-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/tidyverse-tips-39-1.png b/_book/R4DS_files/figure-html/tidyverse-tips-39-1.png deleted file mode 100644 index c76f844..0000000 Binary files a/_book/R4DS_files/figure-html/tidyverse-tips-39-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/tidyverse-tips-40-1.png b/_book/R4DS_files/figure-html/tidyverse-tips-40-1.png deleted file mode 100644 index 5ff9950..0000000 Binary files a/_book/R4DS_files/figure-html/tidyverse-tips-40-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/tidyverse-tips-41-1.png b/_book/R4DS_files/figure-html/tidyverse-tips-41-1.png deleted file mode 100644 index 44f287a..0000000 Binary files a/_book/R4DS_files/figure-html/tidyverse-tips-41-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/tidyverse-tips-42-1.png b/_book/R4DS_files/figure-html/tidyverse-tips-42-1.png deleted file mode 100644 index cc85037..0000000 Binary files a/_book/R4DS_files/figure-html/tidyverse-tips-42-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/tidyverse-tips-45-1.png b/_book/R4DS_files/figure-html/tidyverse-tips-45-1.png deleted file mode 100644 index 84acb1b..0000000 Binary files a/_book/R4DS_files/figure-html/tidyverse-tips-45-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/tidyverse-tips-46-1.png b/_book/R4DS_files/figure-html/tidyverse-tips-46-1.png deleted file mode 100644 index 320db36..0000000 Binary files a/_book/R4DS_files/figure-html/tidyverse-tips-46-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/tidyverse-tips-60-1.png b/_book/R4DS_files/figure-html/tidyverse-tips-60-1.png deleted file mode 100644 index 91753fb..0000000 Binary files a/_book/R4DS_files/figure-html/tidyverse-tips-60-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/tidyverse-tips-62-1.png b/_book/R4DS_files/figure-html/tidyverse-tips-62-1.png deleted file mode 100644 index 21ef4ad..0000000 Binary files a/_book/R4DS_files/figure-html/tidyverse-tips-62-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/tidyverse-workflow-11-1.png b/_book/R4DS_files/figure-html/tidyverse-workflow-11-1.png deleted file mode 100644 index a354de1..0000000 Binary files a/_book/R4DS_files/figure-html/tidyverse-workflow-11-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/tidyverse-workflow-12-1.png b/_book/R4DS_files/figure-html/tidyverse-workflow-12-1.png deleted file mode 100644 index 1e4453c..0000000 Binary files a/_book/R4DS_files/figure-html/tidyverse-workflow-12-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/tidyverse-workflow-13-1.png b/_book/R4DS_files/figure-html/tidyverse-workflow-13-1.png deleted file mode 100644 index 07c0499..0000000 Binary files a/_book/R4DS_files/figure-html/tidyverse-workflow-13-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/unnamed-chunk-10-1.png b/_book/R4DS_files/figure-html/unnamed-chunk-10-1.png deleted file mode 100644 index 0d4fb53..0000000 Binary files a/_book/R4DS_files/figure-html/unnamed-chunk-10-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/unnamed-chunk-20-1.png b/_book/R4DS_files/figure-html/unnamed-chunk-20-1.png deleted file mode 100644 index 022bec8..0000000 Binary files a/_book/R4DS_files/figure-html/unnamed-chunk-20-1.png and /dev/null differ diff --git a/_book/R4DS_files/figure-html/unnamed-chunk-9-1.png b/_book/R4DS_files/figure-html/unnamed-chunk-9-1.png deleted file mode 100644 index 8e7c101..0000000 Binary files a/_book/R4DS_files/figure-html/unnamed-chunk-9-1.png and /dev/null differ diff --git a/_book/author.html b/_book/author.html deleted file mode 100644 index 4c0720b..0000000 --- a/_book/author.html +++ /dev/null @@ -1,1281 +0,0 @@ - - - - - - - 作者简介 | 数据科学中的 R 语言 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- -
- -
-
- - -
-
- -
-
-

作者简介

-

王敏杰,四川师范大学研究生公选课《数据科学中的R语言》和《社会科学中的统计学》授课老师,西南交通大学量子物理学博士,爱好数据科学,喜欢用R和Stan统计编程, -联系方式

- -
- - - -
- -
-
-
- - -
-
- - - - - - - - - - - - - - - - diff --git a/_book/bayesian-models.html b/_book/bayesian-models.html deleted file mode 100644 index c600951..0000000 --- a/_book/bayesian-models.html +++ /dev/null @@ -1,1343 +0,0 @@ - - - - - - - 第 38 章 贝叶斯建模 | 数据科学中的 R 语言 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- -
- -
-
- - -
-
- -
-
-

第 38 章 贝叶斯建模

-
library(tidyverse)
-library(tidybayes)
-library(rstan)
-library(brms)
-rstan_options(auto_write = TRUE)
-options(mc.cores = parallel::detectCores())
-

之前我们讲了线性模型和混合线性模型,今天我们往前一步,应该说是一大步。因为这一步迈向了贝叶斯分析,与频率学派的分析有本质的区别,这种区别类似经典物理和量子物理的区别。

-
    -
  • 频率学派,是从数据出发
  • -
  • 贝叶斯。先假定参数有一个分布,看到数据后,再重新分配可能性。
  • -
-
-

38.1 生活中的贝叶斯

-

事实上,贝叶斯在生活中应用很广泛,我们自觉和不自觉中都在使用贝叶斯分析。

-
-
-

38.2 贝叶斯公式

-

参数是假设,数据是证据。对于参数 \(\theta\) 和数据 \(D\),贝叶斯公式可以写为

-

\[ -\underbrace{p(\theta|D)}_\text{posterior} \; = \; \underbrace{p(D|\theta)}_\text{likelihood} \;\; \underbrace{p(\theta)}_\text{prior} \;. -\]

-
-
-

38.3 三张图讲贝叶斯分析

-

-
    -
  • 第一张图: 在看到数据之前,我们预先认为参数,应该在某个范围且服从某种分布

  • -
  • 第二张图: 曲线与数据匹配得怎么样? 相似性概率

  • -
  • 第三张图: 看到数据之后,可能的曲线

  • -
-

观察到数据点后,我们认为服从线性模型,这个线性模型不是一条直线,而是很多条,有些线的可能性大,有些线的可能性低,但都是有可能的。那么,综合这些有可能的线,(截距和斜率)构成了一种分布,即后验概率分布

-

因为我们是R语言课,我们跳过很多理论推导。事实上,我在学习贝叶斯数据分析的时候,也是先从代码操作人手,然后理解贝叶斯推断相关理论,有时候更直观更容易理解。当然,我不是说我的方法一定正确,只是供大家的一个选项。我会用到brms和stan,但我个人更喜欢stan.

-
-
-

38.4 线性模型

-

从最简单的线性模式开始 -\[ -y_n = \alpha + \beta x_n + \epsilon_n \quad \text{where}\quad -\epsilon_n \sim \operatorname{normal}(0,\sigma). -\]

-

等价于

-

\[ -y_n - (\alpha + \beta X_n) \sim \operatorname{normal}(0,\sigma), -\]

-

进一步等价

-

\[ -y_n \sim \operatorname{normal}(\alpha + \beta X_n, \, \sigma). -\]

-
stan_program <- "
-data {
-  int<lower=0> N;
-  vector[N] x;
-  vector[N] y;
-}
-parameters {
-  real alpha;
-  real beta;
-  real<lower=0> sigma;
-}
-model {
-  y ~ normal(alpha + beta * x, sigma);
-}
-"
- -
-
- - - -
- -
-
-
- - -
-
- - - - - - - - - - - - - - - - diff --git a/_book/beauty-of-across.html b/_book/beauty-of-across.html deleted file mode 100644 index d22cee6..0000000 --- a/_book/beauty-of-across.html +++ /dev/null @@ -1,1928 +0,0 @@ - - - - - - - 第 23 章 tidyverse中的across()之美 | 数据科学中的 R 语言 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- -
- -
-
- - -
-
- -
-
-

第 23 章 tidyverse中的across()之美

-

dplyr 1.0版本增加了across()函数,这个函数集中体现了dplyr宏包的强大和简约,今天我用企鹅数据,来领略它的美。

-
library(tidyverse)
-library(palmerpenguins)
-penguins
-
## # A tibble: 344 x 8
-##    species island bill_length_mm bill_depth_mm
-##    <fct>   <fct>           <dbl>         <dbl>
-##  1 Adelie  Torge~           39.1          18.7
-##  2 Adelie  Torge~           39.5          17.4
-##  3 Adelie  Torge~           40.3          18  
-##  4 Adelie  Torge~           NA            NA  
-##  5 Adelie  Torge~           36.7          19.3
-##  6 Adelie  Torge~           39.3          20.6
-##  7 Adelie  Torge~           38.9          17.8
-##  8 Adelie  Torge~           39.2          19.6
-##  9 Adelie  Torge~           34.1          18.1
-## 10 Adelie  Torge~           42            20.2
-## # ... with 334 more rows, and 4 more variables:
-## #   flipper_length_mm <int>, body_mass_g <int>,
-## #   sex <fct>, year <int>
-

看到数据框里有很多缺失值,需要统计每一列缺失值的数量,按照常规的写法

-
penguins %>%
-  summarise(
-    na_in_species = sum(is.na(species)),
-    na_in_island  = sum(is.na(island)),
-    na_in_length  = sum(is.na(bill_length_mm)),
-    na_in_depth   = sum(is.na(bill_depth_mm)),
-    na_in_flipper = sum(is.na(flipper_length_mm)),
-    na_in_body    = sum(is.na(body_mass_g)),
-    na_in_sex     = sum(is.na(sex)),
-    na_in_year    = sum(is.na(year))
-  )
-
## # A tibble: 1 x 8
-##   na_in_species na_in_island na_in_length na_in_depth
-##           <int>        <int>        <int>       <int>
-## 1             0            0            2           2
-## # ... with 4 more variables: na_in_flipper <int>,
-## #   na_in_body <int>, na_in_sex <int>,
-## #   na_in_year <int>
-

幸亏数据框的列数不够多,只有8列,如果数据框有几百列,那就成体力活了,同时代码复制粘贴也容易出错。想偷懒,我们自然想到用summarise_all()

-
penguins %>%
-  summarise_all(
-    ~ sum(is.na(.))
-  )
-
## # A tibble: 1 x 8
-##   species island bill_length_mm bill_depth_mm
-##     <int>  <int>          <int>         <int>
-## 1       0      0              2             2
-## # ... with 4 more variables: flipper_length_mm <int>,
-## #   body_mass_g <int>, sex <int>, year <int>
-

挺好。接着探索,我们想先按企鹅类型分组,然后统计出各体征数据的均值,这个好说,直接写代码

-
penguins %>%
-  group_by(species) %>%
-  summarise(
-    mean_length   = mean(bill_length_mm, na.rm = TRUE),
-    mean_depth    = mean(bill_depth_mm, na.rm = TRUE),
-    mean_flipper  = mean(flipper_length_mm, na.rm = TRUE),
-    mean_body     = mean(body_mass_g, na.rm = TRUE)
-  )
-
## # A tibble: 3 x 5
-##   species mean_length mean_depth mean_flipper mean_body
-##   <fct>         <dbl>      <dbl>        <dbl>     <dbl>
-## 1 Adelie         38.8       18.3         190.     3701.
-## 2 Chinst~        48.8       18.4         196.     3733.
-## 3 Gentoo         47.5       15.0         217.     5076.
-

或者用summarise_if()偷懒

-
d1 <- penguins %>%
-  group_by(species) %>%
-  summarise_if(is.numeric, mean, na.rm = TRUE)
-d1
-
## # A tibble: 3 x 6
-##   species bill_length_mm bill_depth_mm flipper_length_~
-##   <fct>            <dbl>         <dbl>            <dbl>
-## 1 Adelie            38.8          18.3             190.
-## 2 Chinst~           48.8          18.4             196.
-## 3 Gentoo            47.5          15.0             217.
-## # ... with 2 more variables: body_mass_g <dbl>,
-## #   year <dbl>
-

方法不错,从语义上还算很好理解。 但多了一列year, 我想在summarise_if()中用 is.numeric & !year去掉year,却没成功。人类的欲望是无穷的,我们还需要统计每组下企鹅的个数,然后合并到一起。因此,我们再接再厉

-
d2 <- penguins %>%
-  group_by(species) %>%
-  summarise(
-    n = n()
-  )
-d2
-
## # A tibble: 3 x 2
-##   species       n
-##   <fct>     <int>
-## 1 Adelie      152
-## 2 Chinstrap    68
-## 3 Gentoo      124
-

最后合并

-
d1 %>% left_join(d2, by = "species")
-
## # A tibble: 3 x 7
-##   species bill_length_mm bill_depth_mm flipper_length_~
-##   <fct>            <dbl>         <dbl>            <dbl>
-## 1 Adelie            38.8          18.3             190.
-## 2 Chinst~           48.8          18.4             196.
-## 3 Gentoo            47.5          15.0             217.
-## # ... with 3 more variables: body_mass_g <dbl>,
-## #   year <dbl>, n <int>
-

结果应该没问题,然鹅,总让人感觉怪怪的,过程有点折腾,希望不这么麻烦。

-
-

23.1 across()横空出世

-

across()的出现,让这一切变得简单和清晰,上面三步完成的动作,一步搞定

-

-
penguins %>%
-  group_by(species) %>%
-  summarise(
-    across(where(is.numeric) & !year, mean, na.rm = TRUE),
-    n = n()
-  )
-
## # A tibble: 3 x 6
-##   species bill_length_mm bill_depth_mm flipper_length_~
-##   <fct>            <dbl>         <dbl>            <dbl>
-## 1 Adelie            38.8          18.3             190.
-## 2 Chinst~           48.8          18.4             196.
-## 3 Gentoo            47.5          15.0             217.
-## # ... with 2 more variables: body_mass_g <dbl>,
-## #   n <int>
-

是不是很强大。大爱Hadley Wickham !!!

-
-
-

23.2 across()函数形式

-

across()函数,它有三个主要的参数:

-
across(.cols = , .fns = , .names = )
-
    -
  • 第一个参数.cols = ,选取我们要需要的若干列,选取多列的语法与select()的语法一致,选择方法非常丰富和人性化

    -
      -
    • 基本语法 -
        -
      • :,变量在位置上是连续的,可以使用类似 1:3 或者species:island
      • -
      • !,变量名前加!,意思是求这个变量的补集,等价于去掉这个变量,比如!species
      • -
      • &|,两组变量集的交集和并集,比如 is.numeric & !year, 就是选取数值类型变量,但不包括year; 再比如 is.numeric | is.factor就是选取数值型变量和因子型变量
      • -
      • c(),选取变量的组合,比如c(a, b, x)
      • -
    • -
    • 通过人性化的语句 -
        -
      • everything(): 选取所有的变量
      • -
      • last_col(): 选取最后一列,也就说倒数第一列,也可以last_col(offset = 1L) 就是倒数第二列
      • -
    • -
    • 通过变量名的特征 -
        -
      • starts_with(): 指定一组变量名的前缀,也就把选取具有这一前缀的变量,starts_with("bill_")
      • -
      • ends_with(): 指定一组变量名的后缀,也就选取具有这一后缀的变量,ends_with("_mm")
      • -
      • contains(): 指定变量名含有特定的字符串,也就是选取含有指定字符串的变量,ends_with("length")
      • -
      • matches(): 同上,字符串可以是正则表达式
      • -
    • -
    • 通过字符串向量 -
        -
      • all_of(): 选取字符串向量对应的变量名,比如all_of(c("species", "sex", "year")),当然前提是,数据框中要有这些变量,否则会报错。
      • -
      • any_of(): 同all_of(),只不过数据框中没有字符串向量对应的变量,也不会报错,比如数据框中没有people这一列,代码any_of(c("species", "sex", "year", "people"))也正常运行,挺人性化的
      • -
    • -
    • 通过函数 -
        -
      • 常见的有数据类型函数 where(is.numeric), where(is.factor), where(is.character), where(is.date)
      • -
    • -
  • -
  • 第二个参数.fns =,我们要执行的函数(或者多个函数),函数的语法有三种形式可选:

    -
      -
    • A function, e.g. mean.
    • -
    • A purrr-style lambda, e.g. ~ mean(.x, na.rm = TRUE)
    • -
    • A list of functions/lambdas, e.g. list(mean = mean, n_miss = ~ sum(is.na(.x))
    • -
  • -
  • 第三个参数.names =, 如果.fns是单个函数就默认保留原来数据列的名称,即"{.col}" ;如果.fns是多个函数,就在数据列的列名后面跟上函数名,比如"{.col}_{.fn}";当然,我们也可以简单调整列名和函数之间的顺序或者增加一个标识的字符串,比如弄成"{.fn}_{.col}""{.col}_{.fn}_aa"

  • -
-
-
-

23.3 across()应用举例

-

下面通过一些小案例,继续呈现across()函数的功能

-
-

23.3.1 求每一列的缺失值数量

-

就是本章开始的需求

-
penguins %>%
-  summarise(
-    na_in_species = sum(is.na(species)),
-    na_in_island  = sum(is.na(island)),
-    na_in_length  = sum(is.na(bill_length_mm)),
-    na_in_depth   = sum(is.na(bill_depth_mm)),
-    na_in_flipper = sum(is.na(flipper_length_mm)),
-    na_in_body    = sum(is.na(body_mass_g)),
-    na_in_sex     = sum(is.na(sex)),
-    na_in_year    = sum(is.na(year))
-  )
-
# using across()
-penguins %>%
-  summarise(
-    across(everything(), function(x) sum(is.na(x)))
-  )
-
## # A tibble: 1 x 8
-##   species island bill_length_mm bill_depth_mm
-##     <int>  <int>          <int>         <int>
-## 1       0      0              2             2
-## # ... with 4 more variables: flipper_length_mm <int>,
-## #   body_mass_g <int>, sex <int>, year <int>
-
# or
-penguins %>%
-  summarise(
-    across(everything(), ~ sum(is.na(.)))
-  )
-
## # A tibble: 1 x 8
-##   species island bill_length_mm bill_depth_mm
-##     <int>  <int>          <int>         <int>
-## 1       0      0              2             2
-## # ... with 4 more variables: flipper_length_mm <int>,
-## #   body_mass_g <int>, sex <int>, year <int>
-
-
-

23.3.2 每个类型变量下有多少组?

-
penguins %>%
-  summarise(
-    distinct_species = n_distinct(species),
-    distinct_island  = n_distinct(island),
-    distinct_sex     = n_distinct(sex)
-  )
-
## # A tibble: 1 x 3
-##   distinct_species distinct_island distinct_sex
-##              <int>           <int>        <int>
-## 1                3               3            3
-
# using across()
-penguins %>%
-  summarise(
-    across(c(species, island, sex), n_distinct)
-  )
-
## # A tibble: 1 x 3
-##   species island   sex
-##     <int>  <int> <int>
-## 1       3      3     3
-
-
-

23.3.3 多列多个统计函数

-
penguins %>%
-  group_by(species) %>%
-  summarise(
-    length_mean  = mean(bill_length_mm, na.rm = TRUE),
-    length_sd    = sd(bill_length_mm, na.rm = TRUE),
-    depth_mean   = mean(bill_depth_mm, na.rm = TRUE),
-    depth_sd     = sd(bill_depth_mm, na.rm = TRUE),
-    flipper_mean = mean(flipper_length_mm, na.rm = TRUE),
-    flipper_sd   = sd(flipper_length_mm, na.rm = TRUE),
-    n            = n()
-  )
-
## # A tibble: 3 x 8
-##   species length_mean length_sd depth_mean depth_sd
-##   <fct>         <dbl>     <dbl>      <dbl>    <dbl>
-## 1 Adelie         38.8      2.66       18.3    1.22 
-## 2 Chinst~        48.8      3.34       18.4    1.14 
-## 3 Gentoo         47.5      3.08       15.0    0.981
-## # ... with 3 more variables: flipper_mean <dbl>,
-## #   flipper_sd <dbl>, n <int>
-
# using across()
-penguins %>%
-  group_by(species) %>%
-  summarise(
-    across(ends_with("_mm"), list(mean = mean, sd = sd), na.rm = TRUE),
-    n = n()
-  )
-
## # A tibble: 3 x 8
-##   species bill_length_mm_~ bill_length_mm_~
-##   <fct>              <dbl>            <dbl>
-## 1 Adelie              38.8             2.66
-## 2 Chinst~             48.8             3.34
-## 3 Gentoo              47.5             3.08
-## # ... with 5 more variables: bill_depth_mm_mean <dbl>,
-## #   bill_depth_mm_sd <dbl>,
-## #   flipper_length_mm_mean <dbl>,
-## #   flipper_length_mm_sd <dbl>, n <int>
-
-
-

23.3.4 不同分组下数据变量的多个分位数

-

事实上,这里是across()summarise()的强大结合起来

-
penguins %>%
-  group_by(species, island) %>%
-  summarise(
-    prob    = c(.25, .75),
-    length  = quantile(bill_length_mm, prob, na.rm = TRUE),
-    depth   = quantile(bill_depth_mm, prob, na.rm = TRUE),
-    flipper = quantile(flipper_length_mm, prob, na.rm = TRUE)
-  )
-
## # A tibble: 10 x 6
-## # Groups:   species, island [5]
-##    species   island     prob length depth flipper
-##    <fct>     <fct>     <dbl>  <dbl> <dbl>   <dbl>
-##  1 Adelie    Biscoe     0.25   37.7  17.6    185.
-##  2 Adelie    Biscoe     0.75   40.7  19.0    193 
-##  3 Adelie    Dream      0.25   36.8  17.5    185 
-##  4 Adelie    Dream      0.75   40.4  18.8    193 
-##  5 Adelie    Torgersen  0.25   36.7  17.4    187 
-##  6 Adelie    Torgersen  0.75   41.1  19.2    195 
-##  7 Chinstrap Dream      0.25   46.3  17.5    191 
-##  8 Chinstrap Dream      0.75   51.1  19.4    201 
-##  9 Gentoo    Biscoe     0.25   45.3  14.2    212 
-## 10 Gentoo    Biscoe     0.75   49.6  15.7    221
-
# using across()
-penguins %>%
-  group_by(species, island) %>%
-  summarise(
-    prob = c(.25, .75),
-    across(
-      c(bill_length_mm, bill_depth_mm, flipper_length_mm),
-      ~ quantile(., prob, na.rm = TRUE)
-    )
-  )
-
## # A tibble: 10 x 6
-## # Groups:   species, island [5]
-##    species island  prob bill_length_mm bill_depth_mm
-##    <fct>   <fct>  <dbl>          <dbl>         <dbl>
-##  1 Adelie  Biscoe  0.25           37.7          17.6
-##  2 Adelie  Biscoe  0.75           40.7          19.0
-##  3 Adelie  Dream   0.25           36.8          17.5
-##  4 Adelie  Dream   0.75           40.4          18.8
-##  5 Adelie  Torge~  0.25           36.7          17.4
-##  6 Adelie  Torge~  0.75           41.1          19.2
-##  7 Chinst~ Dream   0.25           46.3          17.5
-##  8 Chinst~ Dream   0.75           51.1          19.4
-##  9 Gentoo  Biscoe  0.25           45.3          14.2
-## 10 Gentoo  Biscoe  0.75           49.6          15.7
-## # ... with 1 more variable: flipper_length_mm <dbl>
-
# or
-penguins %>%
-  group_by(species, island) %>%
-  summarise(
-    prob = c(.25, .75),
-    across(where(is.numeric) & !year, ~ quantile(., prob, na.rm = TRUE))
-  )
-
## # A tibble: 10 x 7
-## # Groups:   species, island [5]
-##    species island  prob bill_length_mm bill_depth_mm
-##    <fct>   <fct>  <dbl>          <dbl>         <dbl>
-##  1 Adelie  Biscoe 0.375           37.7          17.6
-##  2 Adelie  Biscoe 0.625           40.7          19.0
-##  3 Adelie  Dream  0.375           36.8          17.5
-##  4 Adelie  Dream  0.625           40.4          18.8
-##  5 Adelie  Torge~ 0.375           36.7          17.4
-##  6 Adelie  Torge~ 0.625           41.1          19.2
-##  7 Chinst~ Dream  0.375           46.3          17.5
-##  8 Chinst~ Dream  0.625           51.1          19.4
-##  9 Gentoo  Biscoe 0.375           45.3          14.2
-## 10 Gentoo  Biscoe 0.625           49.6          15.7
-## # ... with 2 more variables: flipper_length_mm <dbl>,
-## #   body_mass_g <dbl>
-
-
-

23.3.5 不同分组下更复杂的统计

-
# using across()
-penguins %>%
-  group_by(species) %>%
-  summarise(
-    n = n(),
-    across(starts_with("bill_"), mean, na.rm = TRUE),
-    Area = mean(bill_length_mm * bill_depth_mm, na.rm = TRUE),
-    across(ends_with("_g"), mean, na.rm = TRUE),
-  )
-
## # A tibble: 3 x 6
-##   species     n bill_length_mm bill_depth_mm  Area
-##   <fct>   <int>          <dbl>         <dbl> <dbl>
-## 1 Adelie    152           38.8          18.3  712.
-## 2 Chinst~    68           48.8          18.4  900.
-## 3 Gentoo    124           47.5          15.0  712.
-## # ... with 1 more variable: body_mass_g <dbl>
-
-
-

23.3.6 数据标准化处理

-
std <- function(x) {
-  (x - mean(x, na.rm = TRUE)) / sd(x, na.rm = TRUE)
-}
-
-# using across()
-penguins %>%
-  summarise(
-    across(where(is.numeric), std),
-    across(where(is.character), as.factor)
-  )
-
## # A tibble: 344 x 5
-##    bill_length_mm bill_depth_mm flipper_length_~
-##             <dbl>         <dbl>            <dbl>
-##  1         -0.883         0.784           -1.42 
-##  2         -0.810         0.126           -1.06 
-##  3         -0.663         0.430           -0.421
-##  4         NA            NA               NA    
-##  5         -1.32          1.09            -0.563
-##  6         -0.847         1.75            -0.776
-##  7         -0.920         0.329           -1.42 
-##  8         -0.865         1.24            -0.421
-##  9         -1.80          0.480           -0.563
-## 10         -0.352         1.54            -0.776
-## # ... with 334 more rows, and 2 more variables:
-## #   body_mass_g <dbl>, year <dbl>
-
# using across() and purrr style
-penguins %>%
-  drop_na() %>% 
-  summarise(
-    across(starts_with("bill_"), ~ (.x - mean(.x)) / sd(.x))
-  )
-
## # A tibble: 333 x 2
-##    bill_length_mm bill_depth_mm
-##             <dbl>         <dbl>
-##  1         -0.895         0.780
-##  2         -0.822         0.119
-##  3         -0.675         0.424
-##  4         -1.33          1.08 
-##  5         -0.858         1.74 
-##  6         -0.931         0.323
-##  7         -0.876         1.24 
-##  8         -0.529         0.221
-##  9         -0.986         2.05 
-## 10         -1.72          2.00 
-## # ... with 323 more rows
-
-
-

23.3.7 数据对数化处理

-
# using across()
-penguins %>%
-  drop_na() %>%
-  mutate(
-    across(where(is.numeric), log),
-    across(where(is.character), as.factor)
-  )
-
## # A tibble: 333 x 8
-##    species island bill_length_mm bill_depth_mm
-##    <fct>   <fct>           <dbl>         <dbl>
-##  1 Adelie  Torge~           3.67          2.93
-##  2 Adelie  Torge~           3.68          2.86
-##  3 Adelie  Torge~           3.70          2.89
-##  4 Adelie  Torge~           3.60          2.96
-##  5 Adelie  Torge~           3.67          3.03
-##  6 Adelie  Torge~           3.66          2.88
-##  7 Adelie  Torge~           3.67          2.98
-##  8 Adelie  Torge~           3.72          2.87
-##  9 Adelie  Torge~           3.65          3.05
-## 10 Adelie  Torge~           3.54          3.05
-## # ... with 323 more rows, and 4 more variables:
-## #   flipper_length_mm <dbl>, body_mass_g <dbl>,
-## #   sex <fct>, year <dbl>
-
# using across()
-penguins %>%
-  drop_na() %>%
-  mutate(
-    across(where(is.numeric), .fns = list(log = log), .names = "{.fn}_{.col}"),
-    across(where(is.character), as.factor)
-  )
-
## # A tibble: 333 x 13
-##    species island bill_length_mm bill_depth_mm
-##    <fct>   <fct>           <dbl>         <dbl>
-##  1 Adelie  Torge~           39.1          18.7
-##  2 Adelie  Torge~           39.5          17.4
-##  3 Adelie  Torge~           40.3          18  
-##  4 Adelie  Torge~           36.7          19.3
-##  5 Adelie  Torge~           39.3          20.6
-##  6 Adelie  Torge~           38.9          17.8
-##  7 Adelie  Torge~           39.2          19.6
-##  8 Adelie  Torge~           41.1          17.6
-##  9 Adelie  Torge~           38.6          21.2
-## 10 Adelie  Torge~           34.6          21.1
-## # ... with 323 more rows, and 9 more variables:
-## #   flipper_length_mm <int>, body_mass_g <int>,
-## #   sex <fct>, year <int>, log_bill_length_mm <dbl>,
-## #   log_bill_depth_mm <dbl>,
-## #   log_flipper_length_mm <dbl>,
-## #   log_body_mass_g <dbl>, log_year <dbl>
-
-
-

23.3.8 在分组建模中与cur_data()配合使用

-
penguins %>%
-  group_by(species) %>%
-  summarise(
-    broom::tidy(lm(bill_length_mm ~ bill_depth_mm, data = cur_data()))
-  )
-
## # A tibble: 6 x 6
-## # Groups:   species [3]
-##   species  term   estimate std.error statistic  p.value
-##   <fct>    <chr>     <dbl>     <dbl>     <dbl>    <dbl>
-## 1 Adelie   (Inte~   23.1       3.03       7.60 3.01e-12
-## 2 Adelie   bill_~    0.857     0.165      5.19 6.67e- 7
-## 3 Chinstr~ (Inte~   13.4       5.06       2.66 9.92e- 3
-## 4 Chinstr~ bill_~    1.92      0.274      7.01 1.53e- 9
-## 5 Gentoo   (Inte~   17.2       3.28       5.25 6.60e- 7
-## 6 Gentoo   bill_~    2.02      0.219      9.24 1.02e-15
-
penguins %>%
-  group_by(species) %>%
-  summarise(
-    broom::tidy(lm(bill_length_mm ~ ., data = cur_data() %>% select(is.numeric)))
-  )
-
## # A tibble: 15 x 6
-## # Groups:   species [3]
-##    species  term   estimate std.error statistic p.value
-##    <fct>    <chr>     <dbl>     <dbl>     <dbl>   <dbl>
-##  1 Adelie   (Inte~ -2.75e+2   5.09e+2    -0.539 5.90e-1
-##  2 Adelie   bill_~  2.70e-1   1.92e-1     1.40  1.63e-1
-##  3 Adelie   flipp~  2.51e-2   3.50e-2     0.717 4.74e-1
-##  4 Adelie   body_~  2.62e-3   5.25e-4     4.98  1.74e-6
-##  5 Adelie   year    1.47e-1   2.55e-1     0.576 5.66e-1
-##  6 Chinstr~ (Inte~ -4.20e+2   8.24e+2    -0.509 6.12e-1
-##  7 Chinstr~ bill_~  1.58e+0   3.76e-1     4.20  8.62e-5
-##  8 Chinstr~ flipp~  1.67e-2   6.82e-2     0.244 8.08e-1
-##  9 Chinstr~ body_~  1.43e-3   1.15e-3     1.24  2.19e-1
-## 10 Chinstr~ year    2.15e-1   4.12e-1     0.520 6.05e-1
-## 11 Gentoo   (Inte~ -6.25e+2   5.10e+2    -1.23  2.23e-1
-## 12 Gentoo   bill_~  5.89e-1   3.15e-1     1.87  6.40e-2
-## 13 Gentoo   flipp~  1.32e-1   4.58e-2     2.89  4.59e-3
-## 14 Gentoo   body_~  2.04e-3   6.07e-4     3.36  1.05e-3
-## 15 Gentoo   year    3.11e-1   2.55e-1     1.22  2.24e-1
-
penguins %>%
-  group_by(species) %>%
-  summarise(
-    broom::tidy(lm(bill_length_mm ~ .,
-                data = cur_data() %>% transmute(across(is.numeric))
-    ))
-  )
-
## # A tibble: 15 x 6
-## # Groups:   species [3]
-##    species  term   estimate std.error statistic p.value
-##    <fct>    <chr>     <dbl>     <dbl>     <dbl>   <dbl>
-##  1 Adelie   (Inte~ -2.75e+2   5.09e+2    -0.539 5.90e-1
-##  2 Adelie   bill_~  2.70e-1   1.92e-1     1.40  1.63e-1
-##  3 Adelie   flipp~  2.51e-2   3.50e-2     0.717 4.74e-1
-##  4 Adelie   body_~  2.62e-3   5.25e-4     4.98  1.74e-6
-##  5 Adelie   year    1.47e-1   2.55e-1     0.576 5.66e-1
-##  6 Chinstr~ (Inte~ -4.20e+2   8.24e+2    -0.509 6.12e-1
-##  7 Chinstr~ bill_~  1.58e+0   3.76e-1     4.20  8.62e-5
-##  8 Chinstr~ flipp~  1.67e-2   6.82e-2     0.244 8.08e-1
-##  9 Chinstr~ body_~  1.43e-3   1.15e-3     1.24  2.19e-1
-## 10 Chinstr~ year    2.15e-1   4.12e-1     0.520 6.05e-1
-## 11 Gentoo   (Inte~ -6.25e+2   5.10e+2    -1.23  2.23e-1
-## 12 Gentoo   bill_~  5.89e-1   3.15e-1     1.87  6.40e-2
-## 13 Gentoo   flipp~  1.32e-1   4.58e-2     2.89  4.59e-3
-## 14 Gentoo   body_~  2.04e-3   6.07e-4     3.36  1.05e-3
-## 15 Gentoo   year    3.11e-1   2.55e-1     1.22  2.24e-1
-
penguins %>%
-  group_by(species) %>%
-  summarise(
-    broom::tidy(lm(bill_length_mm ~ ., data = across(is.numeric)))
-  )
-
## # A tibble: 15 x 6
-## # Groups:   species [3]
-##    species  term   estimate std.error statistic p.value
-##    <fct>    <chr>     <dbl>     <dbl>     <dbl>   <dbl>
-##  1 Adelie   (Inte~ -2.75e+2   5.09e+2    -0.539 5.90e-1
-##  2 Adelie   bill_~  2.70e-1   1.92e-1     1.40  1.63e-1
-##  3 Adelie   flipp~  2.51e-2   3.50e-2     0.717 4.74e-1
-##  4 Adelie   body_~  2.62e-3   5.25e-4     4.98  1.74e-6
-##  5 Adelie   year    1.47e-1   2.55e-1     0.576 5.66e-1
-##  6 Chinstr~ (Inte~ -4.20e+2   8.24e+2    -0.509 6.12e-1
-##  7 Chinstr~ bill_~  1.58e+0   3.76e-1     4.20  8.62e-5
-##  8 Chinstr~ flipp~  1.67e-2   6.82e-2     0.244 8.08e-1
-##  9 Chinstr~ body_~  1.43e-3   1.15e-3     1.24  2.19e-1
-## 10 Chinstr~ year    2.15e-1   4.12e-1     0.520 6.05e-1
-## 11 Gentoo   (Inte~ -6.25e+2   5.10e+2    -1.23  2.23e-1
-## 12 Gentoo   bill_~  5.89e-1   3.15e-1     1.87  6.40e-2
-## 13 Gentoo   flipp~  1.32e-1   4.58e-2     2.89  4.59e-3
-## 14 Gentoo   body_~  2.04e-3   6.07e-4     3.36  1.05e-3
-## 15 Gentoo   year    3.11e-1   2.55e-1     1.22  2.24e-1
-
-
-

23.3.9cur_column()配合使用

-
# 每一列乘以各自的系数
-df   <- tibble(x = 1:3, y = 3:5, z = 5:7)
-mult <- list(x = 1, y = 10, z = 100)
-
-df %>% 
-  mutate(across(all_of(names(mult)), ~ .x * mult[[cur_column()]]))
-
## # A tibble: 3 x 3
-##       x     y     z
-##   <dbl> <dbl> <dbl>
-## 1     1    30   500
-## 2     2    40   600
-## 3     3    50   700
-
# 每一列乘以各自的权重
-df      <- tibble(x = 1:3, y = 3:5, z = 5:7)
-weights <- list(x = 0.2, y = 0.3, z = 0.5)
-
-df %>%
-  mutate(
-    across(all_of(names(weights)),
-           list(wt = ~ .x * weights[[cur_column()]]),
-          .names = "{col}.{fn}"
-    )
-  )
-
## # A tibble: 3 x 6
-##       x     y     z  x.wt  y.wt  z.wt
-##   <int> <int> <int> <dbl> <dbl> <dbl>
-## 1     1     3     5   0.2 0.900   2.5
-## 2     2     4     6   0.4 1.2     3  
-## 3     3     5     7   0.6 1.5     3.5
-
# 每一列有各自的阈值,如果在阈值之上为1,否则为 0
-df      <- tibble(x = 1:3, y = 3:5, z = 5:7)
-cutoffs <- list(x = 2, y = 3, z = 7)
-
-df %>% mutate(
-  across(all_of(names(cutoffs)), ~ if_else(.x > cutoffs[[cur_column()]], 1, 0))
-)
-
## # A tibble: 3 x 3
-##       x     y     z
-##   <dbl> <dbl> <dbl>
-## 1     0     0     0
-## 2     0     1     0
-## 3     1     1     0
-
-
-

23.3.10c_across()配合也挺默契

-

在一行中的占比

-
df <- tibble(x = 1:3, y = 3:5, z = 5:7)
-
-df %>%
-  rowwise() %>%
-  mutate(total = sum(c_across(x:z))) %>%
-  ungroup() %>%
-  mutate(across(x:z, ~ . / total))
-
## # A tibble: 3 x 4
-##       x     y     z total
-##   <dbl> <dbl> <dbl> <int>
-## 1 0.111 0.333 0.556     9
-## 2 0.167 0.333 0.5      12
-## 3 0.2   0.333 0.467    15
-

看一行中哪个最大,最大的变为1,其余的变为0

-
replace_col_max <- function(vec) {
-  if (!is.vector(vec)) {
-    stop("input of replace_col_max must be vector.")
-  }
-
-  if_else(vec == max(vec), 1L, 0L)
-}
-
-
-df %>%
-  rowwise() %>%
-  mutate(
-    new = list(replace_col_max(c_across(everything())))
-  ) %>%
-  unnest_wider(new, names_sep = "_")
-
## # A tibble: 3 x 6
-##       x     y     z new_1 new_2 new_3
-##   <int> <int> <int> <int> <int> <int>
-## 1     1     3     5     0     0     1
-## 2     2     4     6     0     0     1
-## 3     3     5     7     0     0     1
-
-
-
-

23.4 across()总结

-

我们看到了,across()函数在summarise()/mutate()/transmute()/condense()中使用,它能实现以下几个功能:

-
    -
  • 数据框中的多列执行相同操作
  • -
  • 不同性质的操作,有时可以一起写出,不用再left_join()
  • -
-
-across()函数总结图 -

-图 23.1: across()函数总结图 -

-
- -
-
-
- -
-
-
- - -
-
- - - - - - - - - - - - - - - - diff --git a/_book/broom.html b/_book/broom.html deleted file mode 100644 index c98adae..0000000 --- a/_book/broom.html +++ /dev/null @@ -1,1581 +0,0 @@ - - - - - - - 第 29 章 模型输出结果的规整 | 数据科学中的 R 语言 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- -
- -
-
- - -
-
- -
-
-

第 29 章 模型输出结果的规整

-
-

29.1 案例

-

还是用第 13 章的gapminder案例

-
library(tidyverse)
-library(gapminder)
-gapminder
-
## # A tibble: 1,704 x 6
-##    country    continent  year lifeExp     pop gdpPercap
-##    <fct>      <fct>     <int>   <dbl>   <int>     <dbl>
-##  1 Afghanist~ Asia       1952    28.8  8.43e6      779.
-##  2 Afghanist~ Asia       1957    30.3  9.24e6      821.
-##  3 Afghanist~ Asia       1962    32.0  1.03e7      853.
-##  4 Afghanist~ Asia       1967    34.0  1.15e7      836.
-##  5 Afghanist~ Asia       1972    36.1  1.31e7      740.
-##  6 Afghanist~ Asia       1977    38.4  1.49e7      786.
-##  7 Afghanist~ Asia       1982    39.9  1.29e7      978.
-##  8 Afghanist~ Asia       1987    40.8  1.39e7      852.
-##  9 Afghanist~ Asia       1992    41.7  1.63e7      649.
-## 10 Afghanist~ Asia       1997    41.8  2.22e7      635.
-## # ... with 1,694 more rows
-
-

29.1.1 可视化探索

-

画个简单的图

-
gapminder %>%
-  ggplot(aes(x = log(gdpPercap), y = lifeExp)) +
-  geom_point(alpha = 0.2)
-

-

我们想用不同的模型拟合log(gdpPercap)lifeExp的关联

-
library(colorspace)
-
-model_colors <- colorspace::qualitative_hcl(4, palette = "dark 2")
-# model_colors <- c("darkorange", "purple", "cyan4")
-
-ggplot(
-  data = gapminder,
-  mapping = aes(x = log(gdpPercap), y = lifeExp)
-) +
-  geom_point(alpha = 0.2) +
-  geom_smooth(
-    method = "lm",
-    aes(color = "OLS", fill = "OLS") # one
-  ) +
-  geom_smooth(
-    method = "lm", formula = y ~ splines::bs(x, df = 3),
-    aes(color = "Cubic Spline", fill = "Cubic Spline") # two
-  ) +
-  geom_smooth(
-    method = "loess",
-    aes(color = "LOESS", fill = "LOESS") # three
-  ) +
-  scale_color_manual(name = "Models", values = model_colors) +
-  scale_fill_manual(name = "Models", values = model_colors) +
-  theme(legend.position = "top")
-

-
-
-

29.1.2 简单模型

-

还是回到我们今天的主题。我们建立一个简单的线性模型

-
out <- lm(
-  formula = lifeExp ~ gdpPercap + pop + continent,
-  data = gapminder
-)
-out
-
## 
-## Call:
-## lm(formula = lifeExp ~ gdpPercap + pop + continent, data = gapminder)
-## 
-## Coefficients:
-##       (Intercept)          gdpPercap  
-##          4.78e+01           4.50e-04  
-##               pop  continentAmericas  
-##          6.57e-09           1.35e+01  
-##     continentAsia    continentEurope  
-##          8.19e+00           1.75e+01  
-##  continentOceania  
-##          1.81e+01
-
str(out)
-
summary(out)
-
## 
-## Call:
-## lm(formula = lifeExp ~ gdpPercap + pop + continent, data = gapminder)
-## 
-## Residuals:
-##    Min     1Q Median     3Q    Max 
-## -49.16  -4.49   0.30   5.11  25.17 
-## 
-## Coefficients:
-##                   Estimate Std. Error t value Pr(>|t|)
-## (Intercept)       4.78e+01   3.40e-01  140.82   <2e-16
-## gdpPercap         4.50e-04   2.35e-05   19.16   <2e-16
-## pop               6.57e-09   1.98e-09    3.33    9e-04
-## continentAmericas 1.35e+01   6.00e-01   22.46   <2e-16
-## continentAsia     8.19e+00   5.71e-01   14.34   <2e-16
-## continentEurope   1.75e+01   6.25e-01   27.97   <2e-16
-## continentOceania  1.81e+01   1.78e+00   10.15   <2e-16
-##                      
-## (Intercept)       ***
-## gdpPercap         ***
-## pop               ***
-## continentAmericas ***
-## continentAsia     ***
-## continentEurope   ***
-## continentOceania  ***
-## ---
-## Signif. codes:  
-## 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
-## 
-## Residual standard error: 8.37 on 1697 degrees of freedom
-## Multiple R-squared:  0.582,  Adjusted R-squared:  0.581 
-## F-statistic:  394 on 6 and 1697 DF,  p-value: <2e-16
-模型的输出结果是一个复杂的list,图 29.1给出了out的结构 -
-线性模型结果的示意图 -

-图 29.1: 线性模型结果的示意图 -

-
-

我们发现out对象包含了很多元素,比如系数、残差、模型残差自由度等等,用读取列表的方法可以直接读取

-
out$coefficients
-out$residuals
-out$fitted.values
-

事实上,前面使用的suammary()函数只是选取和打印了out对象的一小部分信息,同时这些信息的结构不适合用dplyr操作和ggplot2画图。

-
-
-
-

29.2 broom

-

为规整模型结果,这里我们推荐用David Robinson 开发的broom宏包。

-
library(broom)
-

broom 宏包将常用的100多种模型的输出结果规整成数据框 -tibble()的格式,在模型比较和可视化中就可以方便使用dplyr函数了。 -broom 提供了三个主要的函数:

-
    -
  • tidy() 提取模型输出结果的主要信息,比如 coefficientst-statistics
  • -
  • glance() 把模型视为一个整体,提取如 F-statisticmodel deviance 或者 r-squared等信息
  • -
  • augment() 模型输出的信息添加到建模用的数据集中,比如fitted valuesresiduals
  • -
-
-

29.2.1 tidy

-
tidy(out)
-
## # A tibble: 7 x 5
-##   term         estimate   std.error statistic   p.value
-##   <chr>           <dbl>       <dbl>     <dbl>     <dbl>
-## 1 (Intercept)   4.78e+1     3.40e-1    141.   0.       
-## 2 gdpPercap     4.50e-4     2.35e-5     19.2  3.24e- 74
-## 3 pop           6.57e-9     1.98e-9      3.33 9.01e-  4
-## 4 continentAm~  1.35e+1     6.00e-1     22.5  5.19e- 98
-## 5 continentAs~  8.19e+0     5.71e-1     14.3  4.06e- 44
-## 6 continentEu~  1.75e+1     6.25e-1     28.0  6.34e-142
-## 7 continentOc~  1.81e+1     1.78e+0     10.1  1.59e- 23
-
out %>%
-  tidy() %>%
-  ggplot(mapping = aes(
-    x = term,
-    y = estimate
-  )) +
-  geom_point() +
-  coord_flip()
-

-

可以很方便的获取系数的置信区间

-
out %>%
-  tidy(conf.int = TRUE)
-
## # A tibble: 7 x 7
-##   term  estimate std.error statistic   p.value conf.low
-##   <chr>    <dbl>     <dbl>     <dbl>     <dbl>    <dbl>
-## 1 (Int~  4.78e+1   3.40e-1    141.   0.         4.71e+1
-## 2 gdpP~  4.50e-4   2.35e-5     19.2  3.24e- 74  4.03e-4
-## 3 pop    6.57e-9   1.98e-9      3.33 9.01e-  4  2.70e-9
-## 4 cont~  1.35e+1   6.00e-1     22.5  5.19e- 98  1.23e+1
-## 5 cont~  8.19e+0   5.71e-1     14.3  4.06e- 44  7.07e+0
-## 6 cont~  1.75e+1   6.25e-1     28.0  6.34e-142  1.62e+1
-## 7 cont~  1.81e+1   1.78e+0     10.1  1.59e- 23  1.46e+1
-## # ... with 1 more variable: conf.high <dbl>
-
out %>%
-  tidy(conf.int = TRUE) %>%
-  filter(!term %in% c("(Intercept)")) %>%
-  ggplot(aes(
-    x = reorder(term, estimate),
-    y = estimate, ymin = conf.low, ymax = conf.high
-  )) +
-  geom_pointrange() +
-  coord_flip() +
-  labs(x = "", y = "OLS Estimate")
-

-
-
-

29.2.2 augment

-

augment()会返回一个数据框,这个数据框是在原始数据框的基础上,增加了模型的拟合值(.fitted), 拟合值的标准误(.se.fit), 残差(.resid)等列。

-
augment(out)
-
## # A tibble: 1,704 x 10
-##    lifeExp gdpPercap    pop continent .fitted .resid
-##      <dbl>     <dbl>  <int> <fct>       <dbl>  <dbl>
-##  1    28.8      779. 8.43e6 Asia         56.4  -27.6
-##  2    30.3      821. 9.24e6 Asia         56.4  -26.1
-##  3    32.0      853. 1.03e7 Asia         56.5  -24.5
-##  4    34.0      836. 1.15e7 Asia         56.5  -22.4
-##  5    36.1      740. 1.31e7 Asia         56.4  -20.3
-##  6    38.4      786. 1.49e7 Asia         56.5  -18.0
-##  7    39.9      978. 1.29e7 Asia         56.5  -16.7
-##  8    40.8      852. 1.39e7 Asia         56.5  -15.7
-##  9    41.7      649. 1.63e7 Asia         56.4  -14.7
-## 10    41.8      635. 2.22e7 Asia         56.4  -14.7
-## # ... with 1,694 more rows, and 4 more variables:
-## #   .std.resid <dbl>, .hat <dbl>, .sigma <dbl>,
-## #   .cooksd <dbl>
-
out %>%
-  augment() %>%
-  ggplot(mapping = aes(x = lifeExp, y = .fitted)) +
-  geom_point()
-

-
-
-

29.2.3 glance

-

glance() 函数也会返回数据框,但这个数据框只有一行,内容实际上是summary()输出结果的最底下一行。

-
glance(out)
-
## # A tibble: 1 x 12
-##   r.squared adj.r.squared sigma statistic   p.value
-##       <dbl>         <dbl> <dbl>     <dbl>     <dbl>
-## 1     0.582         0.581  8.37      394. 3.94e-317
-## # ... with 7 more variables: df <dbl>, logLik <dbl>,
-## #   AIC <dbl>, BIC <dbl>, deviance <dbl>,
-## #   df.residual <int>, nobs <int>
-
-
-
-

29.3 应用

-

broom的三个主要函数在分组统计建模时,格外方便。

-
penguins <-
-  palmerpenguins::penguins %>%
-  drop_na()
-
penguins %>%
-  group_nest(species) %>%
-  mutate(model = purrr::map(data, ~ lm(bill_depth_mm ~ bill_length_mm, data = .))) %>%
-  mutate(glance = purrr::map(model, ~ broom::glance(.))) %>%
-  tidyr::unnest(glance)
-
## # A tibble: 3 x 15
-##   species      data model r.squared adj.r.squared sigma
-##   <fct>   <list<tb> <lis>     <dbl>         <dbl> <dbl>
-## 1 Adelie  [146 x 7] <lm>      0.149         0.143 1.13 
-## 2 Chinst~  [68 x 7] <lm>      0.427         0.418 0.866
-## 3 Gentoo  [119 x 7] <lm>      0.428         0.423 0.749
-## # ... with 9 more variables: statistic <dbl>,
-## #   p.value <dbl>, df <dbl>, logLik <dbl>, AIC <dbl>,
-## #   BIC <dbl>, deviance <dbl>, df.residual <int>,
-## #   nobs <int>
-
fit_ols <- function(df) {
-  lm(body_mass_g ~ bill_depth_mm + bill_length_mm, data = df)
-}
-
-
-out_tidy <- penguins %>%
-  group_nest(species) %>%
-  mutate(model = purrr::map(data, fit_ols)) %>%
-  mutate(tidy = purrr::map(model, ~ broom::tidy(.))) %>%
-  tidyr::unnest(tidy) %>%
-  dplyr::filter(!term %in% "(Intercept)")
-
-out_tidy
-
## # A tibble: 6 x 8
-##   species      data model term  estimate std.error
-##   <fct>   <list<tb> <lis> <chr>    <dbl>     <dbl>
-## 1 Adelie  [146 x 7] <lm>  bill~    164.       25.1
-## 2 Adelie  [146 x 7] <lm>  bill~     64.8      11.5
-## 3 Chinst~  [68 x 7] <lm>  bill~    159.       43.3
-## 4 Chinst~  [68 x 7] <lm>  bill~     23.8      14.7
-## 5 Gentoo  [119 x 7] <lm>  bill~    255.       40.0
-## 6 Gentoo  [119 x 7] <lm>  bill~     54.7      12.7
-## # ... with 2 more variables: statistic <dbl>,
-## #   p.value <dbl>
-
out_tidy %>%
-  ggplot(aes(
-    x = species, y = estimate,
-    ymin = estimate - 2 * std.error,
-    ymax = estimate + 2 * std.error,
-    color = term
-  )) +
-  geom_pointrange(position = position_dodge(width = 0.25)) +
-  theme(legend.position = "top") +
-  labs(x = NULL, y = "Estimate", color = "系数")
-

-
-
-

29.4 练习

-

假定数据是

-
df <- tibble(
-  x = runif(30, 2, 10),
-  y = -2*x + rnorm(30, 0, 5)
-  )
-df
-
## # A tibble: 30 x 2
-##        x      y
-##    <dbl>  <dbl>
-##  1  8.59 -19.3 
-##  2  7.95  -8.92
-##  3  8.06 -16.3 
-##  4  2.82  -3.84
-##  5  2.17  -4.88
-##  6  5.19  -5.14
-##  7  7.94 -16.1 
-##  8  8.01 -28.1 
-##  9  4.10  -3.55
-## 10  9.62 -12.2 
-## # ... with 20 more rows
-

broom::augment()和ggplot2做出类似的残差图

-

- -
-
-
- -
-
-
- - -
-
- - - - - - - - - - - - - - - - diff --git a/_book/colwise.html b/_book/colwise.html deleted file mode 100644 index 0a24ca5..0000000 --- a/_book/colwise.html +++ /dev/null @@ -1,2443 +0,0 @@ - - - - - - - 第 22 章 列方向和行方向 | 数据科学中的 R 语言 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- -
- -
-
- - -
-
- -
-
-

第 22 章 列方向和行方向

-

dplyr宏包是数据科学tidyverse集合的核心部件之一,Hadley Wickham大神说将会在5月15日发布dplyr 1.0版本,欢呼。

-

为迎接新时代的到来,我在线上同大家一起分享dplyr 1.0版本新的特点和功能,看看都为我们带来哪些惊喜?

-
-

22.1 体验新版本

-

New dplyr - 8 things to know:

-
    -
  1. Built in tidyselect
  2. -
  3. relocate()
  4. -
  5. Superpowered summarise()
  6. -
  7. colwise using across()
  8. -
  9. cur_data(), cur_group() and cur_column()
  10. -
  11. new rowwise() grammar
  12. -
  13. easy modeling inside dataframes
  14. -
  15. nest_by()
  16. -
-
library(dplyr, warn.conflicts = FALSE)
-library(tidyr)
-
-
-

22.2 简单回顾

-
mutate()
-select()
-filter()
-group_by()
-summarise()
-arrange()
-rename()
-left_join()
-
-
-

22.3 summarise()更强大了

-

在dplyr 1.0之前,summarise()会把统计结果整理成一行一列的数据框,现在可以根据函数返回的结果,可以有多种形式:

-
    -
  • 长度为 1 的向量,比如,min(x), n(), or sum(is.na(y))
  • -
  • 长度为 n 的向量,比如,quantile()
  • -
  • 数据框
  • -
-
df <- tibble(
-  grp = rep(c("a", "b"), each = 5),
-  x = c(rnorm(5, -0.25, 1), rnorm(5, 0, 1.5)),
-  y = c(rnorm(5, 0.25, 1), rnorm(5, 0, 0.5))
-)
-df
-
## # A tibble: 10 x 3
-##    grp        x       y
-##    <chr>  <dbl>   <dbl>
-##  1 a     -0.665 -0.387 
-##  2 a     -0.270 -0.839 
-##  3 a      0.791  0.0371
-##  4 a     -1.38  -0.144 
-##  5 a      0.903  0.148 
-##  6 b      1.55   0.143 
-##  7 b      1.10   0.0986
-##  8 b     -0.400 -1.11  
-##  9 b     -2.47  -0.670 
-## 10 b     -0.374 -0.440
-
df %>%
-  group_by(grp) %>%
-  summarise(rng = mean(x))
-
## # A tibble: 2 x 2
-##   grp      rng
-##   <chr>  <dbl>
-## 1 a     -0.124
-## 2 b     -0.117
-

当统计函数返回多个值的时候,比如range()返回是最小值和最大值,summarise()很贴心地将结果整理成多行,这样符合tidy的格式。

-
df %>%
-  group_by(grp) %>%
-  summarise(rng = range(x))
-
## # A tibble: 4 x 2
-## # Groups:   grp [2]
-##   grp      rng
-##   <chr>  <dbl>
-## 1 a     -1.38 
-## 2 a      0.903
-## 3 b     -2.47 
-## 4 b      1.55
-

类似的还有quantile()函数,也是返回多个值

-
df %>%
-  group_by(grp) %>%
-  summarise(
-    rng = quantile(x, probs = c(0.05, 0.5, 0.95))
-  )
-
## # A tibble: 6 x 2
-## # Groups:   grp [2]
-##   grp      rng
-##   <chr>  <dbl>
-## 1 a     -1.23 
-## 2 a     -0.270
-## 3 a      0.881
-## 4 b     -2.05 
-## 5 b     -0.374
-## 6 b      1.46
-
df %>%
-  group_by(grp) %>%
-  summarise(
-    x = quantile(x, c(0.25, 0.5, 0.75)),
-    q = c(0.25, 0.5, 0.75)
-  )
-
## # A tibble: 6 x 3
-## # Groups:   grp [2]
-##   grp        x     q
-##   <chr>  <dbl> <dbl>
-## 1 a     -0.665  0.25
-## 2 a     -0.270  0.5 
-## 3 a      0.791  0.75
-## 4 b     -0.400  0.25
-## 5 b     -0.374  0.5 
-## 6 b      1.10   0.75
-

summarise()可以输出数据框,比如

-
my_quantile <- function(x, probs) {
-  tibble(x = quantile(x, probs), probs = probs)
-}
-mtcars %>%
-  group_by(cyl) %>%
-  summarise(my_quantile(disp, c(0.25, 0.75)))
-
## # A tibble: 6 x 3
-## # Groups:   cyl [3]
-##     cyl     x probs
-##   <dbl> <dbl> <dbl>
-## 1     4  78.8  0.25
-## 2     4 121.   0.75
-## 3     6 160    0.25
-## 4     6 196.   0.75
-## 5     8 302.   0.25
-## 6     8 390    0.75
-

再比如:

-

dplyr 1.0 之前是需要group_modify()来实现数据框进,数据框出

-
mtcars %>%
-  group_by(cyl) %>%
-  group_modify(
-    ~ broom::tidy(lm(mpg ~ wt, data = .))
-  )
-
## # A tibble: 6 x 6
-## # Groups:   cyl [3]
-##     cyl term      estimate std.error statistic  p.value
-##   <dbl> <chr>        <dbl>     <dbl>     <dbl>    <dbl>
-## 1     4 (Interce~    39.6      4.35       9.10  7.77e-6
-## 2     4 wt           -5.65     1.85      -3.05  1.37e-2
-## 3     6 (Interce~    28.4      4.18       6.79  1.05e-3
-## 4     6 wt           -2.78     1.33      -2.08  9.18e-2
-## 5     8 (Interce~    23.9      3.01       7.94  4.05e-6
-## 6     8 wt           -2.19     0.739     -2.97  1.18e-2
-

dplyr 1.0 之后,有了新的方案

-
mtcars %>%
-  group_by(cyl) %>%
-  summarise(
-    broom::tidy(lm(mpg ~ wt))
-  )
-
## # A tibble: 6 x 6
-## # Groups:   cyl [3]
-##     cyl term      estimate std.error statistic  p.value
-##   <dbl> <chr>        <dbl>     <dbl>     <dbl>    <dbl>
-## 1     4 (Interce~    39.6      4.35       9.10  7.77e-6
-## 2     4 wt           -5.65     1.85      -3.05  1.37e-2
-## 3     6 (Interce~    28.4      4.18       6.79  1.05e-3
-## 4     6 wt           -2.78     1.33      -2.08  9.18e-2
-## 5     8 (Interce~    23.9      3.01       7.94  4.05e-6
-## 6     8 wt           -2.19     0.739     -2.97  1.18e-2
-
-
-

22.4 summarise()后的分组信息是去是留?

-

group_by()summarise()配合使用的时候,summarise()默认会抵消掉最近一次的分组信息,比如下面按照cylvs分组,但summarise()后,就只剩下cyl的分组信息了。

-
mtcars %>%
-  group_by(cyl, vs) %>%
-  summarise(cyl_n = n())
-
## # A tibble: 5 x 3
-## # Groups:   cyl [3]
-##     cyl    vs cyl_n
-##   <dbl> <dbl> <int>
-## 1     4     0     1
-## 2     4     1    10
-## 3     6     0     3
-## 4     6     1     4
-## 5     8     0    14
-
mtcars %>%
-  group_by(cyl, vs) %>%
-  summarise(cyl_n = n()) %>%
-  group_vars()
-
## [1] "cyl"
-

如果想保留vs的分组信息,就需要设置.groups = keep参数

-
mtcars %>%
-  group_by(cyl, vs) %>%
-  summarise(cyl_n = n(), .groups = "keep") %>%
-  group_vars()
-
## [1] "cyl" "vs"
-

当然summarise()可以控制输出的更多形式

-
    -
  • 丢弃所有的分组信息
  • -
-
mtcars %>%
-  group_by(cyl, vs) %>%
-  summarise(cyl_n = n(), .groups = "drop") %>%
-  group_vars()
-
## character(0)
-
    -
  • 变成行方向分组,即,每行是一个分组
  • -
-
mtcars %>%
-  group_by(cyl, vs) %>%
-  summarise(cyl_n = n(), .groups = "rowwise") %>%
-  group_vars()
-
## [1] "cyl" "vs"
-
-
-

22.5 选择某列

-
    -
  • 通过位置索引进行选取
  • -
-
df %>% select(1, 3)
-
## # A tibble: 10 x 2
-##    grp         y
-##    <chr>   <dbl>
-##  1 a     -0.387 
-##  2 a     -0.839 
-##  3 a      0.0371
-##  4 a     -0.144 
-##  5 a      0.148 
-##  6 b      0.143 
-##  7 b      0.0986
-##  8 b     -1.11  
-##  9 b     -0.670 
-## 10 b     -0.440
-
df %>% select(2:3)
-
## # A tibble: 10 x 2
-##         x       y
-##     <dbl>   <dbl>
-##  1 -0.665 -0.387 
-##  2 -0.270 -0.839 
-##  3  0.791  0.0371
-##  4 -1.38  -0.144 
-##  5  0.903  0.148 
-##  6  1.55   0.143 
-##  7  1.10   0.0986
-##  8 -0.400 -1.11  
-##  9 -2.47  -0.670 
-## 10 -0.374 -0.440
-
    -
  • 通过列名
  • -
-
df %>% select(grp, x, y)
-
## # A tibble: 10 x 3
-##    grp        x       y
-##    <chr>  <dbl>   <dbl>
-##  1 a     -0.665 -0.387 
-##  2 a     -0.270 -0.839 
-##  3 a      0.791  0.0371
-##  4 a     -1.38  -0.144 
-##  5 a      0.903  0.148 
-##  6 b      1.55   0.143 
-##  7 b      1.10   0.0986
-##  8 b     -0.400 -1.11  
-##  9 b     -2.47  -0.670 
-## 10 b     -0.374 -0.440
-
df %>% select(x:y)
-
## # A tibble: 10 x 2
-##         x       y
-##     <dbl>   <dbl>
-##  1 -0.665 -0.387 
-##  2 -0.270 -0.839 
-##  3  0.791  0.0371
-##  4 -1.38  -0.144 
-##  5  0.903  0.148 
-##  6  1.55   0.143 
-##  7  1.10   0.0986
-##  8 -0.400 -1.11  
-##  9 -2.47  -0.670 
-## 10 -0.374 -0.440
-
    -
  • 通过函数选取
  • -
-
df %>% select(starts_with("x"))
-
## # A tibble: 10 x 1
-##         x
-##     <dbl>
-##  1 -0.665
-##  2 -0.270
-##  3  0.791
-##  4 -1.38 
-##  5  0.903
-##  6  1.55 
-##  7  1.10 
-##  8 -0.400
-##  9 -2.47 
-## 10 -0.374
-
df %>% select(ends_with("p"))
-
## # A tibble: 10 x 1
-##    grp  
-##    <chr>
-##  1 a    
-##  2 a    
-##  3 a    
-##  4 a    
-##  5 a    
-##  6 b    
-##  7 b    
-##  8 b    
-##  9 b    
-## 10 b
-
df %>% select(contains("x"))
-
## # A tibble: 10 x 1
-##         x
-##     <dbl>
-##  1 -0.665
-##  2 -0.270
-##  3  0.791
-##  4 -1.38 
-##  5  0.903
-##  6  1.55 
-##  7  1.10 
-##  8 -0.400
-##  9 -2.47 
-## 10 -0.374
-
df %>% select(matches("x"))
-
## # A tibble: 10 x 1
-##         x
-##     <dbl>
-##  1 -0.665
-##  2 -0.270
-##  3  0.791
-##  4 -1.38 
-##  5  0.903
-##  6  1.55 
-##  7  1.10 
-##  8 -0.400
-##  9 -2.47 
-## 10 -0.374
-
    -
  • 通过类型
  • -
-
df %>% select(where(is.character))
-
## # A tibble: 10 x 1
-##    grp  
-##    <chr>
-##  1 a    
-##  2 a    
-##  3 a    
-##  4 a    
-##  5 a    
-##  6 b    
-##  7 b    
-##  8 b    
-##  9 b    
-## 10 b
-
df %>% select(where(is.numeric))
-
## # A tibble: 10 x 2
-##         x       y
-##     <dbl>   <dbl>
-##  1 -0.665 -0.387 
-##  2 -0.270 -0.839 
-##  3  0.791  0.0371
-##  4 -1.38  -0.144 
-##  5  0.903  0.148 
-##  6  1.55   0.143 
-##  7  1.10   0.0986
-##  8 -0.400 -1.11  
-##  9 -2.47  -0.670 
-## 10 -0.374 -0.440
-
    -
  • 通过各种组合
  • -
-
df %>% select(!where(is.character))
-
## # A tibble: 10 x 2
-##         x       y
-##     <dbl>   <dbl>
-##  1 -0.665 -0.387 
-##  2 -0.270 -0.839 
-##  3  0.791  0.0371
-##  4 -1.38  -0.144 
-##  5  0.903  0.148 
-##  6  1.55   0.143 
-##  7  1.10   0.0986
-##  8 -0.400 -1.11  
-##  9 -2.47  -0.670 
-## 10 -0.374 -0.440
-
df %>% select(where(is.numeric) & starts_with("x"))
-
## # A tibble: 10 x 1
-##         x
-##     <dbl>
-##  1 -0.665
-##  2 -0.270
-##  3  0.791
-##  4 -1.38 
-##  5  0.903
-##  6  1.55 
-##  7  1.10 
-##  8 -0.400
-##  9 -2.47 
-## 10 -0.374
-
df %>% select(starts_with("g") | ends_with("y"))
-
## # A tibble: 10 x 2
-##    grp         y
-##    <chr>   <dbl>
-##  1 a     -0.387 
-##  2 a     -0.839 
-##  3 a      0.0371
-##  4 a     -0.144 
-##  5 a      0.148 
-##  6 b      0.143 
-##  7 b      0.0986
-##  8 b     -1.11  
-##  9 b     -0.670 
-## 10 b     -0.440
-
# 注意any_of和all_of的区别
-
-vars <- c("x", "y", "z")
-df %>% select(all_of(vars))
-df %>% select(any_of(vars))
-
-
-

22.6 重命名某列

-
df %>% rename(group = grp)
-
## # A tibble: 10 x 3
-##    group      x       y
-##    <chr>  <dbl>   <dbl>
-##  1 a     -0.665 -0.387 
-##  2 a     -0.270 -0.839 
-##  3 a      0.791  0.0371
-##  4 a     -1.38  -0.144 
-##  5 a      0.903  0.148 
-##  6 b      1.55   0.143 
-##  7 b      1.10   0.0986
-##  8 b     -0.400 -1.11  
-##  9 b     -2.47  -0.670 
-## 10 b     -0.374 -0.440
-
df %>% rename_with(toupper)
-
## # A tibble: 10 x 3
-##    GRP        X       Y
-##    <chr>  <dbl>   <dbl>
-##  1 a     -0.665 -0.387 
-##  2 a     -0.270 -0.839 
-##  3 a      0.791  0.0371
-##  4 a     -1.38  -0.144 
-##  5 a      0.903  0.148 
-##  6 b      1.55   0.143 
-##  7 b      1.10   0.0986
-##  8 b     -0.400 -1.11  
-##  9 b     -2.47  -0.670 
-## 10 b     -0.374 -0.440
-
df %>% rename_with(toupper, is.numeric)
-
## # A tibble: 10 x 3
-##    grp        X       Y
-##    <chr>  <dbl>   <dbl>
-##  1 a     -0.665 -0.387 
-##  2 a     -0.270 -0.839 
-##  3 a      0.791  0.0371
-##  4 a     -1.38  -0.144 
-##  5 a      0.903  0.148 
-##  6 b      1.55   0.143 
-##  7 b      1.10   0.0986
-##  8 b     -0.400 -1.11  
-##  9 b     -2.47  -0.670 
-## 10 b     -0.374 -0.440
-
df %>% rename_with(toupper, starts_with("x"))
-
## # A tibble: 10 x 3
-##    grp        X       y
-##    <chr>  <dbl>   <dbl>
-##  1 a     -0.665 -0.387 
-##  2 a     -0.270 -0.839 
-##  3 a      0.791  0.0371
-##  4 a     -1.38  -0.144 
-##  5 a      0.903  0.148 
-##  6 b      1.55   0.143 
-##  7 b      1.10   0.0986
-##  8 b     -0.400 -1.11  
-##  9 b     -2.47  -0.670 
-## 10 b     -0.374 -0.440
-
-
-

22.7 调整列的位置

-

我们前面一章讲过arrange()排序,这是行方向的排序, 比如按照x变量绝对值的大小从高到低排序。

-
df %>% arrange(desc(abs(x)))
-
## # A tibble: 10 x 3
-##    grp        x       y
-##    <chr>  <dbl>   <dbl>
-##  1 b     -2.47  -0.670 
-##  2 b      1.55   0.143 
-##  3 a     -1.38  -0.144 
-##  4 b      1.10   0.0986
-##  5 a      0.903  0.148 
-##  6 a      0.791  0.0371
-##  7 a     -0.665 -0.387 
-##  8 b     -0.400 -1.11  
-##  9 b     -0.374 -0.440 
-## 10 a     -0.270 -0.839
-

我们现在想调整列的位置,比如,这里调整数据框三列的位置,让grp列放在x列的后面

-
df %>% select(x, grp, y)
-
## # A tibble: 10 x 3
-##         x grp         y
-##     <dbl> <chr>   <dbl>
-##  1 -0.665 a     -0.387 
-##  2 -0.270 a     -0.839 
-##  3  0.791 a      0.0371
-##  4 -1.38  a     -0.144 
-##  5  0.903 a      0.148 
-##  6  1.55  b      0.143 
-##  7  1.10  b      0.0986
-##  8 -0.400 b     -1.11  
-##  9 -2.47  b     -0.670 
-## 10 -0.374 b     -0.440
-

如果列变量很多的时候,上面的方法就不太好用,因此推荐大家使用relocate()

-
df %>% relocate(grp, .after = y)
-
## # A tibble: 10 x 3
-##         x       y grp  
-##     <dbl>   <dbl> <chr>
-##  1 -0.665 -0.387  a    
-##  2 -0.270 -0.839  a    
-##  3  0.791  0.0371 a    
-##  4 -1.38  -0.144  a    
-##  5  0.903  0.148  a    
-##  6  1.55   0.143  b    
-##  7  1.10   0.0986 b    
-##  8 -0.400 -1.11   b    
-##  9 -2.47  -0.670  b    
-## 10 -0.374 -0.440  b
-
df %>% relocate(x, .before = grp)
-
## # A tibble: 10 x 3
-##         x grp         y
-##     <dbl> <chr>   <dbl>
-##  1 -0.665 a     -0.387 
-##  2 -0.270 a     -0.839 
-##  3  0.791 a      0.0371
-##  4 -1.38  a     -0.144 
-##  5  0.903 a      0.148 
-##  6  1.55  b      0.143 
-##  7  1.10  b      0.0986
-##  8 -0.400 b     -1.11  
-##  9 -2.47  b     -0.670 
-## 10 -0.374 b     -0.440
-

还有

-
df %>% relocate(grp, .after = last_col())
-
## # A tibble: 10 x 3
-##         x       y grp  
-##     <dbl>   <dbl> <chr>
-##  1 -0.665 -0.387  a    
-##  2 -0.270 -0.839  a    
-##  3  0.791  0.0371 a    
-##  4 -1.38  -0.144  a    
-##  5  0.903  0.148  a    
-##  6  1.55   0.143  b    
-##  7  1.10   0.0986 b    
-##  8 -0.400 -1.11   b    
-##  9 -2.47  -0.670  b    
-## 10 -0.374 -0.440  b
-
-
-

22.8 强大的across函数

-

我们必须为这个函数点赞。大爱Hadley Wickham !!!

-

我们经常需要对数据框的多列执行相同的操作。比如

-
iris <- iris %>% as_tibble()
-iris
-
## # A tibble: 150 x 5
-##    Sepal.Length Sepal.Width Petal.Length Petal.Width
-##           <dbl>       <dbl>        <dbl>       <dbl>
-##  1          5.1         3.5          1.4         0.2
-##  2          4.9         3            1.4         0.2
-##  3          4.7         3.2          1.3         0.2
-##  4          4.6         3.1          1.5         0.2
-##  5          5           3.6          1.4         0.2
-##  6          5.4         3.9          1.7         0.4
-##  7          4.6         3.4          1.4         0.3
-##  8          5           3.4          1.5         0.2
-##  9          4.4         2.9          1.4         0.2
-## 10          4.9         3.1          1.5         0.1
-## # ... with 140 more rows, and 1 more variable:
-## #   Species <fct>
-
iris %>%
-  group_by(Species) %>%
-  summarise(
-    mean_Sepal_Length = mean(Sepal.Length),
-    mean_Sepal_Width = mean(Sepal.Width),
-    mean_Petal_Length = mean(Petal.Length),
-    mean_Petal_Width = mean(Petal.Width)
-  )
-
## # A tibble: 3 x 5
-##   Species mean_Sepal_Leng~ mean_Sepal_Width
-##   <fct>              <dbl>            <dbl>
-## 1 setosa              5.01             3.43
-## 2 versic~             5.94             2.77
-## 3 virgin~             6.59             2.97
-## # ... with 2 more variables: mean_Petal_Length <dbl>,
-## #   mean_Petal_Width <dbl>
-

dplyr 1.0之后,使用across()函数异常简练

-
iris %>%
-  group_by(Species) %>%
-  summarise(
-    across(everything(), mean)
-  )
-
## # A tibble: 3 x 5
-##   Species Sepal.Length Sepal.Width Petal.Length
-##   <fct>          <dbl>       <dbl>        <dbl>
-## 1 setosa          5.01        3.43         1.46
-## 2 versic~         5.94        2.77         4.26
-## 3 virgin~         6.59        2.97         5.55
-## # ... with 1 more variable: Petal.Width <dbl>
-

或者更科学的

-
iris %>%
-  group_by(Species) %>%
-  summarise(
-    across(is.numeric, mean)
-  )
-
## # A tibble: 3 x 5
-##   Species Sepal.Length Sepal.Width Petal.Length
-##   <fct>          <dbl>       <dbl>        <dbl>
-## 1 setosa          5.01        3.43         1.46
-## 2 versic~         5.94        2.77         4.26
-## 3 virgin~         6.59        2.97         5.55
-## # ... with 1 more variable: Petal.Width <dbl>
-

可以看到,以往是一列一列的处理,现在对多列同时操作,这主要得益于across()函数,它有两个主要的参数:

-
across(.cols = , .fns = )
-
    -
  • 第一个参数.cols,选取我们要需要的若干列,选取多列的语法与select()的语法一致
  • -
  • 第二个参数.fns,我们要执行的函数(或者多个函数),函数的语法有三种形式可选: -
      -
    • A function, e.g. mean.
    • -
    • A purrr-style lambda, e.g. ~ mean(.x, na.rm = TRUE)
    • -
    • A list of functions/lambdas, e.g. list(mean = mean, n_miss = ~ sum(is.na(.x))
    • -
  • -
-

再看看这个案例

-
std <- function(x) {
-  (x - mean(x)) / sd(x)
-}
-
-iris %>%
-  group_by(Species) %>%
-  summarise(
-    across(starts_with("Sepal"), std)
-  )
-
## # A tibble: 150 x 3
-## # Groups:   Species [3]
-##    Species Sepal.Length Sepal.Width
-##    <fct>          <dbl>       <dbl>
-##  1 setosa        0.267       0.190 
-##  2 setosa       -0.301      -1.13  
-##  3 setosa       -0.868      -0.601 
-##  4 setosa       -1.15       -0.865 
-##  5 setosa       -0.0170      0.454 
-##  6 setosa        1.12        1.25  
-##  7 setosa       -1.15       -0.0739
-##  8 setosa       -0.0170     -0.0739
-##  9 setosa       -1.72       -1.39  
-## 10 setosa       -0.301      -0.865 
-## # ... with 140 more rows
-
# purrr style
-iris %>%
-  group_by(Species) %>%
-  summarise(
-    across(starts_with("Sepal"), ~ (.x - mean(.x)) / sd(.x))
-  )
-
## # A tibble: 150 x 3
-## # Groups:   Species [3]
-##    Species Sepal.Length Sepal.Width
-##    <fct>          <dbl>       <dbl>
-##  1 setosa        0.267       0.190 
-##  2 setosa       -0.301      -1.13  
-##  3 setosa       -0.868      -0.601 
-##  4 setosa       -1.15       -0.865 
-##  5 setosa       -0.0170      0.454 
-##  6 setosa        1.12        1.25  
-##  7 setosa       -1.15       -0.0739
-##  8 setosa       -0.0170     -0.0739
-##  9 setosa       -1.72       -1.39  
-## 10 setosa       -0.301      -0.865 
-## # ... with 140 more rows
-
iris %>%
-  group_by(Species) %>%
-  summarise(
-    across(starts_with("Petal"), list(min = min, max = max))
-    # across(starts_with("Petal"), list(min = min, max = max), .names = "{fn}_{col}")
-  )
-
## # A tibble: 3 x 5
-##   Species Petal.Length_min Petal.Length_max
-##   <fct>              <dbl>            <dbl>
-## 1 setosa               1                1.9
-## 2 versic~              3                5.1
-## 3 virgin~              4.5              6.9
-## # ... with 2 more variables: Petal.Width_min <dbl>,
-## #   Petal.Width_max <dbl>
-
iris %>%
-  group_by(Species) %>%
-  summarise(
-    across(starts_with("Sepal"), mean),
-    Area = mean(Petal.Length * Petal.Width),
-    across(c(Petal.Width), min),
-    n = n()
-  )
-
## # A tibble: 3 x 6
-##   Species Sepal.Length Sepal.Width   Area Petal.Width
-##   <fct>          <dbl>       <dbl>  <dbl>       <dbl>
-## 1 setosa          5.01        3.43  0.366         0.1
-## 2 versic~         5.94        2.77  5.72          1  
-## 3 virgin~         6.59        2.97 11.3           1.4
-## # ... with 1 more variable: n <int>
-

除了在summarise()里可以使用外,在其它函数也是可以使用的

-
iris %>% mutate(across(is.numeric, mean))
-
## # A tibble: 150 x 5
-##    Sepal.Length Sepal.Width Petal.Length Petal.Width
-##           <dbl>       <dbl>        <dbl>       <dbl>
-##  1         5.84        3.06         3.76        1.20
-##  2         5.84        3.06         3.76        1.20
-##  3         5.84        3.06         3.76        1.20
-##  4         5.84        3.06         3.76        1.20
-##  5         5.84        3.06         3.76        1.20
-##  6         5.84        3.06         3.76        1.20
-##  7         5.84        3.06         3.76        1.20
-##  8         5.84        3.06         3.76        1.20
-##  9         5.84        3.06         3.76        1.20
-## 10         5.84        3.06         3.76        1.20
-## # ... with 140 more rows, and 1 more variable:
-## #   Species <fct>
-
iris %>% mutate(across(starts_with("Sepal"), mean))
-
## # A tibble: 150 x 5
-##    Sepal.Length Sepal.Width Petal.Length Petal.Width
-##           <dbl>       <dbl>        <dbl>       <dbl>
-##  1         5.84        3.06          1.4         0.2
-##  2         5.84        3.06          1.4         0.2
-##  3         5.84        3.06          1.3         0.2
-##  4         5.84        3.06          1.5         0.2
-##  5         5.84        3.06          1.4         0.2
-##  6         5.84        3.06          1.7         0.4
-##  7         5.84        3.06          1.4         0.3
-##  8         5.84        3.06          1.5         0.2
-##  9         5.84        3.06          1.4         0.2
-## 10         5.84        3.06          1.5         0.1
-## # ... with 140 more rows, and 1 more variable:
-## #   Species <fct>
-
iris %>% mutate(across(is.numeric, std)) # std function has defined before
-
## # A tibble: 150 x 5
-##    Sepal.Length Sepal.Width Petal.Length Petal.Width
-##           <dbl>       <dbl>        <dbl>       <dbl>
-##  1       -0.898      1.02          -1.34       -1.31
-##  2       -1.14      -0.132         -1.34       -1.31
-##  3       -1.38       0.327         -1.39       -1.31
-##  4       -1.50       0.0979        -1.28       -1.31
-##  5       -1.02       1.25          -1.34       -1.31
-##  6       -0.535      1.93          -1.17       -1.05
-##  7       -1.50       0.786         -1.34       -1.18
-##  8       -1.02       0.786         -1.28       -1.31
-##  9       -1.74      -0.361         -1.34       -1.31
-## 10       -1.14       0.0979        -1.28       -1.44
-## # ... with 140 more rows, and 1 more variable:
-## #   Species <fct>
-
iris %>% mutate(
-  across(is.numeric, ~ .x / 2),
-  across(is.factor, stringr::str_to_upper)
-)
-
## # A tibble: 150 x 5
-##    Sepal.Length Sepal.Width Petal.Length Petal.Width
-##           <dbl>       <dbl>        <dbl>       <dbl>
-##  1         2.55        1.75         0.7         0.1 
-##  2         2.45        1.5          0.7         0.1 
-##  3         2.35        1.6          0.65        0.1 
-##  4         2.3         1.55         0.75        0.1 
-##  5         2.5         1.8          0.7         0.1 
-##  6         2.7         1.95         0.85        0.2 
-##  7         2.3         1.7          0.7         0.15
-##  8         2.5         1.7          0.75        0.1 
-##  9         2.2         1.45         0.7         0.1 
-## 10         2.45        1.55         0.75        0.05
-## # ... with 140 more rows, and 1 more variable:
-## #   Species <chr>
-
-
-

22.9 “current” group or “current” variable

-
    -
  • n(), 返回当前分组的多少行
  • -
  • cur_data(), 返回当前分组的数据内容(不包含分组变量)
  • -
  • cur_group(), 返回当前分组的分组变量(一行一列的数据框)
  • -
  • across(cur_column()), 返回当前列的列名
  • -
-

这些函数返回当前分组的信息,因此只能在特定函数内部使用,比如summarise() and mutate()

-
df <- tibble(
-  g = sample(rep(letters[1:3], 1:3)),
-  x = runif(6),
-  y = runif(6)
-)
-df
-
## # A tibble: 6 x 3
-##   g          x     y
-##   <chr>  <dbl> <dbl>
-## 1 b     0.603  0.415
-## 2 b     0.443  0.936
-## 3 c     0.0727 0.301
-## 4 c     0.749  0.888
-## 5 c     0.591  0.273
-## 6 a     0.278  0.259
-
df %>%
-  group_by(g) %>%
-  summarise(
-    n = n()
-  )
-
## # A tibble: 3 x 2
-##   g         n
-##   <chr> <int>
-## 1 a         1
-## 2 b         2
-## 3 c         3
-
df %>%
-  group_by(g) %>%
-  summarise(
-    data = list(cur_group())
-  )
-
## # A tibble: 3 x 2
-##   g     data            
-##   <chr> <list>          
-## 1 a     <tibble [1 x 1]>
-## 2 b     <tibble [1 x 1]>
-## 3 c     <tibble [1 x 1]>
-
df %>%
-  group_by(g) %>%
-  summarise(
-    data = list(cur_data())
-  )
-
## # A tibble: 3 x 2
-##   g     data            
-##   <chr> <list>          
-## 1 a     <tibble [1 x 2]>
-## 2 b     <tibble [2 x 2]>
-## 3 c     <tibble [3 x 2]>
-
mtcars %>%
-  group_by(cyl) %>%
-  summarise(
-    broom::tidy(lm(mpg ~ wt, data = cur_data()))
-  )
-
## # A tibble: 6 x 6
-## # Groups:   cyl [3]
-##     cyl term      estimate std.error statistic  p.value
-##   <dbl> <chr>        <dbl>     <dbl>     <dbl>    <dbl>
-## 1     4 (Interce~    39.6      4.35       9.10  7.77e-6
-## 2     4 wt           -5.65     1.85      -3.05  1.37e-2
-## 3     6 (Interce~    28.4      4.18       6.79  1.05e-3
-## 4     6 wt           -2.78     1.33      -2.08  9.18e-2
-## 5     8 (Interce~    23.9      3.01       7.94  4.05e-6
-## 6     8 wt           -2.19     0.739     -2.97  1.18e-2
-
df %>%
-  group_by(g) %>%
-  mutate(across(everything(), ~ paste(cur_column(), round(.x, 2))))
-
## # A tibble: 6 x 3
-## # Groups:   g [3]
-##   g     x      y     
-##   <chr> <chr>  <chr> 
-## 1 b     x 0.6  y 0.42
-## 2 b     x 0.44 y 0.94
-## 3 c     x 0.07 y 0.3 
-## 4 c     x 0.75 y 0.89
-## 5 c     x 0.59 y 0.27
-## 6 a     x 0.28 y 0.26
-
wt <- c(x = 0.2, y = 0.8)
-
-df %>%
-  mutate(
-    across(c(x, y), ~ .x * wt[cur_column()])
-  )
-
## # A tibble: 6 x 3
-##   g          x     y
-##   <chr>  <dbl> <dbl>
-## 1 b     0.121  0.332
-## 2 b     0.0885 0.749
-## 3 c     0.0145 0.241
-## 4 c     0.150  0.711
-## 5 c     0.118  0.219
-## 6 a     0.0555 0.207
-
-
-

22.10 行方向操作

-

数据框中向量de方向,事实上可以看做有两个方向,横着看是row-vector,竖着看是col-vector。 -

-

tidyverse遵循的tidy原则,一列表示一个变量,一行表示一次观察。 -这种数据的存储格式,对ggplot2很方便,但对行方向的操作或者运算不同友好。比如

-
-

22.10.1 行方向上的统计

-
df <- tibble(id = letters[1:6], w = 10:15, x = 20:25, y = 30:35, z = 40:45)
-df
-
## # A tibble: 6 x 5
-##   id        w     x     y     z
-##   <chr> <int> <int> <int> <int>
-## 1 a        10    20    30    40
-## 2 b        11    21    31    41
-## 3 c        12    22    32    42
-## 4 d        13    23    33    43
-## 5 e        14    24    34    44
-## 6 f        15    25    35    45
-

计算每行的均值,

-
df %>% mutate(avg = mean(c(w, x, y, z)))
-
## # A tibble: 6 x 6
-##   id        w     x     y     z   avg
-##   <chr> <int> <int> <int> <int> <dbl>
-## 1 a        10    20    30    40  27.5
-## 2 b        11    21    31    41  27.5
-## 3 c        12    22    32    42  27.5
-## 4 d        13    23    33    43  27.5
-## 5 e        14    24    34    44  27.5
-## 6 f        15    25    35    45  27.5
-

好像不对?为什么呢?

-
    -
  • 按照tidy的方法
  • -
-
df %>%
-  pivot_longer(
-    cols = -id,
-    names_to = "variable",
-    values_to = "value"
-  ) %>%
-  group_by(id) %>%
-  summarize(
-    r_mean = mean(value)
-  )
-
## # A tibble: 6 x 2
-##   id    r_mean
-##   <chr>  <dbl>
-## 1 a         25
-## 2 b         26
-## 3 c         27
-## 4 d         28
-## 5 e         29
-## 6 f         30
-

如果保留原始数据,就还需要再left_join()一次,虽然思路清晰,但还是挺周转的。

-
    -
  • 按照Jenny Bryan的方案,使用purrr宏包的pmap_dbl函数
  • -
-
library(purrr)
-df %>%
-  mutate(r_mean = pmap_dbl(select_if(., is.numeric), lift_vd(mean)))
-
## # A tibble: 6 x 6
-##   id        w     x     y     z r_mean
-##   <chr> <int> <int> <int> <int>  <dbl>
-## 1 a        10    20    30    40     25
-## 2 b        11    21    31    41     26
-## 3 c        12    22    32    42     27
-## 4 d        13    23    33    43     28
-## 5 e        14    24    34    44     29
-## 6 f        15    25    35    45     30
-

但需要学习新的语法,代价也很高。

-
    -
  • rowwise()
  • -
-
df %>%
-  rowwise() %>%
-  mutate(avg = mean(c(w, x, y, z)))
-
## # A tibble: 6 x 6
-## # Rowwise: 
-##   id        w     x     y     z   avg
-##   <chr> <int> <int> <int> <int> <dbl>
-## 1 a        10    20    30    40    25
-## 2 b        11    21    31    41    26
-## 3 c        12    22    32    42    27
-## 4 d        13    23    33    43    28
-## 5 e        14    24    34    44    29
-## 6 f        15    25    35    45    30
-

变量名要是很多的话,又变了体力活了,怎么才能变的轻巧一点呢?

-
    -
  • rowwise() + c_across(),现在dplyr 1.0终于给出了一个很好的解决方案
  • -
-
df %>%
-  rowwise() %>%
-  mutate(
-    avg = mean(c_across(w:z))
-  )
-
## # A tibble: 6 x 6
-## # Rowwise: 
-##   id        w     x     y     z   avg
-##   <chr> <int> <int> <int> <int> <dbl>
-## 1 a        10    20    30    40    25
-## 2 b        11    21    31    41    26
-## 3 c        12    22    32    42    27
-## 4 d        13    23    33    43    28
-## 5 e        14    24    34    44    29
-## 6 f        15    25    35    45    30
-

这个很好的解决方案中,rowwise()工作原理类似与group_by(),是按每一行进行分组,然后按行(行方向)统计

-
df %>%
-  rowwise(id) %>%
-  mutate(total = mean(c_across(w:z)))
-
## # A tibble: 6 x 6
-## # Rowwise:  id
-##   id        w     x     y     z total
-##   <chr> <int> <int> <int> <int> <dbl>
-## 1 a        10    20    30    40    25
-## 2 b        11    21    31    41    26
-## 3 c        12    22    32    42    27
-## 4 d        13    23    33    43    28
-## 5 e        14    24    34    44    29
-## 6 f        15    25    35    45    30
-
df %>%
-  rowwise(id) %>%
-  mutate(mean = mean(c_across(is.numeric)))
-
## # A tibble: 6 x 6
-## # Rowwise:  id
-##   id        w     x     y     z  mean
-##   <chr> <int> <int> <int> <int> <dbl>
-## 1 a        10    20    30    40    25
-## 2 b        11    21    31    41    26
-## 3 c        12    22    32    42    27
-## 4 d        13    23    33    43    28
-## 5 e        14    24    34    44    29
-## 6 f        15    25    35    45    30
-
df %>%
-  rowwise(id) %>%
-  summarise(
-    m = mean(c_across(is.numeric))
-  )
-
## # A tibble: 6 x 2
-## # Groups:   id [6]
-##   id        m
-##   <chr> <dbl>
-## 1 a        25
-## 2 b        26
-## 3 c        27
-## 4 d        28
-## 5 e        29
-## 6 f        30
-

因此,我们可以总结成下面这张图

-

-
-
-

22.10.2 行方向处理与列表列是天然一对

-

rowwise()不仅仅用于计算行方向均值这样的简单统计,而是当处理列表列时,方才显示出rowwise()purrr::map一样的强大。那么,什么是列表列? -列表列指的是数据框的一列是一个列表, 比如

-
tb <- tibble(
-  x = list(1, 2:3, 4:6)
-)
-

如果想显示列表中每个元素的长度,用purrr包,可以这样写

-
tb %>% mutate(l = purrr::map_int(x, length))
-
## # A tibble: 3 x 2
-##   x             l
-##   <list>    <int>
-## 1 <dbl [1]>     1
-## 2 <int [2]>     2
-## 3 <int [3]>     3
-

如果从行方向的角度理解,其实很简练

-
tb %>%
-  rowwise() %>%
-  mutate(l = length(x))
-
## # A tibble: 3 x 2
-## # Rowwise: 
-##   x             l
-##   <list>    <int>
-## 1 <dbl [1]>     1
-## 2 <int [2]>     2
-## 3 <int [3]>     3
-
-
-

22.10.3 行方向上的建模

-
mtcars <- mtcars %>% as_tibble()
-mtcars
-
## # A tibble: 32 x 11
-##      mpg   cyl  disp    hp  drat    wt  qsec    vs
-##    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
-##  1  21       6  160    110  3.9   2.62  16.5     0
-##  2  21       6  160    110  3.9   2.88  17.0     0
-##  3  22.8     4  108     93  3.85  2.32  18.6     1
-##  4  21.4     6  258    110  3.08  3.22  19.4     1
-##  5  18.7     8  360    175  3.15  3.44  17.0     0
-##  6  18.1     6  225    105  2.76  3.46  20.2     1
-##  7  14.3     8  360    245  3.21  3.57  15.8     0
-##  8  24.4     4  147.    62  3.69  3.19  20       1
-##  9  22.8     4  141.    95  3.92  3.15  22.9     1
-## 10  19.2     6  168.   123  3.92  3.44  18.3     1
-## # ... with 22 more rows, and 3 more variables:
-## #   am <dbl>, gear <dbl>, carb <dbl>
-

以cyl分组,计算每组中mpg ~ wt的线性模型的系数.

-
mtcars %>%
-  group_by(cyl) %>%
-  nest()
-
## # A tibble: 3 x 2
-## # Groups:   cyl [3]
-##     cyl data              
-##   <dbl> <list>            
-## 1     6 <tibble [7 x 10]> 
-## 2     4 <tibble [11 x 10]>
-## 3     8 <tibble [14 x 10]>
-
-

22.10.3.1 列方向的做法

-

分组建模后,形成列表列,此时列表中的每个元素对应一个模型,我们需要依次提取每次模型的系数,列方向的做法是,借用purrr::map完成列表中每个模型的迭代,

-
mtcars %>%
-  group_by(cyl) %>%
-  nest() %>%
-  mutate(model = purrr::map(data, ~ lm(mpg ~ wt, data = .))) %>%
-  mutate(result = purrr::map(model, ~ broom::tidy(.))) %>%
-  unnest(result)
-
## # A tibble: 6 x 8
-## # Groups:   cyl [3]
-##     cyl data  model term  estimate std.error statistic
-##   <dbl> <lis> <lis> <chr>    <dbl>     <dbl>     <dbl>
-## 1     6 <tib~ <lm>  (Int~    28.4      4.18       6.79
-## 2     6 <tib~ <lm>  wt       -2.78     1.33      -2.08
-## 3     4 <tib~ <lm>  (Int~    39.6      4.35       9.10
-## 4     4 <tib~ <lm>  wt       -5.65     1.85      -3.05
-## 5     8 <tib~ <lm>  (Int~    23.9      3.01       7.94
-## 6     8 <tib~ <lm>  wt       -2.19     0.739     -2.97
-## # ... with 1 more variable: p.value <dbl>
-

purrr::map实现列表元素一个一个的依次迭代,从数据框的角度来看(数据框是列表的一种特殊形式),因此实质上就是一行一行的处理。所以,尽管purrr很强大,但需要一定学习成本,从解决问题的路径上也比较周折。

-
-
-

22.10.3.2 行方向的做法

-

事实上,分组建模后,形成列表列,这种存储格式,天然地符合行处理的范式,因此一开始就使用行方向分组(这里nest_by() 类似于 group_by()

-
mtcars %>%
-  nest_by(cyl) %>%
-  mutate(model = list(lm(mpg ~ wt, data = data))) %>%
-  summarise(broom::tidy(model))
-
## # A tibble: 6 x 6
-## # Groups:   cyl [3]
-##     cyl term      estimate std.error statistic  p.value
-##   <dbl> <chr>        <dbl>     <dbl>     <dbl>    <dbl>
-## 1     4 (Interce~    39.6      4.35       9.10  7.77e-6
-## 2     4 wt           -5.65     1.85      -3.05  1.37e-2
-## 3     6 (Interce~    28.4      4.18       6.79  1.05e-3
-## 4     6 wt           -2.78     1.33      -2.08  9.18e-2
-## 5     8 (Interce~    23.9      3.01       7.94  4.05e-6
-## 6     8 wt           -2.19     0.739     -2.97  1.18e-2
-
# or
-mtcars %>%
-  nest_by(cyl) %>%
-  summarise(
-    broom::tidy(lm(mpg ~ wt, data = data))
-  )
-
## # A tibble: 6 x 6
-## # Groups:   cyl [3]
-##     cyl term      estimate std.error statistic  p.value
-##   <dbl> <chr>        <dbl>     <dbl>     <dbl>    <dbl>
-## 1     4 (Interce~    39.6      4.35       9.10  7.77e-6
-## 2     4 wt           -5.65     1.85      -3.05  1.37e-2
-## 3     6 (Interce~    28.4      4.18       6.79  1.05e-3
-## 4     6 wt           -2.78     1.33      -2.08  9.18e-2
-## 5     8 (Interce~    23.9      3.01       7.94  4.05e-6
-## 6     8 wt           -2.19     0.739     -2.97  1.18e-2
-

至此,tidyverse框架下,实现分组统计中的数据框进,数据框输出, 现在有四种方法了

-
mtcars %>%
-  group_nest(cyl) %>%
-  mutate(model = purrr::map(data, ~ lm(mpg ~ wt, data = .))) %>%
-  mutate(result = purrr::map(model, ~ broom::tidy(.))) %>%
-  tidyr::unnest(result)
-
-
-mtcars %>%
-  group_by(cyl) %>%
-  group_modify(
-    ~ broom::tidy(lm(mpg ~ wt, data = .))
-  )
-
-
-mtcars %>%
-  nest_by(cyl) %>%
-  summarise(
-    broom::tidy(lm(mpg ~ wt, data = data))
-  )
-
-
-mtcars %>%
-  group_by(cyl) %>%
-  summarise(
-    broom::tidy(lm(mpg ~ wt, data = cur_data()))
-  )
-
-# or
-mtcars %>%
-  group_by(cyl) %>%
-  summarise(broom::tidy(lm(mpg ~ wt)))
-
-
-
- -
-
- -
-
-
- - -
-
- - - - - - - - - - - - - - - - diff --git a/_book/css/psyteachr.css b/_book/css/psyteachr.css deleted file mode 100644 index 12493c4..0000000 --- a/_book/css/psyteachr.css +++ /dev/null @@ -1,208 +0,0 @@ -/* psyTeachR default styles. Do not edit this !! */ -/* (edit style.css for your own book styles) */ - -:root { - --pink: #983E82; /* hsl(315, 42%, 42%) */ - --orange: #E2A458; /* hsl( 33, 70%, 62%) */ - --yellow: #F5DC70; /* hsl( 49, 87%, 70%) */ - --green: #59935B; /* hsl(122, 25%, 46%) */ - --blue: #467AAC; /* hsl(209, 42%, 47%) */ - --purple: #61589C; /* hsl(248, 28%, 48%) */ -} - -.psyteachr_footer { - margin-top: 1em; -} - - -pre { - border: 1px solid grey !important; - border-radius: 5px; - white-space: pre-wrap; - word-wrap: break-word; -} - -.book .book-body .page-wrapper .page-inner section.normal code { - border: 1px solid #DEDEDE; - border: 1px solid hsla(0, 0%, 0%, .1); - border-radius: 5px; - padding: 1px; -} - -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code { - border-color: #333333; - border-color: hsla(0, 0%, 100%, .1); -} - -.book .book-body .page-wrapper .page-inner section.normal pre > code { - border: none; - border-radius: 0; - white-space: pre-wrap; - word-wrap: break-word; -} - -.book .book-body .page-wrapper .page-inner section.normal strong code { - color: black; - padding: 0; - background-color: inherit !important; - font-size: 105%; - border: none; -} - -div.color-theme-1 .book-body .page-wrapper .page-inner section.normal strong code { - color: hsl(30, 70%, 16%) !important; -} - -div.color-theme-2 .book-body .page-wrapper .page-inner section.normal strong code { - color: hsl(200, 29%, 85%) !important; -} - -.logo { - height: 150px; - width: 150px; - float: right; -} - -.join { - height: 150px; - width: 200px; - float: right; - clear: right; -} - -/* set styles for alert boxes */ - -.danger, .warning, .info , .alert, .try { - padding: 1em 1em 0.25em 3.5em; - margin-bottom: 10px; - background-color: hsla(0, 0%, 100%, .1); - background-position: 0.5em 0.5em; - background-size: 2.5em; - background-repeat: no-repeat; - border-radius: 0.5em; -} - -.danger { - background-image: url("../images/alerts/danger.svg"); - background-color: hsla(315, 42%, 42%, .10); - border: 1px solid var(--pink); -} - -.warning { - background-image: url("../images/alerts/warning.svg"); - background-color: hsla(49, 87%, 70%, .1); - border: 1px solid var(--yellow); -} - -.info { - background-image: url("../images/alerts/info.svg"); - background-color: hsla(209, 42%, 47%, .1); - border: 1px solid var(--blue); -} - -.try { - background-image: url("../images/alerts/rainbow.svg"); - background-color: hsla(248, 28%, 48%, .1); - border: 1px solid var(--purple); - background-position: 0.5em -.25em; -} - - -/* webex style */ - -/* styles for solveme */ -.solveme { border: 2px dotted var(--pink); } -.solveme.correct { border: 2px solid var(--green); } -/* styles for hidden solutions */ -.solution { - height: 2.2em; - overflow-y: hidden; - padding: 0.5em; -} -.solution.open { - height: auto; - background-color: rgba(0, 0, 0, 0.1); - border-radius: 5px; -} -.solution button { - height: 1.5em; - margin-bottom: 0.5em; -} - -/* verbatim code chunks with ```{r ...} at top */ - -div.verbatim { - border: 1px solid grey; - padding: 0.25em 1em 0 1em; - border-radius: 5px; - margin-bottom: 1em; - font-family: Consolas,"Liberation Mono",Menlo,Courier,monospace; -} - -div.verbatim code { - border: none !important; - font-size: 100% !important; - display: block; -} - -div.verbatim pre.sourceCode.r { - border: none !important; -} - -div.verbatim { - background-color: rgb(247, 247, 247) !important; -} - -.color-theme-1 div.verbatim { - background-color: rgb(253, 246, 227) !important; -} - -.color-theme-2 div.verbatim { - background-color: rgb(45, 49, 67) !important; -} - -div.verbatim * { - background-color: transparent !important; -} - -div.verbatim div.sourceCode { - margin: 0 !important; - padding: 0 !important; -} -div.verbatim pre.sourceCode { - margin: 0 !important; - padding: 0 1em !important; -} - - -/* external link icons */ - -a[target="_blank"]::after { - content: url("../images/icons/link_external.svg"); - margin: 0 3px 0 5px; -} - -.color-theme-1 a[target="_blank"]::after { - content: url("../images/icons/link_external1.svg"); -} - - -.color-theme-2 a[target="_blank"]::after { - content: url("../images/icons/link_external2.svg"); -} - -a.glossary::after { - content: none !important; - margin: 0 !important; -} - -a.glossary, a:visited.glossary { - color: var(--purple) !important; -} - - -/* override table striping */ - -.book .book-body .page-wrapper .page-inner section.normal table tr:nth-child(2n) { - background-color: inherit; -} diff --git a/_book/css/style.css b/_book/css/style.css deleted file mode 100644 index 3e43394..0000000 --- a/_book/css/style.css +++ /dev/null @@ -1,23 +0,0 @@ -p.caption { - color: #777; - margin-top: 10px; -} -p code { - white-space: inherit; -} -pre { - word-break: normal; - word-wrap: normal; -} -pre code { - white-space: inherit; -} -p.flushright { - text-align: right; -} -blockquote > p:last-child { - text-align: right; -} -blockquote > p:first-child { - text-align: inherit; -} diff --git a/_book/dot.html b/_book/dot.html deleted file mode 100644 index 77f3e52..0000000 --- a/_book/dot.html +++ /dev/null @@ -1,1522 +0,0 @@ - - - - - - - 第 25 章 tidyverse中的dot | 数据科学中的 R 语言 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- -
- -
-
- - -
-
- -
-
-

第 25 章 tidyverse中的dot

-

本章介绍tidyverse的语法中经常遇到., 不同的场景,含义不同。因此很有必要弄清楚各自的含义。

-
library(tidyverse)
-
-

25.1 每一行的 . 各自代表什么意思呢?

-
read_csv("./data/wages.csv") %>%
-  mutate(letter = str_extract(race, "(?<=h)(.)")) %>%
-  select(., -letter) %>%
-  mutate_at(vars(race), ~ as.factor(.)) %>%
-  mutate_at(vars(sex), ~ if_else(. == "male", 1, 0)) %>%
-  filter_if(~ is.numeric(.), all_vars(. != 0)) %>%
-  split(.$sex) %>%
-  map(~ lm(earn ~ ., data = .)) %>%
-  map_dfr(~ broom::tidy(.), .id = "sex")
-

回答之前,我们先介绍一些相关知识点

-
-
-

25.2 占位符

-

管道符号%>% 主要功能是传递参数。

-
    -
  • y %>% f() is equivalent to f(y)

  • -
  • y %>% f(x, .) is equivalent to f(x, y)

  • -
  • z %>% f(x, y, arg = .) is equivalent to f(x, y, arg = z)

  • -
-

我们经常这样写

-
mtcars %>%
-  select(cyl, disp, hp) %>%
-  head(2)
-
## # A tibble: 2 x 3
-##     cyl  disp    hp
-##   <dbl> <dbl> <dbl>
-## 1     6   160   110
-## 2     6   160   110
-

实际上,这里是有占位符的

-
mtcars %>%
-  select(., cyl, disp, hp) %>%
-  head(., 2)
-
## # A tibble: 2 x 3
-##     cyl  disp    hp
-##   <dbl> <dbl> <dbl>
-## 1     6   160   110
-## 2     6   160   110
-
-
-

25.3 Lambda函数

-

.出现在函数.f的位置上, 就是 purrr 风格的Lambda函数~ fun(.)

-
mtcars %>%
-  select_at(vars(contains("ar")), ~ toupper(.)) %>%
-  head(3)
-
## # A tibble: 3 x 2
-##    GEAR  CARB
-##   <dbl> <dbl>
-## 1     4     4
-## 2     4     4
-## 3     4     1
-

有时候程序员会将~toupper(.)简写成 toupper

-
mtcars %>%
-  select_at(vars(contains("ar")), toupper) %>%
-  head(3)
-
## # A tibble: 3 x 2
-##    GEAR  CARB
-##   <dbl> <dbl>
-## 1     4     4
-## 2     4     4
-## 3     4     1
-
-
-

25.4 正则表达式

-
words <- "the fattest cat."
-
words %>% str_replace_all("t.", "-")
-
## [1] "-e fa-es-ca-"
-
words %>% str_replace_all("t\\.", "-")
-
## [1] "the fattest ca-"
-
-
-

25.5 Unary funciton (只带一个参数的函数)

- -
mean_rm <- . %>% mean(na.rm = T)
-
-c(1, 2, 3, NA) %>% mean_rm()
-
## [1] 2
-

等价于

-
# is equivalent to
-c(1, 2, 3, NA) %>% mean(., na.rm = T)
-
## [1] 2
-
-
-

25.6 more placeholder

-
iris %>% subset(1:nrow(.) %% 30 == 0)
-
## # A tibble: 5 x 5
-##   Sepal.Length Sepal.Width Petal.Length Petal.Width
-##          <dbl>       <dbl>        <dbl>       <dbl>
-## 1          4.7         3.2          1.6         0.2
-## 2          5.2         2.7          3.9         1.4
-## 3          5.5         2.5          4           1.3
-## 4          6           2.2          5           1.5
-## 5          5.9         3            5.1         1.8
-## # ... with 1 more variable: Species <fct>
-
1:10 %>% {
-  c(min(.), max(.))
-}
-
## [1]  1 10
-
-
-

25.7 当mutate遇到map

-

dplyr::mutate遇到purrr::map,情况就复杂很多了。然而,这种情况,tidyverse比比皆是。我就多说几句吧

-
iris %>%
-  head(3) %>%
-  mutate(., r_sum = pmap_dbl(select_if(., is.numeric), sum))
-
## # A tibble: 3 x 6
-##   Sepal.Length Sepal.Width Petal.Length Petal.Width
-##          <dbl>       <dbl>        <dbl>       <dbl>
-## 1          5.1         3.5          1.4         0.2
-## 2          4.9         3            1.4         0.2
-## 3          4.7         3.2          1.3         0.2
-## # ... with 2 more variables: Species <fct>,
-## #   r_sum <dbl>
-

这里mutate()行,有两个., 实际这两个.都是等待iris %>% head(3)传来的data.frame

-
df <- tibble(
-  mean = c(1, 2),
-  sd = c(2, 4)
-)
-df
-
## # A tibble: 2 x 2
-##    mean    sd
-##   <dbl> <dbl>
-## 1     1     2
-## 2     2     4
-
df %>%
-  dplyr::mutate(., rand = map(mean, ~ rnorm(5, .))) %>%
-  tidyr::unnest_wider(rand)
-
## # A tibble: 2 x 7
-##    mean    sd  ...1    ...2  ...3  ...4  ...5
-##   <dbl> <dbl> <dbl>   <dbl> <dbl> <dbl> <dbl>
-## 1     1     2 0.238 -0.0633 0.651  2.24 0.941
-## 2     2     4 2.63   2.59   1.68   2.00 0.196
-
    -
  • 第一个 ., 是df
  • -
  • 第二个 ., 是df中的mean
  • -
-
df %>%
-  dplyr::mutate(rand = map2(mean, sd, ~ rnorm(5, .x, .y))) %>%
-  tidyr::unnest_wider(rand)
-
## # A tibble: 2 x 7
-##    mean    sd   ...1  ...2  ...3  ...4   ...5
-##   <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>  <dbl>
-## 1     1     2 -0.141 -1.16  1.68  2.26 -0.419
-## 2     2     4 -1.28   1.07 -1.49  6.01 -2.63
-
    -
  • mean传给 .x
  • -
  • sd传给 .y
  • -
-

再来一个变态的。(我们不一定要这样写,但我们尽可能的要明白它的意思。)

-
df <- tribble(
-  ~a, ~b,
-  1, 10,
-  2, 11
-)
-
-
-df %>%
-  dplyr::mutate(., sum = purrr::pmap_dbl(., ~ sum(...)))
-
## # A tibble: 2 x 3
-##       a     b   sum
-##   <dbl> <dbl> <dbl>
-## 1     1    10    11
-## 2     2    11    13
-
-
-

25.8 Dot dot dot

-
commas <- function(...) {
-  stringr::str_c(..., collapse = ", ")
-}
-
-
-commas(letters[1:10])
-
## [1] "a, b, c, d, e, f, g, h, i, j"
-
-
-

25.9 Don’t confuse

- -

注意:有些函数的参数前缀是 .

-
mutate_all(.tbl, .funs, ...)
-
-mutate_if(.tbl, .predicate, .funs, ...)
-
-mutate_at(.tbl, .vars, .funs, ..., .cols = NULL)
-
-select_all(.tbl, .funs = list(), ...)
-
-rename_all(.tbl, .funs = list(), ...)
-
-
-

25.10 小结

-
    -
  • tidyvere中 -
      -
    • 占位符(时常经常和 %>% 一起)
    • -
    • Lambda函数
    • -
    • 一元函数(LHS)
    • -
  • -
  • 其他情形 -
      -
    • 回归公式
    • -
    • 正则表达式
    • -
  • -
  • 注意 -
      -
    • 有些函数参数以 . 前缀(不要混淆喔! )
    • -
  • -
-
-
-

25.11 回答问题

-

现在回答本章开始的问题

-
read_csv("./demo_data/wages.csv") %>%
-  dplyr::mutate(letter = str_extract(race, "(?<=h)(.)")) %>%
-  dplyr::select(., -letter) %>%
-  dplyr::mutate_at(vars(race), ~ as.factor(.)) %>%
-  dplyr::mutate_at(vars(sex), ~ if_else(. == "male", 1, 0)) %>%
-  dplyr::filter_if(~ is.numeric(.), all_vars(. != 0)) %>%
-  split(.$sex) %>%
-  purrr::map(~ lm(earn ~ ., data = .)) %>%
-  purrr::map_dfr(., ~ broom::tidy(.), .id = "sex")
-
## # A tibble: 8 x 6
-##   sex   term     estimate std.error statistic   p.value
-##   <chr> <chr>       <dbl>     <dbl>     <dbl>     <dbl>
-## 1 1     (Interc~ -121846.   37449.    -3.25    1.21e- 3
-## 2 1     height       977.     515.     1.90    5.84e- 2
-## 3 1     sex           NA       NA     NA      NA       
-## 4 1     racehis~     578.    7934.     0.0728  9.42e- 1
-## 5 1     raceoth~   -2035.   11514.    -0.177   8.60e- 1
-## 6 1     racewhi~   12823.    5284.     2.43    1.56e- 2
-## 7 1     ed          5234.     601.     8.71    4.30e-17
-## 8 1     age          406.      95.5    4.25    2.52e- 5
-
    -
  • 第1行:路径中.代表当前位置,如果是..表示上一级目录
  • -
  • 第2行:正则表达式,代表任何字符
  • -
  • 第3行:占位符,等待数据框的传入,也可以简写select(-letter)
  • -
  • 第4行: lambda函数,~ as.factor(.)也可以简写as.factor~(.)要么都写,要么都不写
  • -
  • 第5行:同上,lambda函数
  • -
  • 第6行:第一个.代表lambda函数; 第二个.也是lambda函数,但这里它是all_vars(expr)中expr的一种特有写法,代表所有数值型变量,*行方向构成的向量, all_vars(. != 0)函数返回TRUE或FALSE,从而帮助filter()是否筛选该行
  • -
  • 第7行:占位符,代表上面传来的数据框
  • -
  • 第8行:回归模型lm中,第一个.代表除因变量earn之外所有的变量,第二个.占位符,留给上面的数据框
  • -
  • 第9行:第一个.是占位符,代表上面传来的list,第二个.lambda函数,依次对list的元素迭代处理,第二个.是参数名,.id是特有的一个符号。
  • -
- -
-
-
- -
-
-
- - -
-
- - - - - - - - - - - - - - - - diff --git a/_book/dplyr.html b/_book/dplyr.html deleted file mode 100644 index 3061ea5..0000000 --- a/_book/dplyr.html +++ /dev/null @@ -1,1590 +0,0 @@ - - - - - - - 第 6 章 数据处理 | 数据科学中的 R 语言 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- -
- -
-
- - -
-
- -
-
-

第 6 章 数据处理

-

本章我们介绍tidyverse里数据处理的神器dplyr宏包。首先,我们加载该宏包

-
library(dplyr)
-

dplyr 定义了数据处理的规范语法,其中主要包含以下九个主要的函数。

-
    -
  • mutate(), select(), filter()
  • -
  • summarise(), group_by(), arrange()
  • -
  • left_join(), right_join()full_join()
  • -
-

-

我们依次介绍

-
-

6.1 mutate()

-

假定我们有一数据框,包含三位学生的英语和数学

-
df <- data.frame(
-      name = c("Alice", "Alice", "Bob", "Bob", "Carol", "Carol"),
-      type = c("english", "math", "english", "math", "english", "math")
-)
-
-df
-
##    name    type
-## 1 Alice english
-## 2 Alice    math
-## 3   Bob english
-## 4   Bob    math
-## 5 Carol english
-## 6 Carol    math
-

这里有他们的最近的考试成绩,我们想增加到数据框里去

-
score2020 <- c(80.2, 90.5, 92.2, 90.8, 82.5, 84.6)
-score2020
-
## [1] 80.2 90.5 92.2 90.8 82.5 84.6
-

使用传统的方法

-
df$newscore <- score2020
-df
-
##    name    type newscore
-## 1 Alice english     80.2
-## 2 Alice    math     90.5
-## 3   Bob english     92.2
-## 4   Bob    math     90.8
-## 5 Carol english     82.5
-## 6 Carol    math     84.6
-

dplyr语法这样写

-
mutate(df, newscore = score2020)
-
##    name    type newscore
-## 1 Alice english     80.2
-## 2 Alice    math     90.5
-## 3   Bob english     92.2
-## 4   Bob    math     90.8
-## 5 Carol english     82.5
-## 6 Carol    math     84.6
-

mutate() 函数

-
mutate(.data = df, newscore = score2020)
-
    -
  • 第一参数是我们要处理的数据框,比如这里的df
  • -
  • 第二个参数是newscore = score2020,等号左边的newscore是我们打算创建一个新列,而取的列名; -等号右边是装着学生成绩的向量(注意,向量 的长度要与数据框的行数相等,比如这里长度都是6)
  • -
-

-
-
-

6.2 管道 %>%

-

这里有必要介绍下管道操作符%>%.

-
c(1:10)
-
##  [1]  1  2  3  4  5  6  7  8  9 10
-
sum(c(1:10))
-
## [1] 55
-

与下面的写法是等价的,

-
c(1:10) %>% sum()
-
## [1] 55
-

这条语句的意思,向量c(1:10) 通过管道操作符 %>% ,传递到函数sum()的第一个参数位置,即sum(c(1:10)), 这个%>%管道操作符还是很形象的,

-

-

当对执行多个函数操作的时候,就显得格外方便,代码可读性更强。

-
sqrt(sum(abs(c(-10:10))))
-
## [1] 10.49
-
# sqrt(sum(abs(c(-10:10)))) 
-c(-10:10) %>% abs() %>% sum() %>% sqrt()
-
## [1] 10.49
-

那么,上面增加学生成绩的语句mutate(df, newscore = score2020)就可以使用管道

-

-
# 等价于
-df %>% mutate(newscore = score2020)
-
##    name    type newscore
-## 1 Alice english     80.2
-## 2 Alice    math     90.5
-## 3   Bob english     92.2
-## 4   Bob    math     90.8
-## 5 Carol english     82.5
-## 6 Carol    math     84.6
-

是不是很赞?

-

注意此时df没有变化喔。好比把df 传给了f() 执行了f(df), 但df本身没有变化。 -如果想保留f(df)结果,需要把f(df)赋值给新的对象,当然也可以赋值给df, 即替换.

-
df <- df %>% mutate(newscore = score2020)
-df
-
##    name    type newscore
-## 1 Alice english     80.2
-## 2 Alice    math     90.5
-## 3   Bob english     92.2
-## 4   Bob    math     90.8
-## 5 Carol english     82.5
-## 6 Carol    math     84.6
-
-
-

6.3 select()

-

select()顾名思义选择,就是选择数据框的某一列,或者某几列

-

-

我们还是以学生成绩的数据框为例

-

我们可以选择name列, 结果是只有一列的数据框(仍然数据框喔)

-
    -
  • 使用传统的方法
  • -
-
df["name"]
-
##    name
-## 1 Alice
-## 2 Alice
-## 3   Bob
-## 4   Bob
-## 5 Carol
-## 6 Carol
-
    -
  • dplyr 的方法
  • -
-
df %>% select(name)
-
##    name
-## 1 Alice
-## 2 Alice
-## 3   Bob
-## 4   Bob
-## 5 Carol
-## 6 Carol
-
    -
  • 如果选取多列,用dplyr 就只是再写一个就行了
  • -
-
df %>% select(name, newscore)
-
##    name newscore
-## 1 Alice     80.2
-## 2 Alice     90.5
-## 3   Bob     92.2
-## 4   Bob     90.8
-## 5 Carol     82.5
-## 6 Carol     84.6
-

如果不想要某列, 可以在变量前面加-, 结果与上面的一样

-
df %>% select(-type)
-
##    name newscore
-## 1 Alice     80.2
-## 2 Alice     90.5
-## 3   Bob     92.2
-## 4   Bob     90.8
-## 5 Carol     82.5
-## 6 Carol     84.6
-
-
-

6.4 filter()

-

select是列方向的选择,我们还可以对数据行方向的选择和筛选,选出符合我们条件的某些行

-

-

比如这里把成绩高于90分的同学筛选出来

-
df %>% filter(newscore >= 90)
-
##    name    type newscore
-## 1 Alice    math     90.5
-## 2   Bob english     92.2
-## 3   Bob    math     90.8
-

也可以限定多个条件进行筛选, 英语成绩高于90分的筛选出来

-
df %>% filter(type == "english", newscore >= 90)
-
##   name    type newscore
-## 1  Bob english     92.2
-
-
-

6.5 summarise()统计

-

-

summarise()主要用于统计,往往与其他函数配合使用,比如计算所有同学考试成绩的均值

-
df %>% summarise( mean_score = mean(newscore))
-
##   mean_score
-## 1       86.8
-

比如,计算所有同学的考试成绩的标准差

-
df %>% summarise( mean_score = sd(newscore))
-
##   mean_score
-## 1      5.015
-

还同时完成多个统计

-
df %>% summarise(
-  mean_score = mean(newscore),
-  median_score = median(newscore),
-  n = n(),
-  sum = sum(newscore)
-)
-
##   mean_score median_score n   sum
-## 1       86.8        87.55 6 520.8
-

注意,mutate(), select()filter()是在原数据框的基础上增减, 而summarise()返回的是一个新的数据框。

-
-
-

6.6 group_by()分组

-

-

事实上,summarise()往往配合group_by()一起使用,即,先分组再统计。比如,我们想统计每个学生的平均成绩,那么就需要先按学生name分组,然后求平均

-
df %>% 
-  group_by(name) %>% 
-  summarise( 
-    mean_score = mean(newscore),
-   sd_score = sd(newscore)
-  )
-
## # A tibble: 3 x 3
-##   name  mean_score sd_score
-##   <chr>      <dbl>    <dbl>
-## 1 Alice       85.4    7.28 
-## 2 Bob         91.5    0.990
-## 3 Carol       83.6    1.48
-
-
-

6.7 arrange()排序

-

-

这个很好理解的。比如我们按照考试成绩从低到高排序,然后输出

-
df %>% arrange(newscore)
-
##    name    type newscore
-## 1 Alice english     80.2
-## 2 Carol english     82.5
-## 3 Carol    math     84.6
-## 4 Alice    math     90.5
-## 5   Bob    math     90.8
-## 6   Bob english     92.2
-

如果从高到低排序呢,有两种方法:

-
df %>% arrange(-newscore)
-
##    name    type newscore
-## 1   Bob english     92.2
-## 2   Bob    math     90.8
-## 3 Alice    math     90.5
-## 4 Carol    math     84.6
-## 5 Carol english     82.5
-## 6 Alice english     80.2
-

写成下面这种形式也是降序排列,但可读性更强些

-
df %>% arrange(desc(newscore))
-
##    name    type newscore
-## 1   Bob english     92.2
-## 2   Bob    math     90.8
-## 3 Alice    math     90.5
-## 4 Carol    math     84.6
-## 5 Carol english     82.5
-## 6 Alice english     80.2
-

也可对多个变量先后排序。先按学科排,然后按照成绩从高到底排序

-
df %>% 
-  arrange(type, desc(newscore))
-
##    name    type newscore
-## 1   Bob english     92.2
-## 2 Carol english     82.5
-## 3 Alice english     80.2
-## 4   Bob    math     90.8
-## 5 Alice    math     90.5
-## 6 Carol    math     84.6
-
-
-

6.8 left_join()

-

数据框合并,假定我们已经统计了每个同学的平均成绩,存放在df1

-
df1 <- df %>% 
-  group_by(name) %>% 
-  summarise( mean_score = mean(newscore) )
-
-df1
-
## # A tibble: 3 x 2
-##   name  mean_score
-##   <chr>      <dbl>
-## 1 Alice       85.4
-## 2 Bob         91.5
-## 3 Carol       83.6
-

我们有新一个数据框df2,包含同学们的年龄信息

-
df2 <- tibble(
-      name = c("Alice", "Bob"),
-      age =  c(12, 13)
-)
-
-df2
-
## # A tibble: 2 x 2
-##   name    age
-##   <chr> <dbl>
-## 1 Alice    12
-## 2 Bob      13
-

可以用 left_join把两个数据框df1df2,合并连接再一起, 两个数据框是通过姓名name连接的,因此需要指定by = "name"

-
left_join(df1, df2, by = "name")
-
## # A tibble: 3 x 3
-##   name  mean_score   age
-##   <chr>      <dbl> <dbl>
-## 1 Alice       85.4    12
-## 2 Bob         91.5    13
-## 3 Carol       83.6    NA
-
# 
-df1 %>% left_join(df2, by = "name")
-
## # A tibble: 3 x 3
-##   name  mean_score   age
-##   <chr>      <dbl> <dbl>
-## 1 Alice       85.4    12
-## 2 Bob         91.5    13
-## 3 Carol       83.6    NA
-

大家注意到最后一行Carol的年龄是NA, 大家想想为什么呢?

-
-
-

6.9 right_join()

-

我们再试试right_join()

-
df1 %>% right_join(df2, by = "name")
-
## # A tibble: 2 x 3
-##   name  mean_score   age
-##   <chr>      <dbl> <dbl>
-## 1 Alice       85.4    12
-## 2 Bob         91.5    13
-

Carol同学的信息没有了? 大家想想又为什么呢?

-

事实上,答案就在函数的名字上,left_join()是左合并,即以左边数据框df1中的学生姓名name为准,在右边数据框df2里,有AliceBob的年龄,那么就对应合并过来,没有Carol,就为缺失值NA

-

right_join()是右合并,即以右边数据框df2中的学生姓名name为准,只有AliceBob,因此而df1只需要把AliceBob的信息粘过来。

-

-
-
-

6.10 延伸阅读

- -Download nycflights.Rmd -
    -
  • 统计每位同学成绩高于75分的科目数
  • -
- -
-
-
- -
-
-
- - -
-
- - - - - - - - - - - - - - - - diff --git a/_book/eda-ames-houseprice.html b/_book/eda-ames-houseprice.html deleted file mode 100644 index 36a33df..0000000 --- a/_book/eda-ames-houseprice.html +++ /dev/null @@ -1,2583 +0,0 @@ - - - - - - - 第 47 章 探索性数据分析-ames房屋价格 | 数据科学中的 R 语言 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- -
- -
-
- - -
-
- -
-
-

第 47 章 探索性数据分析-ames房屋价格

-
-

47.1 数据故事

-
-这是数据故事的地图 -

-图 47.1: 这是数据故事的地图 -

-
-

这是一份Ames房屋数据,您可以把它想象为房屋中介推出的成都市武侯区、锦江区以及高新区等各区县的房屋信息

-
library(tidyverse)
-ames <- read_csv("./demo_data/ames_houseprice.csv") %>% 
-        janitor::clean_names()
-
-glimpse(ames)
-
## Rows: 1,460
-## Columns: 81
-## $ id              <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9,...
-## $ ms_sub_class    <dbl> 60, 20, 60, 70, 60, 50, 20...
-## $ ms_zoning       <chr> "RL", "RL", "RL", "RL", "R...
-## $ lot_frontage    <dbl> 65, 80, 68, 60, 84, 85, 75...
-## $ lot_area        <dbl> 8450, 9600, 11250, 9550, 1...
-## $ street          <chr> "Pave", "Pave", "Pave", "P...
-## $ alley           <chr> NA, NA, NA, NA, NA, NA, NA...
-## $ lot_shape       <chr> "Reg", "Reg", "IR1", "IR1"...
-## $ land_contour    <chr> "Lvl", "Lvl", "Lvl", "Lvl"...
-## $ utilities       <chr> "AllPub", "AllPub", "AllPu...
-## $ lot_config      <chr> "Inside", "FR2", "Inside",...
-## $ land_slope      <chr> "Gtl", "Gtl", "Gtl", "Gtl"...
-## $ neighborhood    <chr> "CollgCr", "Veenker", "Col...
-## $ condition1      <chr> "Norm", "Feedr", "Norm", "...
-## $ condition2      <chr> "Norm", "Norm", "Norm", "N...
-## $ bldg_type       <chr> "1Fam", "1Fam", "1Fam", "1...
-## $ house_style     <chr> "2Story", "1Story", "2Stor...
-## $ overall_qual    <dbl> 7, 6, 7, 7, 8, 5, 8, 7, 7,...
-## $ overall_cond    <dbl> 5, 8, 5, 5, 5, 5, 5, 6, 5,...
-## $ year_built      <dbl> 2003, 1976, 2001, 1915, 20...
-## $ year_remod_add  <dbl> 2003, 1976, 2002, 1970, 20...
-## $ roof_style      <chr> "Gable", "Gable", "Gable",...
-## $ roof_matl       <chr> "CompShg", "CompShg", "Com...
-## $ exterior1st     <chr> "VinylSd", "MetalSd", "Vin...
-## $ exterior2nd     <chr> "VinylSd", "MetalSd", "Vin...
-## $ mas_vnr_type    <chr> "BrkFace", "None", "BrkFac...
-## $ mas_vnr_area    <dbl> 196, 0, 162, 0, 350, 0, 18...
-## $ exter_qual      <chr> "Gd", "TA", "Gd", "TA", "G...
-## $ exter_cond      <chr> "TA", "TA", "TA", "TA", "T...
-## $ foundation      <chr> "PConc", "CBlock", "PConc"...
-## $ bsmt_qual       <chr> "Gd", "Gd", "Gd", "TA", "G...
-## $ bsmt_cond       <chr> "TA", "TA", "TA", "Gd", "T...
-## $ bsmt_exposure   <chr> "No", "Gd", "Mn", "No", "A...
-## $ bsmt_fin_type1  <chr> "GLQ", "ALQ", "GLQ", "ALQ"...
-## $ bsmt_fin_sf1    <dbl> 706, 978, 486, 216, 655, 7...
-## $ bsmt_fin_type2  <chr> "Unf", "Unf", "Unf", "Unf"...
-## $ bsmt_fin_sf2    <dbl> 0, 0, 0, 0, 0, 0, 0, 32, 0...
-## $ bsmt_unf_sf     <dbl> 150, 284, 434, 540, 490, 6...
-## $ total_bsmt_sf   <dbl> 856, 1262, 920, 756, 1145,...
-## $ heating         <chr> "GasA", "GasA", "GasA", "G...
-## $ heating_qc      <chr> "Ex", "Ex", "Ex", "Gd", "E...
-## $ central_air     <chr> "Y", "Y", "Y", "Y", "Y", "...
-## $ electrical      <chr> "SBrkr", "SBrkr", "SBrkr",...
-## $ x1st_flr_sf     <dbl> 856, 1262, 920, 961, 1145,...
-## $ x2nd_flr_sf     <dbl> 854, 0, 866, 756, 1053, 56...
-## $ low_qual_fin_sf <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,...
-## $ gr_liv_area     <dbl> 1710, 1262, 1786, 1717, 21...
-## $ bsmt_full_bath  <dbl> 1, 0, 1, 1, 1, 1, 1, 1, 0,...
-## $ bsmt_half_bath  <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0,...
-## $ full_bath       <dbl> 2, 2, 2, 1, 2, 1, 2, 2, 2,...
-## $ half_bath       <dbl> 1, 0, 1, 0, 1, 1, 0, 1, 0,...
-## $ bedroom_abv_gr  <dbl> 3, 3, 3, 3, 4, 1, 3, 3, 2,...
-## $ kitchen_abv_gr  <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 2,...
-## $ kitchen_qual    <chr> "Gd", "TA", "Gd", "Gd", "G...
-## $ tot_rms_abv_grd <dbl> 8, 6, 6, 7, 9, 5, 7, 7, 8,...
-## $ functional      <chr> "Typ", "Typ", "Typ", "Typ"...
-## $ fireplaces      <dbl> 0, 1, 1, 1, 1, 0, 1, 2, 2,...
-## $ fireplace_qu    <chr> NA, "TA", "TA", "Gd", "TA"...
-## $ garage_type     <chr> "Attchd", "Attchd", "Attch...
-## $ garage_yr_blt   <dbl> 2003, 1976, 2001, 1998, 20...
-## $ garage_finish   <chr> "RFn", "RFn", "RFn", "Unf"...
-## $ garage_cars     <dbl> 2, 2, 2, 3, 3, 2, 2, 2, 2,...
-## $ garage_area     <dbl> 548, 460, 608, 642, 836, 4...
-## $ garage_qual     <chr> "TA", "TA", "TA", "TA", "T...
-## $ garage_cond     <chr> "TA", "TA", "TA", "TA", "T...
-## $ paved_drive     <chr> "Y", "Y", "Y", "Y", "Y", "...
-## $ wood_deck_sf    <dbl> 0, 298, 0, 0, 192, 40, 255...
-## $ open_porch_sf   <dbl> 61, 0, 42, 35, 84, 30, 57,...
-## $ enclosed_porch  <dbl> 0, 0, 0, 272, 0, 0, 0, 228...
-## $ x3ssn_porch     <dbl> 0, 0, 0, 0, 0, 320, 0, 0, ...
-## $ screen_porch    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,...
-## $ pool_area       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,...
-## $ pool_qc         <chr> NA, NA, NA, NA, NA, NA, NA...
-## $ fence           <chr> NA, NA, NA, NA, NA, "MnPrv...
-## $ misc_feature    <chr> NA, NA, NA, NA, NA, "Shed"...
-## $ misc_val        <dbl> 0, 0, 0, 0, 0, 700, 0, 350...
-## $ mo_sold         <dbl> 2, 5, 9, 2, 12, 10, 8, 11,...
-## $ yr_sold         <dbl> 2008, 2007, 2008, 2006, 20...
-## $ sale_type       <chr> "WD", "WD", "WD", "WD", "W...
-## $ sale_condition  <chr> "Normal", "Normal", "Norma...
-## $ sale_price      <dbl> 208500, 181500, 223500, 14...
-

感谢曾倬同学提供的解释说明文档

-
explanation <- readxl::read_excel("./demo_data/ames_houseprice_explanation.xlsx")
-explanation %>% 
-  knitr::kable()

-列名 - -description - -解释 -
-MSSubClass - -Identifies the type of dwelling involved in the sale. - -住宅概况 -
-MSZoning - -Identifies the general zoning classification of the sale. - -建筑性质(农业、商业、高/低密度住宅) -
-LotFrontage - -Linear feet of street connected to property - -建筑离街道的距离 -
-LotArea - -Lot size in square feet - -占地面积 -
-Street - -Type of road access to property - -建筑附近的路面材质 -
-Alley - -Type of alley access to property - -建筑附近小巷的修建材质 -
-LotShape - -General shape of property - -建筑物的形状 -
-LandContour - -Flatness of the property - -地面平坦程度 -
-Utilities - -Type of utilities available - -可用公用设施类型 -
-LotConfig - -Lot configuration - -房屋哪里配置多 -
-LandSlope - -Slope of property - -建筑的斜率 -
-Neighborhood - -Physical locations within Ames city limits - -建筑在Ames城市的位置 -
-Condition1 - -Proximity to various conditions - -建筑附近的交通网络 -
-Condition2 - -Proximity to various conditions (if more than one is present) - -建筑附近的交通网络 -
-BldgType - -Type of dwelling - -住宅类别(联排别墅、独栋别墅…) -
-HouseStyle - -Style of dwelling - -建筑风格 -
-OverallQual - -Rates the overall material and finish of the house - -房屋装饰材质水平 -
-OverallCond - -Rates the overall condition of the house - -房屋整体状况评估 -
-YearBuilt - -Original construction date - -房屋修建日期 -
-YearRemodAdd - -Remodel date (same as construction date if no remodeling or additions) - -房屋改建日期 -
-RoofStyle - -Type of roof - -屋顶类型 -
-RoofMatl - -Roof material - -屋顶材质 -
-Exterior1st - -Exterior covering on house - -建筑外立面材质 -
-Exterior2nd - -Exterior covering on house (if more than one material) - -建筑外立面材质 -
-MasVnrType - -Masonry veneer type - -建筑表层砌体类型 -
-MasVnrArea - -Masonry veneer area in square feet - -每平方英尺的砌体面积 -
-ExterQual - -Evaluates the quality of the material on the exterior - -建筑表层砌体材料质量评估 -
-ExterCond - -Evaluates the present condition of the material on the exterior - -建筑表层砌体材料现状评估 -
-Foundation - -Type of foundation - -建筑基础的类型 -
-BsmtQual - -Evaluates the height of the basement - -地下室高度评估 -
-BsmtCond - -Evaluates the general condition of the basement - -地下室总体状况评估 -
-BsmtExposure - -Refers to walkout or garden level walls - -走廊/花园外墙的评估 -
-BsmtFinType1 - -Rating of basement finished area - -地下室完工区域的等级评价 -
-BsmtFinSF1 - -Type 1 finished square feet - -地下室完工区域的面积 -
-BsmtFinType2 - -Rating of basement finished area (if multiple types) - -其他地下室完工区域的等级评价 -
-BsmtFinSF2 - -Type 2 finished square feet - -其他地下室完工区域的面积 -
-BsmtUnfSF - -Unfinished square feet of basement area - -地下室未完工部分的面积 -
-TotalBsmtSF - -Total square feet of basement area - -地下室总面积 -
-Heating - -Type of heating - -房屋暖气类型(地暖、墙暖….) -
-HeatingQC - -Heating quality and condition - -暖气设施的质量和条件 -
-CentralAir - -Central air conditioning - -是否有中央空调 -
-Electrical - -Electrical system - -电器系统配置标准 -
-1stFlrSF - -First Floor square feet - -一楼面积 -
-2ndFlrSF - -Second floor square feet - -二楼面积 -
-LowQualFinSF - -Low quality finished square feet (all floors) - -所有楼层中低质量施工面积 -
-GrLivArea - -Above grade (ground) living area square feet - -地上居住面积 -
-BsmtFullBath - -Basement full bathrooms - -地下室标准卫生间个数 -
-BsmtHalfBath - -Basement half bathrooms - -地下室简易卫生间个数 -
-FullBath - -Full bathrooms above grade - -地上楼层标准卫生间个数 -
-HalfBath - -Half baths above grade - -地上楼层简易卫生间个数 -
-BedroomAbvGr - -Bedrooms above grade (does NOT include basement bedrooms) - -地上楼层卧室个数 -
-KitchenAbvGr - -Kitchens above grade - -地上楼层厨房个数 -
-KitchenQual - -Kitchen quality - -厨房质量评估 -
-TopRmsAbvGrd - -Total rooms above grade (does not include bathrooms) - -地上楼层房间总数(除去卧室) -
-Functional - -Home functionality (Assume typical unless deductions are warranted) - -房屋功能情况 -
-Fireplaces - -Number of fireplaces - -壁炉个数 -
-FireplaceQu - -Fireplace quality - -壁炉质量 -
-GarageType - -Garage location - -车库位置 -
-GarageYrBlt - -Year garage was built - -车库建成年份 -
-GarageFinish - -Interior finish of the garage - -车库内部装饰情况 -
-GarageCars - -Size of garage in car capacity - -车库容量 -
-GarageArea - -Size of garage in square feet - -车库占地面积 -
-GarageQual - -Garage quality - -车库质量 -
-GarageCond - -Garage condition - -车库条件 -
-PavedDrive - -Paved driveway - -车道施工方式 -
-WoodDeckSF - -Wood deck area in square feet - -木甲板面积 -
-OpenPorchSF - -Open porch area in square feet - -开放式门廊面积 -
-EnclosedPorch - -Enclosed porch area in square feet - -封闭式门廊面积 -
-3SsnPorch - -Three season porch area in square feet - -三季门廊面积 -
-ScreenPorch - -Screen porch area in square feet - -纱窗门廊面积 -
-PoolArea - -Pool area in square feet - -游泳池面积 -
-PoolQC - -Pool quality - -游泳池质量 -
-Fence - -Fence quality - -栅栏质量 -
-MiscFeature - -Miscellaneous feature not covered in other categories - -其他配套设施(网球场、电梯…) -
-MiscVal - -$Value of miscellaneous feature - -其他配套设施的费用 -
-MoSold - -Month Sold (MM) - -销售月份 -
-YrSold - -Year Sold (YYYY) - -销售年份 -
-SaleType - -Type of sale - -支付方式 -
-SaleCondition - -Condition of sale - -房屋出售的情况 -
-
-
-

47.2 探索设想

-
    -
  • 读懂数据描述,比如 -
      -
    • 房屋设施 (bedrooms, garage, fireplace, pool, porch, etc.),
    • -
    • 地理位置 (neighborhood),
    • -
    • 土地信息 (zoning, shape, size, etc.),
    • -
    • 品相等级
    • -
    • 出售价格
    • -
  • -
  • 探索影响房屋价格的因素 -
      -
    • 必要的预处理(缺失值处理、标准化、对数化等等)
    • -
    • 必要的可视化(比如价格分布图等)
    • -
    • 必要的统计(比如各地区房屋价格的均值)
    • -
    • 合理选取若干预测变量,建立多元线性模型,并对模型结果给出解释
    • -
    • 房屋价格与预测变量(房屋大小、在城市的位置、房屋类型、与街道的距离)
    • -
  • -
-
-
-

47.3 变量选取

-
d <- ames %>% 
-  select(sale_price, 
-         lot_frontage,   # 建筑离街道的距离
-         lot_area,       # 占地面积
-         neighborhood,   # 建筑在城市的位置
-         gr_liv_area,    # 地上居住面积
-         bldg_type,      # 住宅类别(联排别墅、独栋别墅...)
-         year_built      # 房屋修建日期
-         )
-d
-
## # A tibble: 1,460 x 7
-##    sale_price lot_frontage lot_area neighborhood
-##         <dbl>        <dbl>    <dbl> <chr>       
-##  1     208500           65     8450 CollgCr     
-##  2     181500           80     9600 Veenker     
-##  3     223500           68    11250 CollgCr     
-##  4     140000           60     9550 Crawfor     
-##  5     250000           84    14260 NoRidge     
-##  6     143000           85    14115 Mitchel     
-##  7     307000           75    10084 Somerst     
-##  8     200000           NA    10382 NWAmes      
-##  9     129900           51     6120 OldTown     
-## 10     118000           50     7420 BrkSide     
-## # ... with 1,450 more rows, and 3 more variables:
-## #   gr_liv_area <dbl>, bldg_type <chr>,
-## #   year_built <dbl>
-
-
-

47.4 缺失值处理

-
d %>% 
-  summarise(
-    across(everything(), function(x) sum(is.na(x)) )
-  )
-
## # A tibble: 1 x 7
-##   sale_price lot_frontage lot_area neighborhood
-##        <int>        <int>    <int>        <int>
-## 1          0          259        0            0
-## # ... with 3 more variables: gr_liv_area <int>,
-## #   bldg_type <int>, year_built <int>
-

找出来看看

-
d %>% 
-  filter_all(
-    any_vars(is.na(.))
-  )
-
## # A tibble: 259 x 7
-##    sale_price lot_frontage lot_area neighborhood
-##         <dbl>        <dbl>    <dbl> <chr>       
-##  1     200000           NA    10382 NWAmes      
-##  2     144000           NA    12968 Sawyer      
-##  3     157000           NA    10920 NAmes       
-##  4     149000           NA    11241 NAmes       
-##  5     154000           NA     8246 Sawyer      
-##  6     149350           NA     8544 Sawyer      
-##  7     144000           NA     9180 SawyerW     
-##  8     130250           NA     9200 CollgCr     
-##  9     177000           NA    13869 Gilbert     
-## 10     219500           NA     9375 CollgCr     
-## # ... with 249 more rows, and 3 more variables:
-## #   gr_liv_area <dbl>, bldg_type <chr>,
-## #   year_built <dbl>
-
library(visdat)
-
-d %>% vis_dat()
-

如果不选择lot_frontage 就不会有缺失值,如何选择,自己抉择

-
d %>% 
-  select(-lot_frontage) %>% 
-  visdat::vis_dat()
-

我个人觉得这个变量很重要,所以还是保留,牺牲一点样本量吧

-
d <- d %>% 
-  drop_na()
-
d %>% visdat::vis_dat()
-
-
-

47.5 预处理

-
    -
  • 标准化
  • -
-
standard <- function(x) {
-  (x - mean(x)) / sd(x)
-}
-
-d %>% 
-  mutate(
-    across(where(is.numeric), standard),
-    across(where(is.character), as.factor)
-  )
-
## # A tibble: 1,201 x 7
-##    sale_price lot_frontage lot_area neighborhood
-##         <dbl>        <dbl>    <dbl> <fct>       
-##  1    0.333       -0.208    -0.190  CollgCr     
-##  2    0.00875      0.410    -0.0444 Veenker     
-##  3    0.512       -0.0844    0.164  CollgCr     
-##  4   -0.489       -0.414    -0.0507 Crawfor     
-##  5    0.830        0.574     0.544  NoRidge     
-##  6   -0.453        0.616     0.525  Mitchel     
-##  7    1.51         0.204     0.0167 Somerst     
-##  8   -0.610       -0.784    -0.484  OldTown     
-##  9   -0.753       -0.826    -0.319  BrkSide     
-## 10   -0.615       -0.00206   0.158  Sawyer      
-## # ... with 1,191 more rows, and 3 more variables:
-## #   gr_liv_area <dbl>, bldg_type <fct>,
-## #   year_built <dbl>
-
    -
  • 对数化
  • -
-
d %>% 
-  mutate(
-    log_sale_price = log(sale_price)
-  )
-
## # A tibble: 1,201 x 8
-##    sale_price lot_frontage lot_area neighborhood
-##         <dbl>        <dbl>    <dbl> <chr>       
-##  1     208500           65     8450 CollgCr     
-##  2     181500           80     9600 Veenker     
-##  3     223500           68    11250 CollgCr     
-##  4     140000           60     9550 Crawfor     
-##  5     250000           84    14260 NoRidge     
-##  6     143000           85    14115 Mitchel     
-##  7     307000           75    10084 Somerst     
-##  8     129900           51     6120 OldTown     
-##  9     118000           50     7420 BrkSide     
-## 10     129500           70    11200 Sawyer      
-## # ... with 1,191 more rows, and 4 more variables:
-## #   gr_liv_area <dbl>, bldg_type <chr>,
-## #   year_built <dbl>, log_sale_price <dbl>
-
d %>% 
-  mutate(
-    across(where(is.numeric), log),
-    across(where(is.character), as.factor)
-  )
-
## # A tibble: 1,201 x 7
-##    sale_price lot_frontage lot_area neighborhood
-##         <dbl>        <dbl>    <dbl> <fct>       
-##  1       12.2         4.17     9.04 CollgCr     
-##  2       12.1         4.38     9.17 Veenker     
-##  3       12.3         4.22     9.33 CollgCr     
-##  4       11.8         4.09     9.16 Crawfor     
-##  5       12.4         4.43     9.57 NoRidge     
-##  6       11.9         4.44     9.55 Mitchel     
-##  7       12.6         4.32     9.22 Somerst     
-##  8       11.8         3.93     8.72 OldTown     
-##  9       11.7         3.91     8.91 BrkSide     
-## 10       11.8         4.25     9.32 Sawyer      
-## # ... with 1,191 more rows, and 3 more variables:
-## #   gr_liv_area <dbl>, bldg_type <fct>,
-## #   year_built <dbl>
-
    -
  • 标准化 vs 对数化
  • -
-

选择哪一种,我们看图说话

-
d %>% 
-  ggplot(aes(x = sale_price)) +
-  geom_density()
-

-
d %>% 
-  ggplot(aes(x = log(sale_price))) +
-  geom_density()
-

-

我们选择对数化,并保存结果

-
d <- d %>% 
-  mutate(
-    across(where(is.numeric), 
-           .fns = list(log = log), 
-           .names = "{.fn}_{.col}"
-           ),
-    across(where(is.character), as.factor)
-  )
-
-
-

47.6 有趣的探索

-
-

47.6.1 各区域的房屋价格均值

-
d %>% count(neighborhood)
-
## # A tibble: 25 x 2
-##    neighborhood     n
-##    <fct>        <int>
-##  1 Blmngtn         14
-##  2 Blueste          2
-##  3 BrDale          16
-##  4 BrkSide         51
-##  5 ClearCr         13
-##  6 CollgCr        126
-##  7 Crawfor         41
-##  8 Edwards         92
-##  9 Gilbert         49
-## 10 IDOTRR          34
-## # ... with 15 more rows
-
d %>% 
-  group_by(neighborhood) %>% 
-  summarise(
-    mean_sale = mean(sale_price)
-  ) %>% 
-  
-  ggplot(
-    aes(x = mean_sale, y = fct_reorder(neighborhood, mean_sale))
-  ) +
-  geom_col(aes(fill = mean_sale < 150000), show.legend = FALSE) +
-  geom_text(aes(label = round(mean_sale, 0)), hjust = 1) +
-  # scale_x_continuous(
-  #   expand = c(0, 0),
-  #   breaks = c(0, 100000, 200000, 300000),
-  #   labels = c(0, "1w", "2w", "3w")    
-  #   ) +
-  scale_x_continuous(
-    expand = c(0, 0),
-    labels = scales::dollar
-  ) +
-  scale_fill_viridis_d(option = "D") + 
-  theme_classic() +
-  labs(x = NULL, y = NULL)
-

-
-
-

47.6.2 房屋价格与占地面积

-
d %>%
-  ggplot(aes(x = log_lot_area, y = log_sale_price)) +
-  geom_point(colour = "blue") +
-  geom_smooth(method = lm, se = FALSE, formula = "y ~ x")
-

-
d %>%
-  ggplot(aes(x = log_lot_area, y = log_sale_price)) +
-  geom_point(aes(colour = neighborhood)) +
-  geom_smooth(method = lm, se = FALSE, formula = "y ~ x")
-

-
d %>%
-  ggplot(aes(x = log_lot_area, y = log_sale_price)) +
-  geom_point(colour = "blue") +
-  geom_smooth(method = lm, se = FALSE, formula = "y ~ x", fullrange = TRUE) +
-  facet_wrap(~neighborhood) +
-  theme(strip.background = element_blank())
-

-
-
-

47.6.3 房屋价格与房屋居住面积

-
d %>%
-  ggplot(aes(x = log_gr_liv_area, y = log_sale_price)) +
-  geom_point(aes(colour = neighborhood)) +
-  geom_smooth(method = lm, se = FALSE, formula = "y ~ x")
-

-
d %>%
-  ggplot(aes(x = log_gr_liv_area, y = log_sale_price)) +
-  geom_point() +
-  geom_smooth(method = lm, se = FALSE, formula = "y ~ x", fullrange = TRUE) +
-  facet_wrap(~neighborhood) +
-  theme(strip.background = element_blank())
-

-
-
-

47.6.4 车库与房屋价格

-

车库大小是否对销售价格有帮助?

-
ames %>% 
-  #select(garage_cars, garage_area, sale_price) %>% 
-  ggplot(aes(x = garage_area, y = sale_price)) +
-  geom_point(
-    data = select(ames, -garage_cars),
-    color = "gray50"
-  ) +
-  geom_point(aes(color = as_factor(garage_cars))) +
-  facet_wrap(vars(garage_cars)) +
-  theme(legend.position = "none") +
-  ggtitle("This is the influence of garage for sale price")
-

-
-
-
-

47.7 建模

-
lm(log_sale_price ~ 1 + log_gr_liv_area + neighborhood, data = d) %>% 
-  broom::tidy()
-
## # A tibble: 26 x 5
-##    term          estimate std.error statistic   p.value
-##    <chr>            <dbl>     <dbl>     <dbl>     <dbl>
-##  1 (Intercept)    7.53       0.154    48.7    2.21e-284
-##  2 log_gr_liv_a~  0.638      0.0200   31.9    3.76e-161
-##  3 neighborhood~ -0.314      0.149    -2.10   3.55e-  2
-##  4 neighborhood~ -0.466      0.0724   -6.43   1.80e- 10
-##  5 neighborhood~ -0.336      0.0597   -5.62   2.44e-  8
-##  6 neighborhood~ -0.103      0.0762   -1.35   1.76e-  1
-##  7 neighborhood~  0.00332    0.0556    0.0597 9.52e-  1
-##  8 neighborhood~ -0.0870     0.0612   -1.42   1.55e-  1
-##  9 neighborhood~ -0.365      0.0567   -6.44   1.79e- 10
-## 10 neighborhood~ -0.0621     0.0599   -1.04   3.00e-  1
-## # ... with 16 more rows
-
library(lme4)
-lmer(log_sale_price ~ 1 + log_gr_liv_area + (log_gr_liv_area | neighborhood), 
-    data = d) %>% 
-   broom.mixed::tidy()
-
## # A tibble: 6 x 6
-##   effect  group  term      estimate std.error statistic
-##   <chr>   <chr>  <chr>        <dbl>     <dbl>     <dbl>
-## 1 fixed   <NA>   (Interce~    6.88     0.334       20.6
-## 2 fixed   <NA>   log_gr_l~    0.705    0.0493      14.3
-## 3 ran_pa~ neigh~ sd__(Int~    1.34    NA           NA  
-## 4 ran_pa~ neigh~ cor__(In~   -0.993   NA           NA  
-## 5 ran_pa~ neigh~ sd__log_~    0.205   NA           NA  
-## 6 ran_pa~ Resid~ sd__Obse~    0.191   NA           NA
- -
-
-
- -
-
-
- - -
-
- - - - - - - - - - - - - - - - diff --git a/_book/eda-anscombe.html b/_book/eda-anscombe.html deleted file mode 100644 index dba7dcc..0000000 --- a/_book/eda-anscombe.html +++ /dev/null @@ -1,1570 +0,0 @@ - - - - - - - 第 42 章 探索性数据分析-anscombe数据集 | 数据科学中的 R 语言 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- -
- -
-
- - -
-
- -
-
-

第 42 章 探索性数据分析-anscombe数据集

-

在可视化章节,我们提到 Anscombe’s quartet这个数据集,

-
?datasets::anscombe
-

在其官方文档,我们可看到它是这样描述的:

-
-

Four x-y datasets which have the same traditional statistical properties (mean, variance, correlation, regression line, etc.), yet are quite different.

-
-
d <- datasets::anscombe
-head(d)
-
##   x1 x2 x3 x4   y1   y2    y3   y4
-## 1 10 10 10  8 8.04 9.14  7.46 6.58
-## 2  8  8  8  8 6.95 8.14  6.77 5.76
-## 3 13 13 13  8 7.58 8.74 12.74 7.71
-## 4  9  9  9  8 8.81 8.77  7.11 8.84
-## 5 11 11 11  8 8.33 9.26  7.81 8.47
-## 6 14 14 14  8 9.96 8.10  8.84 7.04
-
-

42.1 探索anscombe

-
library(tidyverse)
-

本节课的内容,就是用tidyverse的方法去探索下这个数据集:

-
    -
  • 规整数据
  • -
  • 分组统计
  • -
  • 建模
  • -
  • 可视化
  • -
-
-
-

42.2 规整数据

-

我们再看看数据

-
head(d)
-
##   x1 x2 x3 x4   y1   y2    y3   y4
-## 1 10 10 10  8 8.04 9.14  7.46 6.58
-## 2  8  8  8  8 6.95 8.14  6.77 5.76
-## 3 13 13 13  8 7.58 8.74 12.74 7.71
-## 4  9  9  9  8 8.81 8.77  7.11 8.84
-## 5 11 11 11  8 8.33 9.26  7.81 8.47
-## 6 14 14 14  8 9.96 8.10  8.84 7.04
-

实际上,这是四组(x1, y1), (x2, y2), (x3, y3), (x4, y4)。那要怎么样规整数据, -或者说怎么样把数据弄成tidy呢。这里有个技巧,你可以想象,数据能ggplot()可视化的基本上就是tidy的。

-
d %>%
-  ggplot(aes(x = x, y = y)) +
-  geom_point() +
-  facet_wrap(~set)
-

那么,我们希望我们的数据是这样的格式

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
setxy
1108.04
186.95
2109.14
288.14
-
-

42.2.1 小小的回顾

-

我们之前讲过,数据变形中,宽表格变成长表格, -需要用到tidyr::pivot_longer()函数 -

-

比如

-
dt <- tibble(id = c("a", "b"), x_1 = 1:2, x_2 = 3:4, y_1 = 5:6, y_2 = 8:9)
-dt
-
## # A tibble: 2 x 5
-##   id      x_1   x_2   y_1   y_2
-##   <chr> <int> <int> <int> <int>
-## 1 a         1     3     5     8
-## 2 b         2     4     6     9
-
dt %>% pivot_longer(-id,
-  names_to = "name",
-  values_to = "vaules"
-)
-
## # A tibble: 8 x 3
-##   id    name  vaules
-##   <chr> <chr>  <int>
-## 1 a     x_1        1
-## 2 a     x_2        3
-## 3 a     y_1        5
-## 4 a     y_2        8
-## 5 b     x_1        2
-## 6 b     x_2        4
-## 7 b     y_1        6
-## 8 b     y_2        9
-

有时候,我们不想要下划线后面的编号,只想保留前面的第一个字母

-
dt %>% pivot_longer(
-  cols = -id,
-  names_to = "name",
-  names_pattern = "(.)_.",
-  values_to = "vaules"
-)
-
## # A tibble: 8 x 3
-##   id    name  vaules
-##   <chr> <chr>  <int>
-## 1 a     x          1
-## 2 a     x          3
-## 3 a     y          5
-## 4 a     y          8
-## 5 b     x          2
-## 6 b     x          4
-## 7 b     y          6
-## 8 b     y          9
-

有时候人的需求是多样的,比如不想要前面的第一个字母,只要下划线后面的编号

-
dt %>% pivot_longer(
-  cols = -id,
-  names_to = "name",
-  names_pattern = "._(.)",
-  values_to = "vaules"
-)
-
## # A tibble: 8 x 3
-##   id    name  vaules
-##   <chr> <chr>  <int>
-## 1 a     1          1
-## 2 a     2          3
-## 3 a     1          5
-## 4 a     2          8
-## 5 b     1          2
-## 6 b     2          4
-## 7 b     1          6
-## 8 b     2          9
-

有时候我们都想要呢?

-
dt %>% pivot_longer(
-  cols = -id,
-  names_to = c("name", "group"),
-  names_pattern = "(.)_(.)",
-  values_to = "vaules"
-)
-
## # A tibble: 8 x 4
-##   id    name  group vaules
-##   <chr> <chr> <chr>  <int>
-## 1 a     x     1          1
-## 2 a     x     2          3
-## 3 a     y     1          5
-## 4 a     y     2          8
-## 5 b     x     1          2
-## 6 b     x     2          4
-## 7 b     y     1          6
-## 8 b     y     2          9
-

有时候,我们希望"x", "y"保留在列名,那么匹配出来的第一个字母,就不能给"name",而是传给特殊的符号".value",它会收集匹配出来的字符,然后放在列名中

-
dt %>% pivot_longer(
-  cols = -id,
-  names_to = c(".value", "group"),
-  names_pattern = "(.)_(.)",
-  values_to = "vaules"
-)
-
## # A tibble: 4 x 4
-##   id    group     x     y
-##   <chr> <chr> <int> <int>
-## 1 a     1         1     5
-## 2 a     2         3     8
-## 3 b     1         2     6
-## 4 b     2         4     9
-

是不是觉得很强大?

-
-
-

42.2.2 回到案例

-

具体来说,我们希望 x1 按照指定的正则表达式分成了两个部分 x1,那么1放在set下,而 x 传给了.value 当作变型后的列名.

-
knitr::include_graphics("images/pivot_longer_values.jpg")
-

-

那么和上面的情况一样,使用tidyr::pivot_longer()函数

-
tidy_d <- d %>%
-  pivot_longer(
-    cols = everything(),
-    names_to = c(".value", "set"),
-    names_pattern = "(.)(.)"
-  )
-tidy_d
-
## # A tibble: 44 x 3
-##    set       x     y
-##    <chr> <dbl> <dbl>
-##  1 1        10  8.04
-##  2 2        10  9.14
-##  3 3        10  7.46
-##  4 4         8  6.58
-##  5 1         8  6.95
-##  6 2         8  8.14
-##  7 3         8  6.77
-##  8 4         8  5.76
-##  9 1        13  7.58
-## 10 2        13  8.74
-## # ... with 34 more rows
-

再啰嗦下参数的含义:

-
    -
  • cols = everything() 表示选择所有列
  • -
  • names_to = c(".value", "set") 希望变型后的列名是c(".value", "set"), 这里 ".value" 是个特殊的符号,代表着names_pattern匹配过来的值,一般情况下,是多个值,如果传给".value""x, y, z",那么列名就会变成c("x", "y", "z", "set")
  • -
  • names_pattern = "(.)(.)" 将变换前的列名按照指定的正则表达式匹配,并且传递给names_to的对应的参数,比如这里第一个(.)传递给.value;第二个(.)传递给set.
  • -
-
-
-
-

42.3 统计

-

数据规整了,统计就很简单了

-
tidy_d_summary <- tidy_d %>%
-  group_by(set) %>%
-  summarise(across(
-    .cols = everything(),
-    .fns = lst(mean, sd, var),
-    .names = "{col}_{fn}"
-  ))
-tidy_d_summary
-
## # A tibble: 4 x 7
-##   set   x_mean  x_sd x_var y_mean  y_sd y_var
-##   <chr>  <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl>
-## 1 1          9  3.32    11   7.50  2.03  4.13
-## 2 2          9  3.32    11   7.50  2.03  4.13
-## 3 3          9  3.32    11   7.5   2.03  4.12
-## 4 4          9  3.32    11   7.50  2.03  4.12
-
-
-

42.4 建模

-

具体参考第 22 章整理的四种方法

-
tidy_d %>%
-  group_nest(set) %>%
-  mutate(
-    fit = map(data, ~ lm(y ~ x, data = .x)),
-    tidy = map(fit, broom::tidy),
-    glance = map(fit, broom::glance)
-  ) %>%
-  unnest(tidy)
-

感觉大家更喜欢这种

-
tidy_d %>%
-  group_by(set) %>%
-  group_modify(
-    ~ broom::tidy(lm(y ~ x, data = .))
-  )
-
## # A tibble: 8 x 6
-## # Groups:   set [4]
-##   set   term       estimate std.error statistic p.value
-##   <chr> <chr>         <dbl>     <dbl>     <dbl>   <dbl>
-## 1 1     (Intercep~    3.00      1.12       2.67 0.0257 
-## 2 1     x             0.500     0.118      4.24 0.00217
-## 3 2     (Intercep~    3.00      1.13       2.67 0.0258 
-## 4 2     x             0.5       0.118      4.24 0.00218
-## 5 3     (Intercep~    3.00      1.12       2.67 0.0256 
-## 6 3     x             0.500     0.118      4.24 0.00218
-## 7 4     (Intercep~    3.00      1.12       2.67 0.0256 
-## 8 4     x             0.500     0.118      4.24 0.00216
-
tidy_d %>%
-  group_by(set) %>%
-  summarise(
-    broom::tidy(lm(y ~ x, data = cur_data()))
-  )
-
## # A tibble: 8 x 6
-## # Groups:   set [4]
-##   set   term       estimate std.error statistic p.value
-##   <chr> <chr>         <dbl>     <dbl>     <dbl>   <dbl>
-## 1 1     (Intercep~    3.00      1.12       2.67 0.0257 
-## 2 1     x             0.500     0.118      4.24 0.00217
-## 3 2     (Intercep~    3.00      1.13       2.67 0.0258 
-## 4 2     x             0.5       0.118      4.24 0.00218
-## 5 3     (Intercep~    3.00      1.12       2.67 0.0256 
-## 6 3     x             0.500     0.118      4.24 0.00218
-## 7 4     (Intercep~    3.00      1.12       2.67 0.0256 
-## 8 4     x             0.500     0.118      4.24 0.00216
-
-
-

42.5 可视化看看

-
tidy_d %>%
-  ggplot(aes(x = x, y = y, colour = set)) +
-  geom_point() +
-  geom_smooth(method = "lm", se = FALSE) +
-  theme(legend.position = "none") +
-  facet_wrap(~set)
-

- -
-
-
- -
-
-
- - -
-
- - - - - - - - - - - - - - - - diff --git a/_book/eda-career-decision.html b/_book/eda-career-decision.html deleted file mode 100644 index 22f9ac1..0000000 --- a/_book/eda-career-decision.html +++ /dev/null @@ -1,1745 +0,0 @@ - - - - - - - 第 46 章 探索性数据分析-大学生职业决策 | 数据科学中的 R 语言 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- -
- -
-
- - -
-
- -
-
-

第 46 章 探索性数据分析-大学生职业决策

-
-

46.1 预备知识

-
library(tidyverse)
-
-example <- 
- tibble::tribble(
-   ~name, ~english, ~chinese, ~math, ~sport, ~psy, ~edu,
-     "A",     133,    100,    102,     56,    89,   89,
-     "B",     120,    120,     86,     88,    45,   75,
-     "C",      98,    109,    114,     87,    NA,   84,
-     "D",     120,     78,    106,     68,    86,   69,
-     "E",     110,     99,    134,     98,    75,   70,
-     "F",      NA,    132,    130,     NA,    68,   88
-   )
-
-example
-
## # A tibble: 6 x 7
-##   name  english chinese  math sport   psy   edu
-##   <chr>   <dbl>   <dbl> <dbl> <dbl> <dbl> <dbl>
-## 1 A         133     100   102    56    89    89
-## 2 B         120     120    86    88    45    75
-## 3 C          98     109   114    87    NA    84
-## 4 D         120      78   106    68    86    69
-## 5 E         110      99   134    98    75    70
-## 6 F          NA     132   130    NA    68    88
-
-

46.1.1 缺失值检查

-

我们需要判断每一列的缺失值

-
example %>% 
-  summarise(
-    na_in_english = sum(is.na(english)),
-    na_in_chinese = sum(is.na(chinese)),
-    na_in_math    = sum(is.na(math)),
-    na_in_sport   = sum(is.na(sport)),
-    na_in_psy     = sum(is.na(math)),   # tpyo here
-    na_in_edu     = sum(is.na(edu))
-  )
-
## # A tibble: 1 x 6
-##   na_in_english na_in_chinese na_in_math na_in_sport
-##           <int>         <int>      <int>       <int>
-## 1             1             0          0           1
-## # ... with 2 more variables: na_in_psy <int>,
-## #   na_in_edu <int>
-

我们发现,这种写法比较笨,而且容易出错,比如na_in_psy = sum(is.na(math)) 就写错了。那么有没有既偷懒又安全的方法呢?有的。但代价是需要学会across()函数,大家可以在Console中输入?dplyr::across查看帮助文档,或者看第 22 章。

-
example %>% 
-  summarise(
-    across(everything(), mean)
-  )
-
## # A tibble: 1 x 7
-##    name english chinese  math sport   psy   edu
-##   <dbl>   <dbl>   <dbl> <dbl> <dbl> <dbl> <dbl>
-## 1    NA      NA    106.   112    NA    NA  79.2
-
example %>% 
-  summarise(
-    across(everything(), function(x) sum(is.na(x)) )
-  )
-
## # A tibble: 1 x 7
-##    name english chinese  math sport   psy   edu
-##   <int>   <int>   <int> <int> <int> <int> <int>
-## 1     0       1       0     0     1     1     0
-
-
-

46.1.2 数据预处理

-
    -
  • 直接丢弃缺失值所在的行
  • -
-
example %>% drop_na()
-
## # A tibble: 4 x 7
-##   name  english chinese  math sport   psy   edu
-##   <chr>   <dbl>   <dbl> <dbl> <dbl> <dbl> <dbl>
-## 1 A         133     100   102    56    89    89
-## 2 B         120     120    86    88    45    75
-## 3 D         120      78   106    68    86    69
-## 4 E         110      99   134    98    75    70
-
    -
  • 均值代替缺失值
  • -
-
d <- example %>% 
-  mutate(
-    across(where(is.numeric), ~ if_else(is.na(.), mean(., na.rm = T), .))
-  )
-d
-
## # A tibble: 6 x 7
-##   name  english chinese  math sport   psy   edu
-##   <chr>   <dbl>   <dbl> <dbl> <dbl> <dbl> <dbl>
-## 1 A        133      100   102  56    89      89
-## 2 B        120      120    86  88    45      75
-## 3 C         98      109   114  87    72.6    84
-## 4 D        120       78   106  68    86      69
-## 5 E        110       99   134  98    75      70
-## 6 F        116.     132   130  79.4  68      88
-
    -
  • 计算总分/均值
  • -
-
d %>% 
-  rowwise() %>% 
-  mutate(
-    total = sum(c_across(-name))
-  )
-
## # A tibble: 6 x 8
-## # Rowwise: 
-##   name  english chinese  math sport   psy   edu total
-##   <chr>   <dbl>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
-## 1 A        133      100   102  56    89      89  569 
-## 2 B        120      120    86  88    45      75  534 
-## 3 C         98      109   114  87    72.6    84  565.
-## 4 D        120       78   106  68    86      69  527 
-## 5 E        110       99   134  98    75      70  586 
-## 6 F        116.     132   130  79.4  68      88  614.
-
d %>% 
-  rowwise() %>% 
-  mutate(
-    mean = mean(c_across(-name))
-  )
-
## # A tibble: 6 x 8
-## # Rowwise: 
-##   name  english chinese  math sport   psy   edu  mean
-##   <chr>   <dbl>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
-## 1 A        133      100   102  56    89      89  94.8
-## 2 B        120      120    86  88    45      75  89  
-## 3 C         98      109   114  87    72.6    84  94.1
-## 4 D        120       78   106  68    86      69  87.8
-## 5 E        110       99   134  98    75      70  97.7
-## 6 F        116.     132   130  79.4  68      88 102.
-
    -
  • 数据标准化处理
  • -
-
standard <- function(x) {
-  (x - mean(x)) / sd(x)
-}
-
d %>% 
-  mutate(
-    across(where(is.numeric), standard)
-  )
-
## # A tibble: 6 x 7
-##   name  english chinese   math  sport    psy    edu
-##   <chr>   <dbl>   <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
-## 1 A       1.44   -0.339 -0.555 -1.54   1.04   1.10 
-## 2 B       0.326   0.731 -1.44   0.566 -1.75  -0.464
-## 3 C      -1.56    0.143  0.111  0.500  0      0.538
-## 4 D       0.326  -1.51  -0.333 -0.75   0.852 -1.13 
-## 5 E      -0.531  -0.392  1.22   1.22   0.153 -1.02 
-## 6 F       0       1.37   0.999  0     -0.292  0.984
-
-
-
-

46.2 开始

-
-

46.2.1 文件管理中需要注意的地方

-

感谢康钦虹同学提供的数据,但这里有几点需要注意的地方:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
事项问题解决办法
文件名excel的文件名是中文用英文,比如 data.xlsx
列名列名中有-号,大小写不统一规范列名,或用janitor::clean_names()偷懒
预处理直接在原始数据中新增不要在原始数据上改动,统计工作可以在R里实现
文件管理没有层级新建data文件夹装数据,与code.Rmd并列
-
data <- readxl::read_excel("demo_data/career-decision.xlsx", skip = 1) %>% 
-        janitor::clean_names()
-
-#glimpse(data)
-
d <- data %>% select(1:61)
-#glimpse(d)
-
-
-

46.2.2 缺失值检查

-
d %>% 
-  summarise(
-    across(everything(), ~sum(is.na(.)))
-  )
-
## # A tibble: 1 x 61
-##     sex majoy grade  from    z1    z2    z3    z4    z5
-##   <int> <int> <int> <int> <int> <int> <int> <int> <int>
-## 1     0     0     0     0     0     0     0     0     0
-## # ... with 52 more variables: z6 <int>, z7 <int>,
-## #   z8 <int>, z9 <int>, z10 <int>, z11 <int>,
-## #   z12 <int>, z13 <int>, z14 <int>, z15 <int>,
-## #   z16 <int>, z17 <int>, z18 <int>, j1 <int>,
-## #   j2 <int>, j3 <int>, j4 <int>, j5 <int>, j6 <int>,
-## #   j7 <int>, j8 <int>, j9 <int>, j10 <int>,
-## #   j11 <int>, j12 <int>, j13 <int>, j14 <int>,
-## #   j15 <int>, j16 <int>, j17 <int>, j18 <int>,
-## #   j19 <int>, j20 <int>, j21 <int>, j22 <int>,
-## #   j23 <int>, j24 <int>, j25 <int>, j26 <int>,
-## #   j27 <int>, j28 <int>, j29 <int>, j30 <int>,
-## #   j31 <int>, j32 <int>, j33 <int>, j34 <int>,
-## #   j35 <int>, j36 <int>, j37 <int>, j38 <int>,
-## #   j39 <int>
-

没有缺失值,挺好

-
-
-

46.2.3 数据预处理

-

采用利克特式 5 点计分… (这方面你们懂得比我多)

-
d <- d %>%
-  rowwise() %>%
-  mutate(
-    environment_exploration          = sum(c_across(z1:z5)),
-    self_exploration                 = sum(c_across(z6:z9)),
-    objective_system_exploration     = sum(c_across(z10:z15)),
-    info_quantity_exploration        = sum(c_across(z16:z18)),
-    
-    self_evaluation                  = sum(c_across(j1:j6)),
-    information_collection           = sum(c_across(j7:j15)),
-    target_select                    = sum(c_across(j16:j24)),
-    formulate                        = sum(c_across(j25:j32)),
-    problem_solving                  = sum(c_across(j33:j39)),
-
-    career_exploration               = sum(c_across(z1:z18)),
-    career_decision_making           = sum(c_across(j1:j39))
-  ) %>% 
-  select(-starts_with("z"), -starts_with("j")) %>% 
-  ungroup() %>% 
-  mutate(pid = 1:n(), .before = sex) %>%
-  mutate(
-    across(c(pid, sex, majoy, grade, from), as_factor)
-  )
-
-#glimpse(d)
-
-
-

46.2.4 标准化

-
standard <- function(x) {
-  (x - mean(x)) / sd(x)
-}
-
-d <- d %>% 
-  mutate(
-    across(where(is.numeric), standard)
-  )
-d
-
## # A tibble: 304 x 16
-##    pid   sex   majoy grade from  environment_exp~
-##    <fct> <fct> <fct> <fct> <fct>            <dbl>
-##  1 1     1     4     4     2              -1.63  
-##  2 2     1     4     4     1              -1.87  
-##  3 3     2     4     4     2               0.0802
-##  4 4     2     4     4     1              -1.87  
-##  5 5     2     4     4     1              -0.895 
-##  6 6     1     1     4     3              -0.651 
-##  7 7     1     4     4     3              -2.36  
-##  8 8     1     4     4     1              -0.407 
-##  9 9     1     4     4     3              -0.651 
-## 10 10    1     4     4     2               0.324 
-## # ... with 294 more rows, and 10 more variables:
-## #   self_exploration <dbl>,
-## #   objective_system_exploration <dbl>,
-## #   info_quantity_exploration <dbl>,
-## #   self_evaluation <dbl>,
-## #   information_collection <dbl>, target_select <dbl>,
-## #   formulate <dbl>, problem_solving <dbl>,
-## #   career_exploration <dbl>,
-## #   career_decision_making <dbl>
-
-
-
-

46.3 探索

-
-

46.3.1 想探索的问题

-
    -
  • 不同性别(或者年级,生源地,专业)下,各指标分值的差异性
  • -
  • 两个变量的相关分析和回归分析
  • -
  • 更多(欢迎大家提出了喔)
  • -
-
-
-

46.3.2 男生女生在职业探索上有所不同?

-

以性别为例。因为性别变量是男女,仅仅2组,所以检查男女在各自指标上的均值差异,可以用T检验。

-
d %>% 
-  group_by(sex) %>% 
-  summarise(
-   across(where(is.numeric), mean)
-)
-
## # A tibble: 2 x 12
-##   sex   environment_exp~ self_exploration
-##   <fct>            <dbl>            <dbl>
-## 1 1               -0.147          -0.0829
-## 2 2                0.165           0.0933
-## # ... with 9 more variables:
-## #   objective_system_exploration <dbl>,
-## #   info_quantity_exploration <dbl>,
-## #   self_evaluation <dbl>,
-## #   information_collection <dbl>, target_select <dbl>,
-## #   formulate <dbl>, problem_solving <dbl>,
-## #   career_exploration <dbl>,
-## #   career_decision_making <dbl>
-

你可以给这个图颜色弄得更好看点?

-
library(ggridges)
-d %>% 
-  ggplot(aes(x = career_exploration, y = sex, fill = sex)) +
-  geom_density_ridges()
-

-
t_test_eq <- t.test(career_exploration ~ sex, data = d, var.equal = TRUE) %>% 
-  broom::tidy()
-t_test_eq
-
## # A tibble: 1 x 10
-##   estimate estimate1 estimate2 statistic p.value
-##      <dbl>     <dbl>     <dbl>     <dbl>   <dbl>
-## 1   -0.367    -0.173     0.194     -3.24 0.00132
-## # ... with 5 more variables: parameter <dbl>,
-## #   conf.low <dbl>, conf.high <dbl>, method <chr>,
-## #   alternative <chr>
-
t_test_uneq <- t.test(career_exploration ~ sex, data = d, var.equal = FALSE) %>% 
-  broom::tidy()
-t_test_uneq 
-
## # A tibble: 1 x 10
-##   estimate estimate1 estimate2 statistic p.value
-##      <dbl>     <dbl>     <dbl>     <dbl>   <dbl>
-## 1   -0.367    -0.173     0.194     -3.27 0.00121
-## # ... with 5 more variables: parameter <dbl>,
-## #   conf.low <dbl>, conf.high <dbl>, method <chr>,
-## #   alternative <chr>
-

当然,也可以用第 32 章介绍的统计推断的方法

-
library(infer)
-
-obs_diff <- d %>% 
-  specify(formula = career_exploration ~ sex) %>% 
-  calculate("diff in means", order = c("1", "2"))
-obs_diff
-
## # A tibble: 1 x 1
-##     stat
-##    <dbl>
-## 1 -0.367
-
null_dist <- d %>% 
-  specify(formula = career_exploration ~ sex) %>% 
-  hypothesize(null = "independence") %>% 
-  generate(reps = 5000, type = "permute") %>% 
-  calculate(stat = "diff in means", order = c("1", "2"))
-null_dist
-
## # A tibble: 5,000 x 2
-##    replicate     stat
-##        <int>    <dbl>
-##  1         1 -0.0114 
-##  2         2  0.0656 
-##  3         3  0.00208
-##  4         4 -0.0663 
-##  5         5  0.0155 
-##  6         6 -0.0736 
-##  7         7 -0.0798 
-##  8         8 -0.0443 
-##  9         9  0.0412 
-## 10        10  0.105  
-## # ... with 4,990 more rows
-
null_dist %>%  
-  visualize() +
-  shade_p_value(obs_stat = obs_diff, direction = "two_sided")
-

-
null_dist %>%  
-  get_p_value(obs_stat = obs_diff, direction = "two_sided") %>% 
-  #get_p_value(obs_stat = obs_diff, direction = "less") %>% 
-  mutate(p_value_clean = scales::pvalue(p_value))
-
## # A tibble: 1 x 2
-##   p_value p_value_clean
-##     <dbl> <chr>        
-## 1 0.00120 0.001
-

也可以用tidyverse的方法一次性的搞定所有指标

-
d %>%
-  pivot_longer(
-    cols = -c(pid, sex, majoy, grade, from),
-    names_to = "index",
-    values_to = "value"
-  ) %>% 
-  group_by(index) %>% 
-  summarise(
-    broom::tidy( t.test(value ~ sex, data = cur_data()))
-  ) %>% 
-  select(index, estimate, statistic, p.value) %>% 
-  arrange(p.value)
-
## # A tibble: 11 x 4
-##    index                   estimate statistic   p.value
-##    <chr>                      <dbl>     <dbl>     <dbl>
-##  1 career_decision_making    -0.494     -4.53   8.62e-6
-##  2 problem_solving           -0.470     -4.26   2.70e-5
-##  3 target_select             -0.449     -4.07   6.09e-5
-##  4 formulate                 -0.411     -3.72   2.35e-4
-##  5 information_collection    -0.411     -3.70   2.53e-4
-##  6 self_evaluation           -0.404     -3.65   3.15e-4
-##  7 objective_system_explo~   -0.382     -3.40   7.65e-4
-##  8 career_exploration        -0.367     -3.27   1.21e-3
-##  9 environment_exploration   -0.312     -2.75   6.29e-3
-## 10 info_quantity_explorat~   -0.274     -2.42   1.62e-2
-## 11 self_exploration          -0.176     -1.54   1.26e-1
-
-
-

46.3.3 来自不同地方的学生在职业探索上有所不同?

-

以生源地为例。因为生源地有3类,所以可以使用方差分析。

-
aov(career_exploration ~ from, data = d) %>%
-  TukeyHSD(which = "from") %>%
-  broom::tidy()
-
## # A tibble: 3 x 7
-##   term  contrast null.value estimate conf.low conf.high
-##   <chr> <chr>         <dbl>    <dbl>    <dbl>     <dbl>
-## 1 from  2-1               0   0.382    0.0623     0.701
-## 2 from  3-1               0   0.287   -0.0386     0.613
-## 3 from  3-2               0  -0.0943  -0.446      0.257
-## # ... with 1 more variable: adj.p.value <dbl>
-
library(ggridges)
-d %>% 
-  ggplot(aes(x = career_exploration, y = from, fill = from)) +
-  geom_density_ridges()
-

-

也可以一次性的搞定所有指标

-
d %>%
-  pivot_longer(
-    cols = -c(pid, sex, majoy, grade, from),
-    names_to = "index",
-    values_to = "value"
-  ) %>% 
-  group_by(index) %>% 
-  summarise(
-    broom::tidy( aov(value ~ from, data = cur_data()))
-  ) %>% 
-  select(index, term, statistic, p.value) %>% 
-  filter(term != "Residuals") %>% 
-  arrange(p.value)
-
## # A tibble: 11 x 4
-## # Groups:   index [11]
-##    index                     term  statistic    p.value
-##    <chr>                     <chr>     <dbl>      <dbl>
-##  1 problem_solving           from     14.6      9.18e-7
-##  2 career_decision_making    from     14.2      1.26e-6
-##  3 formulate                 from     12.2      7.81e-6
-##  4 information_collection    from     10.2      5.27e-5
-##  5 self_evaluation           from      8.91     1.74e-4
-##  6 target_select             from      8.45     2.70e-4
-##  7 info_quantity_exploration from      5.78     3.44e-3
-##  8 career_exploration        from      4.48     1.21e-2
-##  9 objective_system_explora~ from      4.06     1.81e-2
-## 10 environment_exploration   from      3.69     2.60e-2
-## 11 self_exploration          from      0.699    4.98e-1
-
-
-

46.3.4 职业探索和决策之间有关联?

-

可以用第 28 章线性模型来探索

-
lm(career_decision_making  ~ career_exploration, data = d)
-
## 
-## Call:
-## lm(formula = career_decision_making ~ career_exploration, data = d)
-## 
-## Coefficients:
-##        (Intercept)  career_exploration  
-##           2.15e-15            7.83e-01
-

不要因为我讲课讲的很垃圾,就错过了R的美,瑕不掩瑜啦。要相信自己,你们是川师研究生中最聪明的。

-

- -
-
-
-
- -
-
-
- - -
-
- - - - - - - - - - - - - - - - diff --git a/_book/eda-caribou.html b/_book/eda-caribou.html deleted file mode 100644 index 6cf8c8c..0000000 --- a/_book/eda-caribou.html +++ /dev/null @@ -1,1616 +0,0 @@ - - - - - - - 第 44 章 探索性数据分析-驯鹿迁移 | 数据科学中的 R 语言 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- -
- -
-
- - -
-
- -
-
-

第 44 章 探索性数据分析-驯鹿迁移

-

本章我们分析加拿大哥伦比亚林地驯鹿追踪数据,数据包含了从1988年到2016年期间260只驯鹿,近250000个位置标签。

-
-

44.1 驯鹿位置跟踪

-

-

大家可以在这里了解数据集的信息,它包含了两个数据集

-
# devtools::install_github("thebioengineer/tidytuesdayR")
-library(tidytuesdayR)
-
-tuesdata <- tidytuesdayR::tt_load("2020-06-23")
-# or
-# tuesdata <- tidytuesdayR::tt_load(2020, week = 26)
-
library(tidyverse)
-library(lubridate)
-library(gganimate)
-
-individuals <- readr::read_csv("./demo_data/caribou/individuals.csv")
-locations <- readr::read_csv("./demo_data/caribou/locations.csv")
-
-
-

44.2 驯鹿的身份信息

-
individuals %>% glimpse()
-
## Rows: 286
-## Columns: 14
-## $ animal_id            <chr> "HR_151.510", "GR_C04...
-## $ sex                  <chr> "f", "f", "f", "f", "...
-## $ life_stage           <chr> NA, NA, NA, NA, NA, N...
-## $ pregnant             <lgl> NA, NA, NA, NA, NA, N...
-## $ with_calf            <lgl> NA, NA, NA, NA, NA, N...
-## $ death_cause          <chr> NA, NA, NA, NA, NA, N...
-## $ study_site           <chr> "Hart Ranges", "Graha...
-## $ deploy_on_longitude  <dbl> NA, NA, NA, NA, NA, N...
-## $ deploy_on_latitude   <dbl> NA, NA, NA, NA, NA, N...
-## $ deploy_on_comments   <chr> NA, NA, NA, NA, NA, N...
-## $ deploy_off_longitude <dbl> NA, NA, NA, NA, NA, N...
-## $ deploy_off_latitude  <dbl> NA, NA, NA, NA, NA, N...
-## $ deploy_off_type      <chr> "unknown", "unknown",...
-## $ deploy_off_comments  <chr> NA, NA, NA, NA, NA, N...
-
individuals %>% count(animal_id)
-
## # A tibble: 260 x 2
-##    animal_id     n
-##    <chr>     <int>
-##  1 BP_car022     1
-##  2 BP_car023     1
-##  3 BP_car032     1
-##  4 BP_car043     1
-##  5 BP_car100     1
-##  6 BP_car101     1
-##  7 BP_car115     1
-##  8 BP_car144     1
-##  9 BP_car145     1
-## 10 GR_C01        2
-## # ... with 250 more rows
-

我们发现有重复id的,怎么办?

-
individuals %>% janitor::get_dupes(animal_id)
-
## # A tibble: 50 x 15
-##    animal_id dupe_count sex   life_stage pregnant
-##    <chr>          <int> <chr> <chr>      <lgl>   
-##  1 GR_C01             2 f     <NA>       NA      
-##  2 GR_C01             2 f     <NA>       NA      
-##  3 GR_C02             2 f     <NA>       NA      
-##  4 GR_C02             2 f     <NA>       NA      
-##  5 GR_C04             2 f     <NA>       NA      
-##  6 GR_C04             2 f     <NA>       NA      
-##  7 GR_C05             2 f     <NA>       NA      
-##  8 GR_C05             2 f     <NA>       NA      
-##  9 GR_C06             2 f     <NA>       NA      
-## 10 GR_C06             2 f     <NA>       NA      
-## # ... with 40 more rows, and 10 more variables:
-## #   with_calf <lgl>, death_cause <chr>,
-## #   study_site <chr>, deploy_on_longitude <dbl>,
-## #   deploy_on_latitude <dbl>,
-## #   deploy_on_comments <chr>,
-## #   deploy_off_longitude <dbl>,
-## #   deploy_off_latitude <dbl>, deploy_off_type <chr>,
-## #   deploy_off_comments <chr>
-
individuals %>%
-  filter(deploy_on_latitude > 50) %>%
-  ggplot(aes(x = deploy_on_longitude, y = deploy_on_latitude)) +
-  geom_point(aes(color = study_site)) #+
-

-
# borders("world", regions = "china")
-
-
-

44.3 性别比例

-
-
-

44.4 每个站点运动最频繁的前10的驯鹿

-
-
-

44.5 驯鹿的活动信息

-

简单点说,就是哪个驯鹿在什么时间出现在什么地方

-
locations %>%
-  ggplot(aes(x = longitude, y = latitude)) +
-  geom_point(aes(color = study_site))
-

-
-
-

44.6 被追踪最多次的驯鹿的轨迹

-
top_animal_ids <-
-  count(locations, animal_id, sort = TRUE) %>%
-  slice(1:10) %>%
-  pull(animal_id)
-
-
-locations %>%
-  filter(animal_id %in% top_animal_ids) %>%
-  arrange(animal_id, timestamp) %>%
-  group_by(animal_id) %>%
-  mutate(measurement_n = row_number()) %>%
-  ggplot(aes(
-    x = longitude,
-    y = latitude,
-    color = animal_id,
-    alpha = measurement_n
-  )) +
-  geom_point(show.legend = FALSE, size = 1) +
-  geom_path(show.legend = FALSE, size = 1) +
-  # scale_color_manual(values = ) +
-  theme_minimal() +
-  theme(
-    plot.title = element_text(size = 20, face = "bold"),
-    plot.subtitle = element_text(size = 10),
-    text = element_text(color = "White"),
-    panel.grid.minor = element_blank(),
-    panel.grid.major = element_line(color = "gray60", size = 0.05),
-    plot.background = element_rect(fill = "gray10"),
-    axis.text = element_text(color = "white")
-  ) +
-  labs(
-    x = "\nLongitude", y = "Latitude\n",
-    title = "Caribou movement tracking",
-    subtitle = "Latitude and longitude locations of the animals with the highest number of measurements\n",
-    caption = "Tidy Tuesday: Caribou Location Tracking"
-  )
-

-
-
-

44.7 某一只驯鹿的轨迹

-
locations %>%
-  dplyr::filter(animal_id %in% c("QU_car143")) %>%
-  dplyr::arrange(animal_id, timestamp) %>%
-  dplyr::group_by(animal_id) %>%
-  dplyr::mutate(measurement_n = row_number()) %>%
-  ggplot(aes(
-    x = longitude,
-    y = latitude,
-    color = measurement_n,
-    alpha = measurement_n
-  )) +
-  geom_point(show.legend = FALSE, size = 1) +
-  geom_path(show.legend = FALSE, size = 1) +
-  scale_color_gradient(low = "white", high = "firebrick3") +
-  theme_minimal() +
-  theme(
-    plot.title = element_text(size = 20, face = "bold"),
-    plot.subtitle = element_text(size = 10),
-    text = element_text(color = "White"),
-    panel.grid.minor = element_blank(),
-    panel.grid.major = element_line(color = "gray60", size = 0.05),
-    plot.background = element_rect(fill = "gray10"),
-    axis.text = element_text(color = "white")
-  ) +
-  labs(
-    x = "\nLongitude", y = "Latitude\n",
-    title = "QU_car143 movement tracking",
-    subtitle = "Latitude and longitude locations of the animals with the highest number of measurements\n Ligher colors indicate earlier measurements",
-    caption = "Tidy Tuesday: Caribou Location Tracking"
-  )
-

-
-
-

44.8 选择某个驯鹿,查看他的活动轨迹

-
example_animal <- locations %>%
-  dplyr::filter(animal_id == sample(animal_id, 1)) %>%
-  dplyr::arrange(timestamp)
-example_animal
-
## # A tibble: 2,039 x 7
-##    event_id animal_id study_site season
-##       <dbl> <chr>     <chr>      <chr> 
-##  1   2.27e9 QU_car110 Quintette  Winter
-##  2   2.27e9 QU_car110 Quintette  Winter
-##  3   2.27e9 QU_car110 Quintette  Winter
-##  4   2.27e9 QU_car110 Quintette  Winter
-##  5   2.27e9 QU_car110 Quintette  Winter
-##  6   2.27e9 QU_car110 Quintette  Winter
-##  7   2.27e9 QU_car110 Quintette  Winter
-##  8   2.27e9 QU_car110 Quintette  Winter
-##  9   2.27e9 QU_car110 Quintette  Winter
-## 10   2.27e9 QU_car110 Quintette  Winter
-## # ... with 2,029 more rows, and 3 more variables:
-## #   timestamp <dttm>, longitude <dbl>, latitude <dbl>
-
"2010-03-28 21:00:44" %>% lubridate::as_date()
-"2010-03-28 21:00:44" %>% lubridate::as_datetime()
-"2010-03-28 21:00:44" %>% lubridate::quarter()
-
example_animal %>%
-  dplyr::mutate(date = lubridate::as_date(timestamp)) %>%
-  ggplot(aes(x = longitude, y = latitude, color = date)) +
-  geom_path()
-

-
example_animal %>%
-  dplyr::mutate(quarter = lubridate::quarter(timestamp) %>% as.factor()) %>%
-  ggplot(aes(x = longitude, y = latitude, color = quarter)) +
-  geom_path() +
-  facet_wrap(vars(quarter)) +
-  labs(title = "一只小驯鹿到处啊跑")
-

-
-
-

44.9 季节模式

-

看看驯鹿夏季和冬季运动模式,这段代码来自gkaramanis

-
movement <- locations %>%
-  filter(study_site != "Hart Ranges") %>%
-  mutate(
-    season = fct_rev(season),
-    longitude = round(longitude, 2),
-    latitude = round(latitude, 2)
-  ) %>%
-  distinct(season, study_site, longitude, latitude)
-
-
-
-ggplot(movement) +
-  geom_point(aes(longitude, latitude,
-    group = study_site,
-    colour = study_site
-  ), size = 0.1) +
-  gghighlight::gghighlight(
-    unhighlighted_params = list(colour = "grey70"), use_direct_label = FALSE
-  ) +
-  scale_colour_manual(
-    values = c("#ffe119", "#4363d8", "#f58231", "#e6194B", "#800000", "#000075", "#f032e6", "#3cb44b"),
-    breaks = c("Graham", "Scott", "Moberly", "Burnt Pine", "Kennedy", "Quintette", "Narraway")
-  ) +
-  guides(colour = guide_legend(title = "Herd", override.aes = list(size = 3))) +
-  coord_fixed(ratio = 1.5) +
-  facet_wrap(vars(season), ncol = 2) +
-  # labs(
-  #   title = "Migration patterns of Northern Caribou\nin the South Peace of British Columbia",
-  #     subtitle = str_wrap("In summer, most caribou migrate towards the central core of the Rocky Mountains where they use alpine and subalpine habitat. The result of this movement to the central core of the Rocky Mountains is that some of the east side herds can overlap with west side herds during the summer.", 100),
-  #     caption = str_wrap("Source: Seip DR, Price E (2019) Data from: Science update for the South Peace Northern Caribou (Rangifer tarandus caribou pop. 15) in British Columbia. Movebank Data Repository. https://doi.org/10.5441/001/1.p5bn656k | Graphic: Georgios Karamanis", 70)
-  # ) +
-  theme_void() +
-  theme(
-    legend.position = c(0.5, 0.6),
-    legend.text = element_text(size = 11, colour = "#F9EED9"),
-    legend.title = element_text(size = 16, hjust = 0.5, colour = "#F9EED9"),
-    panel.spacing.x = unit(3, "lines"),
-    plot.margin = margin(20, 20, 20, 20),
-    plot.background = element_rect(fill = "#7A6A4F", colour = NA),
-    strip.text = element_text(colour = "#F9EED9", size = 18),
-    plot.title = element_text(colour = "white", size = 20, hjust = 0, lineheight = 1),
-    plot.subtitle = element_text(colour = "white", size = 12, hjust = 0, lineheight = 1, margin = margin(10, 0, 50, 0)),
-    plot.caption = element_text(colour = "grey80", size = 7, hjust = 1, margin = margin(30, 0, 10, 0))
-  )
-

-
-
-

44.10 迁移速度

-
location_with_speed <- locations %>%
-  dplyr::group_by(animal_id) %>%
-  dplyr::mutate(
-    last_longitude = lag(longitude),
-    last_latitude = lag(latitude),
-    hours = as.numeric(difftime(timestamp, lag(timestamp), units = "hours")),
-    km = geosphere::distHaversine(
-      cbind(longitude, latitude), cbind(last_longitude, last_latitude)
-    ) / 1000,
-    speed = km / hours
-  ) %>%
-  dplyr::ungroup()
-
-location_with_speed
-
## # A tibble: 249,450 x 12
-##    event_id animal_id study_site season
-##       <dbl> <chr>     <chr>      <chr> 
-##  1   2.26e9 GR_C01    Graham     Winter
-##  2   2.26e9 GR_C01    Graham     Winter
-##  3   2.26e9 GR_C01    Graham     Winter
-##  4   2.26e9 GR_C01    Graham     Winter
-##  5   2.26e9 GR_C01    Graham     Winter
-##  6   2.26e9 GR_C01    Graham     Winter
-##  7   2.26e9 GR_C01    Graham     Winter
-##  8   2.26e9 GR_C01    Graham     Winter
-##  9   2.26e9 GR_C01    Graham     Winter
-## 10   2.26e9 GR_C01    Graham     Winter
-## # ... with 249,440 more rows, and 8 more variables:
-## #   timestamp <dttm>, longitude <dbl>, latitude <dbl>,
-## #   last_longitude <dbl>, last_latitude <dbl>,
-## #   hours <dbl>, km <dbl>, speed <dbl>
-
location_with_speed %>%
-  ggplot(aes(x = speed)) +
-  geom_histogram() +
-  scale_x_log10()
-

-
-
-

44.11 动态展示

-
library(gganimate)
-
-example_animal %>%
-  ggplot(aes(x = longitude, y = latitude)) +
-  geom_point() +
-  transition_time(time = timestamp) +
-  shadow_mark(past = TRUE) +
-  labs(title = "date is {frame_time}")
-
-
-

44.12 更多

-
df <- locations %>%
-  dplyr::filter(
-    study_site == "Graham",
-    year(timestamp) == 2002
-  ) %>%
-  dplyr::group_by(animal_id) %>%
-  dplyr::filter(
-    as_date(min(timestamp)) == "2002-01-01",
-    as_date(max(timestamp)) == "2002-12-31"
-  ) %>%
-  dplyr::ungroup() %>%
-  dplyr::mutate(date = as_date(timestamp)) %>%
-  dplyr::group_by(animal_id, date) %>%
-  dplyr::summarise(
-    longitude_centroid = mean(longitude),
-    latitude_centroid = mean(latitude)
-  ) %>%
-  dplyr::ungroup() %>%
-  tidyr::complete(animal_id, date) %>%
-  dplyr::arrange(animal_id, date) %>%
-  tidyr::fill(longitude_centroid, latitude_centroid, .direction = "down")
-
p <- df %>%
-  ggplot(aes(longitude_centroid, latitude_centroid, colour = animal_id)) +
-  geom_point(size = 2) +
-  coord_map() +
-  theme_void() +
-  theme(legend.position = "none") +
-  transition_time(time = date) +
-  shadow_mark(alpha = 0.2, size = 0.8) +
-  ggtitle("Caribou location on {frame_time}")
-p
- -
-
-
- -
-
-
- - -
-
- - - - - - - - - - - - - - - - diff --git a/_book/eda-covid2019.html b/_book/eda-covid2019.html deleted file mode 100644 index f59432e..0000000 --- a/_book/eda-covid2019.html +++ /dev/null @@ -1,2182 +0,0 @@ - - - - - - - 第 41 章 探索性数据分析-新冠疫情 | 数据科学中的 R 语言 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- -
- -
-
- - -
-
- -
-
-

第 41 章 探索性数据分析-新冠疫情

-
library(tidyverse)
-library(lubridate)
-library(maps)
-library(viridis)
-library(ggrepel)
-library(paletteer)
-library(shadowtext)
-library(showtext)
-showtext_auto()
-

新型冠状病毒(俗称武汉肺炎)疫情在多国蔓延,本章通过分析疫情数据,了解疫情发展,祝愿人类早日会战胜病毒!

-
-电影《传染病》,《流感》海报 -

-图 41.1: 电影《传染病》,《流感》海报 -

-
-
-电影《传染病》,《流感》海报 -

-图 41.2: 电影《传染病》,《流感》海报 -

-
-
-

41.1 数据来源

-

我们打开链接https://github.com/CSSEGISandData/COVID-19

-

-

找到疫情时间序列数据,你可以通过点击该网页Clone or download直接下载的方式获取数据。

-

-
-
-

41.2 读取数据

-

假定你已经下载了数据,比如time_series_covid19_confirmed_global.csv, 那么我们可以用readr::read_csv()函数直接读取, 关于在R语言里文件读取的方法可以参考第 5 章。

-
d <- read_csv("./demo_data/time_series_covid19_confirmed_global.csv")
-d
-
## # A tibble: 256 x 74
-##    `Province/State` `Country/Region`   Lat   Long
-##    <chr>            <chr>            <dbl>  <dbl>
-##  1 <NA>             Afghanistan       33    65   
-##  2 <NA>             Albania           41.2  20.2 
-##  3 <NA>             Algeria           28.0   1.66
-##  4 <NA>             Andorra           42.5   1.52
-##  5 <NA>             Angola           -11.2  17.9 
-##  6 <NA>             Antigua and Bar~  17.1 -61.8 
-##  7 <NA>             Argentina        -38.4 -63.6 
-##  8 <NA>             Armenia           40.1  45.0 
-##  9 Australian Capi~ Australia        -35.5 149.  
-## 10 New South Wales  Australia        -33.9 151.  
-## # ... with 246 more rows, and 70 more variables:
-## #   `1/22/20` <dbl>, `1/23/20` <dbl>, `1/24/20` <dbl>,
-## #   `1/25/20` <dbl>, `1/26/20` <dbl>, `1/27/20` <dbl>,
-## #   `1/28/20` <dbl>, `1/29/20` <dbl>, `1/30/20` <dbl>,
-## #   `1/31/20` <dbl>, `2/1/20` <dbl>, `2/2/20` <dbl>,
-## #   `2/3/20` <dbl>, `2/4/20` <dbl>, `2/5/20` <dbl>,
-## #   `2/6/20` <dbl>, `2/7/20` <dbl>, `2/8/20` <dbl>,
-## #   `2/9/20` <dbl>, `2/10/20` <dbl>, `2/11/20` <dbl>,
-## #   `2/12/20` <dbl>, `2/13/20` <dbl>, `2/14/20` <dbl>,
-## #   `2/15/20` <dbl>, `2/16/20` <dbl>, `2/17/20` <dbl>,
-## #   `2/18/20` <dbl>, `2/19/20` <dbl>, `2/20/20` <dbl>,
-## #   `2/21/20` <dbl>, `2/22/20` <dbl>, `2/23/20` <dbl>,
-## #   `2/24/20` <dbl>, `2/25/20` <dbl>, `2/26/20` <dbl>,
-## #   `2/27/20` <dbl>, `2/28/20` <dbl>, `2/29/20` <dbl>,
-## #   `3/1/20` <dbl>, `3/2/20` <dbl>, `3/3/20` <dbl>,
-## #   `3/4/20` <dbl>, `3/5/20` <dbl>, `3/6/20` <dbl>,
-## #   `3/7/20` <dbl>, `3/8/20` <dbl>, `3/9/20` <dbl>,
-## #   `3/10/20` <dbl>, `3/11/20` <dbl>, `3/12/20` <dbl>,
-## #   `3/13/20` <dbl>, `3/14/20` <dbl>, `3/15/20` <dbl>,
-## #   `3/16/20` <dbl>, `3/17/20` <dbl>, `3/18/20` <dbl>,
-## #   `3/19/20` <dbl>, `3/20/20` <dbl>, `3/21/20` <dbl>,
-## #   `3/22/20` <dbl>, `3/23/20` <dbl>, `3/24/20` <dbl>,
-## #   `3/25/20` <dbl>, `3/26/20` <dbl>, `3/27/20` <dbl>,
-## #   `3/28/20` <dbl>, `3/29/20` <dbl>, `3/30/20` <dbl>,
-## #   `3/31/20` <dbl>
-
-
-

41.3 数据集结构

-

探索数据之前,我们一定要对数据存储结构、数据变量名及其含义要非常清楚,重要的事情说三遍。

-
glimpse(d)
-
## Rows: 256
-## Columns: 74
-## $ `Province/State` <chr> NA, NA, NA, NA, NA, NA, N...
-## $ `Country/Region` <chr> "Afghanistan", "Albania",...
-## $ Lat              <dbl> 33.00, 41.15, 28.03, 42.5...
-## $ Long             <dbl> 65.000, 20.168, 1.660, 1....
-## $ `1/22/20`        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0...
-## $ `1/23/20`        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0...
-## $ `1/24/20`        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0...
-## $ `1/25/20`        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0...
-## $ `1/26/20`        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0...
-## $ `1/27/20`        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0...
-## $ `1/28/20`        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0...
-## $ `1/29/20`        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0...
-## $ `1/30/20`        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0...
-## $ `1/31/20`        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0...
-## $ `2/1/20`         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0...
-## $ `2/2/20`         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0...
-## $ `2/3/20`         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0...
-## $ `2/4/20`         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0...
-## $ `2/5/20`         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0...
-## $ `2/6/20`         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0...
-## $ `2/7/20`         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0...
-## $ `2/8/20`         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0...
-## $ `2/9/20`         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0...
-## $ `2/10/20`        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0...
-## $ `2/11/20`        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0...
-## $ `2/12/20`        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0...
-## $ `2/13/20`        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0...
-## $ `2/14/20`        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0...
-## $ `2/15/20`        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0...
-## $ `2/16/20`        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0...
-## $ `2/17/20`        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0...
-## $ `2/18/20`        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0...
-## $ `2/19/20`        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0...
-## $ `2/20/20`        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0...
-## $ `2/21/20`        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0...
-## $ `2/22/20`        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0...
-## $ `2/23/20`        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0...
-## $ `2/24/20`        <dbl> 1, 0, 0, 0, 0, 0, 0, 0, 0...
-## $ `2/25/20`        <dbl> 1, 0, 1, 0, 0, 0, 0, 0, 0...
-## $ `2/26/20`        <dbl> 1, 0, 1, 0, 0, 0, 0, 0, 0...
-## $ `2/27/20`        <dbl> 1, 0, 1, 0, 0, 0, 0, 0, 0...
-## $ `2/28/20`        <dbl> 1, 0, 1, 0, 0, 0, 0, 0, 0...
-## $ `2/29/20`        <dbl> 1, 0, 1, 0, 0, 0, 0, 0, 0...
-## $ `3/1/20`         <dbl> 1, 0, 1, 0, 0, 0, 0, 1, 0...
-## $ `3/2/20`         <dbl> 1, 0, 3, 1, 0, 0, 0, 1, 0...
-## $ `3/3/20`         <dbl> 1, 0, 5, 1, 0, 0, 1, 1, 0...
-## $ `3/4/20`         <dbl> 1, 0, 12, 1, 0, 0, 1, 1, ...
-## $ `3/5/20`         <dbl> 1, 0, 12, 1, 0, 0, 1, 1, ...
-## $ `3/6/20`         <dbl> 1, 0, 17, 1, 0, 0, 2, 1, ...
-## $ `3/7/20`         <dbl> 1, 0, 17, 1, 0, 0, 8, 1, ...
-## $ `3/8/20`         <dbl> 4, 0, 19, 1, 0, 0, 12, 1,...
-## $ `3/9/20`         <dbl> 4, 2, 20, 1, 0, 0, 12, 1,...
-## $ `3/10/20`        <dbl> 5, 10, 20, 1, 0, 0, 17, 1...
-## $ `3/11/20`        <dbl> 7, 12, 20, 1, 0, 0, 19, 1...
-## $ `3/12/20`        <dbl> 7, 23, 24, 1, 0, 0, 19, 4...
-## $ `3/13/20`        <dbl> 7, 33, 26, 1, 0, 1, 31, 8...
-## $ `3/14/20`        <dbl> 11, 38, 37, 1, 0, 1, 34, ...
-## $ `3/15/20`        <dbl> 16, 42, 48, 1, 0, 1, 45, ...
-## $ `3/16/20`        <dbl> 21, 51, 54, 2, 0, 1, 56, ...
-## $ `3/17/20`        <dbl> 22, 55, 60, 39, 0, 1, 68,...
-## $ `3/18/20`        <dbl> 22, 59, 74, 39, 0, 1, 79,...
-## $ `3/19/20`        <dbl> 22, 64, 87, 53, 0, 1, 97,...
-## $ `3/20/20`        <dbl> 24, 70, 90, 75, 1, 1, 128...
-## $ `3/21/20`        <dbl> 24, 76, 139, 88, 2, 1, 15...
-## $ `3/22/20`        <dbl> 40, 89, 201, 113, 2, 1, 2...
-## $ `3/23/20`        <dbl> 40, 104, 230, 133, 3, 3, ...
-## $ `3/24/20`        <dbl> 74, 123, 264, 164, 3, 3, ...
-## $ `3/25/20`        <dbl> 84, 146, 302, 188, 3, 3, ...
-## $ `3/26/20`        <dbl> 94, 174, 367, 224, 4, 7, ...
-## $ `3/27/20`        <dbl> 110, 186, 409, 267, 4, 7,...
-## $ `3/28/20`        <dbl> 110, 197, 454, 308, 5, 7,...
-## $ `3/29/20`        <dbl> 120, 212, 511, 334, 7, 7,...
-## $ `3/30/20`        <dbl> 170, 223, 584, 370, 7, 7,...
-## $ `3/31/20`        <dbl> 174, 243, 716, 376, 7, 7,...
-
-
-

41.4 数据清洗规整

-
-

41.4.1 必要的预备知识之select()

-
d %>% select(-c(1:4))
-d %>% select(5:ncol(.))
-d %>% select(matches("/20"))
-d %>% select(ends_with("/20"))
-
-# 应该还有其他的方法
-
-
-

41.4.2 必要的预备知识之pivot_longer()

-

宽表格长表格,需要用到pivot_longer()pivot_wider(), 比如

-

-
table4a
-
## # A tibble: 3 x 3
-##   country     `1999` `2000`
-## * <chr>        <int>  <int>
-## 1 Afghanistan    745   2666
-## 2 Brazil       37737  80488
-## 3 China       212258 213766
-
longer <- table4a %>%
-  pivot_longer(
-    cols = `1999`:`2000`,
-    names_to = "year",
-    values_to = "cases"
-  )
-
-longer
-
## # A tibble: 6 x 3
-##   country     year   cases
-##   <chr>       <chr>  <int>
-## 1 Afghanistan 1999     745
-## 2 Afghanistan 2000    2666
-## 3 Brazil      1999   37737
-## 4 Brazil      2000   80488
-## 5 China       1999  212258
-## 6 China       2000  213766
-
-
-

41.4.3 必要的预备知识之pivot_wider()

-

有时候我们想折腾下,比如把长表格再变回宽表格

-
longer %>%
-  pivot_wider(
-    names_from = year,
-    values_from = cases
-  )
-
## # A tibble: 3 x 3
-##   country     `1999` `2000`
-##   <chr>        <int>  <int>
-## 1 Afghanistan    745   2666
-## 2 Brazil       37737  80488
-## 3 China       212258 213766
-
-
-

41.4.4 必要的预备知识之日期格式

-

有时候,我会遇到日期date这种数据类型,我推荐使用lubridate包来处理,比如

-
c("2020-3-25", "20200325", "20-03-25", "2020 03 25") %>% lubridate::ymd()
-
## [1] "2020-03-25" "2020-03-25" "2020-03-25" "2020-03-25"
-
c("3/25/20", "03-25-20", "3-25/2020") %>% lubridate::mdy()
-
## [1] "2020-03-25" "2020-03-25" "2020-03-25"
-

遇到这种010210日期的,请把输入数据的人扁一顿,他会告诉你的

-
lubridate::dmy(010210)
-lubridate::dym(010210)
-lubridate::mdy(010210)
-lubridate::myd(010210)
-lubridate::ymd(010210)
-lubridate::ydm(010210)
-
-
-

41.4.5 必要的预备知识之时间差

-
difftime(ymd("2020-03-24"),
-  ymd("2020-03-23"),
-  units = "days"
-)
-
## Time difference of 1 days
-

或者更直观的表述

-
ymd("2020-03-24") - ymd("2020-03-23")
-
## Time difference of 1 days
-

转换为天数

-
(ymd("2020-03-24") - ymd("2020-03-23")) %>% as.numeric()
-
## [1] 1
-
-
-

41.4.6 有时候需要log10_scale

-
tb <- tibble(
-  days_since_100 = 0:18,
-  cases = 100 * 1.33^days_since_100
-)
-
-
-p1 <- tb %>%
-  ggplot(aes(days_since_100, cases)) +
-  geom_line(size = 0.8) +
-  geom_point(pch = 21, size = 1)
-
-p2 <- tb %>%
-  ggplot(aes(days_since_100, log10(cases))) +
-  geom_line(size = 0.8) +
-  geom_point(pch = 21, size = 1)
-
-
-p3 <- tb %>%
-  ggplot(aes(days_since_100, cases)) +
-  geom_line(size = 0.8) +
-  geom_point(pch = 21, size = 1) +
-  scale_y_log10()
-
-library(patchwork)
-p1 + p2 + p3
-

-
-
-

41.4.7 数据清洗规整

-
d1 <- d %>%
-  pivot_longer(
-    cols = 5:ncol(.),
-    names_to = "date",
-    values_to = "cases"
-  ) %>%
-  mutate(date = lubridate::mdy(date)) %>%
-  janitor::clean_names() %>%
-  group_by(country_region, date) %>%
-  summarise(cases = sum(cases)) %>%
-  ungroup()
-
-d1
-
## # A tibble: 12,600 x 3
-##    country_region date       cases
-##    <chr>          <date>     <dbl>
-##  1 Afghanistan    2020-01-22     0
-##  2 Afghanistan    2020-01-23     0
-##  3 Afghanistan    2020-01-24     0
-##  4 Afghanistan    2020-01-25     0
-##  5 Afghanistan    2020-01-26     0
-##  6 Afghanistan    2020-01-27     0
-##  7 Afghanistan    2020-01-28     0
-##  8 Afghanistan    2020-01-29     0
-##  9 Afghanistan    2020-01-30     0
-## 10 Afghanistan    2020-01-31     0
-## # ... with 12,590 more rows
-
d1 %>%
-  group_by(date) %>%
-  summarise(confirmed = sum(cases))
-
## # A tibble: 70 x 2
-##    date       confirmed
-##    <date>         <dbl>
-##  1 2020-01-22       555
-##  2 2020-01-23       654
-##  3 2020-01-24       941
-##  4 2020-01-25      1434
-##  5 2020-01-26      2118
-##  6 2020-01-27      2927
-##  7 2020-01-28      5578
-##  8 2020-01-29      6166
-##  9 2020-01-30      8234
-## 10 2020-01-31      9927
-## # ... with 60 more rows
-

【WHO:2019冠状病毒全球大流行正在“加速”】世界卫生组织(WHO)昨日发出警告,指2019冠状病毒全球感染者已超过30万人,全球大流行正在“加速”。世卫组织指,从首例病例报告到感染者达到10万人用了67天;感染人数增至20万用了11天;从20万到突破30万则只用了4天。

-
d1 %>%
-  group_by(date) %>%
-  summarise(confirmed = sum(cases)) %>%
-  ggplot(aes(x = date, y = confirmed)) +
-  geom_point() +
-  scale_x_date(
-    date_labels = "%m-%d",
-    date_breaks = "1 week"
-  ) +
-  scale_y_continuous(
-    breaks = c(0, 50000, 100000, 200000, 300000, 500000, 900000),
-    labels = scales::comma
-  )
-

-
# d1 %>% distinct(country_region) %>% pull(country_region)
-d1 %>% distinct(country_region)
-
## # A tibble: 180 x 1
-##    country_region     
-##    <chr>              
-##  1 Afghanistan        
-##  2 Albania            
-##  3 Algeria            
-##  4 Andorra            
-##  5 Angola             
-##  6 Antigua and Barbuda
-##  7 Argentina          
-##  8 Armenia            
-##  9 Australia          
-## 10 Austria            
-## # ... with 170 more rows
-
d1 %>%
-  filter(country_region == "China")
-
## # A tibble: 70 x 3
-##    country_region date       cases
-##    <chr>          <date>     <dbl>
-##  1 China          2020-01-22   548
-##  2 China          2020-01-23   643
-##  3 China          2020-01-24   920
-##  4 China          2020-01-25  1406
-##  5 China          2020-01-26  2075
-##  6 China          2020-01-27  2877
-##  7 China          2020-01-28  5509
-##  8 China          2020-01-29  6087
-##  9 China          2020-01-30  8141
-## 10 China          2020-01-31  9802
-## # ... with 60 more rows
-
d1 %>%
-  filter(country_region == "China") %>%
-  ggplot(aes(x = date, y = cases)) +
-  geom_point() +
-  scale_x_date(date_breaks = "1 week", date_labels = "%m-%d") +
-  scale_y_log10(labels = scales::comma)
-

-
d1 %>%
-  group_by(country_region) %>%
-  filter(max(cases) >= 20000) %>%
-  ungroup() %>%
-  ggplot(aes(x = date, y = cases, color = country_region)) +
-  geom_point() +
-  scale_x_date(date_breaks = "1 week", date_labels = "%m-%d") +
-  scale_y_log10() +
-  facet_wrap(vars(country_region), ncol = 2) +
-  theme(
-    axis.text.x = element_text(angle = 45, hjust = 1)
-  ) +
-  theme(legend.position = "none")
-

-
-
-
-

41.5 可视化探索

-

网站https://www.ft.com/coronavirus-latest 这张图很受关注,于是打算重复

-
-图片来源www.ft.com -

-图 41.3: 图片来源www.ft.com -

-
-

这张图想表达的是,出现100个案例后,各国确诊人数的爆发趋势

-
    -
  • 横坐标是天数,即在出现100个案例后的第几天
  • -
  • 纵坐标是累积确诊人数
  • -
-

那么,我们需要对数据的时间轴做相应的变形

-
    -
  • 首先按照国家分组
  • -
  • 筛选,累积确诊人数超过100的国家
  • -
  • 找到所有case >= 100的日期,date[cases >= 100]
  • -
  • 最早的日期,就说我们要找的第 0 daymin(date[cases >= 100])
  • -
  • 构建新的一列mutate( days_since_100 = date - min(date[cases >= 100])
  • -
  • days_since_100转换成数值型as.numeric()
  • -
-
d2 <- d1 %>%
-  group_by(country_region) %>%
-  filter(max(cases) >= 100) %>%
-  mutate(
-    days_since_100 = date - min(date[cases >= 100])
-  ) %>%
-  mutate(days_since_100 = as.numeric(days_since_100)) %>%
-  filter(days_since_100 >= 0) %>%
-  ungroup()
-d2
-
## # A tibble: 1,710 x 4
-##    country_region date       cases days_since_100
-##    <chr>          <date>     <dbl>          <dbl>
-##  1 Afghanistan    2020-03-27   110              0
-##  2 Afghanistan    2020-03-28   110              1
-##  3 Afghanistan    2020-03-29   120              2
-##  4 Afghanistan    2020-03-30   170              3
-##  5 Afghanistan    2020-03-31   174              4
-##  6 Albania        2020-03-23   104              0
-##  7 Albania        2020-03-24   123              1
-##  8 Albania        2020-03-25   146              2
-##  9 Albania        2020-03-26   174              3
-## 10 Albania        2020-03-27   186              4
-## # ... with 1,700 more rows
-
-

-大家都谈过恋爱,也有可能失恋。大家失恋时间是不同的,若把失恋的当天作为第 0 day, 就可以比较失恋若干天后每个人精神波动情况。参照《失恋33天》 -

-
-
d2_most <- d2 %>%
-  group_by(country_region) %>%
-  top_n(1, days_since_100) %>%
-  filter(cases >= 10000) %>% 
-  ungroup() %>% 
-  arrange(desc(cases))
-d2_most
-
## # A tibble: 13 x 4
-##    country_region date        cases days_since_100
-##    <chr>          <date>      <dbl>          <dbl>
-##  1 US             2020-03-31 188172             28
-##  2 Italy          2020-03-31 105792             37
-##  3 Spain          2020-03-31  95923             29
-##  4 China          2020-03-31  82279             69
-##  5 Germany        2020-03-31  71808             30
-##  6 France         2020-03-31  52827             31
-##  7 Iran           2020-03-31  44605             34
-##  8 United Kingdom 2020-03-31  25481             26
-##  9 Switzerland    2020-03-31  16605             26
-## 10 Turkey         2020-03-31  13531             12
-## 11 Belgium        2020-03-31  12775             25
-## 12 Netherlands    2020-03-31  12667             25
-## 13 Austria        2020-03-31  10180             23
-
d2 %>%
-  bind_rows(
-    tibble(country = "33% daily rise", days_since_100 = 0:30) %>%
-      mutate(cases = 100 * 1.33^days_since_100)
-  ) %>%
-  
-  ggplot(aes(days_since_100, cases, color = country_region)) +
-  geom_hline(yintercept = 100) +
-  geom_vline(xintercept = 0) +
-  geom_line(size = 0.8) +
-  geom_point(pch = 21, size = 1) +
-#   scale_colour_manual(
-#    values = c(
-#     "US" = "#EB5E8D",
-#     "Italy" = "black", 
-#     "Spain" = "#c2b7af",
-#     "China" = "red",
-#     "Germany" = "#c2b7af",
-#     "France" = "#c2b7af",
-#     "Iran" = "#9dbf57",
-#     "United Kingdom" = "#ce3140",
-#     "Korea, South" = "#208fce",
-#     "Japan" = "#208fce",
-#     "Singapore" = "#1E8FCC",
-#      "33% daily rise" = "#D9CCC3",
-#     "Switzerland" = "#c2b7af",
-#     "Turkey" = "#208fce",
-#     "Belgium" = "#c2b7af",
-#     "Netherlands" = "#c2b7af",
-#     "Austria" = "#c2b7af",
-#     "Hong Kong" = "#1E8FCC",
-#     # gray
-#     "India" = "#c2b7af",
-#     "Switzerland" = "#c2b7af",
-#     "Belgium" = "#c2b7af",
-#     "Norway" = "#c2b7af",
-#      "Sweden" = "#c2b7af",
-#     "Austria" = "#c2b7af",
-#     "Australia" = "#c2b7af",
-#     "Denmark" = "#c2b7af",
-#     "Canada" = "#c2b7af",
-#     "Brazil" = "#c2b7af",
-#     "Portugal" = "#c2b7af"
-#   )
-# ) +
-  
-  geom_shadowtext(
-    data = d2_most, aes(label = paste0("  ", country_region)),
-    bg.color = "white"
-  ) +
-  scale_y_log10(
-    expand = expansion(mult = c(0, .1)),
-    breaks = c(100, 200, 500, 1000, 2000, 5000, 10000, 20000, 50000),
-    labels = scales::comma
-  ) +
-  scale_x_continuous(
-    expand = expansion(mult = c(0, .1)),
-    breaks = c(0, 5, 10, 15, 20, 25, 30)
-  ) +
-  theme_minimal() +
-  theme(
-    panel.grid.minor = element_blank(),
-    plot.background = element_rect(fill = "#FFF1E6"),
-    legend.position = "none",
-    panel.spacing = margin(3, 15, 3, 15, "mm")
-  ) +
-  labs(
-    x = "Number of days since 100th case",
-    y = "",
-    title = "Country by country: how coronavirus case trajectories compare",
-    subtitle = "Cumulative number of cases, by Number of days since 100th case",
-    caption = "data source from @www.ft.com"
-  )
-

-

有点乱,还有很多细节没有实现,后面再弄弄了

-
-

41.5.1 简便的方法

-
d2a <- d1 %>%
-  group_by(country_region) %>%
-  filter(cases >= 100) %>%
-  mutate(days_since_100 = 0:(n() - 1)) %>%
-  # same as
-  # mutate(edate = as.numeric(date - min(date)))
-  ungroup()
-d2a
-
## # A tibble: 1,710 x 4
-##    country_region date       cases days_since_100
-##    <chr>          <date>     <dbl>          <int>
-##  1 Afghanistan    2020-03-27   110              0
-##  2 Afghanistan    2020-03-28   110              1
-##  3 Afghanistan    2020-03-29   120              2
-##  4 Afghanistan    2020-03-30   170              3
-##  5 Afghanistan    2020-03-31   174              4
-##  6 Albania        2020-03-23   104              0
-##  7 Albania        2020-03-24   123              1
-##  8 Albania        2020-03-25   146              2
-##  9 Albania        2020-03-26   174              3
-## 10 Albania        2020-03-27   186              4
-## # ... with 1,700 more rows
-

这里的d2ad2是一样的了,但方法简单很多。

-
-
-

41.5.2 疫情持续时间最久的国家

-
d3 <- d2a %>%
-  group_by(country_region) %>%
-  filter(days_since_100 == max(days_since_100)) %>%
-  # same as
-  # top_n(1, days_since_100) %>%
-  ungroup() %>%
-  arrange(desc(days_since_100))
-d3
-
## # A tibble: 110 x 4
-##    country_region   date        cases days_since_100
-##    <chr>            <date>      <dbl>          <int>
-##  1 China            2020-03-31  82279             69
-##  2 Diamond Princess 2020-03-31    712             50
-##  3 Korea, South     2020-03-31   9786             40
-##  4 Japan            2020-03-31   1953             39
-##  5 Italy            2020-03-31 105792             37
-##  6 Iran             2020-03-31  44605             34
-##  7 France           2020-03-31  52827             31
-##  8 Singapore        2020-03-31    926             31
-##  9 Germany          2020-03-31  71808             30
-## 10 Spain            2020-03-31  95923             29
-## # ... with 100 more rows
-
highlight <- d3 %>%
-  top_n(10, days_since_100) %>%
-  pull(country_region)
-highlight
-
##  [1] "China"            "Diamond Princess"
-##  [3] "Korea, South"     "Japan"           
-##  [5] "Italy"            "Iran"            
-##  [7] "France"           "Singapore"       
-##  [9] "Germany"          "Spain"
-
d2a %>%
-  bind_rows(
-    tibble(country = "33% daily rise", days_since_100 = 0:30) %>%
-      mutate(cases = 100 * 1.33^days_since_100)
-  ) %>%
-  ggplot(aes(days_since_100, cases, color = country_region)) +
-  geom_hline(yintercept = 100) +
-  geom_vline(xintercept = 0) +
-  geom_line(size = 0.8) +
-  geom_point(pch = 21, size = 1) +
-  scale_y_log10(
-    expand = expansion(mult = c(0, .1)),
-    breaks = c(100, 200, 500, 1000, 2000, 5000, 10000, 20000, 50000, 100000),
-    labels = scales::comma
-  ) +
-  scale_x_continuous(
-    expand = expansion(mult = c(0, .1)),
-    breaks = c(0, 5, 10, 15, 20, 25, 30, 40, 50, 60)
-  ) +
-  theme_minimal() +
-  theme(
-    panel.grid.minor = element_blank(),
-    plot.background = element_rect(fill = "#FFF1E6"),
-    legend.position = "none",
-    panel.spacing = margin(3, 15, 3, 15, "mm")
-  ) +
-  labs(
-    x = "Number of days since 100th case",
-    y = "",
-    title = "Country by country: how coronavirus case trajectories compare",
-    subtitle = "Cumulative number of cases, by Number of days since 100th case",
-    caption = "data source from @www.ft.com"
-  ) +
-  gghighlight::gghighlight(country_region %in% highlight,
-    label_key = country_region, use_direct_label = TRUE,
-    label_params = list(segment.color = NA, nudge_x = 1),
-    use_group_by = FALSE
-  )
-

-

灰色线条的国家名,有点不好弄,在想办法

-
-
-

41.5.3 笨办法吧

-

笨办法,实际上是4张表共同完成

-
highlight <- c(
-  "China", "Spain", "US", "United Kingdom", "Korea, South",
-  "Italy", "Japan", "Singapore", "Germany", "France", "Iran"
-)
-
-gray <- c(
-  "India", "Switzerland", "Belgium", "Netherlands",
-  "Sweden", "Austria", "Australia", "Denmark",
-  "Canada", "Brazil", "Portugal"
-)
-
-d3_highlight <- d2a %>% filter(country_region %in% highlight)
-
-d3_gray <- d2a %>% filter(country_region %in% gray)
-
d2a %>%
-  ggplot(aes(days_since_100, cases, group = country_region)) +
-  geom_hline(yintercept = 100) +
-  geom_vline(xintercept = 0) +
-  geom_line(size = 0.8, color = "gray70") +
-  geom_point(pch = 21, size = 1, color = "gray70") +
-
-  # highlight country
-  geom_line(data = d3_highlight, aes(color = country_region)) +
-  geom_point(data = d3_highlight, aes(color = country_region)) +
-  geom_text(
-    data = d3_highlight %>%
-      group_by(country_region) %>%
-      top_n(1, days_since_100) %>%
-      ungroup(),
-    aes(color = country_region, label = country_region),
-    hjust = 0,
-    vjust = 0,
-    nudge_x = 0.5
-  ) +
-
-
-  # gray country
-  geom_text(
-    data = d3_gray %>%
-      group_by(country_region) %>%
-      top_n(1, days_since_100) %>%
-      ungroup(),
-    aes(label = country_region),
-    color = "gray50",
-    hjust = 0,
-    vjust = 0,
-    nudge_x = 0.5
-  ) +
-  geom_point(
-    data = d3_gray %>%
-      group_by(country_region) %>%
-      top_n(1, days_since_100) %>%
-      ungroup(),
-    size = 2,
-    color = "gray50"
-  ) +
-  scale_y_log10(
-    expand = expansion(mult = c(0, .1)),
-    breaks = c(100, 200, 500, 2000, 5000, 10000, 20000, 50000, 100000, 150000),
-    labels = scales::comma
-  ) +
-  scale_x_continuous(
-    expand = expansion(mult = c(0, .1)),
-    breaks = c(0, 5, 10, 15, 20, 25, 30, 40, 50, 60)
-  ) +
-  theme_minimal() +
-  theme(
-    panel.grid.minor = element_blank(),
-    plot.background = element_rect(fill = "#FFF1E6"),
-    legend.position = "none",
-    panel.spacing = margin(3, 15, 3, 15, "mm")
-  ) +
-  labs(
-    x = "Number of days since 100th case",
-    y = "",
-    title = "Country by country: how coronavirus case trajectories compare",
-    subtitle = "Cumulative number of cases, by Number of days since 100th case",
-    caption = "data source from @www.ft.com"
-  )
-

-

差强人意,再想想有没有好的办法

-
-
-

41.5.4 比较tidy的方法

-

对数据框d2a增加两列属性(有无标签,有无颜色),然后手动改颜色

-
highlight_country <- d2a %>%
-  group_by(country_region) %>%
-  filter(days_since_100 == max(days_since_100)) %>%
-  ungroup() %>%
-  arrange(desc(days_since_100)) %>% 
-  top_n(10, days_since_100) %>%
-  pull(country_region)
-
-highlight_country
-
##  [1] "China"            "Diamond Princess"
-##  [3] "Korea, South"     "Japan"           
-##  [5] "Italy"            "Iran"            
-##  [7] "France"           "Singapore"       
-##  [9] "Germany"          "Spain"
-

吸取了Kieran Healy大神的配色方案

-
## Colors
-cgroup_cols <- c(prismatic::clr_darken(paletteer_d("ggsci::category20_d3"), 0.2)[1:length(highlight_country)], "gray70")
-scales::show_col(cgroup_cols)
-

-
d2a %>% 
-  group_by(country_region) %>% 
-  filter(max(days_since_100) > 9) %>%
-  mutate(
-    end_label = ifelse(days_since_100 == max(days_since_100), country_region, NA_character_)
-  ) %>% 
-  mutate(end_label = case_when(country_region %in% highlight_country ~ end_label,
-                               TRUE ~ NA_character_), 
-         cgroup = case_when(country_region %in% highlight_country ~ country_region, 
-                            TRUE ~ "ZZOTHER")) %>% # length(highlight_country) + gray
-
-  
-  ggplot(aes(x = days_since_100, y = cases, 
-         color = cgroup, label = end_label, 
-         group = country_region)) + 
-  geom_line(size = 0.8) + 
-  geom_text_repel(nudge_x = 1.1,
-                  nudge_y = 0.1, 
-                  segment.color = NA) + 
-  guides(color = FALSE) + 
-  scale_color_manual(values = cgroup_cols) +
-  scale_y_continuous(labels = scales::comma_format(accuracy = 1), 
-                     breaks = 10^seq(2, 8),
-                     trans = "log10"
-                     ) + 
-  labs(x = "Days Since 100 Confirmed Death", 
-       y = "Cumulative Number of Deaths (log10 scale)", 
-       title = "Cumulative Number of Reported Deaths from COVID-19, Selected Countries", 
-    subtitle = "Cumulative number of cases, by Number of days since 100th case",
-    caption = "data source from @www.ft.com") 
-

-

感觉这样是最好的方案。

-
-
-
-

41.6 每个国家的情况

-
d2 %>%
-  group_by(country_region) %>%
-  filter(max(cases) >= 1000) %>%
-  ungroup()
-
## # A tibble: 1,060 x 4
-##    country_region date       cases days_since_100
-##    <chr>          <date>     <dbl>          <dbl>
-##  1 Argentina      2020-03-20   128              0
-##  2 Argentina      2020-03-21   158              1
-##  3 Argentina      2020-03-22   266              2
-##  4 Argentina      2020-03-23   301              3
-##  5 Argentina      2020-03-24   387              4
-##  6 Argentina      2020-03-25   387              5
-##  7 Argentina      2020-03-26   502              6
-##  8 Argentina      2020-03-27   589              7
-##  9 Argentina      2020-03-28   690              8
-## 10 Argentina      2020-03-29   745              9
-## # ... with 1,050 more rows
-
d2 %>%
-  group_by(country_region) %>%
-  filter(max(cases) >= 1000) %>%
-  ungroup() %>%
-  ggplot(aes(days_since_100, cases)) +
-  geom_line(size = 0.8) +
-  geom_line(
-    data = d2 %>% rename(country = country_region),
-    aes(days_since_100, cases, group = country),
-    color = "grey"
-  ) +
-  geom_point(pch = 21, size = 1, color = "red") +
-  scale_y_log10(
-    expand = expansion(mult = c(0, .1)),
-    breaks = c(100, 1000, 10000, 50000)
-  ) +
-  scale_x_continuous(
-    expand = expansion(mult = c(0, 0)),
-    breaks = c(0, 5, 10, 20, 30, 50)
-  ) +
-  facet_wrap(vars(country_region), scales = "free_x") +
-  theme(
-    panel.background = element_rect(fill = "#FFF1E6"),
-    plot.background = element_rect(fill = "#FFF1E6")
-  ) +
-  labs(
-    x = "Number of days since 100th case",
-    y = "",
-    title = "Outbreak are now underway in dozens of other countries, with some on the same trajectory as Italy",
-    subtitle = "Cumulative number of cases, by Number of days since 100th case",
-    caption = "data source from @www.ft.com"
-  )
-

-
-
-

41.7 地图

-
library(countrycode)
-# countrycode('Albania', 'country.name', 'iso3c')
-
-d2_newest %>%
-  mutate(ISO3 = countrycode(country_region,
-    origin = "country.name", destination = "iso3c"
-  ))
-

我们选取最新的日期

-
d_newest <- d %>%
-  select(Long, Lat, last_col()) %>%
-  set_names("Long", "Lat", "newest_date")
-d_newest
-
## # A tibble: 256 x 3
-##      Long   Lat newest_date
-##     <dbl> <dbl>       <dbl>
-##  1  65     33           174
-##  2  20.2   41.2         243
-##  3   1.66  28.0         716
-##  4   1.52  42.5         376
-##  5  17.9  -11.2           7
-##  6 -61.8   17.1           7
-##  7 -63.6  -38.4        1054
-##  8  45.0   40.1         532
-##  9 149.   -35.5          80
-## 10 151.   -33.9        2032
-## # ... with 246 more rows
-
world <- map_data("world")
-
-
-ggplot() +
-  geom_polygon(
-    data = world,
-    aes(x = long, y = lat, group = group),
-    fill = "grey", alpha = 0.3
-  ) +
-  geom_point(
-    data = d_newest,
-    aes(x = Long, y = Lat, size = newest_date, color = newest_date),
-    stroke = F, alpha = 0.7
-  ) +
-  scale_size_continuous(
-    name = "Cases", trans = "log",
-    range = c(1, 7),
-    breaks = c(1, 20, 100, 1000, 50000),
-    labels = c("1-19", "20-99", "100-999", "1,000-49,999", "50,000+")
-  ) +
-  scale_color_viridis_c(
-    option = "inferno",
-    name = "Cases",
-    trans = "log",
-    breaks = c(1, 20, 100, 1000, 50000),
-    labels = c("1-19", "20-99", "100-999", "1,000-49,999", "50,000+")
-  ) +
-  theme_void() +
-  guides(colour = guide_legend()) +
-  labs(
-    title = "Mapping the coronavirus outbreak",
-    subtitle = "",
-    caption = "Source: JHU Unviersity, CSSE; FT research @www.FT.com"
-  ) +
-  theme(
-    legend.position = "bottom",
-    text = element_text(color = "#22211d"),
-    plot.background = element_rect(fill = "#ffffff", color = NA),
-    panel.background = element_rect(fill = "#ffffff", color = NA),
-    legend.background = element_rect(fill = "#ffffff", color = NA)
-  )
-

-
- -
-
- -
-
-
- - -
-
- - - - - - - - - - - - - - - - diff --git a/_book/eda-height.html b/_book/eda-height.html deleted file mode 100644 index bbb38cd..0000000 --- a/_book/eda-height.html +++ /dev/null @@ -1,1488 +0,0 @@ - - - - - - - 第 43 章 探索性数据分析-身高体重 | 数据科学中的 R 语言 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- -
- -
-
- - -
-
- -
-
-

第 43 章 探索性数据分析-身高体重

-
library(tidyverse)
-
-

43.1 案例分析

-

这是一份身高和体重的数据集

-
d <- read_csv("./demo_data/weight-height.csv")
-d
-
## # A tibble: 10,000 x 3
-##    Gender Height Weight
-##    <chr>   <dbl>  <dbl>
-##  1 Male     73.8   242.
-##  2 Male     68.8   162.
-##  3 Male     74.1   213.
-##  4 Male     71.7   220.
-##  5 Male     69.9   206.
-##  6 Male     67.3   152.
-##  7 Male     68.8   184.
-##  8 Male     68.3   168.
-##  9 Male     67.0   176.
-## 10 Male     63.5   156.
-## # ... with 9,990 more rows
-
d %>% summarise(
-  across(everything(), ~ sum(is.na(.)))
-)
-
## # A tibble: 1 x 3
-##   Gender Height Weight
-##    <int>  <int>  <int>
-## 1      0      0      0
-
-
-

43.2 可视化

-
-

43.2.1 画出不同性别的身高分布

-

常规答案

-
d %>%
-  ggplot(aes(x = Height, fill = Gender)) +
-  geom_density(alpha = 0.5)
-

-
d %>%
-  ggplot(aes(x = Height, fill = Gender)) +
-  geom_density(alpha = 0.5) +
-  facet_wrap(vars(Gender))
-

-
-
-
-

43.3 来点高级的

-

刚才我们看到了分面的操作,全局数据按照某个变量分组后,形成的若干个子集在不同的面板中分别展示出来。

-

这种方法很适合子集之间对比。事实上,我们看到每个子集的情况后,还很想知道全局的情况,以及子集在全局中的分布、状态或者位置。也就说,想对比子集和全局的情况。

-

所以我们期望(子集之间对比,子集与全局对比)。

-

具体方法:用分面的方法高亮展示子集,同时在每个分面上添加全局(灰色背景)

-
    -
  • 第一步,先把子集用分面的方法,分别画出来
  • -
-
d %>%
-  ggplot(aes(x = Height)) +
-  geom_density() +
-  facet_wrap(vars(Gender))
-
    -
  • 第二步,添加整体的情况作为背景图层。因为第一步用到了分面,也就说会分组,但我们希望整体的背景图层不受分面信息影响,或者叫背景图层不需要分组,而是显示全部。也就说,要保证每个分面面板中的背景图都是一样的,因此,在这个geom_denstiy()图层中,构建不受facet_wrap()影响的数据,即删掉data的分组列。
  • -
-
d %>%
-  ggplot(aes(x = Height)) +
-  geom_density(
-    data = d %>% select(-Gender)
-  ) +
-  geom_density() +
-  facet_wrap(vars(Gender))
-
    -
  • 第三步,y轴的调整,我们希望保持密度的形状,同时希望y轴不用比例值而是用具体的count个数,这样整体和局部能放在一个标度下,
  • -
-
d %>%
-  ggplot(aes(x = Height, y = after_stat(count))) +
-  geom_density(
-    data = d %>% select(-Gender)
-  ) +
-  geom_density() +
-  facet_wrap(vars(Gender))
- -

“Male,” “Female” 是Gender已经存在的分组。另外,我们在背景图层,新增了一个组“all people”,这样,整个图就有三个分组(三个color组),那么,我们可以在scale_fill_manual中统一设置和指定。

-
density_colors <- c(
-  "Male" = "#247BA0",
-  "Female" = "#F25F5C",
-  "all people" = "grey85"
-)
-
d %>%
-  ggplot(aes(x = Height, y = after_stat(count))) +
-  geom_density(
-    data = df %>% select(-Gender),
-    aes(fill = "all people", color = "all people")
-  ) +
-  geom_density(aes(color = Gender, fill = Gender)) +
-  facet_wrap(vars(Gender)) +
-  scale_fill_manual(name = NULL, values = density_colors) +
-  scale_color_manual(name = NULL, values = density_colors) +
-  theme_minimal() +
-  theme(legend.position = "bottom")
-
-

43.3.1 完整代码

-
density_colors <- c(
-  "Male" = "#247BA0",
-  "Female" = "#F25F5C",
-  "all people" = "grey80"
-)
-
-scales::show_col(density_colors)
-

-
d %>%
-  ggplot(aes(x = Height, y = after_stat(count))) +
-  geom_density(
-    data = d %>% dplyr::select(-Gender),
-    aes(fill = "all people", color = "all people")
-  ) +
-  geom_density(aes(color = Gender, fill = Gender)) +
-  facet_wrap(vars(Gender)) +
-  scale_fill_manual(name = NULL, values = density_colors) +
-  scale_color_manual(name = NULL, values = density_colors) +
-  theme_minimal() +
-  theme(legend.position = "bottom")
-

-

或者,用不同的主题风格

-
density_colors <- c(
-  "Male" = "#56B4E9",
-  "Female" = "#EF8A17",
-  "all participants" = "grey85"
-)
-
-d %>%
-  ggplot(aes(x = Height, y = after_stat(count))) +
-  geom_density(
-    data = function(x) dplyr::select(x, -Gender),
-    aes(fill = "all participants", color = "all participants")
-  ) +
-  geom_density(aes(fill = Gender, color = Gender)) +
-  facet_wrap(vars(Gender)) +
-  scale_color_manual(name = NULL, values = density_colors) +
-  scale_fill_manual(name = NULL, values = density_colors) +
-  cowplot::theme_minimal_hgrid(16) +
-  theme(legend.position = "bottom", legend.justification = "center")
-

-
-
-

43.3.2 画出不同性别的体重分布

-
d %>%
-  ggplot(aes(x = Weight, fill = Gender)) +
-  geom_density(alpha = 0.5)
-

-
-
-
-

43.4 建模

-
-

43.4.1 身高与体重的散点图

-
d %>%
-  ggplot(aes(x = Height, y = Weight, color = Gender)) +
-  geom_point()
-

-
-
-

43.4.2 建立身高与体重的线性模型

-
fit <- lm(Weight ~ 1 + Height, data = d)
-summary(fit)
-
## 
-## Call:
-## lm(formula = Weight ~ 1 + Height, data = d)
-## 
-## Residuals:
-##    Min     1Q Median     3Q    Max 
-## -51.93  -8.24  -0.12   8.26  46.84 
-## 
-## Coefficients:
-##              Estimate Std. Error t value Pr(>|t|)    
-## (Intercept) -350.7372     2.1115    -166   <2e-16 ***
-## Height         7.7173     0.0318     243   <2e-16 ***
-## ---
-## Signif. codes:  
-## 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
-## 
-## Residual standard error: 12.2 on 9998 degrees of freedom
-## Multiple R-squared:  0.855,  Adjusted R-squared:  0.855 
-## F-statistic: 5.9e+04 on 1 and 9998 DF,  p-value: <2e-16
-
broom::tidy(fit)
-
## # A tibble: 2 x 5
-##   term        estimate std.error statistic p.value
-##   <chr>          <dbl>     <dbl>     <dbl>   <dbl>
-## 1 (Intercept)  -351.      2.11       -166.       0
-## 2 Height          7.72    0.0318      243.       0
-
-
-

43.4.3 建立不同性别下的身高与体重的线性模型

-
d %>%
-  group_by(Gender) %>%
-  group_modify(
-    ~ broom::tidy(lm(Weight ~ 1 + Height, data = .))
-  )
-
## # A tibble: 4 x 6
-## # Groups:   Gender [2]
-##   Gender term      estimate std.error statistic p.value
-##   <chr>  <chr>        <dbl>     <dbl>     <dbl>   <dbl>
-## 1 Female (Interce~  -246.      3.36       -73.3       0
-## 2 Female Height        5.99    0.0526     114.        0
-## 3 Male   (Interce~  -224.      3.41       -65.8       0
-## 4 Male   Height        5.96    0.0494     121.        0
-
d %>%
-  ggplot(aes(x = Height, y = Weight, group = Gender)) +
-  geom_point(aes(color = Gender)) +
-  geom_smooth(method = lm)
-

- -
-
-
-
- -
-
-
- - -
-
- - - - - - - - - - - - - - - - diff --git a/_book/eda-nobel.html b/_book/eda-nobel.html deleted file mode 100644 index d9242b3..0000000 --- a/_book/eda-nobel.html +++ /dev/null @@ -1,2127 +0,0 @@ - - - - - - - 第 39 章 探索性数据分析-诺奖获得者 | 数据科学中的 R 语言 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- -
- -
-
- - -
-
- -
-
-

第 39 章 探索性数据分析-诺奖获得者

-

探索性数据分析(exporatory data analysis)是各种知识的综合运用。本章通过一个案例,讲解探索性数据分析的基本思路,也算是对前面几章内容的一次总结复习。

-
-

39.1 探索性

-
    -
  • 数据准备(对数据要做到心中有数)

    -
      -
    • 描述变量
    • -
    • 数据结构
    • -
    • 缺失值及其处理
    • -
  • -
  • 数据探索(围绕探索的目标)

    -
      -
    • 数据规整
    • -
    • 可视化
    • -
    • 建模
    • -
  • -
-
-
-

39.2 数据集

-

这是一个诺贝尔奖获得者的数据集,

-

-
-
-

39.3 导入数据

-
library(tidyverse)
-library(lubridate)
-
df <- read_csv("./demo_data/nobel_winners.csv")
-df
-
## # A tibble: 969 x 18
-##    prize_year category prize motivation prize_share
-##         <dbl> <chr>    <chr> <chr>      <chr>      
-##  1       1901 Chemist~ The ~ "\"in rec~ 1/1        
-##  2       1901 Literat~ The ~ "\"in spe~ 1/1        
-##  3       1901 Medicine The ~ "\"for hi~ 1/1        
-##  4       1901 Peace    The ~  <NA>      1/2        
-##  5       1901 Peace    The ~  <NA>      1/2        
-##  6       1901 Physics  The ~ "\"in rec~ 1/1        
-##  7       1902 Chemist~ The ~ "\"in rec~ 1/1        
-##  8       1902 Literat~ The ~ "\"the gr~ 1/1        
-##  9       1902 Medicine The ~ "\"for hi~ 1/1        
-## 10       1902 Peace    The ~  <NA>      1/2        
-## # ... with 959 more rows, and 13 more variables:
-## #   laureate_id <dbl>, laureate_type <chr>,
-## #   full_name <chr>, birth_date <date>,
-## #   birth_city <chr>, birth_country <chr>,
-## #   gender <chr>, organization_name <chr>,
-## #   organization_city <chr>,
-## #   organization_country <chr>, death_date <date>,
-## #   death_city <chr>, death_country <chr>
-
# 如果是xlsx格式
-readxl::read_excel("myfile.xlsx")
-
-# 如果是csv格式
-readr::read_csv("myfile.csv")
-
-

-这里有个小小的提示: -

-
    -
  • -路径(包括文件名), 不要用中文和空格 -
  • -
  • -数据框中变量,也不要有中文和空格(可用下划线代替空格) -
  • -
-
-
-
-

39.4 数据结构

-

一行就是一个诺奖获得者的记录? 确定?

-

缺失值及其处理

-
df %>% map_df(~ sum(is.na(.)))
-
## # A tibble: 1 x 18
-##   prize_year category prize motivation prize_share
-##        <int>    <int> <int>      <int>       <int>
-## 1          0        0     0         88           0
-## # ... with 13 more variables: laureate_id <int>,
-## #   laureate_type <int>, full_name <int>,
-## #   birth_date <int>, birth_city <int>,
-## #   birth_country <int>, gender <int>,
-## #   organization_name <int>, organization_city <int>,
-## #   organization_country <int>, death_date <int>,
-## #   death_city <int>, death_country <int>
-

性别缺失怎么造成的?

-
df %>% count(laureate_type)
-
## # A tibble: 2 x 2
-##   laureate_type     n
-##   <chr>         <int>
-## 1 Individual      939
-## 2 Organization     30
-
-
-

39.5 我们想探索哪些问题?

-

你想关心哪些问题,可能是

-
    -
  • 每个学科颁过多少次奖?
  • -
  • 这些大神都是哪个年代的人?
  • -
  • 性别比例
  • -
  • 平均年龄和获奖数量
  • -
  • 最年轻的诺奖获得者是谁?
  • -
  • 中国诺奖获得者有哪些?
  • -
  • 得奖的时候多大年龄?
  • -
  • 获奖者所在国家的经济情况?
  • -
  • 有大神多次获得诺贝尔奖,而且在不同科学领域获奖?
  • -
  • 出生地分布?工作地分布?迁移模式?
  • -
  • GDP经济与诺奖模型?
  • -
  • 诺奖分享情况?
  • -
-
-
-

39.6 每个学科颁过多少次奖

-
df %>% count(category)
-
## # A tibble: 6 x 2
-##   category       n
-##   <chr>      <int>
-## 1 Chemistry    194
-## 2 Economics     83
-## 3 Literature   113
-## 4 Medicine     227
-## 5 Peace        130
-## 6 Physics      222
-
df %>%
-  count(category) %>%
-  ggplot(aes(x = category, y = n, fill = category)) +
-  geom_col() +
-  geom_text(aes(label = n), vjust = -0.25) +
-  labs(title = "不同学科诺贝奖获奖次数对比", x = "学科", y = "数量") +
-  theme(legend.position = "none")
-

-
df %>%
-  count(category) %>%
-  ggplot(aes(x = fct_reorder(category, n), y = n, fill = category)) +
-  geom_col() +
-  geom_text(aes(label = n), vjust = -0.25) +
-  labs(title = "不同学科诺贝奖获奖次数对比", x = "学科", y = "数量") +
-  theme(legend.position = "none")
-

-

也可以使用别人定义好的配色方案

-
library(ggthemr) # install.packages("devtools")
-# devtools::install_github('cttobin/ggthemr')
-ggthemr("dust")
-
-df %>%
-  count(category) %>%
-  ggplot(aes(x = fct_reorder(category, n), y = n, fill = category)) +
-  geom_col() +
-  labs(title = "不同学科诺贝奖获奖次数对比", x = "学科", y = "数量") +
-  theme(legend.position = "none")
-

-

这个配色方案感觉挺好看的呢,比较适合我这种又挑剔又懒惰的人。

-

当然,也可以自己DIY,或者使用配色网站的主题方案(https://learnui.design/tools/data-color-picker.html#palette)

-
df %>%
-  count(category) %>%
-  ggplot(aes(x = fct_reorder(category, n), y = n)) +
-  geom_col(fill = c("#003f5c", "#444e86", "#955196", "#dd5182", "#ff6e54", "#ffa600")) +
-  labs(title = "不同学科诺贝奖获奖次数对比", x = "学科", y = "数量") +
-  theme(legend.position = "none")
-

-

让图骚动起来吧

-
library(gganimate) # install.packages("gganimate", dependencies = T)
-
-df %>%
-  count(category) %>%
-  mutate(category = fct_reorder(category, n)) %>%
-  ggplot(aes(x = category, y = n)) +
-  geom_text(aes(label = n), vjust = -0.25) +
-  geom_col(fill = c("#003f5c", "#444e86", "#955196", "#dd5182", "#ff6e54", "#ffa600")) +
-  labs(title = "不同学科诺贝奖获奖次数对比", x = "学科", y = "数量") +
-  theme(legend.position = "none") +
-  transition_states(category) +
-  shadow_mark(past = TRUE)
-

和ggplot2的分面一样,动态图可以增加数据展示的维度。

-
-
-

39.7 看看我们伟大的祖国

-
df %>%
-  dplyr::filter(birth_country == "China") %>%
-  dplyr::select(full_name, prize_year, category)
-
## # A tibble: 12 x 3
-##    full_name              prize_year category  
-##    <chr>                       <dbl> <chr>     
-##  1 Walter Houser Brattain       1956 Physics   
-##  2 Chen Ning Yang               1957 Physics   
-##  3 Tsung-Dao (T.D.) Lee         1957 Physics   
-##  4 Edmond H. Fischer            1992 Medicine  
-##  5 Daniel C. Tsui               1998 Physics   
-##  6 Gao Xingjian                 2000 Literature
-##  7 Charles Kuen Kao             2009 Physics   
-##  8 Charles Kuen Kao             2009 Physics   
-##  9 Ei-ichi Negishi              2010 Chemistry 
-## 10 Liu Xiaobo                   2010 Peace     
-## 11 Mo Yan                       2012 Literature
-## 12 Youyou Tu                    2015 Medicine
-

我们发现获奖者有多个地址,就会有重复的情况,比如 Charles Kuen Kao在2009年Physics有两次,为什么重复计数了呢?

-

下面我们去重吧, 去重可以用distinct()函数

-
dt <- tibble::tribble(
-  ~x, ~y, ~z,
-  1, 1, "a",
-  1, 1, "b",
-  1, 2, "c",
-  1, 2, "d"
-)
-
-dt
-
## # A tibble: 4 x 3
-##       x     y z    
-##   <dbl> <dbl> <chr>
-## 1     1     1 a    
-## 2     1     1 b    
-## 3     1     2 c    
-## 4     1     2 d
-
dt %>% distinct_at(vars(x), .keep_all = T)
-
## # A tibble: 1 x 3
-##       x     y z    
-##   <dbl> <dbl> <chr>
-## 1     1     1 a
-
dt %>% distinct_at(vars(x, y), .keep_all = T)
-
## # A tibble: 2 x 3
-##       x     y z    
-##   <dbl> <dbl> <chr>
-## 1     1     1 a    
-## 2     1     2 c
-
nobel_winners <- df %>%
-  mutate_if(is.character, tolower) %>%
-  distinct_at(vars(full_name, prize_year, category), .keep_all = TRUE) %>%
-  mutate(
-    decade = 10 * (prize_year %/% 10),
-    prize_age = prize_year - year(birth_date)
-  )
-
-nobel_winners
-
## # A tibble: 911 x 20
-##    prize_year category prize motivation prize_share
-##         <dbl> <chr>    <chr> <chr>      <chr>      
-##  1       1901 chemist~ the ~ "\"in rec~ 1/1        
-##  2       1901 literat~ the ~ "\"in spe~ 1/1        
-##  3       1901 medicine the ~ "\"for hi~ 1/1        
-##  4       1901 peace    the ~  <NA>      1/2        
-##  5       1901 peace    the ~  <NA>      1/2        
-##  6       1901 physics  the ~ "\"in rec~ 1/1        
-##  7       1902 chemist~ the ~ "\"in rec~ 1/1        
-##  8       1902 literat~ the ~ "\"the gr~ 1/1        
-##  9       1902 medicine the ~ "\"for hi~ 1/1        
-## 10       1902 peace    the ~  <NA>      1/2        
-## # ... with 901 more rows, and 15 more variables:
-## #   laureate_id <dbl>, laureate_type <chr>,
-## #   full_name <chr>, birth_date <date>,
-## #   birth_city <chr>, birth_country <chr>,
-## #   gender <chr>, organization_name <chr>,
-## #   organization_city <chr>,
-## #   organization_country <chr>, death_date <date>,
-## #   death_city <chr>, death_country <chr>,
-## #   decade <dbl>, prize_age <dbl>
-
-

-这是时候,我们才对数据有了一个初步的了解 -

-
-

再来看看我的祖国

-
nobel_winners %>%
-  dplyr::filter(birth_country == "china") %>%
-  dplyr::select(full_name, prize_year, category)
-
## # A tibble: 11 x 3
-##    full_name              prize_year category  
-##    <chr>                       <dbl> <chr>     
-##  1 walter houser brattain       1956 physics   
-##  2 chen ning yang               1957 physics   
-##  3 tsung-dao (t.d.) lee         1957 physics   
-##  4 edmond h. fischer            1992 medicine  
-##  5 daniel c. tsui               1998 physics   
-##  6 gao xingjian                 2000 literature
-##  7 charles kuen kao             2009 physics   
-##  8 ei-ichi negishi              2010 chemistry 
-##  9 liu xiaobo                   2010 peace     
-## 10 mo yan                       2012 literature
-## 11 youyou tu                    2015 medicine
-
-
-

39.8 哪些大神多次获得诺贝尔奖

-
nobel_winners %>% count(full_name, sort = T)
-
## # A tibble: 904 x 2
-##    full_name                                          n
-##    <chr>                                          <int>
-##  1 "comité international de la croix rouge (inte~     3
-##  2 "frederick sanger"                                 2
-##  3 "john bardeen"                                     2
-##  4 "linus carl pauling"                               2
-##  5 "marie curie, née sklodowska"                      2
-##  6 "office of the united nations high commission~     2
-##  7 " lie ducommun"                                    1
-##  8 "a. michael spence"                                1
-##  9 "aage niels bohr"                                  1
-## 10 "aaron ciechanover"                                1
-## # ... with 894 more rows
-
nobel_winners %>%
-  group_by(full_name) %>%
-  mutate(
-    number_prize = n(),
-    number_cateory = n_distinct(category)
-  ) %>%
-  arrange(desc(number_prize), full_name) %>%
-  dplyr::filter(number_cateory == 2)
-
## # A tibble: 4 x 22
-## # Groups:   full_name [2]
-##   prize_year category prize motivation prize_share
-##        <dbl> <chr>    <chr> <chr>      <chr>      
-## 1       1954 chemist~ the ~ "\"for hi~ 1/1        
-## 2       1962 peace    the ~  <NA>      1/1        
-## 3       1903 physics  the ~ "\"in rec~ 1/4        
-## 4       1911 chemist~ the ~ "\"in rec~ 1/1        
-## # ... with 17 more variables: laureate_id <dbl>,
-## #   laureate_type <chr>, full_name <chr>,
-## #   birth_date <date>, birth_city <chr>,
-## #   birth_country <chr>, gender <chr>,
-## #   organization_name <chr>, organization_city <chr>,
-## #   organization_country <chr>, death_date <date>,
-## #   death_city <chr>, death_country <chr>,
-## #   decade <dbl>, prize_age <dbl>, number_prize <int>,
-## #   number_cateory <int>
-
-
-

39.9 大神在得奖的时候是多大年龄?

-
nobel_winners %>%
-  count(prize_age) %>%
-  ggplot(aes(x = prize_age, y = n)) +
-  geom_col()
-

-
nobel_winners %>%
-  group_by(category) %>%
-  summarise(mean_prize_age = mean(prize_age, na.rm = T))
-
## # A tibble: 6 x 2
-##   category   mean_prize_age
-##   <chr>               <dbl>
-## 1 chemistry            58.0
-## 2 economics            67.2
-## 3 literature           64.7
-## 4 medicine             58.0
-## 5 peace                61.4
-## 6 physics              55.4
-
nobel_winners %>%
-  mutate(category = fct_reorder(category, prize_age, median, na.rm = TRUE)) %>%
-  ggplot(aes(category, prize_age)) +
-  geom_point() +
-  geom_boxplot() +
-  coord_flip()
-

-
nobel_winners %>%
-  dplyr::filter(!is.na(prize_age)) %>%
-  group_by(decade, category) %>%
-  summarize(
-    average_age = mean(prize_age),
-    median_age = median(prize_age)
-  ) %>%
-  ggplot(aes(decade, average_age, color = category)) +
-  geom_line()
-

-
library(ggridges)
-
-nobel_winners %>%
-  ggplot(aes(
-    x = prize_age,
-    y = category,
-    fill = category
-  )) +
-  geom_density_ridges()
-

-

他们60多少岁才得诺奖,大家才23或24岁,还年轻,不用焦虑喔。

-
nobel_winners %>%
-
-  ggplot(aes(x = prize_age, fill = category, color = category)) +
-  geom_density() +
-  facet_wrap(vars(category)) +
-  theme(legend.position = "none")
-

-

有同学说要一个个的画,至于group_split()函数,下次课在讲

-
nobel_winners %>%
-  group_split(category) %>%
-  map(
-    ~ ggplot(data = .x, aes(x = prize_age)) +
-      geom_density() +
-      ggtitle(.x$category)
-  )
-
## [[1]]
-

-
## 
-## [[2]]
-

-
## 
-## [[3]]
-

-
## 
-## [[4]]
-

-
## 
-## [[5]]
-

-
## 
-## [[6]]
-

-

也可以用强大的group_by() + group_map()组合,我们会在第 21 章讲到

-
nobel_winners %>%
-  group_by(category) %>%
-  group_map(
-    ~ ggplot(data = .x, aes(x = prize_age)) +
-      geom_density() +
-      ggtitle(.y)
-  )
-
-
-

39.10 性别比例

-
nobel_winners %>%
-  dplyr::filter(laureate_type == "individual") %>%
-  count(category, gender) %>%
-  group_by(category) %>%
-  mutate(prop = n / sum(n))
-
## # A tibble: 12 x 4
-## # Groups:   category [6]
-##    category   gender     n    prop
-##    <chr>      <chr>  <int>   <dbl>
-##  1 chemistry  female     4 0.0229 
-##  2 chemistry  male     171 0.977  
-##  3 economics  female     1 0.0128 
-##  4 economics  male      77 0.987  
-##  5 literature female    14 0.124  
-##  6 literature male      99 0.876  
-##  7 medicine   female    12 0.0569 
-##  8 medicine   male     199 0.943  
-##  9 peace      female    14 0.14   
-## 10 peace      male      86 0.86   
-## 11 physics    female     2 0.00980
-## 12 physics    male     202 0.990
-

各年代性别比例

-
nobel_winners %>%
-  dplyr::filter(laureate_type == "individual") %>%
-  # mutate(decade = glue::glue("{round(prize_year - 1, -1)}s")) %>%
-  count(decade, category, gender) %>%
-  group_by(decade, category) %>%
-  mutate(prop = n / sum(n)) %>%
-  ggplot(aes(decade, category, fill = prop)) +
-  geom_tile(size = 0.7) +
-  # geom_text(aes(label = scales::percent(prop, accuracy = .01))) +
-  geom_text(aes(label = scales::number(prop, accuracy = .01))) +
-  facet_grid(vars(gender)) +
-  scale_fill_gradient(low = "#FDF4E9", high = "#834C0D")
-

-
library(ggbeeswarm) # install.packages("ggbeeswarm")
-
-nobel_winners %>%
-  ggplot(aes(
-    x = category,
-    y = prize_age,
-    colour = gender,
-    alpha = gender
-  )) +
-  ggbeeswarm::geom_beeswarm() +
-  coord_flip() +
-  scale_color_manual(values = c("#BB1288", "#5867A6")) +
-  scale_alpha_manual(values = c(1, .4)) +
-  theme_minimal() +
-  theme(legend.position = "top") +
-  labs(
-    title = "诺奖获得者性别不平衡",
-    subtitle = "1901年-2016年数据",
-    colour = "Gender",
-    alpha = "Gender",
-    x = "学科",
-    y = "获奖年龄"
-  )
-

-
nobel_winners %>%
-  count(decade,
-    category,
-    gender = coalesce(gender, laureate_type)
-  ) %>%
-  group_by(decade, category) %>%
-  mutate(percent = n / sum(n)) %>%
-  ggplot(aes(decade, n, fill = gender)) +
-  geom_col() +
-  facet_wrap(~category) +
-  labs(
-    x = "Decade",
-    y = "# of nobel prize winners",
-    fill = "Gender",
-    title = "Nobel Prize gender distribution over time"
-  )
-

-
-
-

39.11 这些大神都是哪个年代出生的人?

-
nobel_winners %>%
-  select(category, birth_date) %>%
-  mutate(year = floor(year(birth_date) / 10) * 10) %>%
-  count(category, year) %>%
-  dplyr::filter(!is.na(year)) %>%
-  ggplot(aes(x = year, y = n)) +
-  geom_col() +
-  scale_x_continuous(breaks = seq(1810, 1990, 20)) +
-  geom_text(aes(label = n), vjust = -0.25) +
-  facet_wrap(vars(category))
-

-

课堂练习,哪位同学能把图弄得好看些?

-
-
-

39.12 最年轻的诺奖获得者?

-
nobel_winners %>%
-  dplyr::filter(prize_age == min(prize_age, na.rm = T))
-
## # A tibble: 1 x 20
-##   prize_year category prize motivation prize_share
-##        <dbl> <chr>    <chr> <chr>      <chr>      
-## 1       2014 peace    the ~ "\"for th~ 1/2        
-## # ... with 15 more variables: laureate_id <dbl>,
-## #   laureate_type <chr>, full_name <chr>,
-## #   birth_date <date>, birth_city <chr>,
-## #   birth_country <chr>, gender <chr>,
-## #   organization_name <chr>, organization_city <chr>,
-## #   organization_country <chr>, death_date <date>,
-## #   death_city <chr>, death_country <chr>,
-## #   decade <dbl>, prize_age <dbl>
-
nobel_winners %>%
-  dplyr::filter(
-    rank(prize_year - year(birth_date)) == 1
-  )
-
## # A tibble: 1 x 20
-##   prize_year category prize motivation prize_share
-##        <dbl> <chr>    <chr> <chr>      <chr>      
-## 1       2014 peace    the ~ "\"for th~ 1/2        
-## # ... with 15 more variables: laureate_id <dbl>,
-## #   laureate_type <chr>, full_name <chr>,
-## #   birth_date <date>, birth_city <chr>,
-## #   birth_country <chr>, gender <chr>,
-## #   organization_name <chr>, organization_city <chr>,
-## #   organization_country <chr>, death_date <date>,
-## #   death_city <chr>, death_country <chr>,
-## #   decade <dbl>, prize_age <dbl>
-
nobel_winners %>%
-  arrange(
-    prize_year - year(birth_date)
-  )
-
## # A tibble: 911 x 20
-##    prize_year category prize motivation prize_share
-##         <dbl> <chr>    <chr> <chr>      <chr>      
-##  1       2014 peace    the ~ "\"for th~ 1/2        
-##  2       1915 physics  the ~ "\"for th~ 1/2        
-##  3       1932 physics  the ~ "\"for th~ 1/1        
-##  4       1933 physics  the ~ "\"for th~ 1/2        
-##  5       1936 physics  the ~ "\"for hi~ 1/2        
-##  6       1957 physics  the ~ "\"for th~ 1/2        
-##  7       1923 medicine the ~ "\"for th~ 1/2        
-##  8       1961 physics  the ~ "\"for hi~ 1/2        
-##  9       1976 peace    the ~  <NA>      1/2        
-## 10       2011 peace    the ~ "\"for th~ 1/3        
-## # ... with 901 more rows, and 15 more variables:
-## #   laureate_id <dbl>, laureate_type <chr>,
-## #   full_name <chr>, birth_date <date>,
-## #   birth_city <chr>, birth_country <chr>,
-## #   gender <chr>, organization_name <chr>,
-## #   organization_city <chr>,
-## #   organization_country <chr>, death_date <date>,
-## #   death_city <chr>, death_country <chr>,
-## #   decade <dbl>, prize_age <dbl>
-
nobel_winners %>%
-  top_n(1, year(birth_date) - prize_year)
-
## # A tibble: 1 x 20
-##   prize_year category prize motivation prize_share
-##        <dbl> <chr>    <chr> <chr>      <chr>      
-## 1       2014 peace    the ~ "\"for th~ 1/2        
-## # ... with 15 more variables: laureate_id <dbl>,
-## #   laureate_type <chr>, full_name <chr>,
-## #   birth_date <date>, birth_city <chr>,
-## #   birth_country <chr>, gender <chr>,
-## #   organization_name <chr>, organization_city <chr>,
-## #   organization_country <chr>, death_date <date>,
-## #   death_city <chr>, death_country <chr>,
-## #   decade <dbl>, prize_age <dbl>
-
-
-

39.13 平均年龄和获奖数量

-
df1 <- nobel_winners %>%
-  group_by(category) %>%
-  summarise(
-    mean_prise_age = mean(prize_age, na.rm = T),
-    total_num = n()
-  )
-df1
-
## # A tibble: 6 x 3
-##   category   mean_prise_age total_num
-##   <chr>               <dbl>     <int>
-## 1 chemistry            58.0       175
-## 2 economics            67.2        78
-## 3 literature           64.7       113
-## 4 medicine             58.0       211
-## 5 peace                61.4       130
-## 6 physics              55.4       204
-
df1 %>%
-  ggplot(aes(mean_prise_age, total_num)) +
-  geom_point(aes(color = category)) +
-  geom_smooth(method = lm, se = FALSE)
-

-
-
-

39.14 出生地与工作地分布

-
nobel_winners_clean <- nobel_winners %>%
-  mutate_at(
-    vars(birth_country, death_country),
-    ~ ifelse(str_detect(., "\\("), str_extract(., "(?<=\\().*?(?=\\))"), .)
-  ) %>%
-  mutate_at(
-    vars(birth_country, death_country),
-    ~ case_when(
-      . == "scotland" ~ "united kingdom",
-      . == "northern ireland" ~ "united kingdom",
-      str_detect(., "czech") ~ "czechia",
-      str_detect(., "germany") ~ "germany",
-      TRUE ~ .
-    )
-  ) %>%
-  select(full_name, prize_year, category, birth_date, birth_country, gender, organization_name, organization_country, death_country)
-
nobel_winners_clean %>% count(death_country, sort = TRUE)
-
## # A tibble: 45 x 2
-##    death_country                n
-##    <chr>                    <int>
-##  1 <NA>                       329
-##  2 united states of america   203
-##  3 united kingdom              79
-##  4 germany                     56
-##  5 france                      51
-##  6 sweden                      28
-##  7 switzerland                 26
-##  8 italy                       14
-##  9 russia                      11
-## 10 spain                       10
-## # ... with 35 more rows
-
-
-

39.15 迁移模式

-
nobel_winners_clean %>%
-  mutate(
-    colour = case_when(
-      death_country == "united states of america" ~ "#FF2B4F",
-      death_country == "germany" ~ "#fcab27",
-      death_country == "united kingdom" ~ "#3686d3",
-      death_country == "france" ~ "#88398a",
-      death_country == "switzerland" ~ "#20d4bc",
-      TRUE ~ "gray60"
-    )
-  ) %>%
-  ggplot(aes(
-    x = 0,
-    y = fct_rev(factor(birth_country)),
-    xend = death_country,
-    yend = 1,
-    colour = colour,
-    alpha = (colour != "gray60")
-  )) +
-  geom_curve(
-    curvature = -0.5,
-    arrow = arrow(length = unit(0.01, "npc"))
-  ) +
-  scale_x_discrete() +
-  scale_y_discrete() +
-  scale_color_identity() +
-  scale_alpha_manual(values = c(0.1, 0.2), guide = F) +
-  scale_size_manual(values = c(0.1, 0.4), guide = F) +
-  theme_minimal() +
-  theme(
-    panel.grid = element_blank(),
-    plot.background = element_rect(fill = "#F0EFF1", colour = "#F0EFF1"),
-    legend.position = "none",
-    axis.text.x = element_text(angle = 40, hjust = 1)
-  )
-

-
-
-

39.16 地图

-
library(here)
-library(sf)
-library(countrycode)
-
-# countrycode('Albania', 'country.name', 'iso3c')
-
-nobel_winners_birth_country <- nobel_winners_clean %>%
-  count(birth_country) %>%
-  filter(!is.na(birth_country)) %>%
-  mutate(ISO3 = countrycode(birth_country,
-    origin = "country.name", destination = "iso3c"
-  ))
-
-
-global <-
-  sf::st_read("./demo_data/worldmap/TM_WORLD_BORDERS_SIMPL-0.3.shp") %>%
-  st_transform(4326)
-
## Reading layer `TM_WORLD_BORDERS_SIMPL-0.3' from data source `G:\R_for_Data_Science\demo_data\worldmap\TM_WORLD_BORDERS_SIMPL-0.3.shp' using driver `ESRI Shapefile'
-## Simple feature collection with 246 features and 11 fields
-## geometry type:  MULTIPOLYGON
-## dimension:      XY
-## bbox:           xmin: -180 ymin: -90 xmax: 180 ymax: 83.57
-## geographic CRS: WGS 84
-
global %>%
-  full_join(nobel_winners_birth_country, by = "ISO3") %>%
-  ggplot() +
-  geom_sf(aes(fill = n),
-    color = "white",
-    size = 0.1
-  ) +
-  labs(
-    x = NULL, y = NULL,
-    title = "Nobel Winners by country",
-    subtitle = "color of map indicates number of Nobel lauretes",
-    fill = "num of Nobel lauretes",
-    caption = "Made: wang_minjie"
-  ) +
-  scale_fill_gradientn(colors = c("royalblue1", "magenta", "orange", "gold"), na.value = "white") +
-  # scale_fill_gradient(low = "wheat1", high = "red") +
-  theme_void() +
-  theme(
-    legend.position = c(0.1, 0.3),
-    plot.background = element_rect(fill = "gray")
-  )
-

-
# Determine to 10 Countries
-topCountries <- nobel_winners_clean %>%
-  count(birth_country, sort = TRUE) %>%
-  na.omit() %>%
-  top_n(8)
-
-topCountries
-
## # A tibble: 8 x 2
-##   birth_country                n
-##   <chr>                    <int>
-## 1 united states of america   259
-## 2 united kingdom              99
-## 3 germany                     80
-## 4 france                      54
-## 5 sweden                      29
-## 6 poland                      26
-## 7 russia                      26
-## 8 japan                       24
-
df4 <- nobel_winners_clean %>%
-  filter(birth_country %in% topCountries$birth_country) %>%
-  group_by(birth_country, category, prize_year) %>%
-  summarise(prizes = n()) %>%
-  mutate(cumPrizes = cumsum(prizes))
-
-df4
-
## # A tibble: 489 x 5
-## # Groups:   birth_country, category [47]
-##    birth_country category  prize_year prizes cumPrizes
-##    <chr>         <chr>          <dbl>  <int>     <int>
-##  1 france        chemistry       1906      1         1
-##  2 france        chemistry       1912      2         3
-##  3 france        chemistry       1913      1         4
-##  4 france        chemistry       1935      2         6
-##  5 france        chemistry       1970      1         7
-##  6 france        chemistry       1987      1         8
-##  7 france        chemistry       2016      1         9
-##  8 france        economics       1983      1         1
-##  9 france        economics       1988      1         2
-## 10 france        economics       2014      1         3
-## # ... with 479 more rows
-
library(gganimate)
-df4 %>%
-  mutate(prize_year = as.integer(prize_year)) %>%
-  ggplot(aes(x = birth_country, y = category, color = birth_country)) +
-  geom_point(aes(size = cumPrizes), alpha = 0.6) +
-  # geom_text(aes(label = cumPrizes)) +
-  scale_size_continuous(range = c(2, 30)) +
-  transition_reveal(prize_year) +
-  labs(
-    title = "诺奖获得者最多的10个国家",
-    subtitle = "Year: {frame_along}",
-    y = "Category"
-  ) +
-  theme_minimal() +
-  theme(
-    plot.title = element_text(size = 22),
-    axis.title = element_blank()
-  ) +
-  scale_color_brewer(palette = "RdYlBu") +
-  theme(legend.position = "none") +
-  theme(plot.margin = margin(5.5, 5.5, 5.5, 5.5))
-

-
-
-

39.17 出生地和工作地不一样的占比

-
nobel_winners_clean %>%
-  select(category, birth_country, death_country) %>%
-  mutate(immigration = if_else(birth_country == death_country, 0, 1))
-
## # A tibble: 911 x 4
-##    category   birth_country death_country  immigration
-##    <chr>      <chr>         <chr>                <dbl>
-##  1 chemistry  netherlands   germany                  1
-##  2 literature france        france                   0
-##  3 medicine   poland        germany                  1
-##  4 peace      switzerland   switzerland              0
-##  5 peace      france        france                   0
-##  6 physics    germany       germany                  0
-##  7 chemistry  germany       germany                  0
-##  8 literature germany       germany                  0
-##  9 medicine   india         united kingdom           1
-## 10 peace      switzerland   switzerland              0
-## # ... with 901 more rows
-
-
-

39.18 诺奖分享者

- -
nobel_winners %>%
-  separate(prize_share, into = c("num", "deno"), sep = "/", remove = FALSE)
-
nobel_winners %>%
-  filter(category == "medicine") %>%
-  mutate(
-    num_a = as.numeric(str_sub(prize_share, 1, 1)),
-    num_b = as.numeric(str_sub(prize_share, -1)),
-    share = num_a / num_b,
-    year = prize_year %% 10,
-    decade = 10 * (prize_year %/% 10)
-  ) %>%
-  group_by(prize_year) %>%
-  mutate(n = row_number()) %>%
-  ggplot() +
-  geom_col(aes(x = "", y = share, fill = as.factor(n)),
-    show.legend = FALSE
-  ) +
-  coord_polar("y") +
-  facet_grid(decade ~ year, switch = "both") +
-  labs(title = "每年诺贝尔奖分享情况") +
-  theme_void() +
-  theme(
-    plot.title = element_text(face = "bold", vjust = 8),
-    strip.text.x = element_text(
-      size = 7,
-      margin = margin(t = 5)
-    ),
-    strip.text.y = element_text(
-      size = 7,
-      angle = 180, hjust = 1, margin = margin(r = 10)
-    )
-  )
-

-
-
-

39.19 其它

-

没有回答的问题,大家自己花时间探索下。

-
-
-

39.20 延伸阅读

-
    -
  • 有些图可以再美化下
  • -
- -
-
-
- -
-
-
- - -
-
- - - - - - - - - - - - - - - - diff --git a/_book/eda-olympics.html b/_book/eda-olympics.html deleted file mode 100644 index 1433718..0000000 --- a/_book/eda-olympics.html +++ /dev/null @@ -1,1604 +0,0 @@ - - - - - - - 第 40 章 探索性数据分析-奥林匹克 | 数据科学中的 R 语言 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- -
- -
-
- - -
-
- -
-
-

第 40 章 探索性数据分析-奥林匹克

-

这是Nature期刊上的一篇文章Nature. 2004 September 30; 431(7008)

-

-

虽然觉得这个结论不太严谨,但我却无力反驳。

-

于是在文章补充材料里,我找到了文章使用的数据,现在的任务是,重复这张图和文章的分析过程。

-
-

40.1 导入数据

-
library(tidyverse)
-library(readxl)
-
d <- read_excel("./demo_data/olympics.xlsx")
-d
-
## # A tibble: 27 x 3
-##    Olympic_year Men_score Women_score
-##           <dbl>     <dbl>       <dbl>
-##  1         1900      11          NA  
-##  2         1904      11          NA  
-##  3         1908      10.8        NA  
-##  4         1912      10.8        NA  
-##  5         1916      NA          NA  
-##  6         1920      10.8        NA  
-##  7         1924      10.6        NA  
-##  8         1928      10.8        12.2
-##  9         1932      10.3        11.9
-## 10         1936      10.3        11.5
-## # ... with 17 more rows
-
-
-

40.2 可视化

-

我们先画图看看

-
d %>%
-  ggplot() +
-  geom_point(aes(x = Olympic_year, y = Men_score), color = "blue") +
-  geom_point(aes(x = Olympic_year, y = Women_score), color = "red")
-

-这样写也是可以的,只不过最好先tidy数据

-

-
d1 <- d %>%
-  pivot_longer(
-    cols = -Olympic_year,
-    names_to = "sex",
-    values_to = "winning_time"
-  )
-
-d1
-
## # A tibble: 54 x 3
-##    Olympic_year sex         winning_time
-##           <dbl> <chr>              <dbl>
-##  1         1900 Men_score           11  
-##  2         1900 Women_score         NA  
-##  3         1904 Men_score           11  
-##  4         1904 Women_score         NA  
-##  5         1908 Men_score           10.8
-##  6         1908 Women_score         NA  
-##  7         1912 Men_score           10.8
-##  8         1912 Women_score         NA  
-##  9         1916 Men_score           NA  
-## 10         1916 Women_score         NA  
-## # ... with 44 more rows
-

然后在画图

-
d1 %>%
-  ggplot(aes(x = Olympic_year, y = winning_time, color = sex)) +
-  geom_point() +
-  # geom_smooth(method = "lm") +
-  scale_color_manual(
-    values = c("Men_score" = "blue", "Women_score" = "red")
-  ) +
-  scale_x_continuous(
-    breaks = seq(1900, 2004, by = 4),
-    labels = seq(1900, 2004, by = 4)
-  ) +
-  theme(axis.text.x = element_text(
-    size = 10, angle = 45, colour = "black",
-    vjust = 1, hjust = 1
-  ))
-

-
-
-

40.3 回归分析

-

建立年份与成绩的线性关系 -\[ -\text{score}_i = \alpha + \beta \times \text{year}_i + \epsilon_i; \qquad \epsilon_i\in \text{Normal}(\mu, \sigma) -\]

-

我们需要求出其中系数\(\alpha\)\(\beta\),写R语言代码如下 -(lm(y ~ 1 + x,data = d), 要求得 \(\alpha\)\(\beta\),就是对应 1 和 x 前的系数)

-
fit_1 <- lm(Men_score ~ 1 + Olympic_year, data = d)
-
-summary(fit_1)
-
## 
-## Call:
-## lm(formula = Men_score ~ 1 + Olympic_year, data = d)
-## 
-## Residuals:
-##      Min       1Q   Median       3Q      Max 
-## -0.26371 -0.05270  0.00738  0.08005  0.21456 
-## 
-## Coefficients:
-##               Estimate Std. Error t value Pr(>|t|)    
-## (Intercept)  31.826453   1.679643    18.9  4.1e-15 ***
-## Olympic_year -0.011006   0.000859   -12.8  1.1e-11 ***
-## ---
-## Signif. codes:  
-## 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
-## 
-## Residual standard error: 0.135 on 22 degrees of freedom
-##   (3 observations deleted due to missingness)
-## Multiple R-squared:  0.882,  Adjusted R-squared:  0.876 
-## F-statistic:  164 on 1 and 22 DF,  p-value: 1.13e-11
-
fit_2 <- lm(Women_score ~ 1 + Olympic_year, data = d)
-
-summary(fit_2)
-
## 
-## Call:
-## lm(formula = Women_score ~ 1 + Olympic_year, data = d)
-## 
-## Residuals:
-##     Min      1Q  Median      3Q     Max 
-## -0.3758 -0.0846  0.0093  0.0829  0.3223 
-## 
-## Coefficients:
-##              Estimate Std. Error t value Pr(>|t|)    
-## (Intercept)  44.34705    4.28425   10.35  1.7e-08 ***
-## Olympic_year -0.01682    0.00218   -7.73  8.6e-07 ***
-## ---
-## Signif. codes:  
-## 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
-## 
-## Residual standard error: 0.21 on 16 degrees of freedom
-##   (9 observations deleted due to missingness)
-## Multiple R-squared:  0.789,  Adjusted R-squared:  0.776 
-## F-statistic: 59.8 on 1 and 16 DF,  p-value: 8.63e-07
-
-
-

40.4 预测

-

使用predict()完成预测

-
df <- data.frame(Olympic_year = 2020)
-
-predict(fit_1, newdata = df)
-
##     1 
-## 9.595
-

为了图片中的一致,我们使用1900年到2252年(seq(1900, 2252, by = 4))建立预测项,并整理到数据框里

-
grid <- tibble(
-  Olympic_year = as.numeric(seq(1900, 2252, by = 4))
-)
-grid
-
## # A tibble: 89 x 1
-##    Olympic_year
-##           <dbl>
-##  1         1900
-##  2         1904
-##  3         1908
-##  4         1912
-##  5         1916
-##  6         1920
-##  7         1924
-##  8         1928
-##  9         1932
-## 10         1936
-## # ... with 79 more rows
-
tb <- grid %>%
-  mutate(
-    Predict_Men = predict(fit_1, newdata = grid),
-    Predict_Women = predict(fit_2, newdata = grid)
-  )
-tb
-
## # A tibble: 89 x 3
-##    Olympic_year Predict_Men Predict_Women
-##           <dbl>       <dbl>         <dbl>
-##  1         1900        10.9          12.4
-##  2         1904        10.9          12.3
-##  3         1908        10.8          12.3
-##  4         1912        10.8          12.2
-##  5         1916        10.7          12.1
-##  6         1920        10.7          12.0
-##  7         1924        10.7          12.0
-##  8         1928        10.6          11.9
-##  9         1932        10.6          11.8
-## 10         1936        10.5          11.8
-## # ... with 79 more rows
-

有时候我喜欢用modelr::add_predictions()函数实现相同的功能

-
library(modelr)
-grid %>%
-  add_predictions(fit_1, var = "Predict_Men") %>%
-  add_predictions(fit_2, var = "Predict_Women")
-
## # A tibble: 89 x 3
-##    Olympic_year Predict_Men Predict_Women
-##           <dbl>       <dbl>         <dbl>
-##  1         1900        10.9          12.4
-##  2         1904        10.9          12.3
-##  3         1908        10.8          12.3
-##  4         1912        10.8          12.2
-##  5         1916        10.7          12.1
-##  6         1920        10.7          12.0
-##  7         1924        10.7          12.0
-##  8         1928        10.6          11.9
-##  9         1932        10.6          11.8
-## 10         1936        10.5          11.8
-## # ... with 79 more rows
-
-
-

40.5 再次可视化

-
tb1 <- tb %>%
-  pivot_longer(
-    cols = -Olympic_year,
-    names_to = "sex",
-    values_to = "winning_time"
-  )
-tb1
-
## # A tibble: 178 x 3
-##    Olympic_year sex           winning_time
-##           <dbl> <chr>                <dbl>
-##  1         1900 Predict_Men           10.9
-##  2         1900 Predict_Women         12.4
-##  3         1904 Predict_Men           10.9
-##  4         1904 Predict_Women         12.3
-##  5         1908 Predict_Men           10.8
-##  6         1908 Predict_Women         12.3
-##  7         1912 Predict_Men           10.8
-##  8         1912 Predict_Women         12.2
-##  9         1916 Predict_Men           10.7
-## 10         1916 Predict_Women         12.1
-## # ... with 168 more rows
-
tb1 %>%
-  ggplot(aes(
-    x = Olympic_year,
-    y = winning_time,
-    color = sex
-  )) +
-  geom_line(size = 2) +
-  geom_point(data = d1) +
-  scale_color_manual(
-    name = "标记",
-    values = c(
-      "Men_score" = "blue",
-      "Women_score" = "red",
-      "Predict_Men" = "#588B8B",
-      "Predict_Women" = "#C8553D"
-    ),
-    labels = c(
-      "Men_score" = "男性历史成绩",
-      "Women_score" = "女性历史成绩",
-      "Predict_Men" = "男性预测成绩",
-      "Predict_Women" = "女性预测成绩"
-    )
-  ) +
-  scale_x_continuous(
-    breaks = seq(1900, 2252, by = 16),
-    labels = as.character(seq(1900, 2252, by = 16))
-  ) +
-  theme(axis.text.x = element_text(
-    size = 10, angle = 45, colour = "black",
-    vjust = 1, hjust = 1
-  ))
-

-早知道nature文章这么简单,10年前我也可以写啊!

-
-
-

40.6 list_column

-

这里是另外的一种方法

-
library(modelr)
-
d1 <- d %>%
-  pivot_longer(
-    cols = -Olympic_year,
-    names_to = "sex",
-    values_to = "winning_time"
-  )
-
-fit_model <- function(df) lm(winning_time ~ Olympic_year, data = df)
-
-d2 <- d1 %>%
-  group_nest(sex) %>%
-  mutate(
-    mod = map(data, fit_model)
-  )
-d2
-
## # A tibble: 2 x 3
-##   sex                       data mod   
-##   <chr>       <list<tbl_df[,2]>> <list>
-## 1 Men_score             [27 x 2] <lm>  
-## 2 Women_score           [27 x 2] <lm>
-
# d2 %>% mutate(p = list(grid, grid))
-# d3 <- d2 %>% mutate(p = list(grid, grid))
-# d3
-# d3 %>%
-#   mutate(
-#     predictions = map2(p, mod, add_predictions),
-#   )
-
-# or
-tb4 <- d2 %>%
-  mutate(
-    predictions = map(mod, ~ add_predictions(grid, .))
-  ) %>%
-  select(sex, predictions) %>%
-  unnest(predictions)
-
-tb4 %>%
-  ggplot(aes(
-    x = Olympic_year,
-    y = pred,
-    group = sex,
-    color = sex
-  )) +
-  geom_point() +
-  geom_line(size = 2) +
-  geom_point(
-    data = d1,
-    aes(
-      x = Olympic_year,
-      y = winning_time,
-      group = sex,
-      color = sex
-    )
-  ) +
-  scale_x_continuous(
-    breaks = seq(1900, 2252, by = 16),
-    labels = as.character(seq(1900, 2252, by = 16))
-  ) +
-  theme(axis.text.x = element_text(
-    size = 10, angle = 45, colour = "black",
-    vjust = 1, hjust = 1
-  ))
-

-
-
-

40.7 课后作业

-
    -
  • 探索数据,建立身高体重的线性模型
  • -
- -
-
-
- -
-
-
- - -
-
- - - - - - - - - - - - - - - - diff --git a/_book/eda-penguins.html b/_book/eda-penguins.html deleted file mode 100644 index be0b84a..0000000 --- a/_book/eda-penguins.html +++ /dev/null @@ -1,1883 +0,0 @@ - - - - - - - 第 45 章 探索性数据分析-企鹅的故事 | 数据科学中的 R 语言 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- -
- -
-
- - -
-
- -
-
-

第 45 章 探索性数据分析-企鹅的故事

-

今天讲一个关于企鹅的数据故事。这个故事来源于科考人员记录的大量企鹅体征数据,图片来源这里.

-

-
-

45.1 数据

-
-

45.1.1 导入数据

-

可通过宏包palmerpenguins::penguins获取数据,也可以读取本地penguins.csv文件, -我们采取后面一种方法:

-
library(tidyverse)
-penguins <- read_csv("./demo_data/penguins.csv") %>%
-  janitor::clean_names()
-
-penguins %>%
-  head()
-
## # A tibble: 6 x 8
-##   species island bill_length_mm bill_depth_mm
-##   <chr>   <chr>           <dbl>         <dbl>
-## 1 Adelie  Torge~           39.1          18.7
-## 2 Adelie  Torge~           39.5          17.4
-## 3 Adelie  Torge~           40.3          18  
-## 4 Adelie  Torge~           NA            NA  
-## 5 Adelie  Torge~           36.7          19.3
-## 6 Adelie  Torge~           39.3          20.6
-## # ... with 4 more variables: flipper_length_mm <dbl>,
-## #   body_mass_g <dbl>, sex <chr>, year <dbl>
-
-
-

45.1.2 变量含义

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
variableclassdescription
speciesinteger企鹅种类 (Adelie, Gentoo, Chinstrap)
islandinteger所在岛屿 (Biscoe, Dream, Torgersen)
bill_length_mmdouble嘴峰长度 (单位毫米)
bill_depth_mmdouble嘴峰深度 (单位毫米)
flipper_length_mminteger鰭肢长度 (单位毫米)
body_mass_ginteger体重 (单位克)
sexinteger性别
yearinteger记录年份
-

-
-
-

45.1.3 数据清洗

-

检查缺失值(NA)这个很重要!

-
penguins %>% summarise(
-  across(everything(), ~ sum(is.na(.)))
-)
-
## # A tibble: 1 x 8
-##   species island bill_length_mm bill_depth_mm
-##     <int>  <int>          <int>         <int>
-## 1       0      0              2             2
-## # ... with 4 more variables: flipper_length_mm <int>,
-## #   body_mass_g <int>, sex <int>, year <int>
-

有缺失值的地方找出来看看

-
penguins %>% filter_all(
-  any_vars(is.na(.))
-)
-
## # A tibble: 11 x 8
-##    species island bill_length_mm bill_depth_mm
-##    <chr>   <chr>           <dbl>         <dbl>
-##  1 Adelie  Torge~           NA            NA  
-##  2 Adelie  Torge~           34.1          18.1
-##  3 Adelie  Torge~           42            20.2
-##  4 Adelie  Torge~           37.8          17.1
-##  5 Adelie  Torge~           37.8          17.3
-##  6 Adelie  Dream            37.5          18.9
-##  7 Gentoo  Biscoe           44.5          14.3
-##  8 Gentoo  Biscoe           46.2          14.4
-##  9 Gentoo  Biscoe           47.3          13.8
-## 10 Gentoo  Biscoe           44.5          15.7
-## 11 Gentoo  Biscoe           NA            NA  
-## # ... with 4 more variables: flipper_length_mm <dbl>,
-## #   body_mass_g <dbl>, sex <chr>, year <dbl>
-

发现共有11行至少有一处有缺失值,于是我们就删除这些行

-
penguins <- penguins %>% drop_na()
-penguins
-
## # A tibble: 333 x 8
-##    species island bill_length_mm bill_depth_mm
-##    <chr>   <chr>           <dbl>         <dbl>
-##  1 Adelie  Torge~           39.1          18.7
-##  2 Adelie  Torge~           39.5          17.4
-##  3 Adelie  Torge~           40.3          18  
-##  4 Adelie  Torge~           36.7          19.3
-##  5 Adelie  Torge~           39.3          20.6
-##  6 Adelie  Torge~           38.9          17.8
-##  7 Adelie  Torge~           39.2          19.6
-##  8 Adelie  Torge~           41.1          17.6
-##  9 Adelie  Torge~           38.6          21.2
-## 10 Adelie  Torge~           34.6          21.1
-## # ... with 323 more rows, and 4 more variables:
-## #   flipper_length_mm <dbl>, body_mass_g <dbl>,
-## #   sex <chr>, year <dbl>
-
-
-
-

45.2 探索性分析

-

大家可以提出自己想探索的内容:

-
    -
  • 每种类型企鹅有多少只?
  • -
  • 每种类型企鹅各种属性的均值和分布?
  • -
  • 嘴峰长度和深度的关联?
  • -
  • 体重与翅膀长度的关联?
  • -
  • 嘴峰长度与嘴峰深度的比例?
  • -
  • 不同种类的宝宝,体重具有显著性差异?
  • -
  • 这体征中哪个因素对性别影响最大?
  • -
  • -
-
-

45.2.1 每种类型企鹅有多少只

-
penguins %>%
-  count(species, sort = T)
-
## # A tibble: 3 x 2
-##   species       n
-##   <chr>     <int>
-## 1 Adelie      146
-## 2 Gentoo      119
-## 3 Chinstrap    68
-
-
-

45.2.2 每个岛屿有多少企鹅?

-
penguins %>%
-  count(island, sort = T)
-
## # A tibble: 3 x 2
-##   island        n
-##   <chr>     <int>
-## 1 Biscoe      163
-## 2 Dream       123
-## 3 Torgersen    47
-
-
-

45.2.3 每种类型企鹅各种体征属性的均值和分布

-
penguins %>%
-  group_by(species) %>%
-  summarize(across(where(is.numeric), mean, na.rm = TRUE))
-
## # A tibble: 3 x 6
-##   species bill_length_mm bill_depth_mm flipper_length_~
-##   <chr>            <dbl>         <dbl>            <dbl>
-## 1 Adelie            38.8          18.3             190.
-## 2 Chinst~           48.8          18.4             196.
-## 3 Gentoo            47.6          15.0             217.
-## # ... with 2 more variables: body_mass_g <dbl>,
-## #   year <dbl>
-
-
-

45.2.4 每种类型企鹅的嘴峰长度的分布

-
penguins %>%
-  ggplot(aes(x = bill_length_mm)) +
-  geom_density() +
-  facet_wrap(vars(species), scales = "free")
-

-
-
-

45.2.5 每种类型企鹅的嘴峰长度的分布(分性别)

-
penguins %>%
-  ggplot(aes(x = bill_length_mm)) +
-  geom_density(aes(fill = sex)) +
-  facet_wrap(vars(species), scales = "free")
-

-男宝宝的嘴巴要长些,哈哈。

-

来张更好看点的

-
penguins %>%
-  ggplot(aes(x = bill_length_mm, fill = sex)) +
-  geom_histogram(
-    position = "identity",
-    alpha = 0.7,
-    bins = 25
-  ) +
-  scale_fill_manual(values = c("#66b3ff", "#8c8c8c")) +
-  ylab("number of penguins") +
-  xlab("length (mm)") +
-  theme_minimal() +
-  theme(
-    legend.position = "bottom",
-    legend.text = element_text(size = 11),
-    legend.title = element_blank(),
-    panel.grid.minor = element_blank(),
-    axis.title = element_text(color = "white", size = 10),
-    plot.title = element_text(size = 20),
-    plot.subtitle = element_text(size = 12, hjust = 1)
-  ) +
-  facet_wrap(vars(species), scales = "free")
-

-

同理,可以画出其他属性的分布。当然,我更喜欢用山峦图来呈现不同分组的分布,因为竖直方向可以更方便比较

-
library(ggridges)
-penguins %>%
-  ggplot(aes(x = bill_length_mm, y = species, fill = species)) +
-  ggridges::geom_density_ridges()
-

-

同样,我们也用颜色区分下性别,这样不同种类、不同性别企鹅的嘴峰长度分布一目了然

-
penguins %>%
-  ggplot(aes(x = bill_length_mm, y = species, fill = sex)) +
-  geom_density_ridges(alpha = 0.5)
-

-

同样的代码,类似地画个其他体征的分布,

-
penguins %>%
-  ggplot(aes(x = bill_depth_mm, fill = species)) +
-  ggridges::geom_density_ridges(aes(y = species))
-

-
penguins %>%
-  ggplot(aes(x = bill_depth_mm, fill = sex)) +
-  ggridges::geom_density_ridges(aes(y = species))
-

-
penguins %>%
-  ggplot(aes(x = body_mass_g, y = species, fill = sex)) +
-  ggridges::geom_density_ridges(alpha = 0.5)
-

-

但这样一个特征一个特征的画,好麻烦。你知道程序员都是偷懒的,于是我们还有更骚的操作

-
penguins %>%
-  dplyr::select(species, bill_length_mm:body_mass_g) %>%
-  pivot_longer(-species, names_to = "measurement", values_to = "value") %>%
-  ggplot(aes(x = value)) +
-  geom_density(aes(color = species, fill = species), size = 1.2, alpha = 0.2) +
-  facet_wrap(vars(measurement), ncol = 2, scales = "free")
-

-
penguins %>%
-  dplyr::select(species, bill_length_mm:body_mass_g) %>%
-  pivot_longer(-species, names_to = "measurement", values_to = "value") %>%
-  ggplot(aes(x = species, y = value)) +
-  geom_boxplot(aes(color = species, fill = species), size = 1.2, alpha = 0.2) +
-  facet_wrap(vars(measurement), ncol = 2, scales = "free")
-

-
penguins %>%
-  dplyr::select(species, bill_length_mm:body_mass_g) %>%
-  pivot_longer(-species, names_to = "measurement", values_to = "value") %>%
-  ggplot(aes(x = value, y = species, fill = species)) +
-  ggridges::geom_density_ridges() +
-  facet_wrap(vars(measurement), scales = "free")
-

-
penguins %>%
-  dplyr::select(species,sex, bill_length_mm:body_mass_g) %>%
-  pivot_longer(-c(species, sex), names_to = "measurement", values_to = "value") %>%
-  ggplot(aes(x = value, y = species, fill = sex)) +
-  ggridges::geom_density_ridges() +
-  facet_wrap(vars(measurement), scales = "free")
-

-

我若有所思的看着这张图,似乎看到了一些特征(pattern)了。

-
-
-

45.2.6 嘴峰长度和深度的关联

-

嘴巴越长,嘴巴也会越厚?

-
penguins %>%
-  ggplot(aes(
-    x = bill_length_mm, y = bill_depth_mm,
-    shape = species, color = species
-  )) +
-  geom_point()
-

-

我们把不同的种类,用不同的颜色区分看看

-
penguins %>%
-  ggplot(aes(
-    x = bill_length_mm, y = bill_depth_mm,
-    shape = species, color = species
-  )) +
-  geom_point(aes(size = body_mass_g))
-

-

感觉这是一个辛普森佯谬, 我们画图看看

-
penguins %>%
-  ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
-  geom_point(aes(color = species, shape = species)) +
-  geom_smooth(method = lm) +
-  geom_smooth(method = lm, aes(color = species))
-

-
-
-

45.2.7 体重与翅膀长度的关联

-

翅膀越长,体重越大?

-
penguins %>%
-  group_by(species, island, sex) %>%
-  ggplot(aes(
-    x = body_mass_g, y = reorder(species, -body_mass_g),
-    color = species
-  )) +
-  geom_jitter(position = position_jitter(seed = 2020, width = 0.2), alpha = 0.4, size = 2) +
-  stat_summary(fun = mean, geom = "point", size = 5, alpha = 1)
-

-
library(ggtext)
-penguins %>%
-  ggplot(aes(flipper_length_mm, body_mass_g, group = species)) +
-  geom_point(aes(colour = species, shape = species), alpha = 0.7) +
-  scale_color_manual(values = c("darkorange", "purple", "cyan4")) +
-  labs(
-    title = "Penguin Size, Palmer Station LTER",
-    subtitle = "Flipper length and body mass for <span style = 'color:darkorange;'>Adelie</span>, <span style = 'color:purple;'>Chinstrap</span> and <span style = 'color:cyan4;'>Gentoo</span> Penguins",
-    x = "flipper length (mm)",
-    y = "body mass (g)"
-  ) +
-  theme_minimal() +
-  theme(
-    legend.position = "none",
-    # text = element_text(family = "Futura"),
-    # (I only have 'Light' )
-    plot.title = element_text(size = 16),
-    plot.subtitle = element_markdown(), # element_markdown from `ggtext` to parse the css in the subtitle
-    plot.title.position = "plot",
-    plot.caption = element_text(size = 8, colour = "grey50"),
-    plot.caption.position = "plot"
-  )
-

-
-
-

45.2.8 不同种类的宝宝,体重具有显著性差异?

-

先分组计算体重的均值和标准差

-
penguins %>%
-  group_by(species) %>%
-  summarise(
-    count = n(),
-    mean_body_mass = mean(body_mass_g),
-    sd_body_mass = sd(body_mass_g)
-  )
-
## # A tibble: 3 x 4
-##   species   count mean_body_mass sd_body_mass
-##   <chr>     <int>          <dbl>        <dbl>
-## 1 Adelie      146          3706.         459.
-## 2 Chinstrap    68          3733.         384.
-## 3 Gentoo      119          5092.         501.
-
penguins %>%
-  ggplot(aes(x = species, y = body_mass_g)) +
-  geom_boxplot() +
-  geom_jitter()
-

-

用统计方法验证下我们的猜测吧。记住,我们是有科学精神的的人!

-
-

45.2.8.1 参数检验

-
    -
  • one-way ANOVA(要求等方差)
  • -
-
stats::aov(formula = body_mass_g ~ species, data = penguins) %>%
-  summary()
-
##              Df   Sum Sq  Mean Sq F value Pr(>F)    
-## species       2 1.45e+08 72595110     342 <2e-16 ***
-## Residuals   330 7.01e+07   212332                   
-## ---
-## Signif. codes:  
-## 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
-

p-value 很小,说明不同种类企鹅之间体重是有显著差异的,但aov只给出了species在整体上引起了体重差异(只要有任意两组之间有显著差异,aov给出的p-value都很小),如果想知道不同种类两两之间是否有显著差异,这就需要用到TukeyHSD().

-
    -
  • one-way ANOVA(不要求等方差),相关介绍看here
  • -
-
oneway.test(body_mass_g ~ species, data = penguins)
-
## 
-##  One-way analysis of means (not assuming equal
-##  variances)
-## 
-## data:  body_mass_g and species
-## F = 317, num df = 2, denom df = 188, p-value
-## <2e-16
-
stats::aov(formula = body_mass_g ~ species, data = penguins) %>%
-  TukeyHSD(which = "species") %>%
-  broom::tidy()
-
## # A tibble: 3 x 7
-##   term  contrast null.value estimate conf.low conf.high
-##   <chr> <chr>         <dbl>    <dbl>    <dbl>     <dbl>
-## 1 spec~ Chinstr~          0     26.9    -132.      186.
-## 2 spec~ Gentoo-~          0   1386.     1252.     1520.
-## 3 spec~ Gentoo-~          0   1359.     1194.     1524.
-## # ... with 1 more variable: adj.p.value <dbl>
-

表格第一行instrap-Adelie 的 p-value = 0.916,没通过显著性检验;而Gentoo-Adelie 和 Gentoo-Chinstrap 他们的p-value都接近0,通过显著性检验,这和图中的结果是一致的。

-

作为统计出生的R语言,有很多宏包可以帮助我们验证我们的结论,我这里推荐可视化学统计的宏包ggstatsplot宏包将统计分析的结果写在图片里,统计结果和图形融合在一起,让统计结果更容易懂了。(使用这个宏包辅助我们学习统计)

-
library(ggstatsplot)
-
-penguins %>%
-  ggstatsplot::ggbetweenstats(
-    x = species, # > 2 groups
-    y = body_mass_g,
-    type = "parametric",
-    pairwise.comparisons = TRUE, 
-    pairwise.display = "all",
-    messages = FALSE,
-    var.equal = FALSE
-  )
-
-
-

45.2.8.2 非参数检验

-

相关介绍看here

-
kruskal.test(body_mass_g ~ species, data = penguins)
-
## 
-##  Kruskal-Wallis rank sum test
-## 
-## data:  body_mass_g by species
-## Kruskal-Wallis chi-squared = 212, df = 2,
-## p-value <2e-16
-
penguins %>%
-  ggstatsplot::ggbetweenstats(
-    x = species,
-    y = body_mass_g,
-    type = "nonparametric",
-    mean.ci = TRUE,
-    pairwise.comparisons = TRUE, # <<
-    pairwise.display = "all",    # ns = only non-significant
-    p.adjust.method = "fdr",     # <<
-    messages = FALSE
-  )
-

哇,原来统计可以这样学!

-
-
-
-

45.2.9 嘴峰长度与嘴峰深度的比例

-
penguins %>%
-  mutate(ratio = bill_length_mm / bill_depth_mm) %>%
-  group_by(species) %>%
-  summarise(mean = mean(ratio))
-
## # A tibble: 3 x 2
-##   species    mean
-##   <chr>     <dbl>
-## 1 Adelie     2.12
-## 2 Chinstrap  2.65
-## 3 Gentoo     3.18
-
penguins %>%
-  mutate(ratio = bill_length_mm / bill_depth_mm) %>%
-  ggplot(aes(x = ratio, fill = species)) +
-  ggridges::geom_density_ridges(aes(y = species))
-

-

男宝宝和女宝宝颜色区分下,代码只需要修改一个地方,留给大家自己实践下吧。

-
-
-

45.2.10 建立模型

-

建模需要标准化数据,并对分类变量(比如sex)编码为 1 和 0; (这是第二个好习惯)

-
scale_fun <- function(x) {  # 标准化的子函数
-  (x - mean(x)) / sd(x)
-}
-
-d <- penguins %>%
-  select(sex, species, bill_length_mm:body_mass_g) %>%
-  mutate(
-    across(where(is.numeric), scale_fun)
-  ) %>%
-  mutate(male = if_else(sex == "male", 1, 0))
-d
-
## # A tibble: 333 x 7
-##    sex   species bill_length_mm bill_depth_mm
-##    <chr> <chr>            <dbl>         <dbl>
-##  1 male  Adelie          -0.895         0.780
-##  2 fema~ Adelie          -0.822         0.119
-##  3 fema~ Adelie          -0.675         0.424
-##  4 fema~ Adelie          -1.33          1.08 
-##  5 male  Adelie          -0.858         1.74 
-##  6 fema~ Adelie          -0.931         0.323
-##  7 male  Adelie          -0.876         1.24 
-##  8 fema~ Adelie          -0.529         0.221
-##  9 male  Adelie          -0.986         2.05 
-## 10 male  Adelie          -1.72          2.00 
-## # ... with 323 more rows, and 3 more variables:
-## #   flipper_length_mm <dbl>, body_mass_g <dbl>,
-## #   male <dbl>
-

按照species分组后,对flipper_length_mm标准化?这样数据会聚拢到一起了喔, 还是不要了

-
penguins %>%
-  select(sex, species, bill_length_mm:body_mass_g) %>%
-  group_by(species) %>%
-  mutate(
-    across(where(is.numeric), scale_fun)
-  ) %>%
-  ungroup()
-
-

45.2.10.1 model_01

-

我们将性别sex视为响应变量,其他变量为预测变量。这里性别变量是二元的(0 或者 1),所以我们用logistic回归

-
logit_mod1 <- glm(
-  male ~ 1 + species + bill_length_mm + bill_depth_mm +
-    flipper_length_mm + body_mass_g,
-  data = d,
-  family = binomial(link = "logit")
-)
-
-summary(logit_mod1)
-
## 
-## Call:
-## glm(formula = male ~ 1 + species + bill_length_mm + bill_depth_mm + 
-##     flipper_length_mm + body_mass_g, family = binomial(link = "logit"), 
-##     data = d)
-## 
-## Deviance Residuals: 
-##    Min      1Q  Median      3Q     Max  
-## -3.382  -0.215   0.002   0.155   2.809  
-## 
-## Coefficients:
-##                   Estimate Std. Error z value Pr(>|z|)
-## (Intercept)          4.684      1.187    3.95  7.9e-05
-## speciesChinstrap    -6.980      1.574   -4.43  9.3e-06
-## speciesGentoo       -8.354      2.524   -3.31  0.00093
-## bill_length_mm       3.357      0.716    4.69  2.8e-06
-## bill_depth_mm        3.196      0.655    4.88  1.0e-06
-## flipper_length_mm    0.291      0.670    0.43  0.66405
-## body_mass_g          4.723      0.872    5.41  6.2e-08
-##                      
-## (Intercept)       ***
-## speciesChinstrap  ***
-## speciesGentoo     ***
-## bill_length_mm    ***
-## bill_depth_mm     ***
-## flipper_length_mm    
-## body_mass_g       ***
-## ---
-## Signif. codes:  
-## 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
-## 
-## (Dispersion parameter for binomial family taken to be 1)
-## 
-##     Null deviance: 461.61  on 332  degrees of freedom
-## Residual deviance: 127.11  on 326  degrees of freedom
-## AIC: 141.1
-## 
-## Number of Fisher Scoring iterations: 7
-

计算每个变量的平均边际效应

-
library(margins)
-
-logit_mod1_m <- logit_mod1 %>% 
- margins() %>% 
- summary() %>% 
- as_tibble()
-
-logit_mod1_m
-
## # A tibble: 6 x 7
-##   factor     AME     SE      z        p   lower   upper
-##   <chr>    <dbl>  <dbl>  <dbl>    <dbl>   <dbl>   <dbl>
-## 1 bill_~  0.185  0.0290  6.38  1.82e-10  0.128   0.242 
-## 2 bill_~  0.194  0.0339  5.72  1.04e- 8  0.128   0.261 
-## 3 body_~  0.273  0.0378  7.22  5.08e-13  0.199   0.347 
-## 4 flipp~  0.0169 0.0388  0.434 6.64e- 1 -0.0592  0.0929
-## 5 speci~ -0.373  0.0513 -7.27  3.67e-13 -0.473  -0.272 
-## 6 speci~ -0.434  0.0740 -5.86  4.66e- 9 -0.579  -0.289
-
logit_mod1_m %>%
-  ggplot(aes(
-    x = reorder(factor, AME),
-    y = AME, ymin = lower, ymax = upper
-  )) +
-  geom_hline(yintercept = 0, color = "gray80") +
-  geom_pointrange() +
-  coord_flip() +
-  labs(x = NULL, y = "Average Marginal Effect")
-

-
library(ggeffects)
-ggpredict(logit_mod1, terms = "bill_length_mm") 
-
-
-

45.2.10.2 model_02

-
library(brms)
-
-brms_mod2 <- brm(
-  male ~ 1 + bill_length_mm + bill_depth_mm + flipper_length_mm + body_mass_g + (1 | species),
-  data = d,
-  family = binomial(link = "logit")
-)
-
summary(brms_mod2)
-
library(ggeffects)
-ggpredict(brms_mod2, "bill_depth_mm [all]") %>%
-  plot()
-
-
-

45.2.10.3 model_03

-
penguins %>%
-  ggplot(aes(x = flipper_length_mm, y = bill_length_mm, color = species)) +
-  geom_point()
-
brms_mod3 <- brm(bill_length_mm ~ flipper_length_mm + (1|species),
-  data = penguins
-)
-
penguins %>%
-  group_by(species) %>%
-  modelr::data_grid(flipper_length_mm) %>%
-  tidybayes::add_fitted_draws(brms_mod3, n = 100) %>%
-  ggplot() +
-  geom_point(
-    data = penguins,
-    aes(flipper_length_mm, bill_length_mm, color = species, shape = species)
-  ) +
-  geom_line(aes(flipper_length_mm, .value, group = interaction(.draw, species), color = species), alpha = 0.1)
- -
-
-
-
-
- -
-
-
- - -
-
- - - - - - - - - - - - - - - - diff --git a/_book/eda-vaccine.html b/_book/eda-vaccine.html deleted file mode 100644 index 986da1e..0000000 --- a/_book/eda-vaccine.html +++ /dev/null @@ -1,1449 +0,0 @@ - - - - - - - 第 48 章 探索性数据分析-新冠疫苗有效率的计算 | 数据科学中的 R 语言 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- -
- -
-
- - -
-
- -
-
-

第 48 章 探索性数据分析-新冠疫苗有效率的计算

-
-

48.1 引言

-

-

纽约时报报道说,

-
-

美国制药公司辉瑞(Pfizer)和德国生物科技公司(BioNTech)11月9日率先宣布 -,根据在数国临床试验初步结果,其研发的新冠疫苗有效率达到90%以上,星期三,完整结果显示,参加疫苗实验的44000个志愿者中,共有170人确诊感染,其中安慰剂组162人,接种疫苗组仅8人,这证明了辉瑞开发的新冠疫苗有效率高达95%。

-
- - - - - - - - - - - - - - - - - - - - -
-group - -volunteers - -got_covid -
-placebo - -22000 - -162 -
-vaccinated - -22000 - -8 -
-

新冠疫苗是有效的,且有效率高达95%。 那么,这个95%是怎么计算出来的呢?它的概率是多少以及不确定性是多少呢? -回到这个问题,我们首先需要了解,辉瑞公司是如何定义疫苗有效率的

-

\[ -\text{VE} = 1 - \frac{p_{t}}{p_{c}} -\]

-

其中\(p_t\)疫苗组(vaccinated)的感染率,\(p_c\)安慰剂组(placebo)的感染率。

-
-
-

48.2 模型

-
library(tidyverse)
-library(tidybayes)
-library(rstan)
-rstan_options(auto_write = TRUE)
-options(mc.cores = parallel::detectCores())
-

然后,我们建立如下数学模型:

-

\[ -\begin{align} -y_{c} \sim \textsf{binomial}(n_{c},p_{c}) \\ -y_{t} \sim \textsf{binomial}(n_{t},p_{t}) \\ -p_{c} \sim \textsf{beta}(1, 1) \\ -p_{t} \sim \textsf{beta}(1, 1) -\end{align} -\]

-

通过模型可以直接计算干预效果\(\textsf{effect}\)和疫苗有效性\(VE\)

-

\[ -\begin{align} -\text{effect} = p_{t} - p_{c} \\ -\text{VE} = 1 - \frac{p_{t}}{p_{c}} -\end{align} -\]

-
-
-

48.3 计算

-

具体Stan代码如下

-
stan_program <- "
-data {
-  int<lower=1> event_c;        // num events, control
-  int<lower=1> event_t;        // num events, treatment
-  int<lower=1> n_c;            // num of person trial, control
-  int<lower=1> n_t;            // num of person trial, treatment
-}
-parameters {
-  real<lower=0,upper=1> p_c;    
-  real<lower=0,upper=1> p_t;    
-}
-model {
-  event_c ~ binomial(n_c, p_c);
-  event_t ~ binomial(n_t, p_t);
-  p_c ~ beta(1, 1);
-  p_t ~ beta(1, 1);
-}
-generated quantities {
-  real effect   = p_t - p_c;
-  real VE       = 1- p_t /p_c;
-  real log_odds = log(p_t / (1- p_t)) - log(p_c / (1- p_c));
-}
-"
-
-
-stan_data <- list(
-  event_c = 162,
-  event_t = 8,
-  n_c     = 4.4e4 / 2,
-  n_t     = 4.4e4 / 2
-)
-
-mod_vaccine <- stan(model_code = stan_program, data = stan_data)
-
-
-

48.4 结果

-

最后,我们后验概率抽样

-
draws <- mod_vaccine %>%
-  tidybayes::spread_draws(effect, VE, log_odds)
-
-draws %>% 
-  head()
-
## # A tibble: 6 x 6
-##   .chain .iteration .draw   effect    VE log_odds
-##    <int>      <int> <int>    <dbl> <dbl>    <dbl>
-## 1      1          1     1 -0.00915 0.986    -4.30
-## 2      1          2     2 -0.00604 0.959    -3.19
-## 3      1          3     3 -0.00637 0.936    -2.75
-## 4      1          4     4 -0.00761 0.956    -3.14
-## 5      1          5     5 -0.00646 0.947    -2.94
-## 6      1          6     6 -0.00723 0.949    -2.98
-
-

48.4.1 干预效果

-

从结果中看到effect中很多负数。事实上,effect中越多的负值,即被感染的可能性越低,说明疫苗干预效果越好

-
mean(draws$effect < 0) %>% round(2)
-
## [1] 1
-

结果告诉我们,疫苗有明显的干预效果。比如,我们假定10000个人接受了疫苗,那么被感染的人数以及相应的可能性,如下图

-
draws %>%
-  ggplot(aes(x = effect * 1e4)) +
-  geom_density(fill = "blue", alpha = .2) +
-  expand_limits(y = 0) +
-  theme_minimal() +
-  xlab("效应大小") +
-  ggtitle("每10000个接种疫苗的人中被感染新冠的数量")
-

-
-
-

48.4.2 疫苗有效率

-

我们再看看疫苗有效率 VE 的结果

-
draws %>%
-  select(VE) %>%
-  ggdist::median_qi(.width = c(0.90))
-
## # A tibble: 1 x 6
-##      VE .lower .upper .width .point .interval
-##   <dbl>  <dbl>  <dbl>  <dbl> <chr>  <chr>    
-## 1 0.947  0.909  0.972    0.9 median qi
-

通过数据看出,疫苗的有效性为0.95,在90%的可信赖水平, 中位数区间[0.91, 0.97].

-

当然,通过图可能理解的更清晰。

-
label_txt <- paste("median =", round(median(draws$VE), 2))
-
-draws %>%
-  ggplot(aes(x = VE)) +
-  geom_density(fill = "blue", alpha = .2) +
-  expand_limits(y = 0) +
-  theme_minimal() +
-  geom_vline(xintercept = median(draws$VE), size = 0.2) +
-  annotate("text", x = 0.958, y = 10, label = label_txt, size = 3) +
-  xlab("疫苗有效率") +
-  ggtitle("辉瑞公司定义疫苗有效率为 VE = 1 - Pt/Pc")
-

- -
-
-
-
- -
-
-
- - -
-
- - - - - - - - - - - - - - - - diff --git a/_book/exams.html b/_book/exams.html deleted file mode 100644 index dbb4686..0000000 --- a/_book/exams.html +++ /dev/null @@ -1,1308 +0,0 @@ - - - - - - - A 期末考试 | 数据科学中的 R 语言 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- -
- -
-
- - -
-
- -
-
-

A 期末考试

-

研究生生涯的主要工作就是学习,而学以致用是最好的学习路径。考虑同学们不同的学科背景,同时也参考国内其它高校的做法,本学期《数据科学中的 R 语言》期末考试安排如下:

-
-

A.1 方式

-

结合所在学科,找一篇与自己研究方向相关的文献,用课堂上学到的R统计编程技能,重复文献的数据分析过程。

-
-
-

A.2 要求

-

在2020年06月15日前,将以下资料打包并提交38552109@qq.com邮箱

-
    -
  • 所重复的文献(并在文献中高亮你重复的部分)
  • -
  • 数据
  • -
  • Rmarkdown源代码
  • -
  • 分析结果(生成的pdf或者html文件)
  • -
  • 注明学号和姓名
  • -
-
- -
-
- -
-
-
- - -
-
- - - - - - - - - - - - - - - - diff --git a/_book/forcats.html b/_book/forcats.html deleted file mode 100644 index 15ec7b5..0000000 --- a/_book/forcats.html +++ /dev/null @@ -1,1409 +0,0 @@ - - - - - - - 第 10 章 因子型变量 | 数据科学中的 R 语言 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- -
- -
-
- - -
-
- -
-
-

第 10 章 因子型变量

-

本章介绍R语言中的因子类型数据。因子型变量常用于数据处理和可视化中,尤其在希望不以字母顺序排序的时候,因子就格外有用。

-
-

10.1 什么是因子

-

因子是把数据进行分类并标记为不同层级(level,有时候也翻译成因子水平, 我个人觉得翻译为层级,更接近它的特性,因此,我都会用层级来描述)的数据对象,他们可以存储字符串和整数。因子类型有三个属性:

-
    -
  • 存储类别的数据类型
  • -
  • 离散变量
  • -
  • 因子的层级是有限的,只能取因子层级中的值或缺失(NA)
  • -
-
-
-

10.2 创建因子

-
library(tidyverse)
-
income <- c("low", "high", "medium", "medium", "low", "high",  "high")
-factor(income)
-
## [1] low    high   medium medium low    high   high  
-## Levels: high low medium
-

因子层级会自动按照字符串的字母顺序排序,比如high low medium。也可以指定顺序,

-
factor(income, levels = c("low", "high", "medium") )
-
## [1] low    high   medium medium low    high   high  
-## Levels: low high medium
-

不属于因子层级中的值, 比如这里因子层只有c("low", "high"),那么income中的“medium”会被当作缺省值NA

-
factor(income, levels = c("low", "high") )
-
## [1] low  high <NA> <NA> low  high high
-## Levels: low high
-

相比较字符串而言,因子类型更容易处理,因子很多函数会自动的将字符串转换为因子来处理,但事实上,这也会造成,不想当做因子的却又当做了因子的情形,最典型的是在R 4.0之前,data.frame()stringsAsFactors选项,默认将字符串类型转换为因子类型,但这个默认也带来一些不方便,因此在R 4.0之后取消了这个默认。在tidyverse集合里,有专门处理因子的宏包forcats,因此,本章将围绕forcats宏包讲解如何处理因子类型变量,更多内容可以参考这里

-
library(forcats)
-
-
-

10.3 调整因子顺序

-

前面看到因子层级是按照字母顺序排序

-
x <- factor(income)
-x
-
## [1] low    high   medium medium low    high   high  
-## Levels: high low medium
-

也可以指定顺序

-
x %>% fct_relevel(levels = c("high", "medium", "low"))
-
## [1] low    high   medium medium low    high   high  
-## Levels: high medium low
-

或者让“medium” 移动到最前面

-
x %>% fct_relevel(levels = c("medium"))
-
## [1] low    high   medium medium low    high   high  
-## Levels: medium high low
-

或者让“medium” 移动到最后面

-
x %>% fct_relevel("medium", after = Inf)
-
## [1] low    high   medium medium low    high   high  
-## Levels: high low medium
-

可以按照字符串第一次出现的次序

-
x %>% fct_inorder()
-
## [1] low    high   medium medium low    high   high  
-## Levels: low high medium
-

按照其他变量的中位数的升序排序

-
x %>% fct_reorder(c(1:7), .fun = median)  
-
## [1] low    high   medium medium low    high   high  
-## Levels: low medium high
-
-
-

10.4 应用

-

调整因子层级有什么用呢?

-

这个功能在ggplot可视化中调整分类变量的顺序非常方便。这里为了方便演示,我们假定有数据框

-
d <- tibble(
-  x = c("a","a", "b", "b", "c", "c"),
-  y = c(2, 2, 1, 5,  0, 3)
-  
-)
-d
-
## # A tibble: 6 x 2
-##   x         y
-##   <chr> <dbl>
-## 1 a         2
-## 2 a         2
-## 3 b         1
-## 4 b         5
-## 5 c         0
-## 6 c         3
-

先画个散点图看看吧

-
d %>% 
-  ggplot(aes(x = x, y = y)) +
-  geom_point()
-

-

我们看到,横坐标上是a-b-c的顺序。

-
-

10.4.1 fct_reorder()

-

fct_reorder()可以让x的顺序按照x中每个分类变量对应y值的中位数升序排序,具体为

-
    -
  • a对应的y值c(2, 2) 中位数是median(c(2, 2)) = 2
  • -
  • b对应的y值c(1, 5) 中位数是median(c(1, 5)) = 3
  • -
  • c对应的y值c(0, 3) 中位数是median(c(0, 3)) = 1.5
  • -
-

因此,x的因子层级的顺序调整为c-a-b

-
d %>% 
-  ggplot(aes(x = fct_reorder(x, y, .fun = median), y = y)) +
-  geom_point()
-

-

当然,我们可以加一个参数.desc = TRUE让因子层级变为降序排列b-a-c

-
d %>% 
-  ggplot(aes(x = fct_reorder(x, y, .fun = median, .desc = TRUE), y = y)) +
-  geom_point()
-

-

但这样会造成x坐标标签一大串,因此建议可以写mutate()函数里

-
d %>% 
-  mutate(x = fct_reorder(x, y, .fun = median, .desc = TRUE)) %>% 
-  ggplot(aes(x = x, y = y)) +
-  geom_point()
-

-

我们还可以按照y值中最小值的大小降序排列

-
d %>% 
-  mutate(x = fct_reorder(x, y, .fun = min, .desc = TRUE)) %>% 
-  ggplot(aes(x = x, y = y)) +
-  geom_point()
-

-
-
-

10.4.2 fct_rev()

-

按照因子层级的逆序排序

-
d %>% 
-  mutate(x = fct_rev(x)) %>% 
-  ggplot(aes(x = x, y = y)) +
-  geom_point()
-

-
-
-

10.4.3 fct_relevel()

-
d %>% 
-  mutate(
-    x = fct_relevel(x, c("c", "a", "b"))
-  ) %>% 
-
-  ggplot(aes(x = x, y = y)) +
-  geom_point()
-

- -
-
-
-
- -
-
-
- - -
-
- - - - - - - - - - - - - - - - diff --git a/_book/ggplot2-academic.html b/_book/ggplot2-academic.html deleted file mode 100644 index 9c50dd7..0000000 --- a/_book/ggplot2-academic.html +++ /dev/null @@ -1,1613 +0,0 @@ - - - - - - - 第 55 章 科研数据可视化 | 数据科学中的 R 语言 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- -
- -
-
- - -
-
- -
-
-

第 55 章 科研数据可视化

-
-

55.1 统计分布图

-

在学术中,很多情形我们都需要画出统计分布图。比如,围绕天气温度数据(美国内布拉斯加州东部,林肯市, 2016年),我们想看每个月份里气温的分布情况

-
lincoln_df <- ggridges::lincoln_weather %>%
-  mutate(
-    month_short = fct_recode(
-      Month,
-      Jan = "January",
-      Feb = "February",
-      Mar = "March",
-      Apr = "April",
-      May = "May",
-      Jun = "June",
-      Jul = "July",
-      Aug = "August",
-      Sep = "September",
-      Oct = "October",
-      Nov = "November",
-      Dec = "December"
-    )
-  ) %>%
-  mutate(month_short = fct_rev(month_short)) %>%
-  select(Month, month_short, `Mean Temperature [F]`)
-
-lincoln_df %>%
-  head(5)
-
## # A tibble: 5 x 3
-##   Month   month_short `Mean Temperature [F]`
-##   <fct>   <fct>                        <int>
-## 1 January Jan                             24
-## 2 January Jan                             23
-## 3 January Jan                             23
-## 4 January Jan                             17
-## 5 January Jan                             29
-

统计分布图的方法很多,我们下面比较各种方法的优劣。

-
-

55.1.1 points-errorbars

-

画分布图的最简单的方法,就是计算每个月的气温均值或者中位数,并在均值或者中位数位置标出误差棒(error bars),比如图 55.1

-
lincoln_errbar <- lincoln_df %>%
-  ggplot(aes(x = month_short, y = `Mean Temperature [F]`)) +
-  stat_summary(
-    fun.y = mean, fun.ymax = function(x) {
-      mean(x) + 2 * sd(x)
-    },
-    fun.ymin = function(x) {
-      mean(x) - 2 * sd(x)
-    }, geom = "pointrange",
-    fatten = 5
-  ) +
-  xlab("month") +
-  ylab("mean temperature (°F)") +
-  theme_classic(base_size = 14) +
-  theme(
-    axis.text = element_text(color = "black", size = 12),
-    plot.margin = margin(3, 7, 3, 1.5)
-  )
-
-lincoln_errbar
-
-林肯市2016年气温变化图 -

-图 55.1: 林肯市2016年气温变化图 -

-
-

但这个图有很多问题,或者说是错误的

-
    -
  1. 图中只用了一个点和两个误差棒,丢失了很多分布信息。

  2. -
  3. 读者不能很直观的读出这个点的含义(是均值还是中位数?)

  4. -
  5. 误差棒代表的含义不明确(标准差?标准误?还是其他?)

  6. -
-
-
-

通过看代码,知道这里用的是,均值加减2倍的标准差,其目的是想表达这个范围涵盖了95%的的数据。 事实上,误差棒一般用于标准误(或者加减2倍的标准误来代表估计均值的95%置信区间),所以这里使用标准差就造成了混淆。

-
-
-
-
-

( 标准误:对样本均值估计的不确定性; 标准差:对偏离均值的分散程度 )

-
-
-
    -
  1. 现实的数据往往是偏态的,但这个图的误差棒几乎是对称,会让人觉得产生怀疑。
  2. -
-
-
-

55.1.2 箱线图

-

为了解决以上问题,可以使用箱线图(boxplot),箱线图将数据分成若干段,如图 55.2.

-
-箱线图示意图 -

-图 55.2: 箱线图示意图 -

-
-
    -
  • 盒子中间的横线是中位数(50th percentile),底部的横线代表第一分位数(25th -percentile),顶部的横线代表第三分位数(75th percentile)
  • -
  • 盒子的范围覆盖了50%的数据,每个小盒子是25%的数据,盒子高度越短, -说明数据越集中,盒子高度越长,数据越分散。
  • -
  • 上面的这条竖线的长度 = 从盒子上边缘开始,延伸到1.5倍盒子高度的范围中最远的点
  • -
  • 下面的这条竖线的长度 = 从盒子下边缘开始,延伸到1.5倍盒子高度的范围中最远的点
  • -
  • 在线条之外的点就是 outlies
  • -
-

那么气温分布用箱线图画出来 (图 55.3)。 我们可以看到12月份数据 -偏态(绝大部分时候中等的冷,少部分是极度寒冷),其他月份,比如7月份,数据分布的比较正态

-
lincoln_box <- lincoln_df %>%
-  ggplot(aes(x = month_short, y = `Mean Temperature [F]`)) +
-  geom_boxplot(fill = "grey90") +
-  xlab("month") +
-  ylab("mean temperature (°F)") +
-  theme_classic(base_size = 14) +
-  theme(
-    axis.text = element_text(color = "black", size = 12),
-    plot.margin = margin(3, 7, 3, 1.5)
-  )
-
-lincoln_box
-
-林肯市2016年气温分布箱线图 -

-图 55.3: 林肯市2016年气温分布箱线图 -

-
-
-
-

55.1.3 小提琴图

-

箱线图是1970年代统计学家发明的一种可视化方法,这种图可以很方便地用手工画出,所以当时很流行,现在计算机性能大大提升了,所以大家喜欢用视觉上更直观的小提琴图取代箱线图

-
-小提琴图示意图 -

-图 55.4: 小提琴图示意图 -

-
-
    -
  • 小提琴图相当于密度分布图旋转90度,然后再做个对称的镜像
  • -
  • 最宽或者最厚的地方,对应着数据密度最大的地方
  • -
  • 箱线图能用的地方小提琴图都能用,而且小提琴图可以很好的展示bimodal data的情况(箱线图做不到)
  • -
-
-图片来源:nature methods, VOL.11, NO.2, FEBRUARY 2014 -

-图 55.5: 图片来源:nature methods, VOL.11, NO.2, FEBRUARY 2014 -

-
-

在图 55.6, 我们使用小提琴图画图气温分布,可以看到,11月份的时候,有两个高密度区间(两个峰,50 degrees 和 35 degrees Fahrenheit),注意,这个信息在前面两个图中是没有的。

-
lincoln_violin <- lincoln_df %>%
-  ggplot(aes(x = month_short, y = `Mean Temperature [F]`)) +
-  geom_violin(fill = "grey90") +
-  xlab("month") +
-  ylab("mean temperature (°F)") +
-  theme_classic(base_size = 14) +
-  theme(
-    axis.text = element_text(color = "black", size = 12),
-    plot.margin = margin(3, 7, 3, 1.5)
-  )
-
-lincoln_violin
-
-林肯市2016年气温分布小提琴图 -

-图 55.6: 林肯市2016年气温分布小提琴图 -

-
-

事实上,小提琴图也是不完美的,用的是密度分布图,会造成没有数据点的地方,也会有分布。怎么解决呢?

-
-
-

55.1.4 sina 图

-

解决办法就是,把原始数据点打上去,

-
lincoln_points <- lincoln_df %>%
-  ggplot(aes(x = month_short, y = `Mean Temperature [F]`)) +
-  geom_point(size = 0.75) +
-  xlab("month") +
-  ylab("mean temperature (°F)") +
-  theme_classic(base_size = 14) +
-  theme(
-    axis.text = element_text(color = "black", size = 12),
-    plot.margin = margin(3, 7, 3, 1.5)
-  )
-
-lincoln_points
-
-林肯市2016年气温分布散点图 -

-图 55.7: 林肯市2016年气温分布散点图 -

-
-

但问题又来了,这样会有大量重叠的点。有时候会采用透明度的办法,即给每个点设置透明度,某个位置颜色越深,说明这个位置重叠的越多。当然,最好的办法是,给每个点增加一个随机的很小的“偏移”,即抖散图。

-
lincoln_jitter <- lincoln_df %>%
-  ggplot(aes(x = month_short, y = `Mean Temperature [F]`)) +
-  geom_point(position = position_jitter(width = .15, height = 0, seed = 320), size = 0.75) +
-  xlab("month") +
-  ylab("mean temperature (°F)") +
-  theme_classic(base_size = 14) +
-  theme(
-    axis.text = element_text(
-      color = "black",
-      size = 12
-    ),
-    plot.margin = margin(3, 7, 3, 1.5)
-  )
-
-lincoln_jitter
-
-林肯市2016年气温分布抖散图 -

-图 55.8: 林肯市2016年气温分布抖散图 -

-
-

于是,(小提琴图 + 抖散图)= sina 图,这样既可以看到原始的点,又可以看到统计分布,见图 55.9.

-
lincoln_sina <- lincoln_df %>%
-  ggplot(aes(x = month_short, y = `Mean Temperature [F]`)) +
-  geom_violin(color = "transparent", fill = "gray90") +
-  # dviz.supp::stat_sina(size = 0.85) +
-  geom_jitter(width = 0.25, size = 0.85) +
-  xlab("month") +
-  ylab("mean temperature (°F)") +
-  theme_classic(base_size = 14) +
-  theme(
-    axis.text = element_text(
-      color = "black",
-      size = 12
-    ),
-    plot.margin = margin(3, 7, 3, 1.5)
-  )
-
-lincoln_sina
-
-林肯市2016年气温分布 sina 图 -

-图 55.9: 林肯市2016年气温分布 sina 图 -

-
-
-
-

55.1.5 山峦图

-

前面的图,分组变量(月份)是顺着x轴,这里介绍的山峦图(重山叠叠的感觉)分组变量是顺着y轴,这种图,在画不同时间的分布图的时候,效果非常不错。 比如图 55.10, 展示气温分布的山峦图。同样,图中很直观地展示了11月份的气温分布有两个峰值。

-
bandwidth <- 3.4
-
-lincoln_df %>%
-  ggplot(aes(x = `Mean Temperature [F]`, y = `Month`)) +
-  geom_density_ridges(
-    scale = 3, rel_min_height = 0.01,
-    bandwidth = bandwidth, fill = colorspace::lighten("#56B4E9", .3), color = "white"
-  ) +
-  scale_x_continuous(
-    name = "mean temperature (°F)",
-    expand = c(0, 0), breaks = c(0, 25, 50, 75)
-  ) +
-  scale_y_discrete(name = NULL, expand = c(0, .2, 0, 2.6)) +
-  theme_minimal(base_size = 14) +
-  theme(
-    axis.text = element_text(color = "black", size = 12),
-    axis.text.y = element_text(vjust = 0),
-    plot.margin = margin(3, 7, 3, 1.5)
-  )
-
-林肯市2016年气温分布山峦图 -

-图 55.10: 林肯市2016年气温分布山峦图 -

-
-

但这种图,也有一个问题,y轴是分组变量,x轴是数据的密度分布,缺少了密度分布的标度(即,缺少了密度图的高度,事实上,小提琴图也有这个毛病),所以这种图不适合比较精确的密度分布展示,但在探索性分析中,比较不同分组的密度分布,可以很方便获取直观的认知感受。

-
-
-

55.1.6 有颜色山峦图

-

我们看到

-
    -
  • 温度值越高,x轴坐标越靠右;
  • -
  • 温度值越高,颜色更亮;
  • -
-

因此,可以将气温变量映射到位置属性和颜色属性,见图 55.11

-
bandwidth <- 3.4
-
-lincoln_base <- lincoln_weather %>%
-  ggplot(aes(x = `Mean Temperature [F]`, y = `Month`, fill = ..x..)) +
-  geom_density_ridges_gradient(
-    scale = 3, rel_min_height = 0.01, bandwidth = bandwidth,
-    color = "black", size = 0.25
-  ) +
-  scale_x_continuous(
-    name = "mean temperature (°F)",
-    expand = c(0, 0), breaks = c(0, 25, 50, 75), labels = NULL
-  ) +
-  scale_y_discrete(name = NULL, expand = c(0, .2, 0, 2.6)) +
-  colorspace::scale_fill_continuous_sequential(
-    palette = "Heat",
-    l1 = 20, l2 = 100, c2 = 0,
-    rev = FALSE
-  ) +
-  guides(fill = "none") +
-  theme_minimal(base_size = 14) +
-  theme(
-    axis.text = element_text(color = "black", size = 12),
-    axis.text.y = element_text(vjust = 0),
-    plot.margin = margin(3, 7, 3, 1.5)
-  )
-
-
-# x axis labels
-temps <- data.frame(temp = c(0, 25, 50, 75))
-
-# calculate corrected color ranges
-# stat_joy uses the +/- 3*bandwidth calculation internally
-tmin <- min(lincoln_weather$`Mean Temperature [F]`) - 3 * bandwidth
-tmax <- max(lincoln_weather$`Mean Temperature [F]`) + 3 * bandwidth
-
-xax <- axis_canvas(lincoln_base, axis = "x", ylim = c(0, 2)) +
-  geom_ridgeline_gradient(
-    data = data.frame(temp = seq(tmin, tmax, length.out = 100)),
-    aes(x = temp, y = 1.1, height = .9, fill = temp),
-    color = "transparent"
-  ) +
-  geom_text(
-    data = temps, aes(x = temp, label = temp),
-    color = "black", 
-    y = 0.9, hjust = 0.5, vjust = 1, size = 14 / .pt
-  ) +
-  colorspace::scale_fill_continuous_sequential(
-    palette = "Heat",
-    l1 = 20, l2 = 100, c2 = 0,
-    rev = FALSE
-  )
-
-lincoln_final <- cowplot::insert_xaxis_grob(lincoln_base, xax, position = "bottom", height = unit(0.1, "null"))
-
-ggdraw(lincoln_final)
-
-林肯市2016年气温分布山峦图(颜色越亮,温度越高) -

-图 55.11: 林肯市2016年气温分布山峦图(颜色越亮,温度越高) -

-
-
-
-
-

55.2 说明

-

本章的数据和代码来源于《Fundamentals of Data Visualization》的第9章和第20章。感谢Claus O. Wilke为大家写了这本非常好的书。

- -
-
-
- -
-
-
- - -
-
- - - - - - - - - - - - - - - - diff --git a/_book/ggplot2-aes.html b/_book/ggplot2-aes.html deleted file mode 100644 index 47523fc..0000000 --- a/_book/ggplot2-aes.html +++ /dev/null @@ -1,1616 +0,0 @@ - - - - - - - 第 7 章 数据可视化 | 数据科学中的 R 语言 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- -
- -
-
- - -
-
- -
-
-

第 7 章 数据可视化

-

上节课介绍了R语言的基本数据结构,可能大家有种看美剧的感觉,有些懵。这很正常,我在开始学习R的时候,感觉和大家一样,所以不要惊慌,我们后面会慢慢填补这些知识点。

-

这节课,我们介绍R语言最强大的可视化,看看都有哪些炫酷的操作。

-
library(tidyverse) # install.packages("tidyverse")
-library(patchwork) # install.packages("patchwork")
-
-

7.1 为什么要可视化

-

我们先从一个故事开始,1854年伦敦爆发严重霍乱,当时流行的观点是霍乱是通过空气传播的,而John Snow医生(不是《权力的游戏》里的 Jon Snow)研究发现,霍乱是通过饮用水传播的。研究过程中,John Snow医生统计每户病亡人数,每死亡一人标注一条横线,分析发现,大多数病例的住所都围绕在Broad Street水泵附近,结合其他证据得出饮用水传播的结论,于是移掉了Broad Street水泵的把手,霍乱最终得到控制。

-

-

另一个有趣的例子就是辛普森悖论(Simpson’s Paradox)。比如我们想研究下,学习时间和考试成绩的关联。结果发现两者呈负相关性,即补课时间越长,考试成绩反而越差(下图横坐标是学习时间,纵坐标是考试成绩),很明显这个结果有违生活常识。

-

-

事实上,当我们把学生按照不同年级分成五组,再来观察学习时间和考试成绩之间的关联,发现相关性完全逆转了! 我们可以看到学习时间和考试成绩强烈正相关。

-

辛普森悖论在日常生活中层出不穷。 那么如何避免辛普森悖论呢?我们能做的,就是仔细地研究分析各种影响因素,不要笼统概括地、浅尝辄止地看问题。其中,可视化分析为我们提供了一个好的方法。

-
-
-

7.2 什么是数据可视化

-
-

7.2.1 图形属性

-

我们在图中画一个点,那么这个就有(形状,大小,颜色,位置,透明度)等属性, -这些属性就是图形属性(有时也称之为图形元素或者视觉元素),下图 7.1列出了常用的图形属性。

-
-常用的图形元素 -

-图 7.1: 常用的图形元素 -

-
-

数据可视化的过程,就是我们的数据通过这些视觉上的元素表示出来,即,数值到图形属性的转换(映射)过程。

-
-
-
-

7.3 宏包ggplot2

-

ggplot2是RStudio首席科学家Hadley Wickham在2005年读博士期间的作品。很多人学习R语言,就是因为ggplot2宏包。目前, -ggplot2已经发展成为最受欢迎的R宏包,没有之一。

-

我们可以看看它2019年cran的下载量

-
library(cranlogs)
-
-d <- cran_downloads(package = "ggplot2", from = "2019-01-01", to = "2019-12-31")
-
-sum(d$count)
-
## [1] 9889742
-
-
-

7.4 ggplot2 的图形语法

-

ggplot2有一套优雅的绘图语法,包名中“gg”是grammar of graphics的简称。

- - - -

-

ggplot()函数包括9个部件:

-
    -
  • 数据 (data) ( 数据框)
  • -
  • 映射 (mapping)
  • -
  • 几何对象 (geom)
  • -
  • 统计变换 (stats)
  • -
  • 标度 (scale)
  • -
  • 坐标系 (coord)
  • -
  • 分面 (facet)
  • -
  • 主题 (theme)
  • -
  • 存储和输出 (output)
  • -
-

其中前三个是必需的。

-

Hadley Wickham将这套可视化语法诠释为:

-

一张统计图形就是从数据到几何对象(geometric object,缩写geom)的图形属性(aesthetic attribute,缩写aes)的一个映射。

-

-

此外,图形中还可能包含数据的统计变换(statistical transformation,缩写stats),最后绘制在某个特定的坐标系(coordinate -system,缩写coord)中,而分面(facet)则可以用来生成数据不同子集的图形。

-
-

7.4.1 语法模板

-

-

先看一个简单的案例(1880-2014年温度变化和二氧化碳排放量)

-
library(tidyverse)
-d <- read_csv("./demo_data/temp_carbon.csv")
-d
-
## # A tibble: 135 x 5
-##     year temp_anomaly land_anomaly ocean_anomaly
-##    <dbl>        <dbl>        <dbl>         <dbl>
-##  1  1880        -0.11        -0.48         -0.01
-##  2  1881        -0.08        -0.4           0.01
-##  3  1882        -0.1         -0.48          0   
-##  4  1883        -0.18        -0.66         -0.04
-##  5  1884        -0.26        -0.69         -0.14
-##  6  1885        -0.25        -0.56         -0.17
-##  7  1886        -0.24        -0.51         -0.17
-##  8  1887        -0.28        -0.47         -0.23
-##  9  1888        -0.13        -0.41         -0.05
-## 10  1889        -0.09        -0.31         -0.02
-## # ... with 125 more rows, and 1 more variable:
-## #   carbon_emissions <dbl>
-
library(ggplot2)
-ggplot(data = d, mapping = aes(x = year, y = carbon_emissions)) +
-  geom_line() +
-  xlab("Year") +
-  ylab("Carbon emissions (metric tons)") +
-  ggtitle("Annual global carbon emissions, 1880-2014")
-

-

是不是很简单?

-
-
-
-

7.5 映射

-

我们这里用科考人员收集的企鹅体征数据来演示。

-

-
library(tidyverse)
-penguins <- read_csv("./demo_data/penguins.csv") %>%
-  janitor::clean_names() %>% 
-  drop_na()
-
-penguins %>%
-  head()
-
## # A tibble: 6 x 8
-##   species island bill_length_mm bill_depth_mm
-##   <chr>   <chr>           <dbl>         <dbl>
-## 1 Adelie  Torge~           39.1          18.7
-## 2 Adelie  Torge~           39.5          17.4
-## 3 Adelie  Torge~           40.3          18  
-## 4 Adelie  Torge~           36.7          19.3
-## 5 Adelie  Torge~           39.3          20.6
-## 6 Adelie  Torge~           38.9          17.8
-## # ... with 4 more variables: flipper_length_mm <dbl>,
-## #   body_mass_g <dbl>, sex <chr>, year <dbl>
-
-

7.5.1 变量含义

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
variableclassdescription
speciesinteger企鹅种类 (Adelie, Gentoo, Chinstrap)
islandinteger所在岛屿 (Biscoe, Dream, Torgersen)
bill_length_mmdouble嘴峰长度 (单位毫米)
bill_depth_mmdouble嘴峰深度 (单位毫米)
flipper_length_mminteger鰭肢长度 (单位毫米)
body_mass_ginteger体重 (单位克)
sexinteger性别
yearinteger记录年份
-
-
-

7.5.2 嘴巴越长,嘴巴也会越厚?

-

这里提出一个问题,嘴巴越长,嘴巴也会越厚?

-

-

回答这个问题,我们用到penguins数据集其中的四个变量

-
penguins %>%
-  select(species, sex, bill_length_mm, bill_depth_mm) %>%
-  head(4)
-

为考察嘴峰长度(bill_length_mm)与嘴峰深度(bill_depth_mm)之间的关联,先绘制这两个变量的散点图,

-

-
    -
  • ggplot()表示调用该函数画图,data = penguins 表示使用penguins这个数据框来画图。

  • -
  • aes()表示数值和视觉属性之间的映射。

  • -
-

aes(x = bill_length_mm, y = bill_depth_mm),意思是变量bill_length_mm作为(映射为)x轴方向的位置,变量bill_depth_mm作为(映射为)y轴方向的位置

-
    -
  • aes()除了位置上映射,还可以实现色彩、形状或透明度等视觉属性的映射。

  • -
  • geom_point()表示绘制散点图。

  • -
  • +表示添加图层。

  • -
-

运行脚本后生成图片:

-

-

刚才看到的是位置上的映射,ggplot()还包含了颜色、形状以及透明度等图形属性的映射,

-

比如我们在aes()里增加一个颜色映射color = species, 这样做就是希望,不同的企鹅类型, 用不同的颜色来表现。这里,企鹅类型有三组,那么就用三种不同的颜色来表示

-
ggplot(penguins, 
-       aes(x = bill_length_mm, y = bill_depth_mm, color = species)) +
-  geom_point()
-

-

此图绘制不同类型的企鹅,嘴峰长度与嘴峰深度散点图,并用颜色来实现了分组。

-

大家试试下面代码呢,

-
ggplot(penguins, 
-       aes(x = bill_length_mm, y = bill_depth_mm, size = species)) +
-  geom_point()
-
ggplot(penguins, 
-       aes(x = bill_length_mm, y = bill_depth_mm, shape = species)) +
-  geom_point()
-
ggplot(penguins, 
-       aes(x = bill_length_mm, y = bill_depth_mm, alpha = species)) +
-  geom_point()
-

为什么图中是这样的颜色呢?那是因为ggplot()内部有一套默认的设置

-

-

不喜欢默认的颜色,可以自己定义喔。请往下看

-
-
-
-

7.6 映射 vs.设置

-

想把图中的点指定为某一种颜色,可以使用设置语句,比如

-
ggplot(penguins, 
-       aes(x = bill_length_mm, y = bill_depth_mm)) +
-  geom_point(color = "blue")
-

-

大家也可以试试下面

-
ggplot(penguins, 
-       aes(x = bill_length_mm, y = bill_depth_mm)) +
-  geom_point(size = 5)
-
ggplot(penguins, 
-       aes(x = bill_length_mm, y = bill_depth_mm)) +
-  geom_point(shape = 2)
-
ggplot(penguins, 
-       aes(x = bill_length_mm, y = bill_depth_mm)) +
-  geom_point(alpha = 0.5)
-
-

7.6.1 提问

-

-

思考下左图中aes(color = "blue")为什么会变成了红色的点?

-
-
-
-

7.7 几何对象

-

geom_point() 可以画散点图,也可以使用geom_smooth()绘制平滑曲线,

-
p1 <- 
-  ggplot(penguins, aes(x = bill_length_mm, y = bill_depth_mm)) +
-  geom_point()
-p1
-

-
p2 <- 
-  ggplot(penguins, aes(x = bill_length_mm, y = bill_depth_mm)) +
-  geom_smooth()
-p2
-

-
-
-

7.8 图层叠加

-
p3 <- 
-  ggplot(penguins, aes(x = bill_length_mm, y = bill_depth_mm)) +
-  geom_point() +
-  geom_smooth()
-p3
-

-
library(patchwork)
-(p1 / p2) | p3
-

-
-
-

7.9 Global vs. Local

-
ggplot(penguins, aes(x = bill_length_mm, y = bill_depth_mm, color = species)) +
-  geom_point()
-

-
ggplot(penguins) +
-  geom_point(aes(x = bill_length_mm, y = bill_depth_mm, color = species))
-

-

大家可以看到,以上两段代码出来的图是一样。但背后的含义却不同。

-

事实上,如果映射关系aes() 写在ggplot()里,

-
ggplot(penguins, aes(x = bill_length_mm, y = bill_depth_mm, color = species)) +
-  geom_point()
-

那么映射关系x = bill_length_mm, y = bill_depth_mm, color = species 为全局变量。因此,当geom_point()画图时,发现缺少所绘图所需要的映射关系(点的位置、点的大小、点的颜色等等),就会从ggplot()全局变量中继承映射关系。

-

如果映射关系 aes() 写在几何对象geom_point()里, 那么此处的映射关系就为局部变量, 比如。

-
ggplot(penguins) +
-  geom_point(aes(x = bill_length_mm, y = bill_depth_mm, color = species))
-

此时geom_point()绘图所需要的映射关系aes(x = bill_length_mm, y = bill_depth_mm, color = species) 已经存在,就不会继承全局变量的映射关系。

-

再看下面这个例子,

-
ggplot(penguins, aes(x = bill_length_mm, y = bill_depth_mm)) +
-  geom_point(aes(color = species)) +
-  geom_smooth()
-

这里的 geom_point()geom_smooth() 都会从全局变量中继承位置映射关系。

-

再看下面这个例子,

-
ggplot(penguins,aes(x = bill_length_mm, y = bill_depth_mm, color = species)) +
-  geom_point(aes(color = sex))
-

-

局部变量中的映射关系 -aes(color = )已经存在,因此不会从全局变量中继承,沿用当前的映射关系。

-

大家细细体会下,下面两段代码的区别

-
ggplot(penguins, aes(x = bill_length_mm, y = bill_depth_mm, color = species)) +
-  geom_smooth(method = lm) +
-  geom_point()
-

-
ggplot(penguins, aes(x = bill_length_mm, y = bill_depth_mm)) +
-  geom_smooth(method = lm) +
-  geom_point(aes(color = species))
-

-
-
-

7.10 保存图片

-

可以使用ggsave()函数,将图片保存为所需要的格式,如“.pdf,” “.png”等

-
p <- penguins %>% 
-  ggplot(aes(x = bill_length_mm, y = bill_depth_mm)) +
-  geom_smooth(method = lm) +
-  geom_point(aes(color = species)) +
-  ggtitle("This is my first plot")
-
-
-ggsave(
-  filename = "myfirst_plot.pdf",
-  plot = p,
-  width = 8,
-  height = 6,
-  dpi = 300
-)
-
-
-

7.11 课堂作业

-

补充代码,要求在一张图中画出

-
    -
  • 企鹅嘴巴长度和嘴巴厚度的散点图
  • -
  • 不同企鹅种类用不同的颜色
  • -
  • 整体的线性拟合
  • -
  • 不同种类分别线性拟合
  • -
-
ggplot(penguins, aes(x = ___, y = ___)) +
-  geom_point() +
-  geom_smooth() +
-  geom_smooth() 
-
-
-

7.12 延伸阅读

-

在第 13 章到第 17 章会再讲ggplot2

- -
-
-
- -
-
-
- - -
-
- - - - - - - - - - - - - - - - diff --git a/_book/ggplot2-colors.html b/_book/ggplot2-colors.html deleted file mode 100644 index b1b45f0..0000000 --- a/_book/ggplot2-colors.html +++ /dev/null @@ -1,1322 +0,0 @@ - - - - - - - 第 56 章 数据可视化中的配色 | 数据科学中的 R 语言 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- -
- -
-
- - -
-
- -
-
-

第 56 章 数据可视化中的配色

-
library(tidyverse)
-library(palmerpenguins)
-

为了让图更好看,需要在画图中使用配色,但如果从颜色的色相、色度、明亮度三个属性(Hue-Chroma-Luminance )开始学,感觉这样要学的东西太多了 😞. 事实上,大神们已经为我们准备好了很多好看的模板,我们可以偷懒直接拿来用🎵.

-

我个人比较喜欢colorspace中的配色,今天我们就讲讲如何使用这个宏包!

-
library(colorspace)    
-

colorspace 宏包提供了三种类型的配色模板:

-

-
    -
  • Qualitative: 分类,用于呈现分类信息,比如不同种类用不同的颜色,颜色之间一般对比鲜明。
  • -
  • Sequential: 序列,用于呈现有序/连续的数值信息,比如为了展示某地区黑人比例,比例越高颜色越深,比例越低颜色越浅。
  • -
  • Diverging: 分歧,用于呈现有序/连续的数值信息,这些数值围绕着一个中心值,比中心值越大的方向用一种渐变色,比中心值越小用另一种渐变色。
  • -
-

三种类型对应着三个函数 qualitative_hcl(), sequential_hcl(), 和 diverging_hcl().

-
-

56.1 配色模板

-
hcl_palettes(plot = TRUE)
-

-
-
-

56.2 使用案例

-

ggplot2默认

-
penguins %>% 
-   ggplot(aes(bill_length_mm, fill = species)) +
-   geom_density(alpha = 0.6)
-

-

手动修改

-
penguins %>% 
-   ggplot(aes(bill_length_mm, fill = species)) +
-   geom_density(alpha = 0.6) +
-   scale_fill_manual(
-     breaks = c("Adelie", "Chinstrap", "Gentoo"), 
-     values = c("darkorange", "purple", "cyan4")
-  )
-

-

模板配色

-
penguins %>% 
-   ggplot(aes(bill_length_mm, fill = species)) +
-   geom_density(alpha = 0.6) +
-   scale_fill_discrete_qualitative(palette = "cold")
-

-
-
-

56.3 color-wheel

-

Adobe Color -scheme Color

- -
-
-
- -
-
-
- - -
-
- - - - - - - - - - - - - - - - diff --git a/_book/ggplot2-customize.html b/_book/ggplot2-customize.html deleted file mode 100644 index 23abafb..0000000 --- a/_book/ggplot2-customize.html +++ /dev/null @@ -1,1808 +0,0 @@ - - - - - - - 第 17 章 ggplot2之扩展内容 | 数据科学中的 R 语言 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- -
- -
-
- - -
-
- -
-
-

第 17 章 ggplot2之扩展内容

-

ggplot2的强大,还在于它的扩展包。本章在介绍ggplot2新的内容的同时还会引入一些新的宏包,需要提前安装

-
install.packages(c("sf", "cowplot", "patchwork", "gghighlight", "ggforce"))
-

如果安装不成功,请先update宏包,再执行上面安装命令

-

-
library(tidyverse)
-library(gghighlight)
-library(cowplot)
-library(patchwork)
-library(ggforce)
-library(ggridges)
-
-

17.1 你喜欢哪个图

-
p1 <- ggplot(mpg, aes(x = cty, y = hwy)) +
-  geom_point() +
-  geom_smooth() +
-  labs(title = "1: geom_point() + geom_smooth()") +
-  theme(plot.title = element_text(face = "bold"))
-
-p2 <- ggplot(mpg, aes(x = cty, y = hwy)) +
-  geom_hex() +
-  labs(title = "2: geom_hex()") +
-  guides(fill = FALSE) +
-  theme(plot.title = element_text(face = "bold"))
-
-p3 <- ggplot(mpg, aes(x = drv, fill = drv)) +
-  geom_bar() +
-  labs(title = "3: geom_bar()") +
-  guides(fill = FALSE) +
-  theme(plot.title = element_text(face = "bold"))
-
-p4 <- ggplot(mpg, aes(x = cty)) +
-  geom_histogram(binwidth = 2, color = "white") +
-  labs(title = "4: geom_histogram()") +
-  theme(plot.title = element_text(face = "bold"))
-
-p5 <- ggplot(mpg, aes(x = cty, y = drv, fill = drv)) +
-  geom_violin() +
-  guides(fill = FALSE) +
-  labs(title = "5: geom_violin()") +
-  theme(plot.title = element_text(face = "bold"))
-
-p6 <- ggplot(mpg, aes(x = cty, y = drv, fill = drv)) +
-  geom_boxplot() +
-  guides(fill = FALSE) +
-  labs(title = "6: geom_boxplot()") +
-  theme(plot.title = element_text(face = "bold"))
-
-p7 <- ggplot(mpg, aes(x = cty, fill = drv)) +
-  geom_density(alpha = 0.7) +
-  guides(fill = FALSE) +
-  labs(title = "7: geom_density()") +
-  theme(plot.title = element_text(face = "bold"))
-
-p8 <- ggplot(mpg, aes(x = cty, y = drv, fill = drv)) +
-  geom_density_ridges() +
-  guides(fill = FALSE) +
-  labs(title = "8: ggridges::geom_density_ridges()") +
-  theme(plot.title = element_text(face = "bold"))
-
-p9 <- ggplot(mpg, aes(x = cty, y = hwy)) +
-  geom_density_2d() +
-  labs(title = "9: geom_density_2d()") +
-  theme(plot.title = element_text(face = "bold"))
-
-p1 + p2 + p3 + p4 + p5 + p6 + p7 + p8 + p9 +
-  plot_layout(nrow = 3)
-

-
-
-

17.2 定制

-
-

17.2.1 标签

-
gapdata <- read_csv("./demo_data/gapminder.csv")
-gapdata
-
## # A tibble: 1,704 x 6
-##    country    continent  year lifeExp     pop gdpPercap
-##    <chr>      <chr>     <dbl>   <dbl>   <dbl>     <dbl>
-##  1 Afghanist~ Asia       1952    28.8  8.43e6      779.
-##  2 Afghanist~ Asia       1957    30.3  9.24e6      821.
-##  3 Afghanist~ Asia       1962    32.0  1.03e7      853.
-##  4 Afghanist~ Asia       1967    34.0  1.15e7      836.
-##  5 Afghanist~ Asia       1972    36.1  1.31e7      740.
-##  6 Afghanist~ Asia       1977    38.4  1.49e7      786.
-##  7 Afghanist~ Asia       1982    39.9  1.29e7      978.
-##  8 Afghanist~ Asia       1987    40.8  1.39e7      852.
-##  9 Afghanist~ Asia       1992    41.7  1.63e7      649.
-## 10 Afghanist~ Asia       1997    41.8  2.22e7      635.
-## # ... with 1,694 more rows
-
gapdata %>%
-  ggplot(aes(x = gdpPercap, y = lifeExp, color = continent)) +
-  geom_point() +
-  scale_x_log10() +
-  ggtitle("My Plot Title") +
-  xlab("The X Variable") +
-  ylab("The Y Variable")
-

-
gapdata %>%
-  ggplot(aes(x = gdpPercap, y = lifeExp, color = continent)) +
-  geom_point() +
-  scale_x_log10() +
-  labs(
-    title = "My Plot Title",
-    subtitle = "My Plot subtitle",
-    x = "The X Variable",
-    y = "The Y Variable"
-  )
-

-
-
-

17.2.2 定制颜色

-

我喜欢用这两个函数定制喜欢的绘图色彩,scale_colour_manual()scale_fill_manual(). 更多方法可以参考 Colours chapter in Cookbook for R

-
gapdata %>%
-  ggplot(aes(x = gdpPercap, y = lifeExp, color = continent)) +
-  geom_point() +
-  scale_x_log10() +
-  scale_color_manual(
-    values = c("#195744", "#008148", "#C6C013", "#EF8A17", "#EF2917")
-  )
-

-
-
-
-

17.3 组合图片

-

我们有时候想把多张图组合到一起

-
-

17.3.1 cowplot

-

可以使用 cowplot 宏包的plot_grid()函数完成多张图片的组合,使用方法很简单。

-
p1 <- gapdata %>%
-  ggplot(aes(x = gdpPercap, y = lifeExp)) +
-  geom_point(aes(color = lifeExp > mean(lifeExp))) +
-  scale_x_log10() +
-  theme(legend.position = "none") +
-  scale_color_manual(values = c("orange", "pink")) +
-  labs(
-    title = "My Plot Title",
-    x = "The X Variable",
-    y = "The Y Variable"
-  )
-
p2 <- gapdata %>%
-  ggplot(aes(x = gdpPercap, y = lifeExp, color = continent)) +
-  geom_point() +
-  scale_x_log10() +
-  scale_color_manual(
-    values = c("#195744", "#008148", "#C6C013", "#EF8A17", "#EF2917")
-  ) +
-  theme(legend.position = "none") +
-  labs(
-    title = "My Plot Title",
-    x = "The X Variable",
-    y = "The Y Variable"
-  )
-
cowplot::plot_grid(
-  p1,
-  p2,
-  labels = c("A", "B")
-)
-

-

也可以使用patchwork宏包,更简单的方法

-
library(patchwork)
-p1 + p2
-

-
p1 / p2
-

-
p1 + p2 +
-  plot_annotation(
-    tag_levels = "A",
-    title = "The surprising truth about mtcars",
-    subtitle = "These 3 plots will reveal yet-untold secrets about our beloved data-set",
-    caption = "Disclaimer: None of these plots are insightful"
-  )
-

-

再来一个

-
library(palmerpenguins)
-
-g1 <- penguins %>% 
-  ggplot(aes(bill_length_mm, body_mass_g, color = species)) +
-  geom_point() + 
-  theme_bw(base_size = 14) +
-  labs(tag = "(A)", x = "Bill length (mm)", y = "Body mass (g)", color = "Species")
-       
-g2 <- penguins %>% 
-  ggplot(aes(bill_length_mm, bill_depth_mm, color = species)) +
-  geom_point() + 
-  theme_bw(base_size = 14) +
-  labs(tag = "(B)", x = "Bill length (mm)", y = "Bill depth (mm)",  color = "Species")
-         
-g1 + g2 + patchwork::plot_layout(guides = "collect")
-

-

patchwork 使用方法很简单,根本不需要记

-

-
-
-

17.3.2 保存图片

-

使用ggsave()函数,将图片保存为所需要的格式,如“.pdf,” “.png”等, 还可以指定图片的高度和宽度,默认units是英寸,也可以使用“cm,” or “mm.”

-
pp <- gapdata %>%
-  ggplot(aes(x = gdpPercap, y = lifeExp, color = continent)) +
-  geom_point() +
-  scale_x_log10() +
-  scale_color_manual(
-    values = c("#195744", "#008148", "#C6C013", "#EF8A17", "#EF2917")
-  ) +
-  theme(legend.position = "none") +
-  labs(
-    title = "My Plot Title",
-    x = "The X Variable",
-    y = "The Y Variable"
-  )
-
-# ggsave("demo_plot.pdf", plot = pp, width = 8, height = 6)
-
-
-
-

17.4 中文字体

-
library(showtext)
-showtext_auto()
-
-gapdata %>%
-  ggplot(aes(x = gdpPercap, y = lifeExp, color = continent)) +
-  geom_point() +
-  scale_x_log10() +
-  scale_color_manual(
-    values = c("#195744", "#008148", "#C6C013", "#EF8A17", "#EF2917")
-  ) +
-  theme(legend.position = "none") +
-  labs(
-    title = "这是我的标题美美哒",
-    x = "这是我的x坐标",
-    y = "这是我的y坐标"
-  )
-

-
# ggsave("myfirst.pdf", width = 8, height = 6)
-
-
-

17.5 高亮某一组

-

画图很容易,然而画一张好图,不容易。图片质量好不好,其原则就是不增加看图者的心智负担,有些图片的色彩很丰富,然而需要看图人配合文字和图注等信息才能看懂作者想表达的意思,这样就失去了图片“一图胜千言”的价值。

-

分析数据过程中,我们可以使用高亮我们某组数据,突出我们想表达的信息,是非常好的一种可视化探索手段。

-
-

17.5.1 ggplot2方法

-

这种方法是将背景部分高亮部分分两步来画

-
drop_facet <- function(x) select(x, -continent)
-
-gapdata %>%
-  ggplot() +
-  geom_line(
-    data = drop_facet,
-    aes(x = year, y = lifeExp, group = country), color = "grey",
-  ) +
-  geom_line(aes(x = year, y = lifeExp, color = country, group = country)) +
-  facet_wrap(vars(continent)) +
-  theme(legend.position = "none")
-

-

再来一个

-
gapdata %>%
-  mutate(group = country) %>%
-  filter(continent == "Asia") %>%
-  ggplot() +
-  geom_line(
-    data = function(d) select(d, -country),
-    aes(x = year, y = lifeExp, group = group), color = "grey",
-  ) +
-  geom_line(aes(x = year, y = lifeExp, group = country), color = "red") +
-  facet_wrap(vars(country)) +
-  theme(legend.position = "none")
-

-
-
-

17.5.2 gghighlight方法

-

这里推荐gghighlight宏包

-
    -
  • dplyr has filter()
  • -
  • ggplot has Highlighting
  • -
-
gapdata %>% filter(country == "China")
-
## # A tibble: 12 x 6
-##    country continent  year lifeExp        pop gdpPercap
-##    <chr>   <chr>     <dbl>   <dbl>      <dbl>     <dbl>
-##  1 China   Asia       1952    44    556263527      400.
-##  2 China   Asia       1957    50.5  637408000      576.
-##  3 China   Asia       1962    44.5  665770000      488.
-##  4 China   Asia       1967    58.4  754550000      613.
-##  5 China   Asia       1972    63.1  862030000      677.
-##  6 China   Asia       1977    64.0  943455000      741.
-##  7 China   Asia       1982    65.5 1000281000      962.
-##  8 China   Asia       1987    67.3 1084035000     1379.
-##  9 China   Asia       1992    68.7 1164970000     1656.
-## 10 China   Asia       1997    70.4 1230075000     2289.
-## 11 China   Asia       2002    72.0 1280400000     3119.
-## 12 China   Asia       2007    73.0 1318683096     4959.
-
gapdata %>%
-  ggplot(
-    aes(x = year, y = lifeExp, color = continent, group = country)
-  ) +
-  geom_line() +
-  gghighlight(
-    country == "China", # which is passed to dplyr::filter().
-    label_key = country
-  )
-

-
gapdata %>% filter(continent == "Asia")
-
## # A tibble: 396 x 6
-##    country    continent  year lifeExp     pop gdpPercap
-##    <chr>      <chr>     <dbl>   <dbl>   <dbl>     <dbl>
-##  1 Afghanist~ Asia       1952    28.8  8.43e6      779.
-##  2 Afghanist~ Asia       1957    30.3  9.24e6      821.
-##  3 Afghanist~ Asia       1962    32.0  1.03e7      853.
-##  4 Afghanist~ Asia       1967    34.0  1.15e7      836.
-##  5 Afghanist~ Asia       1972    36.1  1.31e7      740.
-##  6 Afghanist~ Asia       1977    38.4  1.49e7      786.
-##  7 Afghanist~ Asia       1982    39.9  1.29e7      978.
-##  8 Afghanist~ Asia       1987    40.8  1.39e7      852.
-##  9 Afghanist~ Asia       1992    41.7  1.63e7      649.
-## 10 Afghanist~ Asia       1997    41.8  2.22e7      635.
-## # ... with 386 more rows
-
gapdata %>%
-  filter(continent == "Asia") %>%
-  ggplot(aes(year, lifeExp, color = country, group = country)) +
-  geom_line(size = 1.2, alpha = .9, color = "#E58C23") +
-  theme_minimal(base_size = 14) +
-  theme(
-    legend.position = "none",
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor = element_blank()
-  ) +
-  gghighlight(
-    country %in% c("China", "India", "Japan", "Korea, Rep."),
-    use_group_by = FALSE,
-    use_direct_label = FALSE,
-    unhighlighted_params = list(color = "grey90")
-  ) +
-  facet_wrap(vars(country))
-

-
-
-
-

17.6 函数图

-

有时候我们想画一个函数图,比如正态分布的函数,可能会想到先产生数据,然后画图,比如下面的代码

-
tibble(x = seq(from = -3, to = 3, by = .01)) %>%
-  mutate(y = dnorm(x, mean = 0, sd = 1)) %>%
-  ggplot(aes(x = x, y = y)) +
-  geom_line(color = "grey33")
-

-

事实上,stat_function()可以简化这个过程

-
ggplot(data = data.frame(x = c(-3, 3)), aes(x = x)) +
-  stat_function(fun = dnorm)
-

-

当然我们也可以绘制自定义函数

-
myfun <- function(x) {
-  (x - 1)**2
-}
-
-ggplot(data = data.frame(x = c(-1, 3)), aes(x = x)) +
-  stat_function(fun = myfun, geom = "line", colour = "red")
-

-

下面这是一个很不错的例子,细细体会下

-
d <- tibble(x = rnorm(2000, mean = 2, sd = 4))
-
-ggplot(data = d, aes(x = x)) +
-  geom_histogram(aes(y = stat(density))) +
-  geom_density() +
-  stat_function(fun = dnorm, args = list(mean = 2, sd = 4), colour = "red")
-

-
-
-

17.7 地图

-
-

小时候画地图很容易,长大了画地图却不容易了。

-
-

这是一个公园🏞地图和公园里松鼠🐿数量的数据集

-
nyc_squirrels <- read_csv("./demo_data/nyc_squirrels.csv")
-central_park <- sf::read_sf("./demo_data/central_park")
-

先来一个地图,

-
ggplot() +
-  geom_sf(data = central_park)
-

-

一个geom_sf就搞定了🥂,貌似没那么难呢? 好吧,换个姿势,在地图上标注松鼠出现的位置

-
nyc_squirrels %>%
-  drop_na(primary_fur_color) %>%
-  ggplot() +
-  geom_sf(data = central_park, color = "grey85") +
-  geom_point(
-    aes(x = long, y = lat, color = primary_fur_color),
-    size = .8
-  )
-

-

分开画呢

-
nyc_squirrels %>%
-  drop_na(primary_fur_color) %>%
-  ggplot() +
-  geom_sf(data = central_park, color = "grey85") +
-  geom_point(
-    aes(x = long, y = lat, color = primary_fur_color),
-    size = .8
-  ) +
-  facet_wrap(vars(primary_fur_color)) +
-  theme(legend.position = "none")
-

-
label_colors <-
-  c("all squirrels" = "grey75", "highlighted group" = "#0072B2")
-
-nyc_squirrels %>%
-  drop_na(primary_fur_color) %>%
-  ggplot() +
-  geom_sf(data = central_park, color = "grey85") +
-  geom_point(
-    data = function(x) select(x, -primary_fur_color),
-    aes(x = long, y = lat, color = "all squirrels"),
-    size = .8
-  ) +
-  geom_point(
-    aes(x = long, y = lat, color = "highlighted group"),
-    size = .8
-  ) +
-  cowplot::theme_map(16) +
-  theme(
-    legend.position = "bottom",
-    legend.justification = "center"
-  ) +
-  facet_wrap(vars(primary_fur_color)) +
-  scale_color_manual(name = NULL, values = label_colors) +
-  guides(color = guide_legend(override.aes = list(size = 2)))
-

-
# ggsave("Squirrels.pdf", width = 9, height = 6)
-

当然,也可以用gghighlight的方法

-
nyc_squirrels %>%
-  drop_na(primary_fur_color) %>%
-  ggplot() +
-  geom_sf(data = central_park, color = "grey85") +
-  geom_point(
-    aes(x = long, y = lat, color = primary_fur_color),
-    size = .8
-  ) +
-  gghighlight(
-    label_key = primary_fur_color,
-    use_direct_label = FALSE
-  ) +
-  facet_wrap(vars(primary_fur_color)) +
-  cowplot::theme_map(16) +
-  theme(legend.position = "none")
-

- - - - - - - - - - - -
library(ggplot2)
-library(showtext)
-showtext_auto()
-
-font_families()
-
## [1] "sans"         "serif"        "mono"        
-## [4] "wqy-microhei"
-
font_paths()
-
## [1] "C:\\Windows\\Fonts"
-
# font_files()
-
-## Add fonts that are available on Windows(默认路径"C:\\Windows\\Fonts")
-font_add("heiti", "simhei.ttf")
-font_add("constan", "constan.ttf", italic = "constani.ttf")
-font_add("kaishu", "simkai.ttf")
-# font_add("Noto", "NotoSansCJKsc-Regular.otf")
-font_add("Yahei", "Yahei.ttf")
-
-# 也可放在指定的目录(尽量英文)
-# https://github.com/yixuan/showtext/issues/18
-font_add("fzfsj", here::here("myfont", "fzfsj.ttf"))
-font_add("fzxbsj", here::here("myfont", "FZXBSJW.ttf"))
-font_add("maoti", here::here("myfont", "maoti.ttf"))
-font_add("fzshuliu", here::here("myfont", "fzshuliu.ttf"))
-font_families()
-
##  [1] "sans"         "serif"        "mono"        
-##  [4] "wqy-microhei" "heiti"        "constan"     
-##  [7] "kaishu"       "Yahei"        "fzfsj"       
-## [10] "fzxbsj"       "maoti"        "fzshuliu"
-
## maybe, 保存为pdf图,才能看到有效字体
-ggplot(data = mpg) +
-  geom_point(mapping = aes(x = displ, y = hwy)) +
-  ggtitle("这是我的小标宋简体") +
-  theme(
-    plot.title = element_text(family = "fzxbsj")
-  ) +
-  geom_text(aes(x = 5, y = 40),
-    label = "方正仿宋简体",
-    family = "fzfsj"
-  ) +
-  geom_text(aes(x = 5, y = 38),
-    label = "这是我的雅黑",
-    family = "Yahei"
-  ) +
-  geom_text(aes(x = 5, y = 35),
-    label = "方正楷书简体",
-    family = "kaishu"
-  ) +
-  geom_text(aes(x = 5, y = 30),
-    label = "草檀斋毛泽东字体",
-    family = "maoti"
-  ) +
-  geom_text(aes(x = 5, y = 28),
-    label = "方正苏新诗柳楷简体",
-    family = "fzshuliu"
-  )
-

-
# ggsave("showtext-example-9.pdf", width = 7, height = 4, dpi = 200)
-

根据往年大家提交的作业,有同学用rmarkdown生成pdf,图片标题使用了中文字体,但中文字体无法显示 -。解决方案是R code chunks加上fig.showtext=TRUE

-
```{r, fig.showtext=TRUE}
-

详细资料可参考这里

-
-

17.7.1 latex公式

-
library(ggplot2)
-library(latex2exp)
-
-ggplot(mpg, aes(x = displ, y = hwy)) +
-  geom_point() +
-  annotate("text",
-    x = 4, y = 40,
-    label = TeX("$\\alpha^2 + \\theta^2 = \\omega^2 $"),
-    size = 9
-  ) +
-  labs(
-    title = TeX("The ratio of 1 and 2 is $\\,\\, \\frac{1}{2}$"),
-    x = TeX("$\\alpha$"),
-    y = TeX("$\\alpha^2$")
-  )
-

- -
-
-
-
- -
-
-
- - -
-
- - - - - - - - - - - - - - - - diff --git a/_book/ggplot2-geom.html b/_book/ggplot2-geom.html deleted file mode 100644 index 176af55..0000000 --- a/_book/ggplot2-geom.html +++ /dev/null @@ -1,2085 +0,0 @@ - - - - - - - 第 13 章 ggplot2之几何对象 | 数据科学中的 R 语言 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- -
- -
-
- - -
-
- -
-
-

第 13 章 ggplot2之几何对象

-
-

采菊东篱下,悠然见南山。

-
-

根据大家投票,觉得ggplot2是最想掌握的技能,我想这就是R语言中最有质感的部分吧。所以,这里专门拿出一节课讲ggplot2,也算是补上之前第 7 章数据可视化没讲的内容。

-
library(tidyverse)
-
-

13.1 一个有趣的案例

-

先看一组数据

-
df <- read_csv("./demo_data/datasaurus.csv")
-df
-
## # A tibble: 1,846 x 3
-##    dataset     x     y
-##    <chr>   <dbl> <dbl>
-##  1 dino     55.4  97.2
-##  2 dino     51.5  96.0
-##  3 dino     46.2  94.5
-##  4 dino     42.8  91.4
-##  5 dino     40.8  88.3
-##  6 dino     38.7  84.9
-##  7 dino     35.6  79.9
-##  8 dino     33.1  77.6
-##  9 dino     29.0  74.5
-## 10 dino     26.2  71.4
-## # ... with 1,836 more rows
-

先用dataset分组后,然后计算每组下x的均值和方差,y的均值和方差,以及x,y两者的相关系数,我们发现每组数据下它们几乎都是相等的

-
df %>%
-  group_by(dataset) %>%
-  summarise(
-    across(everything(), list(mean = mean, sd = sd), .names = "{fn}_{col}")
-  ) %>%
-  mutate(
-    across(is.numeric, round, 3)
-  )
-
## # A tibble: 13 x 5
-##    dataset    mean_x  sd_x mean_y  sd_y
-##    <chr>       <dbl> <dbl>  <dbl> <dbl>
-##  1 away         54.3  16.8   47.8  26.9
-##  2 bullseye     54.3  16.8   47.8  26.9
-##  3 circle       54.3  16.8   47.8  26.9
-##  4 dino         54.3  16.8   47.8  26.9
-##  5 dots         54.3  16.8   47.8  26.9
-##  6 h_lines      54.3  16.8   47.8  26.9
-##  7 high_lines   54.3  16.8   47.8  26.9
-##  8 slant_down   54.3  16.8   47.8  26.9
-##  9 slant_up     54.3  16.8   47.8  26.9
-## 10 star         54.3  16.8   47.8  26.9
-## 11 v_lines      54.3  16.8   47.8  26.9
-## 12 wide_lines   54.3  16.8   47.8  26.9
-## 13 x_shape      54.3  16.8   47.8  26.9
-

如果上面代码不熟悉,可以用第 6 章的代码重新表达,也是一样的

-
df %>%
-  group_by(dataset) %>%
-  summarize(
-    mean_x = mean(x),
-    mean_y = mean(y),
-    std_dev_x = sd(x),
-    std_dev_y = sd(y),
-    corr_x_y = cor(x, y)
-  )
-
## # A tibble: 13 x 6
-##    dataset   mean_x mean_y std_dev_x std_dev_y corr_x_y
-##    <chr>      <dbl>  <dbl>     <dbl>     <dbl>    <dbl>
-##  1 away        54.3   47.8      16.8      26.9  -0.0641
-##  2 bullseye    54.3   47.8      16.8      26.9  -0.0686
-##  3 circle      54.3   47.8      16.8      26.9  -0.0683
-##  4 dino        54.3   47.8      16.8      26.9  -0.0645
-##  5 dots        54.3   47.8      16.8      26.9  -0.0603
-##  6 h_lines     54.3   47.8      16.8      26.9  -0.0617
-##  7 high_lin~   54.3   47.8      16.8      26.9  -0.0685
-##  8 slant_do~   54.3   47.8      16.8      26.9  -0.0690
-##  9 slant_up    54.3   47.8      16.8      26.9  -0.0686
-## 10 star        54.3   47.8      16.8      26.9  -0.0630
-## 11 v_lines     54.3   47.8      16.8      26.9  -0.0694
-## 12 wide_lin~   54.3   47.8      16.8      26.9  -0.0666
-## 13 x_shape     54.3   47.8      16.8      26.9  -0.0656
-

那么,我们是否能得出结论,每组的数据长的差不多呢?然而,我们画图发现

-
ggplot(df, aes(x = x, y = y, colour = dataset)) +
-  geom_point() +
-  # geom_smooth(method = lm) +
-  theme(legend.position = "none") +
-  facet_wrap(~dataset, ncol = 3)
-

-

事实上,每张图都相差很大。所以,这里想说明的是,眼见为实。换句话说,可视化是数据探索中非常重要的部分。本章的目的就是带领大家学习ggplot2基本的绘图技能。

-
-
-

13.2 学习目标

-
-

13.2.1 图形语法

-

-

-
-
-

13.2.2 图形部件

-
    -
  1. data: 数据框data.frame (注意,不支持向量vector和列表list类型)

  2. -
  3. aes: 数据框中的数据变量映射到图形属性。什么叫图形属性?就是图中点的位置、形状,大小,颜色等眼睛能看到的东西。什么叫映射?就是一种对应关系,比如数学中的函数b = f(a)就是ab之间的一种映射关系, a的值决定或者控制了b的值,在ggplot2语法里,a就是我们输入的数据变量,b就是图形属性, 这些图形属性包括:

    -
      -
    • x(x轴方向的位置)
    • -
    • y(y轴方向的位置)
    • -
    • color(点或者线等元素的颜色)
    • -
    • size(点或者线等元素的大小)
    • -
    • shape(点或者线等元素的形状)
    • -
    • alpha(点或者线等元素的透明度)
    • -
  4. -
  5. geoms: 几何对象,确定我们想画什么样的图,一个geom_***确定一种图形。更多几何对象推荐阅读这里

    -
      -
    • geom_bar()
    • -
    • geom_density()
    • -
    • geom_freqpoly()
    • -
    • geom_histogram()
    • -
    • geom_violin()
    • -
    • geom_boxplot()
    • -
    • geom_col()
    • -
    • geom_point()
    • -
    • geom_smooth()
    • -
    • geom_tile()
    • -
    • geom_density2d()
    • -
    • geom_bin2d()
    • -
    • geom_hex()
    • -
    • geom_count()
    • -
    • geom_text()
    • -
    • geom_sf()
    • -
  6. -
-

-
    -
  1. stats: 统计变换
  2. -
  3. scales: 标度
  4. -
  5. coord: 坐标系统
  6. -
  7. facet: 分面
  8. -
  9. layer: 增加图层
  10. -
  11. theme: 主题风格
  12. -
  13. save: 保存图片
  14. -
-

ggplot2图层语法框架

-

-
-
-
-

13.3 开始

-
-

-前面讲到R语言数据类型有字符串型、数值型、因子型、逻辑型、日期型等,ggplot2会将字符串型、因子型、逻辑型、日期型默认为离散变量,而数值型默认为连续变量。我们在而呈现数据的时候,可能会同时用到多种类型的数据,比如 -

-
    -
  • -

    -一个离散 -

    -
  • -
  • -

    -一个连续 -

    -
  • -
  • -

    -两个离散 -

    -
  • -
  • -

    -两个连续 -

    -
  • -
  • -

    -一个离散, 一个连续 -

    -
  • -
  • -

    -三个连续 -

    -
  • -
-
-
-

13.3.1 导入数据

-
gapdata <- read_csv("./demo_data/gapminder.csv")
-gapdata
-
## # A tibble: 1,704 x 6
-##    country    continent  year lifeExp     pop gdpPercap
-##    <chr>      <chr>     <dbl>   <dbl>   <dbl>     <dbl>
-##  1 Afghanist~ Asia       1952    28.8  8.43e6      779.
-##  2 Afghanist~ Asia       1957    30.3  9.24e6      821.
-##  3 Afghanist~ Asia       1962    32.0  1.03e7      853.
-##  4 Afghanist~ Asia       1967    34.0  1.15e7      836.
-##  5 Afghanist~ Asia       1972    36.1  1.31e7      740.
-##  6 Afghanist~ Asia       1977    38.4  1.49e7      786.
-##  7 Afghanist~ Asia       1982    39.9  1.29e7      978.
-##  8 Afghanist~ Asia       1987    40.8  1.39e7      852.
-##  9 Afghanist~ Asia       1992    41.7  1.63e7      649.
-## 10 Afghanist~ Asia       1997    41.8  2.22e7      635.
-## # ... with 1,694 more rows
-
-
-

13.3.2 检查数据

-
# 是否有缺失值
-
-gapdata %>%
-  summarise(
-    across(everything(), ~ sum(is.na(.)))
-  )
-
## # A tibble: 1 x 6
-##   country continent  year lifeExp   pop gdpPercap
-##     <int>     <int> <int>   <int> <int>     <int>
-## 1       0         0     0       0     0         0
-
    -
  • country 代表国家
  • -
  • countinet 表示所在的洲
  • -
  • year 时间
  • -
  • lifeExp 平均寿命
  • -
  • pop 人口数量
  • -
  • gdpPercap 人均GDP
  • -
-
-

-接下来,我们需要思考我们应该选择什么样的图,呈现这些不同类型的数据,探索数据背后的故事 -

-
-
-
-
-

13.4 基本绘图

-
-

13.4.1 柱状图

-

常用于一个离散变量

-
gapdata %>%
-  ggplot(aes(x = continent)) +
-  geom_bar()
-

-
gapdata %>%
-  ggplot(aes(x = reorder(continent, continent, length))) +
-  geom_bar()
-

-
gapdata %>%
-  ggplot(aes(x = reorder(continent, continent, length))) +
-  geom_bar() +
-  coord_flip()
-

-
# geom_bar vs stat_count
-gapdata %>%
-  ggplot(aes(x = continent)) +
-  stat_count()
-

-
gapdata %>% count(continent)
-
## # A tibble: 5 x 2
-##   continent     n
-##   <chr>     <int>
-## 1 Africa      624
-## 2 Americas    300
-## 3 Asia        396
-## 4 Europe      360
-## 5 Oceania      24
-

可见,geom_bar() 自动完成了这个统计,更多geom与stat对应关系见这里

-
gapdata %>%
-  distinct(continent, country) %>%
-  ggplot(aes(x = continent)) +
-  geom_bar()
-

-

我个人比较喜欢先统计,然后画图

-
gapdata %>%
-  distinct(continent, country) %>%
-  group_by(continent) %>%
-  summarise(num = n()) %>%
-  ggplot(aes(x = continent, y = num)) +
-  geom_col()
-

-
-
-

13.4.2 直方图

-

常用于一个连续变量

-
gapdata %>%
-  ggplot(aes(x = lifeExp)) +
-  geom_histogram() # 对应的stat_bin()
-

-
gapdata %>%
-  ggplot(aes(x = lifeExp)) +
-  geom_histogram(binwidth = 1)
-

-
#' histograms, 默认使用 `position = "stack"`
-gapdata %>%
-  ggplot(aes(x = lifeExp, fill = continent)) +
-  geom_histogram()
-

-
#' 使用`position = "identity"`
-gapdata %>%
-  ggplot(aes(x = lifeExp, fill = continent)) +
-  geom_histogram(position = "identity")
-

-
gapdata %>%
-  ggplot(aes(x = lifeExp, color = continent)) +
-  geom_freqpoly()
-

-
#' smooth histogram = densityplot
-gapdata %>%
-  ggplot(aes(x = lifeExp)) +
-  geom_density()
-

-

如果不喜欢下面那条线,可以这样

-
gapdata %>%
-  ggplot(aes(x = lifeExp)) +
-  geom_line(stat = "density")
-

-
# adjust 调节bandwidth,
-# adjust = 1/2 means use half of the default bandwidth.
-gapdata %>%
-  ggplot(aes(x = lifeExp)) +
-  geom_density(adjust = 1)
-

-
gapdata %>%
-  ggplot(aes(x = lifeExp)) +
-  geom_density(adjust = 0.2)
-

-
gapdata %>%
-  ggplot(aes(x = lifeExp, color = continent)) +
-  geom_density()
-

-
gapdata %>%
-  ggplot(aes(x = lifeExp, fill = continent)) +
-  geom_density(alpha = 0.2)
-

-
gapdata %>%
-  filter(continent != "Oceania") %>%
-  ggplot(aes(x = lifeExp, fill = continent)) +
-  geom_density(alpha = 0.2)
-

-
gapdata %>%
-  ggplot(aes(x = lifeExp)) +
-  geom_density() +
-  # facet_wrap(vars(continent))
-  facet_grid(. ~ continent)
-

-
gapdata %>%
-  filter(continent != "Oceania") %>%
-  ggplot(aes(x = lifeExp, fill = continent)) +
-  geom_histogram() +
-  facet_grid(continent ~ .)
-

-

直方图和密度图画在一起。注意y = stat(density)表示y是由x新生成的变量,这是一种固定写法,类似的还有stat(count), stat(level)

-
gapdata %>%
-  filter(continent != "Oceania") %>%
-  ggplot(aes(x = lifeExp, y = stat(density))) +
-  geom_histogram(aes(fill = continent)) +
-  geom_density() +
-  facet_grid(continent ~ .)
-

-
-
-

13.4.3 箱线图

-

一个离散变量 + 一个连续变量

-
#' 思考下结果为什么是这样?
-gapdata %>%
-  ggplot(aes(x = year, y = lifeExp)) +
-  geom_boxplot()
-

-
# 数据框中的year变量是数值型,需要先转换成因子型,弄成离散型变量
-gapdata %>%
-  ggplot(aes(x = as.factor(year), y = lifeExp)) +
-  geom_boxplot()
-

-
# 明确指定分组变量
-gapdata %>%
-  ggplot(aes(x = year, y = lifeExp)) +
-  geom_boxplot(aes(group = year))
-

-
gapdata %>%
-  ggplot(aes(x = year, y = lifeExp)) +
-  geom_violin(aes(group = year)) +
-  geom_jitter(alpha = 1 / 4) +
-  geom_smooth(se = FALSE)
-

-
-
-

13.4.4 抖散图

-

点重叠的处理方案

-
gapdata %>% ggplot(aes(x = continent, y = lifeExp)) +
-  geom_point()
-

-
gapdata %>% ggplot(aes(x = continent, y = lifeExp)) +
-  geom_jitter()
-

-
gapdata %>% ggplot(aes(x = continent, y = lifeExp)) +
-  geom_boxplot()
-

-
gapdata %>% ggplot(aes(x = continent, y = lifeExp)) +
-  geom_boxplot() +
-  geom_jitter()
-

-
gapdata %>%
-  ggplot(aes(x = continent, y = lifeExp)) +
-  geom_jitter() +
-  stat_summary(fun.y = median, colour = "red", geom = "point", size = 5)
-

-
gapdata %>%
-  ggplot(aes(reorder(x = continent, lifeExp), y = lifeExp)) +
-  geom_jitter() +
-  stat_summary(fun.y = median, colour = "red", geom = "point", size = 5)
-

-

注意到我们已经提到过 stat_count / stat_bin / stat_summary

-
gapdata %>%
-  ggplot(aes(x = continent, y = lifeExp)) +
-  geom_violin(
-    trim = FALSE,
-    alpha = 0.5
-  ) +
-  stat_summary(
-    fun.y = mean,
-    fun.ymax = function(x) {
-      mean(x) + sd(x)
-    },
-    fun.ymin = function(x) {
-      mean(x) - sd(x)
-    },
-    geom = "pointrange"
-  )
-

-
-
-

13.4.5 山峦图

-

常用于一个离散变量 + 一个连续变量

-
gapdata %>%
-  ggplot(aes(
-    x = lifeExp,
-    y = continent,
-    fill = continent
-  )) +
-  ggridges::geom_density_ridges()
-

-
# https://learnui.design/tools/data-color-picker.html#palette
-gapdata %>%
-  ggplot(aes(
-    x = lifeExp,
-    y = continent,
-    fill = continent
-  )) +
-  ggridges::geom_density_ridges() +
-  scale_fill_manual(
-    values = c("#003f5c", "#58508d", "#bc5090", "#ff6361", "#ffa600")
-  )
-

-
gapdata %>%
-  ggplot(aes(
-    x = lifeExp,
-    y = continent,
-    fill = continent
-  )) +
-  ggridges::geom_density_ridges() +
-  scale_fill_manual(
-    values = colorspace::sequential_hcl(5, palette = "Peach")
-  )
-

-
-
-

13.4.6 散点图

-

常用于两个连续变量

-
gapdata %>%
-  ggplot(aes(x = gdpPercap, y = lifeExp)) +
-  geom_point()
-

-
gapdata %>%
-  ggplot(aes(x = log(gdpPercap), y = lifeExp)) +
-  geom_point()
-

-
gapdata %>%
-  ggplot(aes(x = gdpPercap, y = lifeExp)) +
-  geom_point() +
-  scale_x_log10() # A better way to log transform
-

-
gapdata %>%
-  ggplot(aes(x = gdpPercap, y = lifeExp)) +
-  geom_point(aes(color = continent))
-

-
gapdata %>%
-  ggplot(aes(x = gdpPercap, y = lifeExp)) +
-  geom_point(alpha = (1 / 3), size = 2)
-

-
gapdata %>%
-  ggplot(aes(x = gdpPercap, y = lifeExp)) +
-  geom_point() +
-  geom_smooth()
-

-
gapdata %>%
-  ggplot(aes(x = gdpPercap, y = lifeExp)) +
-  geom_point() +
-  geom_smooth(lwd = 3, se = FALSE)
-

-
gapdata %>%
-  ggplot(aes(x = gdpPercap, y = lifeExp)) +
-  geom_point() +
-  geom_smooth(lwd = 3, se = FALSE, method = "lm")
-

-
gapdata %>%
-  ggplot(aes(x = gdpPercap, y = lifeExp, color = continent)) +
-  geom_point() +
-  geom_smooth(lwd = 3, se = FALSE, method = "lm")
-

-
gapdata %>%
-  ggplot(aes(x = gdpPercap, y = lifeExp, color = continent)) +
-  geom_point(show.legend = FALSE) +
-  facet_wrap(~continent)
-

-
jCountries <- c("Canada", "Rwanda", "Cambodia", "Mexico")
-
-gapdata %>%
-  filter(country %in% jCountries) %>%
-  ggplot(aes(x = year, y = lifeExp, color = country)) +
-  geom_line() +
-  geom_point()
-

-
gapdata %>%
-  filter(country %in% jCountries) %>%
-  ggplot(aes(
-    x = year, y = lifeExp,
-    color = reorder(country, -1 * lifeExp, max)
-  )) +
-  geom_line() +
-  geom_point()
-

-

这是一种技巧,但我更推荐以下方法

-
d1 <- gapdata %>%
-  filter(country %in% jCountries) %>%
-  group_by(country) %>%
-  mutate(end_label = if_else(year == max(year), country, NA_character_))
-
-d1
-
## # A tibble: 48 x 7
-## # Groups:   country [4]
-##    country continent  year lifeExp    pop gdpPercap
-##    <chr>   <chr>     <dbl>   <dbl>  <dbl>     <dbl>
-##  1 Cambod~ Asia       1952    39.4 4.69e6      368.
-##  2 Cambod~ Asia       1957    41.4 5.32e6      434.
-##  3 Cambod~ Asia       1962    43.4 6.08e6      497.
-##  4 Cambod~ Asia       1967    45.4 6.96e6      523.
-##  5 Cambod~ Asia       1972    40.3 7.45e6      422.
-##  6 Cambod~ Asia       1977    31.2 6.98e6      525.
-##  7 Cambod~ Asia       1982    51.0 7.27e6      624.
-##  8 Cambod~ Asia       1987    53.9 8.37e6      684.
-##  9 Cambod~ Asia       1992    55.8 1.02e7      682.
-## 10 Cambod~ Asia       1997    56.5 1.18e7      734.
-## # ... with 38 more rows, and 1 more variable:
-## #   end_label <chr>
-
d1 %>% ggplot(aes(
-  x = year, y = lifeExp, color = country
-)) +
-  geom_line() +
-  geom_point() +
-  geom_label(aes(label = end_label)) +
-  theme(legend.position = "none")
-

-

如果觉得麻烦,就用gghighlight宏包吧

-
gapdata %>%
-  filter(country %in% jCountries) %>%
-  ggplot(aes(
-    x = year, y = lifeExp, color = country
-  )) +
-  geom_line() +
-  geom_point() +
-  gghighlight::gghighlight()
-

-
-
-

13.4.7 点线图

-
gapdata %>%
-  filter(continent == "Asia" & year == 2007) %>%
-  ggplot(aes(x = lifeExp, y = country)) +
-  geom_point()
-

-
gapdata %>%
-  filter(continent == "Asia" & year == 2007) %>%
-  ggplot(aes(
-    x = lifeExp,
-    y = reorder(country, lifeExp)
-  )) +
-  geom_point(color = "blue", size = 2) +
-  geom_segment(aes(
-    x = 40,
-    xend = lifeExp,
-    y = reorder(country, lifeExp),
-    yend = reorder(country, lifeExp)
-  ),
-  color = "lightgrey"
-  ) +
-  labs(
-    x = "Life Expectancy (years)",
-    y = "",
-    title = "Life Expectancy by Country",
-    subtitle = "GapMinder data for Asia - 2007"
-  ) +
-  theme_minimal() +
-  theme(
-    panel.grid.major = element_blank(),
-    panel.grid.minor = element_blank()
-  )
-

-
-
-

13.4.8 文本标注

-
gapdata %>%
-  ggplot(aes(x = gdpPercap, y = lifeExp)) +
-  geom_point() +
-  ggforce::geom_mark_ellipse(aes(
-    filter = gdpPercap > 70000,
-    label = "有钱的国家",
-    description = "他们是什么国家?"
-  ))
-

-
ten_countries <- gapdata %>%
-  distinct(country) %>%
-  pull() %>%
-  sample(10)
-
library(ggrepel)
-gapdata %>%
-  filter(year == 2007) %>%
-  mutate(
-    label = ifelse(country %in% ten_countries, as.character(country), "")
-  ) %>%
-  ggplot(aes(log(gdpPercap), lifeExp)) +
-  geom_point(
-    size = 3.5,
-    alpha = .9,
-    shape = 21,
-    col = "white",
-    fill = "#0162B2"
-  ) +
-  geom_text_repel(
-    aes(label = label),
-    size = 4.5,
-    point.padding = .2,
-    box.padding = .3,
-    force = 1,
-    min.segment.length = 0
-  ) +
-  theme_minimal(14) +
-  theme(
-    legend.position = "none",
-    panel.grid.minor = element_blank()
-  ) +
-  labs(
-    x = "log(GDP per capita)",
-    y = "life expectancy"
-  )
-

-
-
-

13.4.9 errorbar图

-
avg_gapdata <- gapdata %>%
-  group_by(continent) %>%
-  summarise(
-    mean = mean(lifeExp),
-    sd = sd(lifeExp)
-  )
-avg_gapdata
-
## # A tibble: 5 x 3
-##   continent  mean    sd
-##   <chr>     <dbl> <dbl>
-## 1 Africa     48.9  9.15
-## 2 Americas   64.7  9.35
-## 3 Asia       60.1 11.9 
-## 4 Europe     71.9  5.43
-## 5 Oceania    74.3  3.80
-
avg_gapdata %>%
-  ggplot(aes(continent, mean, fill = continent)) +
-  # geom_col(alpha = 0.5) +
-  geom_point() +
-  geom_errorbar(aes(ymin = mean - sd, ymax = mean + sd), width = 0.25)
-

-
-
-

13.4.10 椭圆图

-
gapdata %>%
-  ggplot(aes(x = log(gdpPercap), y = lifeExp)) +
-  geom_point() +
-  stat_ellipse(type = "norm", level = 0.95)
-

-
-
-

13.4.11 2D 密度图

-

与一维的情形geom_density()类似, -geom_density_2d(), geom_bin2d(), geom_hex()常用于刻画两个变量构成的二维区间的密度

-
gapdata %>%
-  ggplot(aes(x = gdpPercap, y = lifeExp)) +
-  geom_bin2d()
-

-
gapdata %>%
-  ggplot(aes(x = gdpPercap, y = lifeExp)) +
-  geom_hex()
-

-
-
-

13.4.12 马赛克图

-

geom_tile()geom_contour()geom_raster()常用于3个变量

-
gapdata %>%
-  group_by(continent, year) %>%
-  summarise(mean_lifeExp = mean(lifeExp)) %>%
-  ggplot(aes(x = year, y = continent, fill = mean_lifeExp)) +
-  geom_tile() +
-  scale_fill_viridis_c()
-

-

事实上可以有更好的呈现方式

-
gapdata %>%
-  group_by(continent, year) %>%
-  summarise(mean_lifeExp = mean(lifeExp)) %>%
-  ggplot(aes(x = year, y = continent, size = mean_lifeExp)) +
-  geom_point()
-

-
gapdata %>%
-  group_by(continent, year) %>%
-  summarise(mean_lifeExp = mean(lifeExp)) %>%
-  ggplot(aes(x = year, y = continent, size = mean_lifeExp)) +
-  geom_point(shape = 21, color = "red", fill = "white") +
-  scale_size_continuous(range = c(7, 15)) +
-  geom_text(aes(label = round(mean_lifeExp, 2)), size = 3, color = "black") +
-  theme(legend.position = "none")
-

-
-
-
-

13.5 主题风格

-
gapdata %>%
-  ggplot(aes(x = gdpPercap, y = lifeExp, color = continent)) +
-  geom_point() +
-  geom_smooth(lwd = 3, se = FALSE, method = "lm")
-

-
gapdata %>%
-  ggplot(aes(x = gdpPercap, y = lifeExp, color = continent)) +
-  geom_point() +
-  geom_smooth(lwd = 3, se = FALSE, method = "lm") +
-  ggtitle("Life expectancy over time by continent")
-

-
gapdata %>%
-  ggplot(aes(x = gdpPercap, y = lifeExp, color = continent)) +
-  geom_point() +
-  geom_smooth(lwd = 3, se = FALSE, method = "lm") +
-  theme_grey() # the default
-

-
gapdata %>%
-  ggplot(aes(x = gdpPercap, y = lifeExp, color = continent)) +
-  geom_point() +
-  geom_smooth(lwd = 3, se = FALSE, method = "lm") +
-  theme_bw()
-

-
gapdata %>%
-  ggplot(aes(x = gdpPercap, y = lifeExp, color = continent)) +
-  geom_point() +
-  geom_smooth(lwd = 3, se = FALSE, method = "lm") +
-  ggthemes::theme_calc() +
-  ggtitle("ggthemes::theme_calc()")
-

-
gapdata %>%
-  ggplot(aes(x = gdpPercap, y = lifeExp, color = continent)) +
-  geom_point() +
-  geom_smooth(lwd = 3, se = FALSE, method = "lm") +
-  ggthemes::theme_economist() +
-  ggtitle("ggthemes::theme_economist()")
-

-
gapdata %>%
-  ggplot(aes(x = gdpPercap, y = lifeExp, color = continent)) +
-  geom_point() +
-  geom_smooth(lwd = 3, se = FALSE, method = "lm") +
-  ggthemes::theme_economist_white() +
-  ggtitle("ggthemes::theme_economist_white()")
-

-
gapdata %>%
-  ggplot(aes(x = gdpPercap, y = lifeExp, color = continent)) +
-  geom_point() +
-  geom_smooth(lwd = 3, se = FALSE, method = "lm") +
-  ggthemes::theme_few() +
-  ggtitle("ggthemes::theme_few()")
-

-
gapdata %>%
-  ggplot(aes(x = gdpPercap, y = lifeExp, color = continent)) +
-  geom_point() +
-  geom_smooth(lwd = 3, se = FALSE, method = "lm") +
-  ggthemes::theme_gdocs() +
-  ggtitle("ggthemes::theme_gdocs()")
-

-
gapdata %>%
-  ggplot(aes(x = gdpPercap, y = lifeExp, color = continent)) +
-  geom_point() +
-  geom_smooth(lwd = 3, se = FALSE, method = "lm") +
-  ggthemes::theme_tufte() +
-  ggtitle("ggthemes::theme_tufte()")
-

-
gapdata %>%
-  ggplot(aes(x = gdpPercap, y = lifeExp, color = continent)) +
-  geom_point() +
-  geom_smooth(lwd = 3, se = FALSE, method = "lm") +
-  ggthemes::theme_wsj() +
-  ggtitle("ggthemes::theme_wsj()")
-

-
-
-

13.6 参考资料

- - -
-
-
- -
-
-
- - -
-
- - - - - - - - - - - - - - - - diff --git a/_book/ggplot2-gganimate.html b/_book/ggplot2-gganimate.html deleted file mode 100644 index 392e406..0000000 --- a/_book/ggplot2-gganimate.html +++ /dev/null @@ -1,1891 +0,0 @@ - - - - - - - 第 57 章 ggplot2之让你的数据骚动起来 | 数据科学中的 R 语言 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- -
- -
-
- - -
-
- -
-
-

第 57 章 ggplot2之让你的数据骚动起来

-

这节课,我们讲如何让我们的图动起来。(因为渲染需要花费很长时间,所以文档中的动图代码都没有执行。)

-
-

57.1 为什么要使用动图

-
    -
  • 改进了图形在时间上和空间上的重新定位
  • -
  • 传递更多信息
  • -
  • 引人注意
  • -
-
-
-

57.2 gganimate宏包

-

动图可以将其理解为多张静态图堆在一起,当然不是随意的堆放,而是按照一定的规则,比如按照时间的顺序,或者类别的顺序。一般而言,动图制作包括两个步骤: 静态图制作及图形组装。静态图制作,前面几章我们讲过主要用ggplot2宏包实现;对于图形组装,需要用到今天我们要讲Thomas Lin Pedersen的gganimate宏包,来自同一工厂的产品,用起来自然是无缝衔接啦。

-
install.packages("gganimate")
-
-

57.2.1 先来一张静态图

-
library(tidyverse)
-library(covdata) # remotes::install_github("kjhealy/covdata")
-library(gganimate)
-
covdata::covnat %>%
-  dplyr::filter(iso3 == "USA") %>%
-  dplyr::filter(cu_cases > 0) %>%
-  ggplot(aes(x = date, y = cases)) +
-  geom_path() +
-  labs(
-    title = "美国新冠肺炎累积确诊病例",
-    subtitle = "数据来源https://kjhealy.github.io/covdata/"
-  )
-

让它动起来,我们只需要增加一行代码!

-
covdata::covnat %>%
-  dplyr::filter(iso3 == "USA") %>%
-  dplyr::filter(cu_cases > 0) %>%
-  ggplot(aes(x = date, y = cases)) +
-  geom_path() +
-  labs(
-    title = "美国新冠肺炎累积确诊病例 {frame_along}",
-    subtitle = "数据来源https://kjhealy.github.io/covdata/"
-  ) +
-  transition_reveal(along = date)
-
-
-

57.2.2 相对复杂点的例子

-
library(datasauRus)
-ggplot(datasaurus_dozen) +
-  aes(x, y, color = dataset) +
-  geom_point()
-

用分面展示

-
ggplot(datasaurus_dozen) +
-  aes(x, y, color = dataset) +
-  geom_point() +
-  facet_wrap(~dataset)
-

可以用动图展示

-
ggplot(datasaurus_dozen) +
-  aes(x, y, color = dataset) +
-  geom_point() +
-  transition_states(dataset, 3, 1) + # <<
-  labs(title = "Dataset: {closest_state}")
-

是不是很炫酷,下面我们就一个个讲解其中的函数。

-
-
-
-

57.3 The grammar of animation

-

使用gganimate做动画,只需要掌握以下五类函数:

-
    -
  • transition_*(): 定义动画是根据哪个变量进行”动”,以及如何”动”
  • -
  • view_*(): 定义坐标轴随数据变化.
  • -
  • shadow_*(): 影子(旧数据的历史记忆)?定义点相继出现的方式.
  • -
  • enter_*()/exit_*(): 定义新数据出现和旧数据退去的方式.
  • -
  • ease_aes(): 美观定义,控制变化的节奏(如何让整个动画看起来更舒适).
  • -
-

下面通过案例依次讲解这些函数功能。

-
-
-

57.4 希望动画随哪个变量动起来

-

变量如何选择,这需要从变量类型和变量代表的信息来确定。

-
-

57.4.1 transition_states

-
    -
  • transition_states(states = ), 这里的参数states往往带有分组信息,可以等价于静态图中的分面。
  • -
-
diamonds %>%
-  ggplot(aes(carat, price)) +
-  geom_point()
-
diamonds %>%
-  ggplot(aes(carat, price)) +
-  geom_point() +
-  facet_wrap(vars(color))
-
diamonds %>%
-  ggplot(aes(carat, price)) +
-  geom_point() +
-  transition_states(states = color, transition_length = 3, state_length = 1)
-
-
-

57.4.2 transition_time

-
    -
  • transition_time(time = ), 这里的time一般认为是连续的值,相比于transition_states,没有了transtion_length这个选项,是因为transtion_length默认为time. 事实上,transition_timetransition_states的一种特例,但其实也有分组的要求
  • -
-
p <- gapminder::gapminder %>%
-  ggplot(aes(x = gdpPercap, y = lifeExp, size = pop, colour = country)) +
-  geom_point(alpha = 0.7, show.legend = FALSE) +
-  scale_size(range = c(2, 12)) +
-  scale_x_log10() +
-  labs(
-    x = "GDP per capita",
-    y = "life expectancy"
-  )
-p
-
anim <- p +
-  transition_time(time = year) +
-  labs(title = "year: {frame_time}")
-anim
-
-
-

57.4.3 transition_reveal

-
    -
  • transition_reveal(along = ), along 这个词可以看出,它是按照某个变量依次显示的意思,比如顺着x轴显示
  • -
-
ggplot(data = economics) +
-  aes(x = date, y = unemploy) +
-  geom_line()
-
ggplot(economics) +
-  aes(x = date, y = unemploy) +
-  geom_line() +
-  transition_reveal(along = date) +
-  labs(title = "now is {frame_along}")
-
-
-

57.4.4 transition_filter

-
    -
  • transition_filter( 至少2个筛选条件,transition_length = , filter_length =), 动图将会在这些筛选条件对应的子图之间转换
  • -
-
diamonds %>%
-  ggplot(aes(carat, price)) +
-  geom_point() +
-  transition_filter(
-    transition_length = 3,
-    filter_length = 1,
-
-    cut == "Ideal",
-    Deep = depth >= 60
-  )
-
-
-

57.4.5 transition_layers

-
    -
  • transition_layers(): 依次显示每个图层
  • -
-
mtcars %>%
-  ggplot(aes(mpg, disp)) +
-  geom_point() +
-  geom_smooth(colour = "grey", se = FALSE) +
-  geom_smooth(aes(colour = factor(gear))) +
-  transition_layers(
-    layer_length = 1, transition_length = 2,
-    from_blank = FALSE, keep_layers = c(Inf, 0, 0)
-  ) +
-  enter_fade() +
-  exit_fade()
-
-
-

57.4.6 其他

-
    -
  • transition_manual()
  • -
  • transition_components()
  • -
  • transition_events()
  • -
-
-
-
-

57.5 希望坐标轴随数据动起来

-

动画过程中,绘图窗口怎么变化呢?

-
-

57.5.1 view_follow

-
ggplot(iris, aes(Sepal.Length, Sepal.Width)) +
-  geom_point() +
-  labs(title = "{closest_state}") +
-  transition_states(Species, transition_length = 4, state_length = 1) +
-  view_follow()
-
-
-

57.5.2 其它

-
    -
  • view_step()
  • -
  • view_step_manual()
  • -
  • view_zoom()
  • -
  • view_zoom_manual()
  • -
-
-
-
-

57.6 希望动画有个记忆

-
    -
  • shadow_wake(wake_length =, ) 旧数据消退时,制造点小小的尾迹的效果(wake除了叫醒,还有尾迹的意思,合起来就是记忆_尾迹)
  • -
  • shadow_trail(distance = 0.05) 旧数据消退时,制造面包屑一样的残留痕迹(记忆_零星残留)
  • -
  • shadow_mark(past = TRUE, future = FALSE) 将旧数据和新数据当作背景(记忆_标记)
  • -
-
-

57.6.1 shadow_wake()

-
p +
-  transition_time(time = year) +
-  labs(title = "year: {frame_time}") +
-  shadow_wake(wake_length = 0.1, alpha = FALSE)
-
ggplot(iris, aes(Petal.Length, Sepal.Length)) +
-  geom_point(size = 2) +
-  labs(title = "{closest_state}") +
-  transition_states(Species, transition_length = 4, state_length = 1) +
-  shadow_wake(wake_length = 0.1)
-
-
-

57.6.2 shadow_trail()

-
p +
-  transition_time(time = year) +
-  labs(title = "year: {frame_time}") +
-  shadow_trail(distance = 0.1)
-
ggplot(iris, aes(Petal.Length, Sepal.Length)) +
-  geom_point(size = 2) +
-  labs(title = "{closest_state}") +
-  transition_states(Species, transition_length = 4, state_length = 1) +
-  shadow_trail(distance = 0.1)
-
-
-

57.6.3 shadow_mark()

-
p +
-  transition_time(time = year) +
-  labs(title = "year: {frame_time}") +
-  shadow_mark(alpha = 0.3, size = 0.5)
-
ggplot(airquality, aes(Day, Temp)) +
-  geom_line(color = "red", size = 1) +
-  transition_time(Month) +
-  shadow_mark(colour = "black", size = 0.75)
-
-
-
-

57.7 定义新数据出现和旧数据退去的方式

-

出现和退去的函数是成对的

-
-

57.7.1 enter/exit_fade()

-

透明度上的变化,我这里用柱状图展示,效果要明显一点。

-
tibble(
-  x = month.name,
-  y = sample.int(12)
-) %>%
-  ggplot(aes(x = x, y = y)) +
-  geom_col() +
-  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) +
-  transition_states(states = month.name)
-
tibble(
-  x = month.name,
-  y = sample.int(12)
-) %>%
-  ggplot(aes(x = x, y = y)) +
-  geom_col() +
-  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) +
-  transition_states(states = month.name) +
-  shadow_mark(past = TRUE) +
-  enter_fade()
-
p +
-  transition_time(time = year) +
-  labs(title = "year: {frame_time}") +
-  enter_fade()
-
-
-

57.7.2 enter_grow()/exit_shrink()

-

大小上的变化

-
tibble(
-  x = month.name,
-  y = sample.int(12)
-) %>%
-  ggplot(aes(x = x, y = y)) +
-  geom_col() +
-  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) +
-  transition_states(states = month.name) +
-  shadow_mark(past = TRUE) +
-  enter_grow()
-
p +
-  transition_time(time = year) +
-  labs(title = "year: {frame_time}") +
-  enter_grow() +
-  enter_fade()
-
-
-
-

57.8 控制变化的节奏

-

控制数据点变化的快慢

-
p + ease_aes({aesthetic} = {ease})
-p + ease_aes(x = "cubic")
-
knitr::include_graphics("images/ease.png")
-

-

Source: https://easings.net/

-

看下面的案例:

-
diamonds %>%
-  ggplot(aes(carat, price)) +
-  geom_point() +
-  transition_states(color, transition_length = 3, state_length = 1) +
-  ease_aes("cubic-in") # Change easing of all aesthetics
-
diamonds %>%
-  ggplot(aes(carat, price)) +
-  geom_point() +
-  transition_states(color, transition_length = 3, state_length = 1) +
-  ease_aes(x = "elastic-in") # Only change `x` (others remain “linear”)
-
-
-

57.9 标签

-

我们可能需要在标题中加入每张动画的信息,常用罗列如下

-
transition_states(states = ) +
-  labs(title = "previous is {previous_state}, 
-                current is {closest_state}, 
-                next is {next_state}")
-
-transition_layers() +
-  labs(title = "previous is {previous_layers}, 
-                current is {closest_layers}, 
-                next is {next_layers}")
-
-transition_time(time = ) +
-  labs(title = "now is {frame_time}")
-
-
-transition_reveal(along = ) +
-  labs(title = "now is {frame_along}")
-
-
-

57.10 保存

-
-

57.10.1 Renderer options

-
## # A tibble: 6 x 2
-##   Function      Description                            
-##   <chr>         <chr>                                  
-## 1 gifski_rende~ Default, super fast gif renderer.      
-## 2 magick_rende~ Somewhat slower gif renderer.          
-## 3 ffmpeg_rende~ Uses ffmpeg to create a video from the~
-## 4 av_renderer   Uses the av package to create a video ~
-## 5 file_renderer Dumps a list of image frames from the ~
-## 6 sprite_rende~ Creates a spritesheet from frames of t~
-
-
-

57.10.2 常用方法

-

一般用anim_save()保存为 gif 格式,方法类似ggsave()

-
animation_to_save <- diamonds %>%
-  ggplot(aes(carat, price)) +
-  geom_point() +
-  transition_states(color, transition_length = 3, state_length = 1) +
-  ease_aes("cubic-in")
-
-anim_save("first_saved_animation.gif", animation = animation_to_save)
-
-
-
-

57.11 案例演示一

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

这是网上有段时间比较火的racing_bar图

-
ranked_by_date <- covdata::covnat %>%
-  group_by(date) %>%
-  arrange(date, desc(cu_cases)) %>%
-  mutate(rank = 1:n()) %>%
-  filter(rank <= 10) %>%
-  ungroup()
-
ranked_by_date %>%
-  filter(date >= "2020-05-01") %>%
-  ggplot(
-    aes(x = rank, y = cname, group = cname, fill = cname)
-  ) +
-  geom_tile(
-    aes(
-      y = cu_cases / 2,
-      height = cu_cases,
-      width = 0.9
-    ),
-    alpha = 0.8,
-    show.legend = F
-  ) +
-  geom_text(aes(
-    y = cu_cases,
-    label = cname
-  ),
-  show.legend = FALSE
-  ) +
-  scale_x_reverse(
-    breaks = c(1:10),
-    label = c(1:10)
-  ) +
-  theme_minimal() +
-  coord_flip(clip = "off", expand = FALSE) +
-  labs(
-    title = "日期: {closest_state}",
-    x = "",
-    caption = "Source: github/kjhealy/covdata"
-  ) +
-  transition_states(date,
-    transition_length = 4,
-    state_length = 1,
-    wrap = TRUE
-  ) +
-  ease_aes("cubic-in-out")
-
-
-

57.12 案例演示二

-
bats <- readr::read_csv("./demo_data/bats-subset.csv") %>%
-  dplyr::mutate(id = factor(id))
-
bats %>%
-  ggplot(aes(
-    x = longitude,
-    y = latitude,
-    group = id,
-    color = id
-  )) +
-  geom_point()
-
-

57.12.1 常规的方法

-
bats %>%
-  ggplot(aes(
-    x = longitude,
-    y = latitude,
-    group = id,
-    color = id
-  )) +
-  geom_point() +
-  transition_time(time) +
-  shadow_mark(past = TRUE)
-
    -
  • geom_path()是按照数据点出现的先后顺序
  • -
  • geom_line()是按照数据点在x轴的顺序
  • -
-
bats %>%
-  ggplot(aes(
-    x = longitude,
-    y = latitude,
-    group = id,
-    color = id
-  )) +
-  geom_path() +
-  transition_time(time) +
-  shadow_mark(past = TRUE)
-
-
-

57.12.2 炫酷点的

- -
bats %>%
-  dplyr::mutate(
-    image = "images/bat-cartoon.png"
-  ) %>%
-  ggplot(aes(
-    x = longitude,
-    y = latitude,
-    group = id,
-    color = id
-  )) +
-  geom_path() +
-  ggimage::geom_image(aes(image = image), size = 0.1) +
-  transition_reveal(time)
-
-
-
-

57.13 案例演示三

-

全球R-Ladies组织,会议活动的情况,我们在地图上用动图展示

-
rladies <- read_csv("./demo_data/rladies.csv")
-rladies
-

这里需要一个地图,可以这样

-
ggplot() +
-  ggplot2::borders("world", colour = "gray85", fill = "gray80") +
-  ggthemes::theme_map()
-

当然,最好是这样

-
library(maps)
-world <- map_data("world") 
-
-world_map <- ggplot() +
-  geom_polygon(data = world, 
-               aes(x = long, y = lat, group = group),
-               color = "white", fill = "gray80"
-               ) +
-  ggthemes::theme_map()
-
-world_map 
-

然后把点打上去

-
world_map +
-  geom_point(
-    data = rladies,
-    aes(x = lon, y = lat, size = followers),
-    colour = "purple", alpha = .5
-  ) +
-  scale_size_continuous(
-    range = c(1, 8),
-    breaks = c(250, 500, 750, 1000)
-  ) +
-  labs(size = "Followers")
-

用动图展示(这种方法常用在流行病传播的展示上)

-
world_map +
-  geom_point(aes(x = lon, y = lat, size = followers),
-    data = rladies,
-    colour = "purple", alpha = .5
-  ) +
-  scale_size_continuous(
-    range = c(1, 8),
-    breaks = c(250, 500, 750, 1000)
-  ) +
-  transition_states(created_at) +
-  shadow_mark(past = TRUE) +
-  labs(title = "Day: {closest_state}")
-
-
-

57.14 课后作业

-
-

57.14.1 作业1

-

把下图弄成你喜欢的样子

-
library(gapminder)
-theme_set(theme_bw())
-
-ggplot(gapminder) +
-  aes(
-    x = gdpPercap, y = lifeExp,
-    size = pop, colour = country
-  ) +
-  geom_point(show.legend = FALSE) +
-  scale_x_log10() +
-  scale_color_viridis_d() +
-  scale_size(range = c(2, 12)) +
-  labs(x = "GDP per capita", y = "Life expectancy") +
-  transition_time(year) +
-  labs(title = "Year: {frame_time}")
-
-
-

57.14.2 作业2

-

那请说说这以下三个的区别?

-
bats %>%
-  dplyr::filter(id == 1) %>%
-  ggplot(
-    aes(
-      x = longitude,
-      y = latitude
-    )
-  ) +
-  geom_point() +
-  transition_reveal(time) # <<
-
-
-
-bats %>%
-  dplyr::filter(id == 1) %>%
-  ggplot(
-    aes(
-      x = longitude,
-      y = latitude
-    )
-  ) +
-  geom_point() +
-  transition_states(time) # <<
-
-
-
-bats %>%
-  dplyr::filter(id == 1) %>%
-  ggplot(
-    aes(
-      x = longitude,
-      y = latitude
-    )
-  ) +
-  geom_point() +
-  transition_time(time) # <<
- -
-
-
-
- -
-
-
- - -
-
- - - - - - - - - - - - - - - - diff --git a/_book/ggplot2-guides.html b/_book/ggplot2-guides.html deleted file mode 100644 index 729d5eb..0000000 --- a/_book/ggplot2-guides.html +++ /dev/null @@ -1,1387 +0,0 @@ - - - - - - - 第 16 章 ggplot2之图例系统 | 数据科学中的 R 语言 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- -
- -
-
- - -
-
- -
-
-

第 16 章 ggplot2之图例系统

-

这一章,我们一起学习ggplot2中的图例系统,内容相对简单,但还是推荐大家阅读ggplot2官方文档

-
-

16.1 图例系统

-

为了方便演示,我们还是用熟悉的配方ggplot2::mpg

-
library(tidyverse)
-
-mpg %>%
-  ggplot(aes(x = displ, y = hwy, color = factor(cyl))) +
-  geom_point()  
-

-

如果想调整图例的样式,可以使用guides()函数,用法类似上节课中的theme函数, 具体参数为:

-
    -
  • 要么是字符串 (i.e. "color = colorbar" or "color = legend"),
  • -
  • 要么是特定的函数 (i.e. color = guide_colourbar() or color = guide_legend())
  • -
-

-
-
-

16.2 案例详解

-
mpg %>%
-  ggplot(aes(x = displ, y = hwy, color = factor(cyl))) +
-  geom_point() +
-  ggtitle("这是我的标题") +
-  labs(x = "x_displ", y = "y_hwy") +
-  guides(color = "legend")
-

-
mpg %>%
-  ggplot(aes(x = displ, y = hwy, color = factor(cyl))) +
-  geom_point() +
-  ggtitle("这是我的标题") +
-  labs(x = "x_displ", y = "y_hwy") +
-  guides(color = guide_bins(
-                 title = "my title",
-                 label.hjust = 1
-                 )
-         )
-

-
mpg %>%
-  ggplot(aes(x = displ, y = hwy, color = factor(cyl))) +
-  geom_point() +
-  ggtitle("这是我的标题") +
-  labs(x = "x_displ", y = "y_hwy") +
-  guides(color = guide_legend(
-                   ncol = 4
-                 )
-  )
-

-
mpg %>%
-  ggplot(aes(x = displ, y = hwy, color = factor(cyl))) +
-  geom_point() +
-  ggtitle("这是我的标题") +
-  labs(x = "x_displ", y = "y_hwy") +
-  guides(color = guide_legend(
-    title = "标题好像有点高",
-    title.position = "top",
-    title.vjust = 5,
-    label.position = "left",
-    label.hjust = 1,
-    label.theme = element_text(size = 15,
-                               face = "italic",
-                               colour = "red",
-                               angle = 0),
-    keywidth = 5,
-    reverse = TRUE
-   )
-  )
-

-
-
-

16.3 删除其中一个图例

-
mpg %>%
-  ggplot(aes(x = displ, y = hwy, color = class, size = cyl)) +
-  geom_point()
-

-

比如,我们想删除size这个图例,那么需要这样做

-
mpg %>%
-  ggplot(aes(x = displ, y = hwy, color = class, size = cyl)) +
-  geom_point() +
-  guides(color = guide_legend("汽车类型"),  # keep
-         size = FALSE                       # remove
-         )
-

-
-
-

16.4 小结

-

到了这里,ggplot2内容的差不多介绍完了,最后做下自我测试,能读懂下面代码(来源 Emi Tanaka)的意思?

-
mtcars %>%
-  as_tibble() %>%
-  ggplot(aes(x = wt, y = mpg, shape = factor(vs), color = hp)) +
-  geom_point(size = 3) +
-  colorspace::scale_color_continuous_sequential(palette = "Dark Mint") +
-  scale_shape_discrete(labels = c("V-shaped", "Straight")) +
-  labs(
-    x = "Weight (1000 lbs)", y = "Miles per gallon",
-    title = "Motor Trend Car Road Tests",
-    shape = "Engine", color = "Horsepower"
-  ) +
-  theme(
-    text = element_text(size = 18, color = "white"),
-    rect = element_rect(fill = "black"),
-    panel.background = element_rect(fill = "black"),
-    legend.key = element_rect(fill = "black"),
-    axis.text = element_text(color = "white"),
-    plot.title.position = "plot",
-    plot.margin = margin(10, 10, 10, 10)
-  ) +
-  guides(
-    shape =
-      guide_legend(override.aes = list(color = "white"))
-  )
- -
-
-
- -
-
-
- - -
-
- - - - - - - - - - - - - - - - diff --git a/_book/ggplot2-scales.html b/_book/ggplot2-scales.html deleted file mode 100644 index 4764cf0..0000000 --- a/_book/ggplot2-scales.html +++ /dev/null @@ -1,1444 +0,0 @@ - - - - - - - 第 15 章 ggplot2之标度 | 数据科学中的 R 语言 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- -
- -
-
- - -
-
- -
-
-

第 15 章 ggplot2之标度

-

这一章我们一起学习ggplot2中的scales语法,推荐大家阅读Hadley Wickham最新版的《ggplot2: Elegant Graphics for Data Analysis》,但如果需要详细了解标度参数体系,还是要看ggplot2官方文档

-
-

15.1 标度

-

13章,我们了解到ggplot2中,映射是数据转化到图形属性,这里的图形属性是指视觉可以感知的东西,比如大小,形状,颜色和位置等。我们今天讨论的标度(scale)是控制着数据到图形属性映射的函数,每一种标度都是从数据空间的某个区域(标度的定义域)到图形属性空间的某个区域(标度的值域)的一个函数。

-

简单点来说,标度是用于调整数据映射的图形属性。 -在ggplot2中,每一种图形属性都拥有一个默认的标度,也许你对这个默认的标度不满意,可以就需要学习如何修改默认的标度。比如, -系统默认"a"对应红色,"b"对应蓝色,我们想让"a"对应紫色,"b"对应橙色。

-
-
-

15.2 图形属性和变量类型

-

还是用我们熟悉的ggplot2::mpg,可能有同学说,我画图没接触到scale啊,比如

-
library(tidyverse)
-mpg %>% 
-  ggplot(aes(x = displ, y = hwy)) + 
-  geom_point(aes(colour = class)) 
-

-

能画个很漂亮的图,那是因为ggplot2默认缺省条件下,已经很美观了。(据说Hadley Wickham很后悔使用了这么漂亮的缺省值,因为很漂亮了大家都不认真学画图了。马云好像也说后悔创立了阿里巴巴?)

-

事实上,根据映射关系和变量名,我们将标度写完整,应该是这样的

-
ggplot(mpg, aes(x = displ, y = hwy)) + 
-  geom_point(aes(colour = class)) +
-  
-  scale_x_continuous() + 
-  scale_y_continuous() + 
-  scale_colour_discrete()
-

-

如果每次都要手动设置一次标度函数,那将是比较繁琐的事情。因此ggplot2使用了默认了设置,如果不满意ggplot2的默认值,可以手动调整或者改写标度,比如

-
ggplot(mpg, aes(x = displ, y = hwy)) + 
-  geom_point(aes(colour = class)) +
-  
-  scale_x_continuous(name = "这是我的x坐标") + 
-  scale_y_continuous(name = "这是我的y坐标") + 
-  scale_colour_brewer()
-

-
-
-

15.3 坐标轴和图例是同样的东西

-

-
-
-

15.4 丰富的标度体系

-

注意到,标度函数是由"_"分割的三个部分构成的 -- scale -- 视觉属性名 (e.g., colour, shape or x) -- 标度名 (e.g., continuous, discrete, brewer).

-

-

每个标度函数内部都有丰富的参数系统

-
scale_colour_manual(
-  palette = function(), 
-  limits = NULL,
-  name = waiver(),
-  labels = waiver(),
-  breaks = waiver(),
-  minor_breaks = waiver(),
-  values = waiver(),
-  ...
-)
-
    -
  • 参数name,坐标和图例的名字,如果不想要图例的名字,就可以 name = NULL

  • -
  • 参数limits, 坐标或图例的范围区间。连续性c(n, m),离散型c("a", "b", "c")

  • -
  • 参数breaks, 控制显示在坐标轴或者图例上的值(元素)

  • -
  • 参数labels, 坐标和图例的间隔标签

    -
      -
    • 一般情况下,内置函数会自动完成
    • -
    • 也可人工指定一个字符型向量,与breaks提供的字符型向量一一对应
    • -
    • 也可以是函数,把breaks提供的字符型向量当做函数的输入
    • -
    • NULL,就是去掉标签
    • -
  • -
  • 参数values 指的是(颜色、形状等)视觉属性值,

    -
      -
    • 要么,与数值的顺序一致;
    • -
    • 要么,与breaks提供的字符型向量长度一致
    • -
    • 要么,用命名向量c("数据标签" = "视觉属性")提供
    • -
  • -
  • 参数expand, 控制参数溢出量

  • -
  • 参数range, 设置尺寸大小范围,比如针对点的相对大小

  • -
- -

下面,我们通过具体的案例讲解如何使用参数,把图形变成我们想要的模样。

-
-
-

15.5 案例详解

-

先导入一个数据

-
gapdata <- read_csv("./demo_data/gapminder.csv")
-
newgapdata <- gapdata %>% 
-  group_by(continent, country) %>% 
-  summarise(
-    across(c(lifeExp, gdpPercap, pop), mean)
-  )
-newgapdata
-
## # A tibble: 142 x 5
-## # Groups:   continent [5]
-##    continent country          lifeExp gdpPercap     pop
-##    <chr>     <chr>              <dbl>     <dbl>   <dbl>
-##  1 Africa    Algeria             59.0     4426.  1.99e7
-##  2 Africa    Angola              37.9     3607.  7.31e6
-##  3 Africa    Benin               48.8     1155.  4.02e6
-##  4 Africa    Botswana            54.6     5032.  9.71e5
-##  5 Africa    Burkina Faso        44.7      844.  7.55e6
-##  6 Africa    Burundi             44.8      472.  4.65e6
-##  7 Africa    Cameroon            48.1     1775.  9.82e6
-##  8 Africa    Central African~    43.9      959.  2.56e6
-##  9 Africa    Chad                46.8     1165.  5.33e6
-## 10 Africa    Comoros             52.4     1314.  3.62e5
-## # ... with 132 more rows
-
newgapdata %>% 
-  ggplot(aes(x = gdpPercap, y = lifeExp)) +
-    geom_point(aes(color = continent, size = pop)) +
-    scale_x_continuous()
-

-
newgapdata %>% 
-  ggplot(aes(x = gdpPercap, y = lifeExp)) +
-    geom_point(aes(color = continent, size = pop)) +
-    scale_x_log10()
-

-
newgapdata %>% 
-  ggplot(aes(x = gdpPercap, y = lifeExp)) +
-    geom_point(aes(color = continent, size = pop)) +
-    scale_x_log10(breaks = c(500, 1000, 3000, 10000, 30000),
-                  labels = scales::dollar)
-

-
newgapdata %>% 
-  ggplot(aes(x = gdpPercap, y = lifeExp)) +
-    geom_point(aes(color = continent, size = pop)) +
-    scale_x_log10(
-      name = "人均GDP",
-      breaks = c(500, 1000, 3000, 10000, 30000),
-      labels = scales::unit_format(unit = "美元"))
-

-
newgapdata %>% 
-  ggplot(aes(x = gdpPercap, y = lifeExp)) +
-    geom_point(aes(color = continent, size = pop)) +
-    scale_x_log10() +
-    scale_color_viridis_d()
-

-

离散变量映射到色彩的情形,可以使用ColorBrewer色彩。

-
newgapdata %>% 
-  ggplot(aes(x = gdpPercap, y = lifeExp)) +
-    geom_point(aes(color = continent, size = pop)) +
-    scale_x_log10() +
-    scale_color_brewer(type = "qual", palette = "Set1")
-

-
newgapdata %>% 
-  ggplot(aes(x = gdpPercap, y = lifeExp)) +
-    geom_point(aes(color = continent, size = pop)) +
-    scale_x_log10() +
-    scale_color_manual(
-      name = "五大洲",
-      values = c("Africa" = "red", "Americas" = "blue", "Asia" = "orange",
-                 "Europe" = "black", "Oceania" = "gray"),
-      breaks = c("Africa", "Americas", "Asia", "Europe", "Oceania"),
-      labels = c("非洲", "美洲", "亚洲", "欧洲", "大洋洲")
-    ) +
-   scale_size(
-     name = "人口数量",
-     breaks = c(2e8, 5e8, 7e8),
-     labels = c("2亿", "5亿", "7亿")
-   )
-

-
-
-

15.6 用标度还是主题?

-

那什么时候用标度,什么时候用主题?这里有个原则:主题风格不会增加标签,也不会改变变量的范围,主题只会改变字体、大小、颜色等等。

-
-
-

15.7 小测试

-

用 ggplot2 重复这张lego图

-

- -
-
-
- -
-
-
- - -
-
- - - - - - - - - - - - - - - - diff --git a/_book/ggplot2-stat-layer.html b/_book/ggplot2-stat-layer.html deleted file mode 100644 index 95b18c2..0000000 --- a/_book/ggplot2-stat-layer.html +++ /dev/null @@ -1,1672 +0,0 @@ - - - - - - - 第 18 章 ggplot2之统计图层 | 数据科学中的 R 语言 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- -
- -
-
- - -
-
- -
-
-

第 18 章 ggplot2之统计图层

-
-

18.1 导言

-

美学映射是图形语法中非常重要的一个概念,变量映射到视觉元素,然后通过几何对象GEOM画出图形。(下图是每个几何对象所对应的视觉元素)

-
-ggplot2中的几何对象与美学映射 -

-图 18.1: ggplot2中的几何对象与美学映射 -

-
-

比如geom_point(mapping = aes(x = mass, y = height)) 将会画出散点图,这里的x轴代表mass变量,而y轴代表height变量.

-

因为geom_*()很强大而且也很容易理解,所以一般我们不会去思考我们的数据在喂给ggplot()后发生了什么,只希望能出图就行了。比如下面的直方图例子

-
library(tidyverse)
-library(palmerpenguins)
-
-ggplot(data = penguins, mapping = aes(x = body_mass_g)) +
-  geom_histogram()
-

-

这里发生了什么呢?你可能看到body_mass_g这个变量代表了x轴,这个没错,但想弄清楚这个直方图,需要回答下面的问题

-
    -
  • 映射到x轴的变量被分成了若干离散的小区间(bins)
  • -
  • 需要计算每个小区间中有多少观测值落入其中
  • -
  • 用于y轴上是一个新的变量
  • -
  • 最终,用户提供的x变量和经过计算处理后的y变量,共同确定了柱状图中每个柱子的位置和高度
  • -
-

我并不是说,不能给出geom_histogram()详细说明就是一个傻子。相反,我这里的本意是强调数据->视觉元素的映射并不是理所当然的,尽管看上去往往非常自然、直观和客观。

-

我们这里是提醒下,我们是否想过,修改上面中间过程,比如第1步和第2步,然后看看输出的图形是否还是直方图。

-

这个想法非常重要,但我们很少想到。某种程度是因为在我们最初学习ggplot画图的时候,ggplot已经影响了我们的思维方式。比如,初学者可能经历过拿到数据却还不出图形的受挫感,举个例子来说,这里有个数据

-
d <- tibble::tribble(
-     ~variable, ~subject1, ~subject2, ~subject3,
-  "mass",         75,     70,    55,
-  "height",       154,    172,   144
-  )
-d
-
## # A tibble: 2 x 4
-##   variable subject1 subject2 subject3
-##   <chr>       <dbl>    <dbl>    <dbl>
-## 1 mass           75       70       55
-## 2 height        154      172      144
-

geom_point(aes(x = mass, y = height)) 画图,却报错了。初学者可能苦苦搜索答案,然后被告知,ggplot画图需要先弄成tidy格式

-
d %>% pivot_longer(
-  cols = subject1:subject3,
-  names_to = "subject",
-  names_pattern = "subject(\\d)",
-  values_to = "value"
-) %>% 
-  pivot_wider(names_from = variable,
-              values_from = value)
-
## # A tibble: 3 x 3
-##   subject  mass height
-##   <chr>   <dbl>  <dbl>
-## 1 1          75    154
-## 2 2          70    172
-## 3 3          55    144
-

现在数据tidy了,你可以使用ggplot(),问题得以解决。于是我们得出了一个结论:想要ggplot工作就需要tidy data。 如果这样想,那么今天的内容ggplot2统计图层就更加有必要了。

-
-
-

18.2 为何及何时使用统计图层

-

你可能每天都在用ggplot,却用不到stat_*()函数,这样也可以胜任很多工作。事实上,因为我们仅仅只使用geom_*()函数,你会发现stat_*()是开发者才使用的深奥和神秘的部分,如果这样想,你可能怀疑你是否有必要了解这些stat_*()函数。

-

好吧,学习 STAT 最主要的原因

-
-

“Even though the data is tidy, it may not represent the values you want to display”

-
-

我们这里再用一个例子说明,假定我们有一数据框simple_data

-
simple_data <- tibble(group = factor(rep(c("A", "B"), each = 15)),
-                      subject = 1:30,
-                      score = c(rnorm(15, 40, 20), rnorm(15, 60, 10)))
-simple_data
-
## # A tibble: 30 x 3
-##    group subject score
-##    <fct>   <int> <dbl>
-##  1 A           1  76.9
-##  2 A           2  66.2
-##  3 A           3  28.5
-##  4 A           4  31.4
-##  5 A           5  18.3
-##  6 A           6  46.0
-##  7 A           7  64.9
-##  8 A           8  46.8
-##  9 A           9  17.9
-## 10 A          10  51.6
-## # ... with 20 more rows
-

假定我们现在想画一个柱状图,一个柱子代表每一组group,柱子的高度代表的score的均值。

-

好比,按照我们的想法,我们首先规整(tidy)数据,并且确保数据包含每个geom所需的美学映射,最后传递给ggplot()

-
simple_data %>%
-  group_by(group) %>% 
-  summarize(
-    mean_score = mean(score),
-    .groups = 'drop' 
-  ) %>% 
-  ggplot(aes(x = group, y = mean_score)) +
-  geom_col()
-

-

那么,传递给ggplot()的数据是

-
simple_data %>%
-  group_by(group) %>% 
-  summarize(
-    mean_score = mean(score),
-    .groups = 'drop' 
-  ) 
-
## # A tibble: 2 x 2
-##   group mean_score
-##   <fct>      <dbl>
-## 1 A           41.8
-## 2 B           61.5
-

需求很简单,很容易搞定。但如果我们想加误差棒(stand error)呢? 那我们需要再对数据整理统计,然后再传给ggplot().

-

于是,我们再计算误差棒,这里变型的数据是这个样子的

-
simple_data %>% 
-  group_by(group) %>% 
-  summarize(
-    mean_score = mean(score),
-    se = sqrt(var(score)/length(score)),
-    .groups = 'drop'
-  ) %>% 
-  mutate(
-    lower = mean_score - se,
-    upper = mean_score + se
-  )
-
## # A tibble: 2 x 5
-##   group mean_score    se lower upper
-##   <fct>      <dbl> <dbl> <dbl> <dbl>
-## 1 A           41.8  4.82  37.0  46.7
-## 2 B           61.5  2.83  58.6  64.3
-

然后把变型的数据传递给ggplot()

-
simple_data %>% 
-  group_by(group) %>% 
-  summarize(
-    mean_score = mean(score),
-    se = sqrt(var(score)/length(score)),
-    .groups = 'drop'
-  ) %>% 
-  mutate(
-    lower = mean_score - se,
-    upper = mean_score + se
-  ) %>% 
-  ggplot(aes(x = group, y = mean_score, ymin = lower, ymax = upper)) +
-  geom_errorbar()
-

-

最后,我们把两个数据框组会到一起,一个用于柱状图,一个用于画误差棒。

-
simple_data_bar <- simple_data %>%
-  group_by(group) %>% 
-  summarize(
-    mean_score = mean(score),
-    .groups = 'drop'
-  )
-  
-simple_data_errorbar <- simple_data %>% 
-  group_by(group) %>% 
-  summarize(
-    mean_score = mean(score),
-    se = sqrt(var(score)/length(score)),
-    .groups = 'drop'
-  ) %>% 
-  mutate(
-    lower = mean_score - se,
-    upper = mean_score + se
-  )
-
-ggplot() +
-  geom_col(
-    aes(x = group, y = mean_score),
-    data = simple_data_bar
-  ) +
-  geom_errorbar(
-    aes(x = group, y = mean_score, ymin = lower, ymax = upper),
-    data = simple_data_errorbar
-  )
-

-

OMG, 为了画一个简单的图,我们需要写这么长的一段代码。究其原因就是,我们认为,一定要准备好一个tidy的数据,并且把想画的几何对象所需要的美学映射,都整理到这个tidy的数据框中

-

事实上,理论上讲,simple_data_barsimple_data_errorbar 并不是真正的tidy格式。因为按照Hadley Wickham的对tidy的定义是,一行代表一次观察。 -而这里的柱子的高度以及误差棒的两端不是观察出来的,而是统计计算出来的。

-
-

-所以我们的观点是,辛辛苦苦创建一个(包含每个几何对象所需的美学映射)的数据框,太低效了,而且这种方法也不支持tidy原则。 -

-
-

既然 simple_data_barsimple_data_errorbar都来源于simple_data,那为何不直接传递simple_dataggplot(),让数据在内部转换,得到每个几何对象所需的美学映射呢?

-

或许,你想要的是这样?

-
simple_data %>% 
-  ggplot(aes(group, score)) +
-  stat_summary(geom = "bar") +
-  stat_summary(geom = "errorbar")
-

-

Bingo

-
-

18.2.1 小结

-

这一节,我们用一个很长的数据整理的代码,借助geom_*()画了一张含有误差棒的柱状图,而用stat_summary()不需要数据整理,只需要两行代码就实现相同效果。 -感受到了stat_summary()的强大了?

-

不忙,好戏才慢慢开始…

-
-
-
-

18.3 用 stat_summary() 理解统计图层

-

前面讲到的 stat_summary() 是学习和理解 stat_*() 很好的例子,理解了stat_summary()的工作原理,其它的stat_*()也就都明白了, -事实上,stat_summary()也是在数据视化中最常用的,因此我们接着讲它。

-

那么,我们现在模拟一个测试数据height_df

-
height_df <- tibble(group = "A",
-                    height = rnorm(30, 170, 10))
-

用我们熟悉的geom_point()

-
height_df %>% 
-  ggplot(aes(x = group, y = height)) +
-  geom_point()
-

-

然后用stat_summary()代替geom_point(),然后看看发生了什么

-
height_df %>% 
-  ggplot(aes(x = group, y = height)) +
-  stat_summary()
-

-

看到了一个点和经过这个点的一条线,实际上,它也是一个几何对象pointrange. -那么geom_pointrange() 是怎么数据转换的呢?回答这个问题,我们需要了解下geom_pointrange()需要哪些美学映射(参见图 18.1):

-
    -
  • x or y
  • -
  • ymin or xmin
  • -
  • ymax or xmax
  • -
-

所以,我们回去看看ggplot(aes(x = group, y = height))aes()里的参数,group 映射到 x, height映射到了y, 但我们没有发现有ymin / xmin或者ymax / xmax的踪迹。问题来了,我们没有给出geom_pointrange()需要的美学映射,那stat_summmary()是怎么画出pointrange的呢?

-

我们先猜测一下,stat_summary()先计算出必要的数据值,然后传递给pointrange? -是不是呢?我们先看上图过程中有个提示

-
No summary function supplied, defaulting to `mean_se()`
-

看到了吧,summary function,说明我们猜对了,这就是stat_*()神秘的地方。

-
    -
  • 首先,对于stat_summary()中的fun.data参数,它的默认值是mean_se()
  • -
  • 其次,我们看看这个函数
  • -
-
mean_se
-
function (x, mult = 1) 
-{
-    x <- stats::na.omit(x)
-    se <- mult * sqrt(stats::var(x)/length(x))
-    mean <- mean(x)
-    new_data_frame(list(y = mean, ymin = mean - se, ymax = mean + 
-        se), n = 1)
-}
-<bytecode: 0x0000021aef28aa10>
-<environment: namespace:ggplot2>
-

这个mean_se()函数有两个参数,一个是x,一个是mult(默认为1), 那么这个函数的功能,一步步来说

-
    -
  • 删除缺失值NA
  • -
  • 计算出se, 公式为\(SE = \sqrt{\frac{1}{N}\sum_{i=1}^N(x_i-\bar{x})^2}\)
  • -
  • 计算x的均值
  • -
  • 创建一个数据框(一行三列),y = mean, ymin = mean - se, ymax = mean + se
  • -
-

很酷的一件事情是,mean_se()看上去是在ggplot()内部使用,实际上加载ggplot2宏包后,在全局环境变量里就可以访问到,不妨试试看, 注意到stat_summary()是对向量(单维度)做统计,因此要传height_df$height给它

-
mean_se(height_df$height)
-
##       y  ymin  ymax
-## 1 170.2 168.6 171.9
-

数据看上去和我们前面 stat_summary() 画的点线图一样。当然为了保险起见,我们还是核对下,这里用到ggplot2包中的一个神奇的函数layer_data(), 它可以拉取在图层中使用的数据,第二个参数是指定拉取哪个图层的数据,这里只有唯一的一个图层,因此指定为1。

-
pointrange_plot <- height_df %>% 
-  ggplot(aes(x = group, y = height)) +
-  stat_summary()
-
-layer_data(pointrange_plot, 1)
-
##   x group     y  ymin  ymax PANEL flipped_aes colour
-## 1 1     1 170.2 168.6 171.9     1       FALSE  black
-##   size linetype shape fill alpha stroke
-## 1  0.5        1    19   NA    NA      1
-

喔喔,结果很丰富,我们注意到y, ymin, and ymax 的值与 mean_se() 计算的结果一致。

-
-

18.3.1 小结

-

我们揭开了stat_summary()统计图层的神秘面纱的一角:

-
    -
  • 函数stat_summary()里若没有指定数据,那就会从ggplot(data = .)里继承
  • -
  • 参数fun.data 会调用函数将数据变形,这个函数默认是mean_se()
  • -
  • fun.data 返回的是数据框,这个数据框将用于geom参数画图,这里缺省的geom是pointrange
  • -
  • 如果fun.data 返回的数据框包含了所需要的美学映射,图形就会显示出来。
  • -
-

为了让大家看的更明白,我们在stat_summary()中显式地给出fun.datageom两个参数

-
height_df %>% 
-  ggplot(aes(x = group, y = height)) +
-  stat_summary(
-    geom = "pointrange",
-    fun.data = mean_se
-  )
-

-

Look, it’s the same plot!

-
-
-
-

18.4 使用统计图层

-

现在我们进入了stat_summary()有趣的环节: 调整其中的参数画出各种图

-
-

18.4.1 包含95%置信区间的误差棒

-

我们用企鹅数据画出不同性别sex下的企鹅体重均值,同时误差棒要给出95%的置信区间( -即均值加减 1.96倍的标准误)

-
my_penguins <- na.omit(penguins)
-
-my_penguins %>% 
-  ggplot(aes(sex, body_mass_g)) +
-  stat_summary(
-    fun.data = ~mean_se(., mult = 1.96), # Increase `mult` value for bigger interval!
-    geom = "errorbar",
-  )
-

-

那么这里在stat_summary()函数内部发生了什么呢?

-

分组分别各自的mean_se()

-
female_mean_se <- my_penguins %>% 
-  filter(sex == "female") %>% 
-  pull(body_mass_g) %>% 
-  mean_se(., mult = 1.96)
-
-male_mean_se <- my_penguins %>% 
-  filter(sex == "male") %>% 
-  pull(body_mass_g) %>% 
-  mean_se(., mult = 1.96)
-
-bind_rows(female_mean_se, male_mean_se)
-
##      y ymin ymax
-## 1 3862 3761 3964
-## 2 4546 4427 4665
-

ggplot()中提供了分组变量(比如这里的sex),stat_summary()会分组计算, -再次感受到ggplot2的强大气息!

-
-
-

18.4.2 带有彩色填充色的柱状图

-

不同的企鹅种类,画出bill_length_mm长度的中位数(不再是均值),同时,让中位数小于40的用粉红色标出。这里需要自定义fun.data函数

-
calc_median_and_color <- function(x, threshold = 40) {
-  tibble(y = median(x)) %>% 
-    mutate(fill = ifelse(y < threshold, "pink", "grey35"))
-}
-
-my_penguins %>% 
-  ggplot(aes(species, bill_length_mm)) +
-  stat_summary(
-    fun.data = calc_median_and_color,
-    geom = "bar"
-  )
-

-

我们再来看看,stat_summary()内部发生了什么?

-
my_penguins %>% 
-  group_split(species) %>% 
-  map(~ pull(., bill_length_mm)) %>% 
-  map_dfr(calc_median_and_color)
-
## # A tibble: 3 x 2
-##       y fill  
-##   <dbl> <chr> 
-## 1  38.8 pink  
-## 2  49.6 grey35
-## 3  47.4 grey35
-

注意到,fun.data中的定制函数还可以计算fill美学映射,最后一起传递给geom画图,强大!

-
-
-

18.4.3 大小变化的点线图

-

我们现在想画不同岛屿islands上企鹅bill_depth_mm均值,要求点线图中点的大小随观测数量(该岛屿企鹅的数量)变化

-
my_penguins %>% 
-  ggplot(aes(species, bill_depth_mm)) +
-  stat_summary(
-    fun.data = function(x) {
-      
-      scaled_size <- length(x)/nrow(my_penguins)
-      
-      mean_se(x) %>% 
-        mutate(size = scaled_size)
-    }
-  )
-

-

这张图其实听酷的,每个岛屿观察值越小(也就说样本量越小),pointrange的不确定性就越大(图中的误差棒范围就越长)。我们再看看,这里的stat_summary()内部发生了什么,或者说数据是怎么转换的。

-
my_penguins %>% 
-  group_split(species) %>%
-  map(~ pull(., bill_depth_mm)) %>% 
-  map_dfr(
-    function(x) {
-      
-      scaled_size <- length(x)/nrow(my_penguins)
-      
-      mean_se(x) %>% 
-        mutate(size = scaled_size)
-    }
-  )
-
##       y  ymin  ymax   size
-## 1 18.35 18.25 18.45 0.4384
-## 2 18.42 18.28 18.56 0.2042
-## 3 15.00 14.91 15.09 0.3574
-
-
-
-

18.5 总结

-
-

18.5.1 主要结论

-
    -
  • 尽管数据是tidy的,但它未必能代表你想展示的值

  • -
  • 解决办法不是去规整数据以符合几何对象的要求,而是将原初tidy数据传递给ggplot(), -让stat_*()函数在内部实现变型

  • -
  • 可以stat_*()函数可以定制geom以及相应的变形函数。当然,定制自己的函数,需要核对stat_*()所需要的变量和数据类型

  • -
  • 如果想用不同的geom,确保变换函数能计算出(几何对象所需要的)美学映射

  • -
-
-
-

18.5.2 STAT vs. GEOM or STAT and GEOM?

-

尽管我们在谈论geom_*()的局限性,从而衬托出stat_*()的强大,但并不意味了后者可以取代前者,因为这不是一个非此即彼的问题,事实上,他们彼此依赖– 我们看到stat_summary()geom 参数, geom_*() 也有 stat 参数。 -在更高的层级上讲,stat_*()geom_*() 都只是ggplot里构建图层的layer()函数的一个便利的方法,用曹植的《七步诗》来说, 本是同根生,相煎何太急。

-

layer()分成stat_*()geom_*()两块,或许是一个失误,最后我们用Hadley的原话来结束本章内容

-
-

Unfortunately, due to an early design mistake I called these either stat_() or geom_(). A better decision would have been to call them layer_() functions: that’s a more accurate description because every layer involves a stat and a geom

-
-

本文档翻译自Demystifying stat_ layers in ggplot2

- -
-
-
-
- -
-
-
- - -
-
- - - - - - - - - - - - - - - - diff --git a/_book/ggplot2-theme.html b/_book/ggplot2-theme.html deleted file mode 100644 index 8bf1073..0000000 --- a/_book/ggplot2-theme.html +++ /dev/null @@ -1,1704 +0,0 @@ - - - - - - - 第 14 章 ggplot2之主题设置 | 数据科学中的 R 语言 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- -
- -
-
- - -
-
- -
-
-

第 14 章 ggplot2之主题设置

-

这一章我们一起学习ggplot2中的theme elements -语法,感谢Henry Wang提供了很好的思路。如果需要详细了解,可以参考Hadley Wickham最新版的《ggplot2: Elegant Graphics for Data Analysis》,最推荐的是ggplot2官方文档

-
theme(element_name = element_function())
-

这里element_function()有四个

-
element_text()
-element_line()
-element_rect()
-element_blank()
-

望文生义吧,内置元素函数有四个基础类型:

-
    -
  • element_text(), 文本,一般用于控制标签和标题的字体风格
  • -
  • element_line(), 线条,一般用于控制线条或线段的颜色或线条类型
  • -
  • element_rect(), 矩形区域,一般用于控制背景矩形的颜色或者边界线条类型
  • -
  • element_blank() , 空白,就是不分配相应的绘图空间,即删去这个地方的绘图元素。
  • -
-

每个元素函数都有一系列控制外观的参数,下面我们通过具体的案例来一一介绍吧。

-
library(tidyverse)
-

还是用让人生厌的ggplot2::mpg数据包吧,具体介绍请见?? 章。

-
glimpse(mpg)
-
## Rows: 234
-## Columns: 11
-## $ manufacturer <chr> "audi", "audi", "audi", "audi...
-## $ model        <chr> "a4", "a4", "a4", "a4", "a4",...
-## $ displ        <dbl> 1.8, 1.8, 2.0, 2.0, 2.8, 2.8,...
-## $ year         <int> 1999, 1999, 2008, 2008, 1999,...
-## $ cyl          <int> 4, 4, 4, 4, 6, 6, 6, 4, 4, 4,...
-## $ trans        <chr> "auto(l5)", "manual(m5)", "ma...
-## $ drv          <chr> "f", "f", "f", "f", "f", "f",...
-## $ cty          <int> 18, 21, 20, 21, 16, 18, 18, 1...
-## $ hwy          <int> 29, 29, 31, 30, 26, 26, 27, 2...
-## $ fl           <chr> "p", "p", "p", "p", "p", "p",...
-## $ class        <chr> "compact", "compact", "compac...
-

稍微做点数据整理

-
df <- mpg %>% 
-  as_tibble() %>% 
-  filter(class != "2seater", manufacturer %in% c("toyota", "volkswagen"))
-df
-
## # A tibble: 61 x 11
-##    manufacturer model displ  year   cyl trans drv  
-##    <chr>        <chr> <dbl> <int> <int> <chr> <chr>
-##  1 toyota       4run~   2.7  1999     4 manu~ 4    
-##  2 toyota       4run~   2.7  1999     4 auto~ 4    
-##  3 toyota       4run~   3.4  1999     6 auto~ 4    
-##  4 toyota       4run~   3.4  1999     6 manu~ 4    
-##  5 toyota       4run~   4    2008     6 auto~ 4    
-##  6 toyota       4run~   4.7  2008     8 auto~ 4    
-##  7 toyota       camry   2.2  1999     4 manu~ f    
-##  8 toyota       camry   2.2  1999     4 auto~ f    
-##  9 toyota       camry   2.4  2008     4 manu~ f    
-## 10 toyota       camry   2.4  2008     4 auto~ f    
-## # ... with 51 more rows, and 4 more variables:
-## #   cty <int>, hwy <int>, fl <chr>, class <chr>
-

我相信这种图你们已经会画了吧

-
df %>%
-  ggplot(aes(x = displ, y = hwy, color = factor(cyl))) +
-  geom_point() +
-  facet_grid(vars(manufacturer), vars(class)) +
-  ggtitle("这是我的标题") +
-  labs(x = "x_displ", y = "y_hwy")
-

-

想让这张图,符合你的想法?如何控制呢?come on

-
-

14.1 图表整体元素

-

图表整体元素包括:

- - - - - - - - - - - - - - - - - - - - - - - - - -
描述主题元素类型
整个图形背景plot.backgroundelement_rect()
图形标题plot.titleelement_text()
图形边距plot.marginmargin()
-
df %>%
-  ggplot(aes(x = displ, y = hwy, color = factor(cyl))) +
-  geom_point() +
-  facet_grid(vars(manufacturer), vars(class)) +
-  ggtitle("这是我的标题") +
-  labs(x = "x_displ", y = "y_hwy") +
-  theme(
-    plot.background = element_rect(fill = "orange", color = "black", size = 10),
-    plot.title = element_text(hjust = 1, color = "red", face = "italic"),
-    plot.margin = margin(t = 20, r = 20, b = 20, l = 20, unit = "pt")
-  )
-

-
-
-

14.2 坐标轴元素

-

坐标轴元素包括:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
描述主题元素类型
坐标轴刻度axis.tickselement_line()
坐标轴标题axis.titleelement_text()
坐标轴标签axis.textelement_text()
直线和坐标轴axis.lineelement_line()
-
df %>%
-  ggplot(aes(x = displ, y = hwy, color = factor(cyl))) +
-  geom_point() +
-  facet_grid(vars(manufacturer), vars(class)) +
-  ggtitle("这是我的标题") +
-  labs(x = "x_displ", y = "y_hwy") +
-  theme(
-    axis.line = element_line(color = "orange", size = 2),
-    axis.title = element_text(color = "red", face = "italic"),
-    axis.ticks = element_line(color = "purple", size = 3),
-    axis.text = element_text(color = "blue"),
-    axis.text.x = element_text(angle = 45, hjust = 1)
-  )
-

-
-
-

14.3 面板元素

-

面板元素包括:

- - - - - - - - - - - - - - - - - - - - - - - - - -
描述主题元素类型
面板背景panel.backgroundelement_rect()
面板网格线panel.gridelement_line()
面板边界panel.borderelement_rect()
-
df %>%
-  ggplot(aes(x = displ, y = hwy, color = factor(cyl))) +
-  geom_point() +
-  facet_grid(vars(manufacturer), vars(class)) +
-  ggtitle("这是我的标题") +
-  labs(x = "x_displ", y = "y_hwy") +
-  theme(
-    panel.background = element_rect(fill = "orange", color = "red"),
-    panel.grid = element_line(color = "grey80", size = 0.5)
-  )
-

-

或者

-
df %>%
-  ggplot(aes(x = displ, y = hwy, color = factor(cyl))) +
-  geom_point() +
-  facet_grid(vars(manufacturer), vars(class)) +
-  ggtitle("这是我的标题") +
-  labs(x = "x_displ", y = "y_hwy") +
-  theme(
-    panel.background = element_rect(fill = "orange"),
-    panel.grid = element_line(color = "grey80", size = 0.5),
-    panel.border = element_rect(color = "red", fill = NA)
-  )
-

-
-
-

14.4 图例元素

-

图例元素包括:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
描述主题元素类型
图例背景legend.backgroundelement_rect()
图例符号legend.keyelement_rect()
图例标签legend.textelement_text()
图例标题legend.titleelement_text()
图例边距legend.marginmargin
图例位置legend.postion“top,” “bottom,” “left,” “right”
-
df %>%
-  ggplot(aes(x = displ, y = hwy, color = factor(cyl))) +
-  geom_point() +
-  facet_grid(vars(manufacturer), vars(class)) +
-  ggtitle("这是我的标题") +
-  labs(x = "x_displ", y = "y_hwy") +
-  theme(
-    legend.background = element_rect(fill = "orange"),
-    legend.title = element_text(color = "blue", size = 10),
-    legend.key = element_rect(fill = "grey80"),
-    legend.text = element_text(color = "red"),
-    legend.margin = margin(t = 20, r = 20, b = 20, l = 20, unit = "pt"),
-    legend.position = "bottom"
-  )
-

-
-
-

14.5 分面元素

-

分面元素包括:

- - - - - - - - - - - - - - - - - - - - - - - - - -
描述主题元素类型
分面标签背景strip.backgroundelement_rect()
条状文本strip.textelement_text()
分面间隔panel.spacingunit
-
df %>%
-  ggplot(aes(x = displ, y = hwy, color = factor(cyl))) +
-  geom_point() +
-  facet_grid(vars(manufacturer), vars(class)) +
-  ggtitle("这是我的标题") +
-  labs(x = "x_displ", y = "y_hwy") +
-  theme(
-    strip.background = element_rect(fill = "orange"),
-    strip.text = element_text(color = "red"),
-    panel.spacing = unit(0.3, "inch") # ,
-    # strip.switch.pad.grid =
-  )
-

-
-
-

14.6 案例

-
diamonds %>%
-  ggplot(aes(carat, price)) +
-  geom_hex() +
-  labs(title = "Diamond") +
-  theme(
-    axis.title.x = element_text(
-      size = 30,
-      color = "red",
-      face = "bold",
-      angle = 10
-    ),
-    legend.title = element_text(
-      size = 25,
-      color = "#ff6361",
-      margin = margin(b = 5)
-    ),
-    plot.title = element_text(
-      size = 35,
-      face = "bold",
-      color = "blue"
-    )
-  )
-

-你肯定不会觉得这图好看。

-
library(palmerpenguins)
-penguins %>%
-  ggplot(aes(bill_length_mm, bill_depth_mm)) +
-  geom_point() +
-  theme(
-    axis.line.y = element_line(
-      color = "black",
-      size = 1.2,
-      arrow = grid::arrow()
-    ),
-    axis.line.x = element_line(
-      linetype = "dashed",
-      color = "brown",
-      size = 1.2
-    ),
-    axis.ticks = element_line(color = "red", size = 1.1),
-    axis.ticks.length = unit(3, "mm"),
-    panel.grid.major = element_line(
-      color = "blue",
-      size = 1.2
-    ),
-    panel.grid.minor = element_line(
-      color = "#58508d",
-      size = 1.2,
-      linetype = "dotted"
-    )
-  )
-

-
penguins %>%
-  ggplot(aes(bill_length_mm, bill_depth_mm)) +
-  geom_point(aes(color = species)) +
-  theme(
-    legend.background = element_rect(
-      fill = "#fff6c2",
-      color = "black",
-      linetype = "dashed"
-    ),
-    legend.key = element_rect(fill = "grey", color = "brown"),
-    panel.background = element_rect(
-      fill = "#005F59",
-      color = "red", size = 3
-    ),
-    panel.border = element_rect(
-      color = "black",
-      fill = "transparent",
-      linetype = "dashed", size = 3
-    ),
-    plot.background = element_rect(
-      fill = "#a1dce9",
-      color = "black",
-      size = 1.3
-    ),
-    legend.position = "bottom"
-  )
-

-
-
-

14.7 小结

-

-
-
-

14.8 提问

-
    -
  • ggplot2中 plot 与 panel 有区别?

  • -
  • 假定数据是这样

  • -
-
library(tidyverse)
-set.seed(12)
-
-d1 <- data.frame(x = rnorm(50, 10, 2), type = "Island #1")
-d2 <- data.frame(x = rnorm(50, 18, 1.2), type = "Island #2")
-
-dd <- bind_rows(d1, d2) %>%
-  set_names(c("Height", "Location"))
-
-head(dd)
-
##   Height  Location
-## 1  7.039 Island #1
-## 2 13.154 Island #1
-## 3  8.087 Island #1
-## 4  8.160 Island #1
-## 5  6.005 Island #1
-## 6  9.455 Island #1
-

你画图后,交给老板看

-
dd %>%
-  ggplot(aes(x = Height, fill = Location)) +
-  geom_histogram(binwidth = 1, color = "white") +
-  scale_fill_manual(values = c("green3", "turquoise3"))
-

-

然而,老板有点不满意,希望你要这样改 -

-

请用前后两章学到的内容让老板满意吧

- -
-
-
- -
-
-
- - -
-
- - - - - - - - - - - - - - - - diff --git a/_book/images/Cholera1.png b/_book/images/Cholera1.png deleted file mode 100644 index 7cc5ef2..0000000 Binary files a/_book/images/Cholera1.png and /dev/null differ diff --git a/_book/images/Cholera2.png b/_book/images/Cholera2.png deleted file mode 100644 index 01fd541..0000000 Binary files a/_book/images/Cholera2.png and /dev/null differ diff --git a/_book/images/HotlineDrake1.jpg b/_book/images/HotlineDrake1.jpg deleted file mode 100644 index cbcfe9e..0000000 Binary files a/_book/images/HotlineDrake1.jpg and /dev/null differ diff --git a/_book/images/LINE.png b/_book/images/LINE.png deleted file mode 100644 index 0e45da3..0000000 Binary files a/_book/images/LINE.png and /dev/null differ diff --git a/_book/images/Languages02.jpg b/_book/images/Languages02.jpg deleted file mode 100644 index 16bcf33..0000000 Binary files a/_book/images/Languages02.jpg and /dev/null differ diff --git a/_book/images/Least_squares_as_springs.png b/_book/images/Least_squares_as_springs.png deleted file mode 100644 index 142f069..0000000 Binary files a/_book/images/Least_squares_as_springs.png and /dev/null differ diff --git a/_book/images/One_Picture.png b/_book/images/One_Picture.png deleted file mode 100644 index 2542c32..0000000 Binary files a/_book/images/One_Picture.png and /dev/null differ diff --git a/_book/images/QQgroup.png b/_book/images/QQgroup.png deleted file mode 100644 index a41251a..0000000 Binary files a/_book/images/QQgroup.png and /dev/null differ diff --git a/_book/images/R_Excel.png b/_book/images/R_Excel.png deleted file mode 100644 index 8bf9ee2..0000000 Binary files a/_book/images/R_Excel.png and /dev/null differ diff --git a/_book/images/R_logo.png b/_book/images/R_logo.png deleted file mode 100644 index 55fff32..0000000 Binary files a/_book/images/R_logo.png and /dev/null differ diff --git a/_book/images/Resampling.jpg b/_book/images/Resampling.jpg deleted file mode 100644 index 0f7fa85..0000000 Binary files a/_book/images/Resampling.jpg and /dev/null differ diff --git a/_book/images/Rhelp.png b/_book/images/Rhelp.png deleted file mode 100644 index 8b67e6a..0000000 Binary files a/_book/images/Rhelp.png and /dev/null differ diff --git a/_book/images/Rinstall.png b/_book/images/Rinstall.png deleted file mode 100644 index 1806295..0000000 Binary files a/_book/images/Rinstall.png and /dev/null differ diff --git a/_book/images/Rinventor.png b/_book/images/Rinventor.png deleted file mode 100644 index 4980fe6..0000000 Binary files a/_book/images/Rinventor.png and /dev/null differ diff --git a/_book/images/Rproject.png b/_book/images/Rproject.png deleted file mode 100644 index 88b4856..0000000 Binary files a/_book/images/Rproject.png and /dev/null differ diff --git a/_book/images/Rstudio_install.png b/_book/images/Rstudio_install.png deleted file mode 100644 index 496d614..0000000 Binary files a/_book/images/Rstudio_install.png and /dev/null differ diff --git a/_book/images/a-14.png b/_book/images/a-14.png deleted file mode 100644 index cb64586..0000000 Binary files a/_book/images/a-14.png and /dev/null differ diff --git a/_book/images/a-21new.png b/_book/images/a-21new.png deleted file mode 100644 index d50c486..0000000 Binary files a/_book/images/a-21new.png and /dev/null differ diff --git a/_book/images/a-3new.png b/_book/images/a-3new.png deleted file mode 100644 index de74610..0000000 Binary files a/_book/images/a-3new.png and /dev/null differ diff --git a/_book/images/across.png b/_book/images/across.png deleted file mode 100644 index 1b9ea3b..0000000 Binary files a/_book/images/across.png and /dev/null differ diff --git a/_book/images/across_cover.jpg b/_book/images/across_cover.jpg deleted file mode 100644 index 47b60e2..0000000 Binary files a/_book/images/across_cover.jpg and /dev/null differ diff --git a/_book/images/advisor_email.png b/_book/images/advisor_email.png deleted file mode 100644 index 641dbd8..0000000 Binary files a/_book/images/advisor_email.png and /dev/null differ diff --git a/_book/images/alerts/danger.svg b/_book/images/alerts/danger.svg deleted file mode 100644 index 53870a2..0000000 --- a/_book/images/alerts/danger.svg +++ /dev/null @@ -1,8 +0,0 @@ - - - - - - - - \ No newline at end of file diff --git a/_book/images/alerts/info.svg b/_book/images/alerts/info.svg deleted file mode 100644 index 9c0b095..0000000 --- a/_book/images/alerts/info.svg +++ /dev/null @@ -1,9 +0,0 @@ - - - - - - - - - diff --git a/_book/images/alerts/rainbow.svg b/_book/images/alerts/rainbow.svg deleted file mode 100644 index 293c550..0000000 --- a/_book/images/alerts/rainbow.svg +++ /dev/null @@ -1,12 +0,0 @@ - - - - - - - - - - - - diff --git a/_book/images/alerts/warning.svg b/_book/images/alerts/warning.svg deleted file mode 100644 index 97ba2ad..0000000 --- a/_book/images/alerts/warning.svg +++ /dev/null @@ -1,9 +0,0 @@ - - - - - - - - - diff --git a/_book/images/algorithms.png b/_book/images/algorithms.png deleted file mode 100644 index e7db1b7..0000000 Binary files a/_book/images/algorithms.png and /dev/null differ diff --git a/_book/images/ames.png b/_book/images/ames.png deleted file mode 100644 index 87cac46..0000000 Binary files a/_book/images/ames.png and /dev/null differ diff --git a/_book/images/arrival-movie.png b/_book/images/arrival-movie.png deleted file mode 100644 index 90b0094..0000000 Binary files a/_book/images/arrival-movie.png and /dev/null differ diff --git a/_book/images/bayes-three-panels.png b/_book/images/bayes-three-panels.png deleted file mode 100644 index bccc542..0000000 Binary files a/_book/images/bayes-three-panels.png and /dev/null differ diff --git a/_book/images/best_fit.png b/_book/images/best_fit.png deleted file mode 100644 index 18a1f8d..0000000 Binary files a/_book/images/best_fit.png and /dev/null differ diff --git a/_book/images/boxplot-schematic.png b/_book/images/boxplot-schematic.png deleted file mode 100644 index 4579e69..0000000 Binary files a/_book/images/boxplot-schematic.png and /dev/null differ diff --git a/_book/images/boxplot.png b/_book/images/boxplot.png deleted file mode 100644 index 4d41dfb..0000000 Binary files a/_book/images/boxplot.png and /dev/null differ diff --git a/_book/images/caribou_location.png b/_book/images/caribou_location.png deleted file mode 100644 index 66ca0f1..0000000 Binary files a/_book/images/caribou_location.png and /dev/null differ diff --git a/_book/images/code_utf_8.png b/_book/images/code_utf_8.png deleted file mode 100644 index d2e9f8b..0000000 Binary files a/_book/images/code_utf_8.png and /dev/null differ diff --git a/_book/images/colwise_vs_rowwise.jpg b/_book/images/colwise_vs_rowwise.jpg deleted file mode 100644 index 581283f..0000000 Binary files a/_book/images/colwise_vs_rowwise.jpg and /dev/null differ diff --git a/_book/images/common-aesthetics.png b/_book/images/common-aesthetics.png deleted file mode 100644 index b3a7fe9..0000000 Binary files a/_book/images/common-aesthetics.png and /dev/null differ diff --git a/_book/images/culmen_depth.png b/_book/images/culmen_depth.png deleted file mode 100644 index 658652e..0000000 Binary files a/_book/images/culmen_depth.png and /dev/null differ diff --git a/_book/images/data-science-explore.png b/_book/images/data-science-explore.png deleted file mode 100644 index bd84df3..0000000 Binary files a/_book/images/data-science-explore.png and /dev/null differ diff --git a/_book/images/data_science.png b/_book/images/data_science.png deleted file mode 100644 index 693065e..0000000 Binary files a/_book/images/data_science.png and /dev/null differ diff --git a/_book/images/data_struction1.png b/_book/images/data_struction1.png deleted file mode 100644 index 15fb38c..0000000 Binary files a/_book/images/data_struction1.png and /dev/null differ diff --git a/_book/images/data_type.png b/_book/images/data_type.png deleted file mode 100644 index a2a2302..0000000 Binary files a/_book/images/data_type.png and /dev/null differ diff --git a/_book/images/dont-load-data.png b/_book/images/dont-load-data.png deleted file mode 100644 index 3e6f3f7..0000000 Binary files a/_book/images/dont-load-data.png and /dev/null differ diff --git a/_book/images/downey.png b/_book/images/downey.png deleted file mode 100644 index 2044228..0000000 Binary files a/_book/images/downey.png and /dev/null differ diff --git a/_book/images/dplyr-arrange.png b/_book/images/dplyr-arrange.png deleted file mode 100644 index 6148e60..0000000 Binary files a/_book/images/dplyr-arrange.png and /dev/null differ diff --git a/_book/images/dplyr-filter.png b/_book/images/dplyr-filter.png deleted file mode 100644 index 49e99e1..0000000 Binary files a/_book/images/dplyr-filter.png and /dev/null differ diff --git a/_book/images/dplyr-group-by.png b/_book/images/dplyr-group-by.png deleted file mode 100644 index 7de2017..0000000 Binary files a/_book/images/dplyr-group-by.png and /dev/null differ diff --git a/_book/images/dplyr-mutate.png b/_book/images/dplyr-mutate.png deleted file mode 100644 index 8139fa2..0000000 Binary files a/_book/images/dplyr-mutate.png and /dev/null differ diff --git a/_book/images/dplyr-select.png b/_book/images/dplyr-select.png deleted file mode 100644 index e388dfb..0000000 Binary files a/_book/images/dplyr-select.png and /dev/null differ diff --git a/_book/images/dplyr-summarize.png b/_book/images/dplyr-summarize.png deleted file mode 100644 index 95d19e4..0000000 Binary files a/_book/images/dplyr-summarize.png and /dev/null differ diff --git a/_book/images/dplyr-verbs.png b/_book/images/dplyr-verbs.png deleted file mode 100644 index 6b8dbdc..0000000 Binary files a/_book/images/dplyr-verbs.png and /dev/null differ diff --git a/_book/images/ease.png b/_book/images/ease.png deleted file mode 100644 index 39596bf..0000000 Binary files a/_book/images/ease.png and /dev/null differ diff --git a/_book/images/engine_dashboard.png b/_book/images/engine_dashboard.png deleted file mode 100644 index 20d34f3..0000000 Binary files a/_book/images/engine_dashboard.png and /dev/null differ diff --git a/_book/images/fail_to_reject_you.png b/_book/images/fail_to_reject_you.png deleted file mode 100644 index b9e8edc..0000000 Binary files a/_book/images/fail_to_reject_you.png and /dev/null differ diff --git a/_book/images/fishes.png b/_book/images/fishes.png deleted file mode 100644 index ce23b74..0000000 Binary files a/_book/images/fishes.png and /dev/null differ diff --git a/_book/images/ft_coronavirus.jpg b/_book/images/ft_coronavirus.jpg deleted file mode 100644 index 5255fc1..0000000 Binary files a/_book/images/ft_coronavirus.jpg and /dev/null differ diff --git a/_book/images/ggplot2-scales.png b/_book/images/ggplot2-scales.png deleted file mode 100644 index 32cb9db..0000000 Binary files a/_book/images/ggplot2-scales.png and /dev/null differ diff --git a/_book/images/ggplot2-themes.jpg b/_book/images/ggplot2-themes.jpg deleted file mode 100644 index 8aa0468..0000000 Binary files a/_book/images/ggplot2-themes.jpg and /dev/null differ diff --git a/_book/images/ggplot2_guides.jpg b/_book/images/ggplot2_guides.jpg deleted file mode 100644 index a609753..0000000 Binary files a/_book/images/ggplot2_guides.jpg and /dev/null differ diff --git a/_book/images/ggplot2_scales_cheat.jpg b/_book/images/ggplot2_scales_cheat.jpg deleted file mode 100644 index b648d89..0000000 Binary files a/_book/images/ggplot2_scales_cheat.jpg and /dev/null differ diff --git a/_book/images/ggplot2_system.png b/_book/images/ggplot2_system.png deleted file mode 100644 index 2034c2d..0000000 Binary files a/_book/images/ggplot2_system.png and /dev/null differ diff --git a/_book/images/ggplot_aesthetics_cheatsheet.png b/_book/images/ggplot_aesthetics_cheatsheet.png deleted file mode 100644 index 66523b7..0000000 Binary files a/_book/images/ggplot_aesthetics_cheatsheet.png and /dev/null differ diff --git a/_book/images/ggplot_template.png b/_book/images/ggplot_template.png deleted file mode 100644 index 94e7bc8..0000000 Binary files a/_book/images/ggplot_template.png and /dev/null differ diff --git a/_book/images/github_COVID-19_download.png b/_book/images/github_COVID-19_download.png deleted file mode 100644 index 68fbf88..0000000 Binary files a/_book/images/github_COVID-19_download.png and /dev/null differ diff --git a/_book/images/github_COVID-19_files.png b/_book/images/github_COVID-19_files.png deleted file mode 100644 index 3ab0d46..0000000 Binary files a/_book/images/github_COVID-19_files.png and /dev/null differ diff --git a/_book/images/grammar-of-graphics.png b/_book/images/grammar-of-graphics.png deleted file mode 100644 index 8695979..0000000 Binary files a/_book/images/grammar-of-graphics.png and /dev/null differ diff --git a/_book/images/hadley-wickham.jpg b/_book/images/hadley-wickham.jpg deleted file mode 100644 index e1067b5..0000000 Binary files a/_book/images/hadley-wickham.jpg and /dev/null differ diff --git a/_book/images/hcl-palettes-principles.png b/_book/images/hcl-palettes-principles.png deleted file mode 100644 index 94a029d..0000000 Binary files a/_book/images/hcl-palettes-principles.png and /dev/null differ diff --git a/_book/images/hex/dplyr.png b/_book/images/hex/dplyr.png deleted file mode 100644 index 15754e4..0000000 Binary files a/_book/images/hex/dplyr.png and /dev/null differ diff --git a/_book/images/hex/forcats.png b/_book/images/hex/forcats.png deleted file mode 100644 index 968a1a7..0000000 Binary files a/_book/images/hex/forcats.png and /dev/null differ diff --git a/_book/images/hex/ggplot2.png b/_book/images/hex/ggplot2.png deleted file mode 100644 index 50ffcbb..0000000 Binary files a/_book/images/hex/ggplot2.png and /dev/null differ diff --git a/_book/images/hex/purrr.png b/_book/images/hex/purrr.png deleted file mode 100644 index 41eeb95..0000000 Binary files a/_book/images/hex/purrr.png and /dev/null differ diff --git a/_book/images/hex/readr.png b/_book/images/hex/readr.png deleted file mode 100644 index 349db07..0000000 Binary files a/_book/images/hex/readr.png and /dev/null differ diff --git a/_book/images/hex/stringr.png b/_book/images/hex/stringr.png deleted file mode 100644 index 6cd25fb..0000000 Binary files a/_book/images/hex/stringr.png and /dev/null differ diff --git a/_book/images/hex/tibble.png b/_book/images/hex/tibble.png deleted file mode 100644 index 15bf5ec..0000000 Binary files a/_book/images/hex/tibble.png and /dev/null differ diff --git a/_book/images/how_to_plot.png b/_book/images/how_to_plot.png deleted file mode 100644 index 42a5508..0000000 Binary files a/_book/images/how_to_plot.png and /dev/null differ diff --git a/_book/images/icons/link_external.svg b/_book/images/icons/link_external.svg deleted file mode 100644 index d05a889..0000000 --- a/_book/images/icons/link_external.svg +++ /dev/null @@ -1,9 +0,0 @@ - - - - - - - - - diff --git a/_book/images/icons/link_external1.svg b/_book/images/icons/link_external1.svg deleted file mode 100644 index 803b6b4..0000000 --- a/_book/images/icons/link_external1.svg +++ /dev/null @@ -1,9 +0,0 @@ - - - - - - - - - diff --git a/_book/images/icons/link_external2.svg b/_book/images/icons/link_external2.svg deleted file mode 100644 index 243733a..0000000 --- a/_book/images/icons/link_external2.svg +++ /dev/null @@ -1,9 +0,0 @@ - - - - - - - - - diff --git a/_book/images/imdb.png b/_book/images/imdb.png deleted file mode 100644 index 7741210..0000000 Binary files a/_book/images/imdb.png and /dev/null differ diff --git a/_book/images/import_datatype01.png b/_book/images/import_datatype01.png deleted file mode 100644 index 4287d74..0000000 Binary files a/_book/images/import_datatype01.png and /dev/null differ diff --git a/_book/images/infer-ht-diagram.png b/_book/images/infer-ht-diagram.png deleted file mode 100644 index b940396..0000000 Binary files a/_book/images/infer-ht-diagram.png and /dev/null differ diff --git a/_book/images/infer_workflow.jpeg b/_book/images/infer_workflow.jpeg deleted file mode 100644 index eece5f8..0000000 Binary files a/_book/images/infer_workflow.jpeg and /dev/null differ diff --git a/_book/images/join_examples.jpg b/_book/images/join_examples.jpg deleted file mode 100644 index c2b8827..0000000 Binary files a/_book/images/join_examples.jpg and /dev/null differ diff --git a/_book/images/lego_example.jpg b/_book/images/lego_example.jpg deleted file mode 100644 index 7df0cbf..0000000 Binary files a/_book/images/lego_example.jpg and /dev/null differ diff --git a/_book/images/linear_tests_cheat_sheet.png b/_book/images/linear_tests_cheat_sheet.png deleted file mode 100644 index 079aa9f..0000000 Binary files a/_book/images/linear_tests_cheat_sheet.png and /dev/null differ diff --git a/_book/images/list_subset.png b/_book/images/list_subset.png deleted file mode 100644 index b04fde8..0000000 Binary files a/_book/images/list_subset.png and /dev/null differ diff --git a/_book/images/lm-object-schematic.png b/_book/images/lm-object-schematic.png deleted file mode 100644 index 3f96599..0000000 Binary files a/_book/images/lm-object-schematic.png and /dev/null differ diff --git a/_book/images/make_data_tidy.png b/_book/images/make_data_tidy.png deleted file mode 100644 index 37fadd5..0000000 Binary files a/_book/images/make_data_tidy.png and /dev/null differ diff --git a/_book/images/make_data_tidy2.jpg b/_book/images/make_data_tidy2.jpg deleted file mode 100644 index 274f779..0000000 Binary files a/_book/images/make_data_tidy2.jpg and /dev/null differ diff --git a/_book/images/map_fun.png b/_book/images/map_fun.png deleted file mode 100644 index 7392fdc..0000000 Binary files a/_book/images/map_fun.png and /dev/null differ diff --git a/_book/images/map_function1.png b/_book/images/map_function1.png deleted file mode 100644 index 1f98568..0000000 Binary files a/_book/images/map_function1.png and /dev/null differ diff --git a/_book/images/map_function2.png b/_book/images/map_function2.png deleted file mode 100644 index 20349c3..0000000 Binary files a/_book/images/map_function2.png and /dev/null differ diff --git a/_book/images/map_function3.png b/_book/images/map_function3.png deleted file mode 100644 index 5c50a24..0000000 Binary files a/_book/images/map_function3.png and /dev/null differ diff --git a/_book/images/mapping.png b/_book/images/mapping.png deleted file mode 100644 index f14fcc4..0000000 Binary files a/_book/images/mapping.png and /dev/null differ diff --git a/_book/images/meme.png b/_book/images/meme.png deleted file mode 100644 index 24cc0cc..0000000 Binary files a/_book/images/meme.png and /dev/null differ diff --git a/_book/images/mirror1.png b/_book/images/mirror1.png deleted file mode 100644 index b3467cc..0000000 Binary files a/_book/images/mirror1.png and /dev/null differ diff --git a/_book/images/mirror2.png b/_book/images/mirror2.png deleted file mode 100644 index 552e870..0000000 Binary files a/_book/images/mirror2.png and /dev/null differ diff --git a/_book/images/model_evaluation.png b/_book/images/model_evaluation.png deleted file mode 100644 index e776f81..0000000 Binary files a/_book/images/model_evaluation.png and /dev/null differ diff --git a/_book/images/movie_contagion.jpg b/_book/images/movie_contagion.jpg deleted file mode 100644 index 789cac1..0000000 Binary files a/_book/images/movie_contagion.jpg and /dev/null differ diff --git a/_book/images/movie_flu.png b/_book/images/movie_flu.png deleted file mode 100644 index 4e2b232..0000000 Binary files a/_book/images/movie_flu.png and /dev/null differ diff --git a/_book/images/nature_editorial.png b/_book/images/nature_editorial.png deleted file mode 100644 index 772431f..0000000 Binary files a/_book/images/nature_editorial.png and /dev/null differ diff --git a/_book/images/night_king.jpg b/_book/images/night_king.jpg deleted file mode 100644 index 4e1dc74..0000000 Binary files a/_book/images/night_king.jpg and /dev/null differ diff --git a/_book/images/nobel_prize_winners_list.jpg b/_book/images/nobel_prize_winners_list.jpg deleted file mode 100644 index a1fe111..0000000 Binary files a/_book/images/nobel_prize_winners_list.jpg and /dev/null differ diff --git a/_book/images/node_edge01.png b/_book/images/node_edge01.png deleted file mode 100644 index 2e2c287..0000000 Binary files a/_book/images/node_edge01.png and /dev/null differ diff --git a/_book/images/node_edge02.png b/_book/images/node_edge02.png deleted file mode 100644 index 031f960..0000000 Binary files a/_book/images/node_edge02.png and /dev/null differ diff --git a/_book/images/node_edge03.png b/_book/images/node_edge03.png deleted file mode 100644 index a0f0478..0000000 Binary files a/_book/images/node_edge03.png and /dev/null differ diff --git a/_book/images/patchwork.png b/_book/images/patchwork.png deleted file mode 100644 index 1dcb115..0000000 Binary files a/_book/images/patchwork.png and /dev/null differ diff --git a/_book/images/penguins.png b/_book/images/penguins.png deleted file mode 100644 index c73248b..0000000 Binary files a/_book/images/penguins.png and /dev/null differ diff --git a/_book/images/pi.jpg b/_book/images/pi.jpg deleted file mode 100644 index 6c128f0..0000000 Binary files a/_book/images/pi.jpg and /dev/null differ diff --git a/_book/images/pipe1.png b/_book/images/pipe1.png deleted file mode 100644 index 943cf63..0000000 Binary files a/_book/images/pipe1.png and /dev/null differ diff --git a/_book/images/pipe2.png b/_book/images/pipe2.png deleted file mode 100644 index c373ef8..0000000 Binary files a/_book/images/pipe2.png and /dev/null differ diff --git a/_book/images/pivot.png b/_book/images/pivot.png deleted file mode 100644 index acf6e38..0000000 Binary files a/_book/images/pivot.png and /dev/null differ diff --git a/_book/images/pivot_longer_values.jpg b/_book/images/pivot_longer_values.jpg deleted file mode 100644 index 83c0210..0000000 Binary files a/_book/images/pivot_longer_values.jpg and /dev/null differ diff --git a/_book/images/rbook1.png b/_book/images/rbook1.png deleted file mode 100644 index 4fd0542..0000000 Binary files a/_book/images/rbook1.png and /dev/null differ diff --git a/_book/images/recipes-process.png b/_book/images/recipes-process.png deleted file mode 100644 index aad44e8..0000000 Binary files a/_book/images/recipes-process.png and /dev/null differ diff --git a/_book/images/regex_repeat.jpg b/_book/images/regex_repeat.jpg deleted file mode 100644 index 8e9bdc9..0000000 Binary files a/_book/images/regex_repeat.jpg and /dev/null differ diff --git a/_book/images/rmarkdown.png b/_book/images/rmarkdown.png deleted file mode 100644 index 2056184..0000000 Binary files a/_book/images/rmarkdown.png and /dev/null differ diff --git a/_book/images/rstudio-editor.png b/_book/images/rstudio-editor.png deleted file mode 100644 index e172c84..0000000 Binary files a/_book/images/rstudio-editor.png and /dev/null differ diff --git a/_book/images/rstudio-markdown.png b/_book/images/rstudio-markdown.png deleted file mode 100644 index da3456d..0000000 Binary files a/_book/images/rstudio-markdown.png and /dev/null differ diff --git a/_book/images/script1.png b/_book/images/script1.png deleted file mode 100644 index 2d05dfb..0000000 Binary files a/_book/images/script1.png and /dev/null differ diff --git a/_book/images/script2.png b/_book/images/script2.png deleted file mode 100644 index f6b7294..0000000 Binary files a/_book/images/script2.png and /dev/null differ diff --git a/_book/images/social_science.jpg b/_book/images/social_science.jpg deleted file mode 100644 index e56aaf1..0000000 Binary files a/_book/images/social_science.jpg and /dev/null differ diff --git a/_book/images/styler.png b/_book/images/styler.png deleted file mode 100644 index 762d45a..0000000 Binary files a/_book/images/styler.png and /dev/null differ diff --git a/_book/images/support.jpg b/_book/images/support.jpg deleted file mode 100644 index a70004d..0000000 Binary files a/_book/images/support.jpg and /dev/null differ diff --git a/_book/images/tbl_graph02.png b/_book/images/tbl_graph02.png deleted file mode 100644 index e0b8068..0000000 Binary files a/_book/images/tbl_graph02.png and /dev/null differ diff --git a/_book/images/tbl_graph04.png b/_book/images/tbl_graph04.png deleted file mode 100644 index fcd60e5..0000000 Binary files a/_book/images/tbl_graph04.png and /dev/null differ diff --git a/_book/images/tidyr-fig.png b/_book/images/tidyr-fig.png deleted file mode 100644 index 44b663e..0000000 Binary files a/_book/images/tidyr-fig.png and /dev/null differ diff --git a/_book/images/tidyverse-workflow.png b/_book/images/tidyverse-workflow.png deleted file mode 100644 index 61f75c1..0000000 Binary files a/_book/images/tidyverse-workflow.png and /dev/null differ diff --git a/_book/images/tidyverse.png b/_book/images/tidyverse.png deleted file mode 100644 index 62a62a8..0000000 Binary files a/_book/images/tidyverse.png and /dev/null differ diff --git a/_book/images/tiobe-index.png b/_book/images/tiobe-index.png deleted file mode 100644 index 405ea4a..0000000 Binary files a/_book/images/tiobe-index.png and /dev/null differ diff --git a/_book/images/ukmss-36386-f0001.jpg b/_book/images/ukmss-36386-f0001.jpg deleted file mode 100644 index ca8683c..0000000 Binary files a/_book/images/ukmss-36386-f0001.jpg and /dev/null differ diff --git a/_book/images/update_packages.png b/_book/images/update_packages.png deleted file mode 100644 index 34ec44b..0000000 Binary files a/_book/images/update_packages.png and /dev/null differ diff --git a/_book/images/vaccine.png b/_book/images/vaccine.png deleted file mode 100644 index 9b9a325..0000000 Binary files a/_book/images/vaccine.png and /dev/null differ diff --git a/_book/images/variables.png b/_book/images/variables.png deleted file mode 100644 index bd75f6f..0000000 Binary files a/_book/images/variables.png and /dev/null differ diff --git a/_book/images/vctr.png b/_book/images/vctr.png deleted file mode 100644 index a4177e8..0000000 Binary files a/_book/images/vctr.png and /dev/null differ diff --git a/_book/images/violin-schematic-1.png b/_book/images/violin-schematic-1.png deleted file mode 100644 index 17b0ae6..0000000 Binary files a/_book/images/violin-schematic-1.png and /dev/null differ diff --git a/_book/images/what_is_R.png b/_book/images/what_is_R.png deleted file mode 100644 index 44af98e..0000000 Binary files a/_book/images/what_is_R.png and /dev/null differ diff --git a/_book/images/why_R_is_best_language.png b/_book/images/why_R_is_best_language.png deleted file mode 100644 index 9de1805..0000000 Binary files a/_book/images/why_R_is_best_language.png and /dev/null differ diff --git a/_book/images/workflow_c.png b/_book/images/workflow_c.png deleted file mode 100644 index 5bbd397..0000000 Binary files a/_book/images/workflow_c.png and /dev/null differ diff --git a/_book/index.html b/_book/index.html deleted file mode 100644 index 389c093..0000000 --- a/_book/index.html +++ /dev/null @@ -1,1444 +0,0 @@ - - - - - - - 数据科学中的 R 语言 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- -
- -
-
- - -
-
- -
- -
-

前言

-

你好,这里是四川师范大学研究生公选课《数据科学中的R语言》的课程内容。考虑到大家来自不同的学院,有着不同的学科背景,因此讲授的内容不会太深奥(要有信心喔)。

-

比如在课程中以下内容就不会出现

-

\[ -f(x)=\frac{1}{\sqrt{2 \pi}} e^{-\frac{1}{2} x^{2}} -\]

-

而出现更多的是

-
library(tidyverse)
-summary_monthly_temp <- weather %>% 
-  group_by(month) %>% 
-  summarize(mean = mean(temp), 
-            std_dev = sd(temp))
-

跟进本课程的同时, 我强烈推荐大家阅读Hadley Wickham的 -r4ds这本书 (Grolemund and Wickham 2017)。作者可是2019年8月刚刚获得考普斯总统奖(被誉为统计学的诺贝尔奖)的大神喔,点击这里可以看他照片。

-

-
-

关于课程

-

1、课程安排是这样的,每个章节研究的内容都是彼此独立的,大家可以单独阅读每章及运行代码。

-
    -
  • 基础篇 -
      -
    • 1 章介绍数据科学基础
    • -
    • 2 章介绍R语言基本概念
    • -
    • 3 章介绍R语言中的子集选取
    • -
  • -
  • tidyverse篇 -
      -
    • 4 章介绍可重复性报告
    • -
    • 5 章介绍数据读入
    • -
    • 6 章介绍数据处理
    • -
    • 7 章介绍数据可视化
    • -
    • 8 章介绍数据规整
    • -
    • 9 章介绍字符串处理
    • -
    • 10 章介绍因子类型数据
    • -
    • 11 章介绍函数式编程
    • -
    • 12 章介绍简单数据框
    • -
    • 13 章ggplot2几何对象
    • -
    • 14 章ggplot2的主题
    • -
    • 15 章ggplot2的标度
    • -
    • 16 章ggplot2的图例
    • -
    • 17 章ggplot2扩展内容
    • -
    • 18 章ggplot2统计图层
    • -
    • 19 章回望tidyverse之旅
    • -
    • 20 章介绍tidyverse常用技巧
    • -
    • 21 章介绍tidyverse进阶技巧
    • -
    • 22 章介绍数据框的列方向和行方向
    • -
    • 23 章介绍tidyverse中的across()之美
    • -
    • 24 章介绍tidyverse中的NA
    • -
    • 25 章介绍tidyverse中的dot
    • -
    • 26 章介绍非标准性评估
    • -
  • -
  • 建模篇 -
      -
    • 27 章介绍模拟与抽样
    • -
    • 28 章介绍线性模型
    • -
    • 29 章介绍模型输出结果的规整
    • -
    • 30 章介绍方差分析
    • -
    • 31 章介绍统计检验与线性模型的等价性
    • -
    • 32 章介绍统计推断
    • -
    • 33 章介绍多层线性模型
    • -
    • 34 章介绍广义线性模型中的泊松回归
    • -
    • 35 章介绍logistic回归模型
    • -
    • 36 章介绍有序logistic回归模型
    • -
    • 37 章介绍机器学习
    • -
    • 38 章介绍贝叶斯模型和Stan
    • -
  • -
  • 应用篇 -
      -
    • 39 章介绍探索性数据分析-诺奖获得者
      -
    • -
    • 40 章介绍探索性数据分析-奥林匹克
      -
    • -
    • 41 章介绍探索性数据分析-新冠疫情
      -
    • -
    • 42 章介绍探索性数据分析-anscombe数据集
      -
    • -
    • 43 章介绍探索性数据分析-身高体重
    • -
    • 44 章介绍探索性数据分析-驯鹿迁移
      -
    • -
    • 45 章介绍探索性数据分析-企鹅的故事
    • -
    • 46 章介绍探索性数据分析-大学生职业决策
    • -
    • 47 章介绍探索性数据分析-ames房屋价格
    • -
    • 48 章介绍探索性数据分析-新冠疫苗有效率的计算
    • -
    • 49 章介绍网页爬虫
    • -
    • 50 章介绍社会网络分析
    • -
    • 51 章介绍文本挖掘
    • -
    • 52 章介绍时间序列分析
    • -
    • 53 章介绍地理数据处理
    • -
    • 54 章介绍tidyverse中行方向的操作
    • -
    • 55 章介绍科研数据可视化中的统计分布图
    • -
    • 56 章介绍数据可视化中的配色
    • -
    • 57 章让你的数据骚动起来
    • -
    • 58 章介绍我收集的一些有用和有趣的宏包
    • -
  • -
-

2、课件源代码和数据

-

我将持续改进课件,所以欢迎大家提出建议

- - - -

4、关于课程目标

-
    -
  • 课程目标: 熟悉数据科学流程,掌握统计编程技能,能运用探索性分析方法,解决基本的实际应用问题,做到学以致用,不是 learning R,而是 learning with R

  • -
  • 授课方式:

    -
      -
    • 边写代码边讲
    • -
    • 通过案例式、问题式的方法,增强参与感和目标感
    • -
  • -
  • 课堂要求

    -
      -
    • 自带电脑,配好运行环境
    • -
    • 光看李小龙的电影,是学不会功夫的
    • -
  • -
  • 科学脚手架

    -
      -
    • 科学脚手架,我个人比较喜欢这个比喻(我微信公众号就使用了这个名字)。在教育中,各种知识或技巧就好比建房子用的脚手架,它帮助我们加深理解,逐渐获得独立自主学习的能力。
    • -
  • -
-

5、关于如何提问

-

有的同学,这样一上来就问:老师,我的代码怎么运行不出来呢?或者图省事,干脆手机拍个照片一发。

-
    -
  • 我想说,要想获得快速的帮助,在问问题之前,请先告诉对方三个信息: -
      -
    • 想解决的问题是什么?
    • -
    • 代码是什么?
    • -
    • 报错信息是什么?
    • -
  • -
-
-
-

课件中用到的宏包

-
my_packages <- 
-   c("brms", "broom", "broom.mixed", "colorspace", "corrr", "countrycode", "cowplot", "cranlogs", "datapasta", "datasauRus", "devtools", "dplyr", "equatiomatic", "forcats", "gapminder", "geoshpere", "gganimate", "ggbeeswarm", "ggeffects", "ggforce", "gghighlight", "ggimage", "ggplot2", "ggpubr", "ggraph", "ggrepel", "ggridges", "ggstatsplot", "ggtext", "ggthemes", "gt", "gtsummary", "haven", "here", "janitor", "knitr", "latex2exp", "lme4", "lubridate", "maps", "margins", "MASS", "modelr", "naniar", "nycflights13", "ordinal", "pacman", "pacman", "paletteer", "palmerpenguins", "patchwork", "performance", "purrr", "readr", "readxl", "remotes", "reprex", "rlang", "rmarkdown", "rstan", "rvest", "scales", "sf", "shadowtext", "showtext", "slider", "stars", "statsExpressions", "stringr", "styler", "tibble", "tibbletime", "tidybayes", "tidygraph", "tidymodels", "tidyr", "tidytext", "tidyverse", "tinytex", "viridis", "visdat", "namer")
-
install.packages(my_packages, repos = "http://cran.rstudio.com", dependencies = T)
-

可能用到的开发版本的宏包

-
#remotes::install_github("datalorax/equatiomatic")
-devtools::install_github("easystats/report")
-devtools::install_github("kassambara/navdata")
-devtools::install_github('cttobin/ggthemr')
-remotes::install_github("daranzolin/inferregex")
-devtools::install_github("EmilHvitfeldt/gganonymize")
-remotes::install_github("ThinkR-open/remedy") 
-remotes::install_git("https://git.rud.is/hrbrmstr/hrbraddins.git") 
-devtools::install_github("hadley/emo") 
-remotes::install_github("romainfrancois/lay")
-remotes::install_github("kjhealy/covdata")
-devtools::install_github("kbodwin/flair")
-devtools::install_github("seasmith/AlignAssign")
-
-
-

RYouWithMe

-

-
-
-

致谢

-

非常感谢川师研究生院的信任,有了您的支持,才会有更多的川师学子了解R的美!

- -

-王敏杰
-于 川师图书馆某角落 -

- -
-
-
- -
-
-
- - -
-
- - - - - - - - - - - - - - - - diff --git a/_book/infer.html b/_book/infer.html deleted file mode 100644 index e5eddbf..0000000 --- a/_book/infer.html +++ /dev/null @@ -1,1787 +0,0 @@ - - - - - - - 第 32 章 统计推断 | 数据科学中的 R 语言 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- -
- -
-
- - -
-
- -
-
-

第 32 章 统计推断

-

Statistical Inference: A Tidy Approach

-
-

32.1 案例1:你会给爱情片还是动作片高分?

-

-

这是一个关于电影评分的数据集3

-
library(tidyverse)
-d <- ggplot2movies::movies
-d
-
## # A tibble: 58,788 x 24
-##    title    year length budget rating votes    r1    r2
-##    <chr>   <int>  <int>  <int>  <dbl> <int> <dbl> <dbl>
-##  1 $        1971    121     NA    6.4   348   4.5   4.5
-##  2 $1000 ~  1939     71     NA    6      20   0    14.5
-##  3 $21 a ~  1941      7     NA    8.2     5   0     0  
-##  4 $40,000  1996     70     NA    8.2     6  14.5   0  
-##  5 $50,00~  1975     71     NA    3.4    17  24.5   4.5
-##  6 $pent    2000     91     NA    4.3    45   4.5   4.5
-##  7 $windle  2002     93     NA    5.3   200   4.5   0  
-##  8 '15'     2002     25     NA    6.7    24   4.5   4.5
-##  9 '38      1987     97     NA    6.6    18   4.5   4.5
-## 10 '49-'17  1917     61     NA    6      51   4.5   0  
-## # ... with 58,778 more rows, and 16 more variables:
-## #   r3 <dbl>, r4 <dbl>, r5 <dbl>, r6 <dbl>, r7 <dbl>,
-## #   r8 <dbl>, r9 <dbl>, r10 <dbl>, mpaa <chr>,
-## #   Action <int>, Animation <int>, Comedy <int>,
-## #   Drama <int>, Documentary <int>, Romance <int>,
-## #   Short <int>
-

数据集包含58788 行 和 24 变量

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
variabledescription
title电影名
year发行年份
budget预算金额
length电影时长
rating平均得分
votes投票人数
r1-10各分段投票人占比
mpaaMPAA 分级
action动作片
animation动画片
comedy喜剧片
drama戏剧
documentary纪录片
romance爱情片
short短片
-

我们想看下爱情片与动作片(不是爱情动作片)的平均得分是否显著不同。

-
    -
  • 首先我们简单的整理下数据,主要是剔除既是爱情片又是动作片的电影
  • -
-
movies_genre_sample <- d %>%
-  select(title, year, rating, Action, Romance) %>%
-  filter(!(Action == 1 & Romance == 1)) %>% # 既是爱情片又是动作片的,删去
-  mutate(genre = case_when(
-    Action == 1 ~ "Action",
-    Romance == 1 ~ "Romance",
-    TRUE ~ "Neither"
-  )) %>%
-  filter(genre != "Neither") %>%
-  select(-Action, -Romance) %>%
-  group_by(genre) %>%
-  slice_sample(n = 34) %>% # 每种题材的电影只选取了34个
-  ungroup()
-
-movies_genre_sample
-
## # A tibble: 68 x 4
-##    title                              year rating genre
-##    <chr>                             <int>  <dbl> <chr>
-##  1 Alley Cat                          1982    6.1 Acti~
-##  2 Purple Plain, The                  1954    6.3 Acti~
-##  3 Martial Law                        1990    4   Acti~
-##  4 Vodkaa, komisario Palmu            1969    5.7 Acti~
-##  5 Vendetta                           1986    5.1 Acti~
-##  6 Confessione di un commissario di~  1971    7   Acti~
-##  7 Full Clip                          2004    4.1 Acti~
-##  8 Tailspin Tommy in The Great Air ~  1935    9.6 Acti~
-##  9 Cyclone                            1978    3.1 Acti~
-## 10 Gharaana Mogudu                    1992    9   Acti~
-## # ... with 58 more rows
-
    -
  • 先看下图形
  • -
-
movies_genre_sample %>%
-  ggplot(aes(x = genre, y = rating)) +
-  geom_boxplot() +
-  geom_jitter()
-

-
    -
  • 看下两种题材电影评分的分布
  • -
-
movies_genre_sample %>%
-  ggplot(mapping = aes(x = rating)) +
-  geom_histogram(binwidth = 1, color = "white") +
-  facet_grid(vars(genre))
-

-
    -
  • 统计两种题材电影评分的均值
  • -
-
summary_ratings <- movies_genre_sample %>%
-  group_by(genre) %>%
-  summarize(
-    mean = mean(rating),
-    std_dev = sd(rating),
-    n = n()
-  )
-summary_ratings
-
## # A tibble: 2 x 4
-##   genre    mean std_dev     n
-##   <chr>   <dbl>   <dbl> <int>
-## 1 Action   5.54    1.72    34
-## 2 Romance  6.08    1.41    34
-
-

32.1.1 传统的基于频率方法的t检验

-

假设:

-
    -
  • 零假设:

    -
      -
    • \(H_0: \mu_{1} - \mu_{2} = 0\)
    • -
  • -
  • 备选假设:

    -
      -
    • \(H_A: \mu_{1} - \mu_{2} \neq 0\)
    • -
  • -
-

两种可能的结论:

-
    -
  • 拒绝 \(H_0\)
  • -
  • 不能拒绝 \(H_0\)
  • -
-
t_test_eq <- t.test(rating ~ genre,
-  data = movies_genre_sample,
-  var.equal = TRUE
-) %>%
-  broom::tidy()
-t_test_eq
-
## # A tibble: 1 x 10
-##   estimate estimate1 estimate2 statistic p.value
-##      <dbl>     <dbl>     <dbl>     <dbl>   <dbl>
-## 1   -0.541      5.54      6.08     -1.42   0.161
-## # ... with 5 more variables: parameter <dbl>,
-## #   conf.low <dbl>, conf.high <dbl>, method <chr>,
-## #   alternative <chr>
-
t_test_uneq <- t.test(rating ~ genre,
-  data = movies_genre_sample,
-  var.equal = FALSE
-) %>%
-  broom::tidy()
-t_test_uneq
-
## # A tibble: 1 x 10
-##   estimate estimate1 estimate2 statistic p.value
-##      <dbl>     <dbl>     <dbl>     <dbl>   <dbl>
-## 1   -0.541      5.54      6.08     -1.42   0.161
-## # ... with 5 more variables: parameter <dbl>,
-## #   conf.low <dbl>, conf.high <dbl>, method <chr>,
-## #   alternative <chr>
-
-
-

32.1.2 infer:基于模拟的检验

-

所有的假设检验都符合这个框架4:

-
-Hypothesis Testing Framework -

-图 32.1: Hypothesis Testing Framework -

-
-
    -
  • 实际观察的差别
  • -
-
library(infer)
-
-obs_diff <- movies_genre_sample %>%
-  specify(formula = rating ~ genre) %>%
-  calculate(
-    stat = "diff in means",
-    order = c("Romance", "Action")
-  )
-obs_diff
-
## # A tibble: 1 x 1
-##    stat
-##   <dbl>
-## 1 0.541
-
    -
  • 模拟
  • -
-
null_dist <- movies_genre_sample %>%
-  specify(formula = rating ~ genre) %>%
-  hypothesize(null = "independence") %>%
-  generate(reps = 5000, type = "permute") %>% 
-  calculate(
-    stat = "diff in means",
-    order = c("Romance", "Action")
-  )
-head(null_dist)
-
## # A tibble: 6 x 2
-##   replicate    stat
-##       <int>   <dbl>
-## 1         1  0.135 
-## 2         2  0.0353
-## 3         3 -0.7   
-## 4         4  0.294 
-## 5         5 -0.141 
-## 6         6  0.247
-
    -
  • 可视化
  • -
-
null_dist %>%
-  visualize()
-

-
null_dist %>%
-  visualize() +
-  shade_p_value(obs_stat = obs_diff, direction = "both")
-

-
# shade_p_value(bins = 100, obs_stat = obs_diff, direction = "both")
-
    -
  • 计算p值
  • -
-
pvalue <- null_dist %>%
-  get_pvalue(obs_stat = obs_diff, direction = "two_sided")
-
-pvalue
-
## # A tibble: 1 x 1
-##   p_value
-##     <dbl>
-## 1   0.164
-
    -
  • 结论
  • -
-

在构建的虚拟(\(\Delta = 0\))的平行世界里,出现实际观察值(0.5412)的概率很小,这里是(0.1644)。 如果以(p< 0.05)为标准,那我们有足够的证据证明,H0不成立,即爱情电影和动作电影的评分均值存在显著差异,具体来说,动作电影的平均评分要比爱情电影低些。

-
-
-
-

32.2 案例2: 航天事业的预算有党派门户之见?

-

美国国家航空航天局的预算是否存在党派门户之见?

-
gss <- read_rds("./demo_data/gss.rds")
-
-gss %>%
-  select(NASA, party) %>%
-  count(NASA, party) %>%
-  head(8)
-
## # A tibble: 8 x 3
-##   NASA        party     n
-##   <fct>       <fct> <int>
-## 1 TOO LITTLE  Dem       8
-## 2 TOO LITTLE  Ind      13
-## 3 TOO LITTLE  Rep       9
-## 4 ABOUT RIGHT Dem      22
-## 5 ABOUT RIGHT Ind      37
-## 6 ABOUT RIGHT Rep      17
-## 7 TOO MUCH    Dem      13
-## 8 TOO MUCH    Ind      22
-
gss %>%
-  ggplot(aes(x = party, fill = NASA)) +
-  geom_bar()
-

-

假设:

-
    -
  • 零假设 \(H_0\):

    -
      -
    • 不同党派对预算的态度的构成比(TOO LITTLE, ABOUT RIGHT, TOO MUCH) 没有区别
    • -
  • -
  • 备选假设 \(H_a\):

    -
      -
    • 不同党派对预算的态度的构成比(TOO LITTLE, ABOUT RIGHT, TOO MUCH) 存在区别
    • -
  • -
-

两种可能的结论:

-
    -
  • 拒绝 \(H_0\)
  • -
  • 不能拒绝 \(H_0\)
  • -
-
-

32.2.1 传统的方法

-
chisq.test(gss$party, gss$NASA)
-
## 
-##  Pearson's Chi-squared test
-## 
-## data:  gss$party and gss$NASA
-## X-squared = 1.3, df = 4, p-value = 0.9
-

或者

-
gss %>%
-  chisq_test(NASA ~ party) %>%
-  dplyr::select(p_value) %>%
-  dplyr::pull()
-
## [1] 0.8569
-
-
-

32.2.2 infer:Simulation-based tests

-
obs_stat <- gss %>%
-  specify(NASA ~ party) %>%
-  calculate(stat = "Chisq")
-obs_stat
-
## # A tibble: 1 x 1
-##    stat
-##   <dbl>
-## 1  1.33
-
null_dist <- gss %>%
-  specify(NASA ~ party) %>%                     # (1)
-  hypothesize(null = "independence") %>%        # (2)
-    generate(reps = 5000, type = "permute") %>% # (3)
-  calculate(stat = "Chisq")                     # (4)
-null_dist
-
## # A tibble: 5,000 x 2
-##    replicate  stat
-##        <int> <dbl>
-##  1         1  4.85
-##  2         2  1.01
-##  3         3  6.20
-##  4         4  4.08
-##  5         5  2.46
-##  6         6  2.72
-##  7         7  2.63
-##  8         8  1.27
-##  9         9  3.96
-## 10        10  3.24
-## # ... with 4,990 more rows
-
null_dist %>%
-  visualize() +
-  shade_p_value(obs_stat = obs_stat, method = "both", direction = "right")
-

-
null_dist %>%
-  get_pvalue(obs_stat = obs_stat, direction = "greater")
-
## # A tibble: 1 x 1
-##   p_value
-##     <dbl>
-## 1   0.851
-

看到 p_value > 0.05,不能拒绝 \(H_0\),我们没有足够的证据证明党派之间有显著差异

-

-
-
-
-

32.3 案例3:原住民中的女学生多?

-

案例 quine 数据集有 146 行 5 列,包含学生的生源、文化、性别和学习成效,具体说明如下

-
    -
  • Eth: 民族背景:原住民与否 (是“A”; 否 “N”)
  • -
  • Sex: 性别
  • -
  • Age: 年龄组 (“F0,” “F1,” “F2” or “F3”)
  • -
  • Lrn: 学习者状态(平均水平 “AL”, 学习缓慢 “SL”)
  • -
  • Days:一年中缺勤天数
  • -
-
td <- MASS::quine %>%
-  as_tibble() %>%
-  mutate(
-    across(c(Sex, Eth), as_factor)
-  )
-td
-
## # A tibble: 146 x 5
-##    Eth   Sex   Age   Lrn    Days
-##    <fct> <fct> <fct> <fct> <int>
-##  1 A     M     F0    SL        2
-##  2 A     M     F0    SL       11
-##  3 A     M     F0    SL       14
-##  4 A     M     F0    AL        5
-##  5 A     M     F0    AL        5
-##  6 A     M     F0    AL       13
-##  7 A     M     F0    AL       20
-##  8 A     M     F0    AL       22
-##  9 A     M     F1    SL        6
-## 10 A     M     F1    SL        6
-## # ... with 136 more rows
-

从民族背景有两组(A, N)来看,性别为 F 的占比 是否有区别?

-
td %>% count(Eth, Sex)
-
## # A tibble: 4 x 3
-##   Eth   Sex       n
-##   <fct> <fct> <int>
-## 1 A     F        38
-## 2 A     M        31
-## 3 N     F        42
-## 4 N     M        35
-
-

32.3.1 传统方法

-
prop.test(table(td$Eth, td$Sex), correct = FALSE)
-
## 
-##  2-sample test for equality of proportions
-##  without continuity correction
-## 
-## data:  table(td$Eth, td$Sex)
-## X-squared = 0.0041, df = 1, p-value = 0.9
-## alternative hypothesis: two.sided
-## 95 percent confidence interval:
-##  -0.1564  0.1670
-## sample estimates:
-## prop 1 prop 2 
-## 0.5507 0.5455
-
-
-

32.3.2 基于模拟的方法

-
obs_diff <- td %>%
-  specify(Sex ~ Eth, success = "F") %>% # #被解释变量 sex中F的占比
-  calculate(
-    stat = "diff in props",
-    order = c("A", "N") # 解释变量中两组A,N
-  )
-
-obs_diff
-
## # A tibble: 1 x 1
-##      stat
-##     <dbl>
-## 1 0.00527
-
null_distribution <- td %>%
-  specify(Sex ~ Eth, success = "F") %>%
-  hypothesize(null = "independence") %>%
-  generate(reps = 5000, type = "permute") %>%
-  calculate(stat = "diff in props", order = c("A", "N"))
-
null_distribution %>%
-  visualize()
-

-
pvalue <- null_distribution %>%
-  get_pvalue(obs_stat = obs_diff, direction = "both")
-
-pvalue
-
## # A tibble: 1 x 1
-##   p_value
-##     <dbl>
-## 1       1
-
null_distribution %>%
-  get_ci(level = 0.95, type = "percentile")
-
## # A tibble: 1 x 2
-##   lower_ci upper_ci
-##      <dbl>    <dbl>
-## 1   -0.160    0.170
-
-
-
-

32.4 宏包infer

-

我比较喜欢infer宏包的设计思想,它把统计推断分成了四个步骤

-

-

下图可以更好的帮助我们理解infer的工作流程 -

-
    -
  • specify() 指定解释变量和被解释变量 (y ~ x)

  • -
  • hypothesize() 指定零假设 (比如, independence= yx 彼此独立)

  • -
  • generate() 从基于零假设的平行世界中抽样:

    -
      -
    • 指定每次重抽样的类型,通俗点讲就是数据洗牌,重抽样type = "bootstrap" (有放回的),对应的零假设往往是null = “point” ; 重抽样type = "permuting" (无放回的),对应的零假设往往是null = “independence,” 指的是y和x之间彼此独立的,因此抽样后会重新排列,也就说原先 value1-group1 可能变成了value1-group2,(因为我们假定他们是独立的啊,这种操作,也不会影响y和x的关系)
    • -
    • 指定多少组 (reps = 1000)
    • -
  • -
  • calculate() 计算每组(reps)的统计值 (stat = "diff in props")

  • -
  • visualize() 可视化,对比零假设的分布与实际观察值.

  • -
-

下面是我自己对重抽样的理解 -

-
- -
- -
- -
-
-
- - -
-
- - - - - - - - - - - - - - - - diff --git a/_book/intro-R.html b/_book/intro-R.html deleted file mode 100644 index b7f22c9..0000000 --- a/_book/intro-R.html +++ /dev/null @@ -1,1645 +0,0 @@ - - - - - - - 第 2 章 R语言基础 | 数据科学中的 R 语言 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- -
- -
-
- - -
-
- -
-
-

第 2 章 R语言基础

-

R 软件是一个自由、开源软件平台,具有统计分析、可视化和编程的强大功能。 -你可以从这里免费下载。 为了更好的使用 R 软件,我推荐大家使用 RStudio这个 IDE。这里有个在线教程帮助我们熟悉 R 和 RStudio。

-
-

2.1 安装 R

-

我们从官方网站http://cran.r-project.org下载, 网站界面感觉有点朴素:

-

-
-
-

2.2 安装 RStudio

-

安装完R, 还需要安装RStudio。有同学可能要问 R 与 RStudio 是什么关系呢?打个比方吧,R 就像汽车的发动机, RStudio 就是汽车的仪表盘。但我更觉得 R 是有趣的灵魂,而 Rstudio 是好看的皮囊。

-

-

同样,我们从官方网站下载并安装,如果你是苹果系统的用户,选择苹果系统对应的rstudio版本即可。

- -

-
-

-这里有个小小的提示: -

-
    -
  • -电脑不要用中文用户名,否则Rstudio会杠上中文用户名 -
  • -
  • -尽量安装在非系统盘,比如,可以选择安装在D盘 -
  • -
  • -安装路径不要有中文和空格。比如,这样就比较好 -
      -
    • -D:/R -
    • -
    • -D:/Rstudio -
    • -
    -
  • -
-
-
-
-

2.3 开始

-

安装完毕后,从windos开始菜单,点开rstudio图标,就打开了rstudio的窗口,界面效果如下

-

-

RStudio 的用户界面十分友好,想要运行一段R代码,只需要在 RStudio 控制台面板最下面 (Console)一行内键入R 代码,然后回车即可。比如我们键入1 + 1 并按回车后,RStudio 将显示如下结果

-
1 + 1
-
## [1] 2
-
log(8)
-
## [1] 2.079
-
1:15
-
##  [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15
-
-
-

2.4 一切都是对象

-

在R中存储的数据称为对象, R语言数据处理实际上就是不断的创建和操控这些对象。创建一个R对象,首先确定一个名称,然后使用 -赋值操作符 <-,将数据赋值给它。比如,如果想给变量 x 赋值为5,在命令行中可以这样写 x <- 5 ,然后回车.

-
x <- 5
-

当键入x 然后回车,就打印出 x 的值。当然也可以使用命令print(x),结果一样。

-
x
-
## [1] 5
-
x + 2
-
## [1] 7
-
die <- 1:6
-
die
-
## [1] 1 2 3 4 5 6
-
die / 2
-
## [1] 0.5 1.0 1.5 2.0 2.5 3.0
-
die * die
-
## [1]  1  4  9 16 25 36
-
die %*% die
-
##      [,1]
-## [1,]   91
-
die %o% die
-
##      [,1] [,2] [,3] [,4] [,5] [,6]
-## [1,]    1    2    3    4    5    6
-## [2,]    2    4    6    8   10   12
-## [3,]    3    6    9   12   15   18
-## [4,]    4    8   12   16   20   24
-## [5,]    5   10   15   20   25   30
-## [6,]    6   12   18   24   30   36
-
-
-

2.5 数据类型

-

-
    -
  • 数值型
  • -
-
3
-
## [1] 3
-
5000
-
## [1] 5000
-
3e+06
-
## [1] 3e+06
-
class(0.0001)
-
## [1] "numeric"
-
    -
  • 字符串型
  • -
-
"hello"
-
## [1] "hello"
-
"girl"
-
## [1] "girl"
-
"1"     # 注意 1 和 "1" 的区别
-
## [1] "1"
-
class("1")
-
## [1] "character"
-
    -
  • 逻辑型
  • -
-
TRUE
-
## [1] TRUE
-
FALSE
-
## [1] FALSE
-
3 < 4
-
## [1] TRUE
-
class(T)
-
## [1] "logical"
-
3 < 4
-
## [1] TRUE
-
    -
  • 因子型
  • -
-

因子型可以看作是字符串向量的增强版,比如 “Alice,” “Bob,” “Carol,” “Ted” 是四个人名的字符串,因子型就在字符串的基础上,告诉计算机他们每个人都是有官阶层级的,比如 “排长”,“团长,” “师长,” “军长,” 也就说“Ted”排第一,“Carol”排第二,“Bob”排第三,“Alice” 排最后, 相比字符串而言,多了官阶层级信息。

-
fac <- factor(c("Alice", "Bob", "Carol", "Ted"), 
-              levels = c("Ted", "Carol", "Bob", "Alice")
-       )
-fac
-
## [1] Alice Bob   Carol Ted  
-## Levels: Ted Carol Bob Alice
-
class(fac)
-
## [1] "factor"
-

再比如,General上将;Colonel上校;Captain上尉, 如果没有指定层级levels,c("Colonel", "General", "Captain")就是一个常规的字符串向量,若指定了层级levels,这个字符串就有了军衔信息.

-
factor(c("Colonel", "General", "Captain"), 
-              levels = c("General", "Colonel", "Captain")
-       )
-
## [1] Colonel General Captain
-## Levels: General Colonel Captain
-
-
-

2.6 数据结构

-
    -
  • 大家前面看到x <- 1x <- c(1, 2, 3),这就是最简单的数据对象,叫原子型向量
  • -
  • c函数将一组数据构造成向量,要求每个元素用逗 -号分隔,且每个元素的数据类型是一致的,可以把它想象成手里拿着一个糖葫芦
  • -
-
die <- c(2, 4, 3, 1, 5, 7)
-die
-
## [1] 2 4 3 1 5 7
-

长度为 1 的原子型向量

-
x <- c(1) # or
-x <- 1 
-

强制转换

-
vec <- c("R", 1, TRUE)
-class(vec)
-
## [1] "character"
-

你依次输入,就发现三种类型的优先级关系

-
c(TRUE, 1)                   # 被转换成了数值型
-
## [1] 1 1
-
c(      1,  "R")         # 被转换成了字符串型
-
## [1] "1" "R"
-
c(TRUE, 1,  "R")             # 被转换成了字符串型
-
## [1] "TRUE" "1"    "R"
-
    -
  • 大家看到前面die %o% die矩阵类型,矩阵就是二维数组
  • -
  • 可以用matrix 函数创建,可以想象成糖葫芦太多,一个棒子串不下,就多用几根棒子串。
  • -
-
m <- matrix(c(2, 4, 3, 1, 5, 7),
-  nrow = 2, ncol = 3, byrow = TRUE
-)
-
m
-
##      [,1] [,2] [,3]
-## [1,]    2    4    3
-## [2,]    1    5    7
-
    -
  • 数据对象:数组
  • -
  • array 函数生成n维数组,可以想象成我们吃的土司面包一样。
  • -
-
ar <- array(c(11:14, 21:24, 31:34), dim = c(2, 2, 3))
-ar
-
## , , 1
-## 
-##      [,1] [,2]
-## [1,]   11   13
-## [2,]   12   14
-## 
-## , , 2
-## 
-##      [,1] [,2]
-## [1,]   21   23
-## [2,]   22   24
-## 
-## , , 3
-## 
-##      [,1] [,2]
-## [1,]   31   33
-## [2,]   32   34
-
    -
  • 数据对象:列表
  • -
  • c函数创建向量的方式相似,不同的元素用逗号分开。不同的是,列表允许不同的数据类型(数值型,字符型,逻辑型等), 而向量要求每个元素的数据类型必须相同。可以想象成小火车,每节车厢可以装自己喜欢的东西
  • -
-
list1 <- list(100:110, "R", c(2, 4, 3, 1, 5, 7))
-list1
-
## [[1]]
-##  [1] 100 101 102 103 104 105 106 107 108 109 110
-## 
-## [[2]]
-## [1] "R"
-## 
-## [[3]]
-## [1] 2 4 3 1 5 7
-
    -
  • 数据对象:数据框,这个不用想象,它与我们经常用的excel表格一个样
  • -
  • data.frame函数构建
  • -
-
df <- data.frame(
-  name = c("ace", "bob", "carl", "kaite"),
-  age = c(21, 14, 13, 15),
-  sex = c("girl", "boy", "boy", "girl")
-)
-df
-
- -
-

R 对象的数据结构(向量、矩阵、数组、列表和数据框),总结如下

-

-

为了更好地理解相关概念,建议大家阅读Garrett Grolemund的 -hopr这本书 (Grolemund 2014)

-
-
-

2.7 函数

-

R 语言的强大在于使用函数操控各种对象,你可以把对象看作是名词,而函数看作是动词。 -我们用一个简单的例子,sum()来演示函数如何工作的。这个函数的功能正如它的名字一样,对输入的各个对象求和,然后返回求和后的值,你可以在命令行中键入?sum()查看其官方文档。 -sum()后的结果可以直接显示出来,也可以赋名。比如下面代码,首先计算x + 10并赋以名字y, 然后第二行中打印出来这个新创建的对象y

-
y <- sum(x, 10)
-y
-
## [1] 11
-

因为代码的灵活性,可以不断地重新定义对象。只要数据发生改变,原来的代码就会返回新的值。比如,对x重新赋值为 15, 同样运行sum()函数,这次我们不赋值给对象y,而是让它直接显示

-
x <- 15
-sum(x, 10)
-
## [1] 25
-

再比如

-
round(3.14159)
-
## [1] 3
-
mean(1:6)
-
## [1] 3.5
-
n <- 100
-x <- seq(1, n)
-sum(x)
-
## [1] 5050
-
dt <- mtcars[, 1:4]
-head(dt)
-
- -
-
cor(dt)
-
##          mpg     cyl    disp      hp
-## mpg   1.0000 -0.8522 -0.8476 -0.7762
-## cyl  -0.8522  1.0000  0.9020  0.8324
-## disp -0.8476  0.9020  1.0000  0.7909
-## hp   -0.7762  0.8324  0.7909  1.0000
-
-
-

2.8 脚本

-

如果我们已经写好了一段R程序,我们可以保存为脚本文件,脚本文件通常以.R作为文件的后缀名。比如我们可以将刚才创建xy对象的命令,保存为脚本文件my_script.R。 -这样我们可以在其它时间修改和重新运行它。

-

在RStudio中,你可以通过菜单栏依此点击File > New File > R Script 来创建一个新的脚本。 -强烈建议大家在运行代码之前,使用脚本的形式编写和编辑自己的程序,养成这样的习惯后,你今后所有的工作都有案可查,并且具有可重复性。

-

-
    -
  • 点击 Run 或者 Source 运行脚本
  • -
-

-
    -
  • 点击 Run, 运行光标所在行的代码
  • -
  • 点击 Source,从头到尾运行全部代码
  • -
-
-
-

2.9 宏包

-

R 语言的强大还在于各种宏包,一般在The Comprehensive R Archive Network (CRAN)下载安装。宏包扩展了R语言本身的各种功能,也为解决问题提供了各种方案。截至撰写本书时止,CRAN上大约有1.4万个宏包可以使用。但由于各种包接口不统一,语法不一致,也带来一些困扰。为了解决这个问题,RStudio 公司的Hadley Wickham 与其带领的团队推出了tidyverse宏包, tidyverse将常用的宏包整合在一起,并保持了语法的一致性。可以说,tidyverse宏包是R语言入门 学习的首选。 -本书正是基于tidyverse宏包而成的,本书也将通过一些例子不断地展示tidyverse在数据分析和可视化的应用。

-

可以用如下命令安装 ggplot2 宏包:

-
# 安装单个包
-install.packages("tidyverse")
-
# 安装多个包
-install.packages(c("ggplot2", "devtools", "dplyr"))
-
-
-

2.10 可能的问题

-
    -
  • 问题1:如果下载速度太慢,可以选择国内镜像,
  • -
-

-

然后再输入命令install.packages("tidyverse"),或者直接指定清华大学镜像

-
install.packages("tidyverse", repos = "http://mirrors.tuna.tsinghua.edu.cn/CRAN")
-
    -
  • 问题2:如果遇到如下报错信息
  • -
-
Warning in install.packages :
-  unable to access index for repository http://cran.rstudio.com/src/contrib:
-  cannot open URL 'http://cran.rstudio.com/src/contrib/PACKAGES'
-

输入下面命令后,再试试

-
options(download.file.method="libcurl")
-

或者打开D:\R\etc\Rprofile.site,添加以下内容:

-
local({r <- getOption("repos")
-       r["CRAN"] <- "http://mirrors.tuna.tsinghua.edu.cn/CRAN"
-       options(repos=r)})
-
-options(download.file.method="libcurl")
-
    -
  • 问题3:如果打开代码是乱码,可以试试修改如下设置
  • -
-

-
    -
  • 问题4:如果每次打开Rstudio非常慢,可以在Rstudio里将这几个选项取消 -

  • -
  • 问题5:如果 Rstudio 打开是空白

  • -
-

很大的可能是你的电脑用户名是中文的,修改用户名再试试

-
    -
  • 问题6:安装过程中提示,我的系统不能兼容 64 位的 Rstudio。
  • -
-

可能你是低版本的windows系统,建议安装旧版本的Rstudio,可以在这里找到旧版本.

-

更多Rstudio的使用,可参考这里introducing-the-rstudio

-
-
-

2.11 如何获取帮助

-
    -
  • 记住和学习所有的函数几乎是不可能的
  • -
  • 打开函数的帮助页面(Rstudio右下面板的Help选项卡)
  • -
-
?sqrt
-?gather
-?spread
-?ggplot2
-?scale
-?map_dfr
-

比如:

-

-
-
-

2.12 R 语言社区

-

R 语言社区非常友好,可以在这里找到你问题的答案

- -
-
-

2.13 延伸阅读

-
    -
  • 如何获取向量a <- c("a", "c", "e")的第二个元素?矩阵和列表的时候,又该如何?
  • -
  • 试试 c(1, FALSE)c("a", TRUE) 会是什么?
  • -
  • 1 == "1"-1 < FALSE 为什么为真? "one" < 2 为什么为假?
  • -
  • R语言里可以构造哪些数据对象?
  • -
  • 数据框可以装载哪些数据类型的数据?
  • -
  • 数据框和列表区别在哪里?
  • -
  • ()与[]区别?
  • -
- -
-
-
- -
-
-
- - -
-
- - - - - - - - - - - - - - - - diff --git a/_book/intro-ds.html b/_book/intro-ds.html deleted file mode 100644 index bb28500..0000000 --- a/_book/intro-ds.html +++ /dev/null @@ -1,1557 +0,0 @@ - - - - - - - 第 1 章 数据科学与R语言 | 数据科学中的 R 语言 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- -
- -
-
- - -
-
- -
-
-

第 1 章 数据科学与R语言

-

马克思曾说过:“一门科学只有当它达到能够成功运用数学时,才算真正得到发展。”数学为数据科学提供了坚实的理论基础,数据科学也为数学与实际应用之间建立起一个直接的桥梁。

-
-

1.1 什么是数据科学

-

数据科学是综合了统计学、计算机科学和领域知识的交叉学科,其基本内容就是用数据的方法研究科学,用科学的方法研究数据(鄂维南院士)。2010年,Drew Conway画了一张数据科学的韦恩图

-

-

从数据科学所涉及的学科领域来看,其知识结构不仅仅包括数学、统计学、计算机科学、信息科学等在内的基础性理论,还应该包括社会学、物理学、情报学、生物医学等在内的专业性领域理论。

-

(事实上,编程是工具,统计是灵魂,专业是核心,最重要的最下面那个部分,专业领域的知识)

-
-
-

1.2 什么是R

-
-

1.2.1 R那些事

-
    -
  • 1992年,新西兰奥克兰大学统计学教授 Ross Ihaka 和 Robert Gentleman,为了方便地给学生教授统计学课程,他们设计开发了R语言(他们名字的首字母都是R)。
  • -
-

-
    -
  • 2000年,R1.0.0 发布
  • -
  • 2004年,第一届国际useR!会议(随后每年举办一次)
  • -
  • 2005年,ggplot2宏包(2018.8 - 2019.8下载量超过 1.3 亿次)
  • -
  • 2012年,R2.15.2 发布
  • -
  • 2013年,R3.0.2 发布, CRAN上的宏包数量5026个
  • -
  • 2016年,Rstudio公司推出 tidyverse 宏包(数据科学当前最流行的R宏包)
  • -
  • 2017年,R3.4.1 发布,CRAN上的宏包数量10875个
  • -
  • 2019年,R3.6.1 发布,CRAN上的宏包数量15102个
  • -
  • 2020年,R4.0.0 发布,CRAN上的宏包数量16054个
  • -
-

想了解R语言的发展历史,可阅读The History of R

-
-
-

1.2.2 R是什么

-

官网定义:https://www.r-project.org/

-

-

R语言是用于统计分析、图形表示和报告的编程语言:

-
    -
  • R 是一个统计编程语言(statistical programming)
  • -
  • R 可运行于多种平台之上,包括Windows、UNIX 和 Mac OS X
  • -
  • R 拥有顶尖水准的制图功能
  • -
  • R 是免费的
  • -
  • R 应用广泛,拥有丰富的库包
  • -
  • 活跃的社区
  • -
-
-
-

1.2.3 R语言发展趋势

-

-

TIOBE index

-
-
-

1.2.4 R路上的大神

-

2019 年 8 月,国际统计学年会将考普斯总统奖(The Committee of Presidents of Statistical Societies -Awards,简称 COPSS 奖,被誉为统计学的诺贝尔奖)奖颁给 tidyverse的作者Hadley Wickham后,充分说明R语言得到了学术界的肯定和认可,我相信未来它在自然科学、社会科学和工业领域中的应用前景会非常光明。

-

- -
-
-
-

1.3 R能干什么

-
-

1.3.1 数据科学流程

-

Hadley Wickham将数据科学流程分解成6个环节

-

-

即数据导入、数据规整、数据处理、可视化、建模以及形成可重复性报告,整个分析和探索过程都在一个程序代码中完成,这种方式对训练我们的数据思维非常有帮助。

-
-
-

1.3.2 tidyverse家族

-

-

tidyverse套餐,其主要成员包括

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
功能宏包
有颜值担当ggplot2
数据处理王者dplyr
数据转换专家tidyr
数据载入利器readr
循环加速器purrr
强化数据框tibble
字符串处理stringr
因子处理forcats
-
-
-

1.3.3 R & tidyverse 四大优势

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
序号内容代码演示
1统计Download 01_stats.R
2可视化Download 02_visual.R
3探索性分析Download 03_eda.R
4可重复性报告Download 04_reproducible.Rmd
-看了这些代码,可能第一眼感觉是这样的 -
-图片来自电影《降临》 -

-图 1.1: 图片来自电影《降临》 -

-
-但我更希望这门课结束后,大家的感觉是这样的 -
-图片来自美剧《权利的游戏》 -

-图 1.2: 图片来自美剧《权利的游戏》 -

-
-
-
-
-

1.4 为什么选择 R

-
-

1.4.1 社会科学需要统计

-

-

看到这图,有同学可能会有同感。我认为,一个学科之所以成为一门科学,必须要有数学作 -为基础。我说这话,相信很多人会反驳我。我接受反驳。但我还是会坚持我的观点。很多同学在选专业的时候,导师会说,这个专业不会用太多数学,事实上被忽悠了,尤其在(新文科建设、跨学科研究)背景下,社会科学(包括心理学、语言学)都在交叉融合,都需要用数学和计算机。所以,我们不是学统计的,但需要用统计。一个更残酷的现实,用统计的,往往不是学统计的

-
-
-

1.4.2 社会科学需要可视化

-

-

我们人,都是视觉动物,都喜欢看漂亮美好的东西。如果文章或者报告太多表格,不会 -给人留下深刻影响;相反,用图片,重点突出、观点明确,一图胜千言,很容易传递信息。当然,前提是,画图要画的好。 事实上,可视化,一半是科学、一半是艺术。

-

又一个残酷的现实,在这个看脸的时代,没有好看的皮囊,没人愿意了解你的灵魂。

-
-
-

1.4.3 社会科学需要编程

-

为什么要统计编程,回答这个问题,相 -当于回答,为什么不能用 excel 做数据分析?画个图说明下

-

-

对于数据量不大,或者复杂程度不高的需求来说,excel很方便也很直观。但随着数据量或复杂程度不断增大,excel解决起来难度系数就陡增,或者无法搞定,这就需要借助编程完成。也就说,掌握了编程技能,对于简单的问题和复杂的问题,难度系数是差不多了。

-

所以,第三残酷的现实:现在小学生都开始学编程了。 -

-
-
-

1.4.4 社会科学需要可重复性

-

- - -

科学的可重复性危机,已经成为举世瞩目的热点议题。 -科研结果可重复性低的原因很多很多。不可重复,说明事情没那么简单。 -或许,科学固有不确定性,但我们需要从研究方法、实验 -设计和统计方法方面改进。 -所以,第四个残酷的现实:科学研究的方向是(开放科学 -框架 (Open Science Framework, OSF)), 正如 Nature 期刊 -要求的一样,需要公布原始数据和如何分析的代码

-
-
-
-

1.5 R 语言之美,你值得拥有

-

我想,R语言之美,你值得拥有,因为它可以缓解你的压力

-
    -
  • 首先,R语言做统计分析,是它的看家本领,非常好用 (可以缓解第一个残酷)
  • -
  • 其次,ggplot2画图,是颜值担当,非常好看,一直被模仿,从未被超越(可以解决第二个残酷)
  • -
  • tidyverse来编程,代码可读性强,用的是人类语言, 非常好学 (在解决第三个残酷现实的同时,还让你感受到乐趣)
  • -
  • 关于第四点,需要特别说明下,Rmarkdown 并不能保证研究结果可重复性,因为影响结果可重复性的原因很多很多,这不是程序语言能解决的事。但是,R语言能帮你的,就是减少低级的计算错误和复制粘贴等繁琐工作,可以生成html、word或者pdf 格式的可重复性报告文档,可以方便快捷做幻灯片、海报、论文、书籍、网页。所以还是挺好玩的.
  • -
-

所以,R语言之美,体现在好用、好看、好学、好玩。

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
序号内容特性评价
1统计分析看家本领好用
2ggplot2画图颜值担当好看
3tidyverse语法人类语言好学
4可重复性报告方便快捷好玩
-
-
-

1.6 当今最值得学习的数据科学语言

-

2016年权威机构KDnuggets做过调研,显示数据科学领域最受欢迎的工具,是python和R两种语言

-

-

事实上,python和R都是非常强大的工具,两者各有优劣,作为初学者,究竟选择谁? -可以参考这篇文章,这篇文章旗帜鲜明地指出,R语言,是当今最值得学习的数据科学语言。为此做了详细的对比,并罗列了很多理由,其中的3点理由很重要,我圈了出来(传统的统计学,贝叶斯新统计、数据可视化)。

-

-

事实上,数据科学,是和数据打交道(定义:用科学的方法研究数据,用数据的方法研究科学),目的要利用(计算机和统计知识)推动学科发展,不是把大家培养成程序员。

-

所以,我看完这篇文章的感受是:

-
    -
  • 第一、在数据科学领域,python能做的,R也能做,甚至更好,比如可视化。
  • -
  • 第二、有一定R基础后,对统计学的学习帮助很大,这是 python 语言不具备的
  • -
  • 第三、我觉得 R的语法 更符合人的思维方式。尤其 tidyverse -
      -
    • 语法一致性(学习一个宏包,可以帮助理解其他宏包)
    • -
    • 代码可读性,接近人类语言 ( %>% 太酷了 )
    • -
  • -
- -
-
-

1.7 一见钟情,还是相见恨晚?

-

-
- -
-
- -
-
-
- - -
-
- - - - - - - - - - - - - - - - diff --git a/_book/lazyman.html b/_book/lazyman.html deleted file mode 100644 index 97f3a31..0000000 --- a/_book/lazyman.html +++ /dev/null @@ -1,1572 +0,0 @@ - - - - - - - 第 58 章 懒人系列 | 数据科学中的 R 语言 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- -
- -
-
- - -
-
- -
-
-

第 58 章 懒人系列

-

R社区上很多大神,贡献了很多非常优秀的工具,节省了我们的时间,也给我们的生活增添了无限乐趣。我平时逛github的时候时整理一些,现在分享出来供像我一样的懒人用,因此本文档叫“懒人系列”。欢迎大家补充。

-
-

58.1 列名太乱了

-
library(tidyverse)
-library(janitor)
-## install.packages("janitor")
-## https://github.com/sfirke/janitor
-
fake_raw <- tibble::tribble(
-  ~id, ~`count/num`, ~W.t, ~Case, ~`time--d`, ~`%percent`,
-  1L, "china", 3L, "w", 5L, 25L,
-  2L, "us", 4L, "f", 6L, 34L,
-  3L, "india", 5L, "q", 8L, 78L
-)
-fake_raw
-
## # A tibble: 3 x 6
-##      id `count/num`   W.t Case  `time--d` `%percent`
-##   <int> <chr>       <int> <chr>     <int>      <int>
-## 1     1 china           3 w             5         25
-## 2     2 us              4 f             6         34
-## 3     3 india           5 q             8         78
-
fake_raw %>% janitor::clean_names()
-
## # A tibble: 3 x 6
-##      id count_num   w_t case  time_d percent_percent
-##   <int> <chr>     <int> <chr>  <int>           <int>
-## 1     1 china         3 w          5              25
-## 2     2 us            4 f          6              34
-## 3     3 india         5 q          8              78
-
-
-

58.2 比count()更懂我的心

-
mtcars %>%
-  dplyr::count(cyl)
-
## # A tibble: 3 x 2
-##     cyl     n
-##   <dbl> <int>
-## 1     4    11
-## 2     6     7
-## 3     8    14
-
mtcars %>%
-  janitor::tabyl(cyl)
-
##  cyl  n percent
-##    4 11  0.3438
-##    6  7  0.2188
-##    8 14  0.4375
-
-
-

58.3 比distinct()更知我心

-
df <- tribble(
-  ~id, ~date, ~store_id, ~sales,
-  1, "2020-03-01", 1, 100,
-  2, "2020-03-01", 2, 100,
-  3, "2020-03-01", 3, 150,
-  4, "2020-03-02", 1, 110,
-  5, "2020-03-02", 3, 101
-)
-
-df %>%
-  janitor::get_dupes(store_id)
-
## # A tibble: 4 x 5
-##   store_id dupe_count    id date       sales
-##      <dbl>      <int> <dbl> <chr>      <dbl>
-## 1        1          2     1 2020-03-01   100
-## 2        1          2     4 2020-03-02   110
-## 3        3          2     3 2020-03-01   150
-## 4        3          2     5 2020-03-02   101
-
df %>%
-  janitor::get_dupes(date)
-
## # A tibble: 5 x 5
-##   date       dupe_count    id store_id sales
-##   <chr>           <int> <dbl>    <dbl> <dbl>
-## 1 2020-03-01          3     1        1   100
-## 2 2020-03-01          3     2        2   100
-## 3 2020-03-01          3     3        3   150
-## 4 2020-03-02          2     4        1   110
-## 5 2020-03-02          2     5        3   101
-
-
-

58.4 代码太乱了,谁帮我整理下

-
## install.packages("styler")
-

-

安装后,然后这两个地方点两下,就发现你的代码整齐很多了。或者直接输入

-
styler:::style_active_file()
-
-
-

58.5 谁帮我敲模型的公式

-
library(equatiomatic)
-## https://github.com/datalorax/equatiomatic
-
mod1 <- lm(mpg ~ cyl + disp, mtcars)
-
extract_eq(mod1)
-

\[ -\operatorname{mpg} = \alpha + \beta_{1}(\operatorname{cyl}) + \beta_{2}(\operatorname{disp}) + \epsilon -\]

-
extract_eq(mod1, use_coefs = TRUE)
-

\[ -\operatorname{mpg} = 34.66 - 1.59(\operatorname{cyl}) - 0.02(\operatorname{disp}) + \epsilon -\]

-
-
-

58.6 模型有了,不知道怎么写论文?

-
library(report)
-## https://github.com/easystats/report
-
model <- lm(Sepal.Length ~ Species, data = iris)
-report(model)
-

We fitted a linear model (estimated using OLS) to predict Sepal.Length with Species (formula = Sepal.Length ~ Species). Standardized parameters were obtained by fitting the model on a standardized version of the dataset. Effect sizes were labelled following Cohen’s (1988) recommendations.

-

The model explains a significant and substantial proportion of variance (R2 = 0.62, F(2, 147) = 119.26, p < .001, adj. R2 = 0.61). The model’s intercept, corresponding to Sepal.Length = 0 and Species = setosa, is at 5.01 (SE = 0.07, 95% CI [4.86, 5.15], p < .001). Within this model:

-
    -
  • The effect of Species [versicolor] is positive and can be considered as large and significant (beta = 0.93, SE = 0.10, 95% CI [0.73, 1.13], std. beta = 1.12, p < .001).
  • -
  • The effect of Species [virginica] is positive and can be considered as large and significant (beta = 1.58, SE = 0.10, 95% CI [1.38, 1.79], std. beta = 1.91, p < .001).
  • -
-
-
-

58.7 模型评估一步到位

-
library(performance)
-
-model <- lm(mpg ~ wt * cyl + gear, data = mtcars)
-performance::check_model(model)
-
-
-

58.8 统计表格不用愁

-
library(gtsummary)
-## https://github.com/ddsjoberg/gtsummary
-
-
-gtsummary::trial %>%
-  dplyr::select(trt, age, grade, response) %>%
-  gtsummary::tbl_summary(
-    by = trt,
-    missing = "no"
-  ) %>%
-  gtsummary::add_p() %>%
-  gtsummary::add_overall() %>%
-  gtsummary::add_n() %>%
-  gtsummary::bold_labels()
-

直接复制到论文即可

-
t1 <-
-  glm(response ~ trt + age + grade, trial, family = binomial) %>%
-  gtsummary::tbl_regression(exponentiate = TRUE)
-
-t2 <-
-  survival::coxph(survival::Surv(ttdeath, death) ~ trt + grade + age, trial) %>%
-  gtsummary::tbl_regression(exponentiate = TRUE)
-
-
-
-gtsummary::tbl_merge(
-  tbls = list(t1, t2),
-  tab_spanner = c("**Tumor Response**", "**Time to Death**")
-)
-
-
-

58.9 统计结果写图上

-
library(ggplot2)
-library(statsExpressions)
-# https://github.com/IndrajeetPatil/statsExpressions
-
-
-ggplot(mtcars, aes(x = mpg, y = wt)) +
-  geom_point() +
-  geom_smooth(method = "lm") +
-  labs(
-    title = "Spearman's rank correlation coefficient",
-    subtitle = expr_corr_test(mtcars, mpg, wt, type = "nonparametric")
-  )
-
-
-

58.10 正则表达式太南了

-
library(inferregex)
-## remotes::install_github("daranzolin/inferregex")
-
s <- "abcd-9999-ab9"
-infer_regex(s)$regex
-
## [1] "^[a-z]{4}-\\d{4}-[a-z]{2}\\d$"
-

有了它,妈妈再也不担心我的正则表达式了

-
-
-

58.11 颜控怎么配色?

-
library(ggthemr) ## devtools::install_github('cttobin/ggthemr')
-ggthemr("dust")
-
mtcars %>%
-  mutate(cyl = factor(cyl)) %>%
-  ggplot(aes(x = mpg, fill = cyl, colour = cyl)) +
-  geom_density(alpha = 0.75) +
-  labs(fill = "Cylinders", colour = "Cylinders", x = "MPG", y = "Density") +
-  legend_top()
-

-

用完别忘了

-
ggthemr_reset()
-
-
-

58.12 画图颜色好看不

-

scales也是大神的作品,功能多多

-
## https://github.com/r-lib/scales
-library(scales)
-
-show_col(viridis_pal()(10))
-

-

不推荐个人配色,因为我们不专业。直接用专业的配色网站 -colorbrewer

-

先看看颜色,再选择

-
-
-

58.13 宏包太多

-
library(pacman)
-## p_load(lattice, foreign, boot, rpart)
-

唉,这个library()都要偷懒,真服了你们了

-
-
-

58.14 犹抱琵琶半遮面

-
## https://github.com/EmilHvitfeldt/gganonymize
-library(ggplot2)
-library(gganonymize)
-
-ggg <-
-  ggplot(mtcars, aes(as.factor(cyl))) +
-  geom_bar() +
-  labs(
-    title = "Test title",
-    subtitle = "Test subtitle, this one have a lot lot lot lot lot more text then the rest",
-    caption = "Test caption",
-    tag = 1
-  ) +
-  facet_wrap(~vs)
-
-gganonomize(ggg)
-

-

你可以看我的图,但就不想告诉你图什么意思,因为我加密了

-
-
-

58.15 整理Rmarkdown

-
# remotes::install_github("tjmahr/WrapRmd")
-# remotes::install_github("fkeck/quickview")
-# remotes::install_github("mwip/beautifyR")
-
-
-

58.16 如何有效的提问

-

直接看官方网站,这里不举例了

-
## install.packages("reprex")
-## https://reprex.tidyverse.org/
-
-
-

58.17 程序结束后记得提醒我

-
## beepr::beep(sound = "mario")
-

你听到了声音吗?

-
-
-

58.18 多张图摆放

-
library(patchwork)
-p1 <- ggplot(mtcars) +
-  geom_point(aes(mpg, disp))
-p2 <- ggplot(mtcars) +
-  geom_boxplot(aes(gear, disp, group = gear))
-p3 <- ggplot(mtcars) +
-  geom_smooth(aes(disp, qsec))
-p1 + p2 + p3
-
-
-

58.19 缺失值处理

-
library(naniar)
-## https://github.com/njtierney/naniar
-
-airquality %>%
-  group_by(Month) %>%
-  naniar::miss_var_summary()
-
## # A tibble: 25 x 4
-## # Groups:   Month [5]
-##    Month variable n_miss pct_miss
-##    <int> <chr>     <int>    <dbl>
-##  1     5 Ozone         5     16.1
-##  2     5 Solar.R       4     12.9
-##  3     5 Wind          0      0  
-##  4     5 Temp          0      0  
-##  5     5 Day           0      0  
-##  6     6 Ozone        21     70  
-##  7     6 Solar.R       0      0  
-##  8     6 Wind          0      0  
-##  9     6 Temp          0      0  
-## 10     6 Day           0      0  
-## # ... with 15 more rows
-
-
-

58.20 看看数据什么情况

-
library(visdat)
-
-vis_dat(airquality)
-
-
-

58.21 管道都不想

-

管道都不想写, 写代码还有美感?

-
## library(nakepipe)
-
-
-

58.22 各种插件,任君选取

-
## https://github.com/daattali/addinslist
- -
-
- - - -
- -
-
-
- - -
-
- - - - - - - - - - - - - - - - diff --git a/_book/libs/gitbook/css/fontawesome/fontawesome-webfont.ttf b/_book/libs/gitbook/css/fontawesome/fontawesome-webfont.ttf deleted file mode 100644 index 35acda2..0000000 Binary files a/_book/libs/gitbook/css/fontawesome/fontawesome-webfont.ttf and /dev/null differ diff --git a/_book/libs/gitbook/css/plugin-bookdown.css b/_book/libs/gitbook/css/plugin-bookdown.css deleted file mode 100644 index 0ae0f66..0000000 --- a/_book/libs/gitbook/css/plugin-bookdown.css +++ /dev/null @@ -1,99 +0,0 @@ -.book .book-header h1 { - padding-left: 20px; - padding-right: 20px; -} -.book .book-header.fixed { - position: fixed; - right: 0; - top: 0; - left: 0; - border-bottom: 1px solid rgba(0,0,0,.07); -} -span.search-highlight { - background-color: #ffff88; -} -@media (min-width: 600px) { - .book.with-summary .book-header.fixed { - left: 300px; - } -} -@media (max-width: 1240px) { - .book .book-body.fixed { - top: 50px; - } - .book .book-body.fixed .body-inner { - top: auto; - } -} -@media (max-width: 600px) { - .book.with-summary .book-header.fixed { - left: calc(100% - 60px); - min-width: 300px; - } - .book.with-summary .book-body { - transform: none; - left: calc(100% - 60px); - min-width: 300px; - } - .book .book-body.fixed { - top: 0; - } -} - -.book .book-body.fixed .body-inner { - top: 50px; -} -.book .book-body .page-wrapper .page-inner section.normal sub, .book .book-body .page-wrapper .page-inner section.normal sup { - font-size: 85%; -} - -@media print { - .book .book-summary, .book .book-body .book-header, .fa { - display: none !important; - } - .book .book-body.fixed { - left: 0px; - } - .book .book-body,.book .book-body .body-inner, .book.with-summary { - overflow: visible !important; - } -} -.kable_wrapper { - border-spacing: 20px 0; - border-collapse: separate; - border: none; - margin: auto; -} -.kable_wrapper > tbody > tr > td { - vertical-align: top; -} -.book .book-body .page-wrapper .page-inner section.normal table tr.header { - border-top-width: 2px; -} -.book .book-body .page-wrapper .page-inner section.normal table tr:last-child td { - border-bottom-width: 2px; -} -.book .book-body .page-wrapper .page-inner section.normal table td, .book .book-body .page-wrapper .page-inner section.normal table th { - border-left: none; - border-right: none; -} -.book .book-body .page-wrapper .page-inner section.normal table.kable_wrapper > tbody > tr, .book .book-body .page-wrapper .page-inner section.normal table.kable_wrapper > tbody > tr > td { - border-top: none; -} -.book .book-body .page-wrapper .page-inner section.normal table.kable_wrapper > tbody > tr:last-child > td { - border-bottom: none; -} - -div.theorem, div.lemma, div.corollary, div.proposition, div.conjecture { - font-style: italic; -} -span.theorem, span.lemma, span.corollary, span.proposition, span.conjecture { - font-style: normal; -} -div.proof>*:last-child:after { - content: "\25a2"; - float: right; -} -.header-section-number { - padding-right: .5em; -} diff --git a/_book/libs/gitbook/css/plugin-clipboard.css b/_book/libs/gitbook/css/plugin-clipboard.css deleted file mode 100644 index 6844a70..0000000 --- a/_book/libs/gitbook/css/plugin-clipboard.css +++ /dev/null @@ -1,18 +0,0 @@ -div.sourceCode { - position: relative; -} - -.copy-to-clipboard-button { - position: absolute; - right: 0; - top: 0; - visibility: hidden; -} - -.copy-to-clipboard-button:focus { - outline: 0; -} - -div.sourceCode:hover > .copy-to-clipboard-button { - visibility: visible; -} diff --git a/_book/libs/gitbook/css/plugin-fontsettings.css b/_book/libs/gitbook/css/plugin-fontsettings.css deleted file mode 100644 index 87236b4..0000000 --- a/_book/libs/gitbook/css/plugin-fontsettings.css +++ /dev/null @@ -1,292 +0,0 @@ -/* - * Theme 1 - */ -.color-theme-1 .dropdown-menu { - background-color: #111111; - border-color: #7e888b; -} -.color-theme-1 .dropdown-menu .dropdown-caret .caret-inner { - border-bottom: 9px solid #111111; -} -.color-theme-1 .dropdown-menu .buttons { - border-color: #7e888b; -} -.color-theme-1 .dropdown-menu .button { - color: #afa790; -} -.color-theme-1 .dropdown-menu .button:hover { - color: #73553c; -} -/* - * Theme 2 - */ -.color-theme-2 .dropdown-menu { - background-color: #2d3143; - border-color: #272a3a; -} -.color-theme-2 .dropdown-menu .dropdown-caret .caret-inner { - border-bottom: 9px solid #2d3143; -} -.color-theme-2 .dropdown-menu .buttons { - border-color: #272a3a; -} -.color-theme-2 .dropdown-menu .button { - color: #62677f; -} -.color-theme-2 .dropdown-menu .button:hover { - color: #f4f4f5; -} -.book .book-header .font-settings .font-enlarge { - line-height: 30px; - font-size: 1.4em; -} -.book .book-header .font-settings .font-reduce { - line-height: 30px; - font-size: 1em; -} -.book.color-theme-1 .book-body { - color: #704214; - background: #f3eacb; -} -.book.color-theme-1 .book-body .page-wrapper .page-inner section { - background: #f3eacb; -} -.book.color-theme-2 .book-body { - color: #bdcadb; - background: #1c1f2b; -} -.book.color-theme-2 .book-body .page-wrapper .page-inner section { - background: #1c1f2b; -} -.book.font-size-0 .book-body .page-inner section { - font-size: 1.2rem; -} -.book.font-size-1 .book-body .page-inner section { - font-size: 1.4rem; -} -.book.font-size-2 .book-body .page-inner section { - font-size: 1.6rem; -} -.book.font-size-3 .book-body .page-inner section { - font-size: 2.2rem; -} -.book.font-size-4 .book-body .page-inner section { - font-size: 4rem; -} -.book.font-family-0 { - font-family: Georgia, serif; -} -.book.font-family-1 { - font-family: "Helvetica Neue", Helvetica, Arial, sans-serif; -} -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal { - color: #704214; -} -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal a { - color: inherit; -} -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal h1, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal h2, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal h3, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal h4, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal h5, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal h6 { - color: inherit; -} -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal h1, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal h2 { - border-color: inherit; -} -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal h6 { - color: inherit; -} -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal hr { - background-color: inherit; -} -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal blockquote { - border-color: #c4b29f; - opacity: 0.9; -} -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code { - background: #fdf6e3; - color: #657b83; - border-color: #f8df9c; -} -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal .highlight { - background-color: inherit; -} -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal table th, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal table td { - border-color: #f5d06c; -} -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal table tr { - color: inherit; - background-color: #fdf6e3; - border-color: #444444; -} -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal table tr:nth-child(2n) { - background-color: #fbeecb; -} -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal { - color: #bdcadb; -} -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal a { - color: #3eb1d0; -} -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal h1, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal h2, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal h3, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal h4, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal h5, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal h6 { - color: #fffffa; -} -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal h1, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal h2 { - border-color: #373b4e; -} -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal h6 { - color: #373b4e; -} -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal hr { - background-color: #373b4e; -} -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal blockquote { - border-color: #373b4e; -} -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code { - color: #9dbed8; - background: #2d3143; - border-color: #2d3143; -} -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal .highlight { - background-color: #282a39; -} -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal table th, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal table td { - border-color: #3b3f54; -} -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal table tr { - color: #b6c2d2; - background-color: #2d3143; - border-color: #3b3f54; -} -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal table tr:nth-child(2n) { - background-color: #35394b; -} -.book.color-theme-1 .book-header { - color: #afa790; - background: transparent; -} -.book.color-theme-1 .book-header .btn { - color: #afa790; -} -.book.color-theme-1 .book-header .btn:hover { - color: #73553c; - background: none; -} -.book.color-theme-1 .book-header h1 { - color: #704214; -} -.book.color-theme-2 .book-header { - color: #7e888b; - background: transparent; -} -.book.color-theme-2 .book-header .btn { - color: #3b3f54; -} -.book.color-theme-2 .book-header .btn:hover { - color: #fffff5; - background: none; -} -.book.color-theme-2 .book-header h1 { - color: #bdcadb; -} -.book.color-theme-1 .book-body .navigation { - color: #afa790; -} -.book.color-theme-1 .book-body .navigation:hover { - color: #73553c; -} -.book.color-theme-2 .book-body .navigation { - color: #383f52; -} -.book.color-theme-2 .book-body .navigation:hover { - color: #fffff5; -} -/* - * Theme 1 - */ -.book.color-theme-1 .book-summary { - color: #afa790; - background: #111111; - border-right: 1px solid rgba(0, 0, 0, 0.07); -} -.book.color-theme-1 .book-summary .book-search { - background: transparent; -} -.book.color-theme-1 .book-summary .book-search input, -.book.color-theme-1 .book-summary .book-search input:focus { - border: 1px solid transparent; -} -.book.color-theme-1 .book-summary ul.summary li.divider { - background: #7e888b; - box-shadow: none; -} -.book.color-theme-1 .book-summary ul.summary li i.fa-check { - color: #33cc33; -} -.book.color-theme-1 .book-summary ul.summary li.done > a { - color: #877f6a; -} -.book.color-theme-1 .book-summary ul.summary li a, -.book.color-theme-1 .book-summary ul.summary li span { - color: #877f6a; - background: transparent; - font-weight: normal; -} -.book.color-theme-1 .book-summary ul.summary li.active > a, -.book.color-theme-1 .book-summary ul.summary li a:hover { - color: #704214; - background: transparent; - font-weight: normal; -} -/* - * Theme 2 - */ -.book.color-theme-2 .book-summary { - color: #bcc1d2; - background: #2d3143; - border-right: none; -} -.book.color-theme-2 .book-summary .book-search { - background: transparent; -} -.book.color-theme-2 .book-summary .book-search input, -.book.color-theme-2 .book-summary .book-search input:focus { - border: 1px solid transparent; -} -.book.color-theme-2 .book-summary ul.summary li.divider { - background: #272a3a; - box-shadow: none; -} -.book.color-theme-2 .book-summary ul.summary li i.fa-check { - color: #33cc33; -} -.book.color-theme-2 .book-summary ul.summary li.done > a { - color: #62687f; -} -.book.color-theme-2 .book-summary ul.summary li a, -.book.color-theme-2 .book-summary ul.summary li span { - color: #c1c6d7; - background: transparent; - font-weight: 600; -} -.book.color-theme-2 .book-summary ul.summary li.active > a, -.book.color-theme-2 .book-summary ul.summary li a:hover { - color: #f4f4f5; - background: #252737; - font-weight: 600; -} diff --git a/_book/libs/gitbook/css/plugin-highlight.css b/_book/libs/gitbook/css/plugin-highlight.css deleted file mode 100644 index 2aabd3d..0000000 --- a/_book/libs/gitbook/css/plugin-highlight.css +++ /dev/null @@ -1,426 +0,0 @@ -.book .book-body .page-wrapper .page-inner section.normal pre, -.book .book-body .page-wrapper .page-inner section.normal code { - /* http://jmblog.github.com/color-themes-for-google-code-highlightjs */ - /* Tomorrow Comment */ - /* Tomorrow Red */ - /* Tomorrow Orange */ - /* Tomorrow Yellow */ - /* Tomorrow Green */ - /* Tomorrow Aqua */ - /* Tomorrow Blue */ - /* Tomorrow Purple */ -} -.book .book-body .page-wrapper .page-inner section.normal pre .hljs-comment, -.book .book-body .page-wrapper .page-inner section.normal code .hljs-comment, -.book .book-body .page-wrapper .page-inner section.normal pre .hljs-title, -.book .book-body .page-wrapper .page-inner section.normal code .hljs-title { - color: #8e908c; -} -.book .book-body .page-wrapper .page-inner section.normal pre .hljs-variable, -.book .book-body .page-wrapper .page-inner section.normal code .hljs-variable, -.book .book-body .page-wrapper .page-inner section.normal pre .hljs-attribute, -.book .book-body .page-wrapper .page-inner section.normal code .hljs-attribute, -.book .book-body .page-wrapper .page-inner section.normal pre .hljs-tag, -.book .book-body .page-wrapper .page-inner section.normal code .hljs-tag, -.book .book-body .page-wrapper .page-inner section.normal pre .hljs-regexp, -.book .book-body .page-wrapper .page-inner section.normal code .hljs-regexp, -.book .book-body .page-wrapper .page-inner section.normal pre .ruby .hljs-constant, -.book .book-body .page-wrapper .page-inner section.normal code .ruby .hljs-constant, -.book .book-body .page-wrapper .page-inner section.normal pre .xml .hljs-tag .hljs-title, -.book .book-body .page-wrapper .page-inner section.normal code .xml .hljs-tag .hljs-title, -.book .book-body .page-wrapper .page-inner section.normal pre .xml .hljs-pi, -.book .book-body .page-wrapper .page-inner section.normal code .xml .hljs-pi, -.book .book-body .page-wrapper .page-inner section.normal pre .xml .hljs-doctype, -.book .book-body .page-wrapper .page-inner section.normal code .xml .hljs-doctype, -.book .book-body .page-wrapper .page-inner section.normal pre .html .hljs-doctype, -.book .book-body .page-wrapper .page-inner section.normal code .html .hljs-doctype, -.book .book-body .page-wrapper .page-inner section.normal pre .css .hljs-id, -.book .book-body .page-wrapper .page-inner section.normal code .css .hljs-id, -.book .book-body .page-wrapper .page-inner section.normal pre .css .hljs-class, -.book .book-body .page-wrapper .page-inner section.normal code .css .hljs-class, -.book .book-body .page-wrapper .page-inner section.normal pre .css .hljs-pseudo, -.book .book-body .page-wrapper .page-inner section.normal code .css .hljs-pseudo { - color: #c82829; -} -.book .book-body .page-wrapper .page-inner section.normal pre .hljs-number, -.book .book-body .page-wrapper .page-inner section.normal code .hljs-number, -.book .book-body .page-wrapper .page-inner section.normal pre .hljs-preprocessor, -.book .book-body .page-wrapper .page-inner section.normal code .hljs-preprocessor, -.book .book-body .page-wrapper .page-inner section.normal pre .hljs-pragma, -.book .book-body .page-wrapper .page-inner section.normal code .hljs-pragma, -.book .book-body .page-wrapper .page-inner section.normal pre .hljs-built_in, -.book .book-body .page-wrapper .page-inner section.normal code .hljs-built_in, -.book .book-body .page-wrapper .page-inner section.normal pre .hljs-literal, -.book .book-body .page-wrapper .page-inner section.normal code .hljs-literal, -.book .book-body .page-wrapper .page-inner section.normal pre .hljs-params, -.book .book-body .page-wrapper .page-inner section.normal code .hljs-params, -.book .book-body .page-wrapper .page-inner section.normal pre .hljs-constant, -.book .book-body .page-wrapper .page-inner section.normal code .hljs-constant { - color: #f5871f; -} -.book .book-body .page-wrapper .page-inner section.normal pre .ruby .hljs-class .hljs-title, -.book .book-body .page-wrapper .page-inner section.normal code .ruby .hljs-class .hljs-title, -.book .book-body .page-wrapper .page-inner section.normal pre .css .hljs-rules .hljs-attribute, -.book .book-body .page-wrapper .page-inner section.normal code .css .hljs-rules .hljs-attribute { - color: #eab700; -} -.book .book-body .page-wrapper .page-inner section.normal pre .hljs-string, -.book .book-body .page-wrapper .page-inner section.normal code .hljs-string, -.book .book-body .page-wrapper .page-inner section.normal pre .hljs-value, -.book .book-body .page-wrapper .page-inner section.normal code .hljs-value, -.book .book-body .page-wrapper .page-inner section.normal pre .hljs-inheritance, -.book .book-body .page-wrapper .page-inner section.normal code .hljs-inheritance, -.book .book-body .page-wrapper .page-inner section.normal pre .hljs-header, -.book .book-body .page-wrapper .page-inner section.normal code .hljs-header, -.book .book-body .page-wrapper .page-inner section.normal pre .ruby .hljs-symbol, -.book .book-body .page-wrapper .page-inner section.normal code .ruby .hljs-symbol, -.book .book-body .page-wrapper .page-inner section.normal pre .xml .hljs-cdata, -.book .book-body .page-wrapper .page-inner section.normal code .xml .hljs-cdata { - color: #718c00; -} -.book .book-body .page-wrapper .page-inner section.normal pre .css .hljs-hexcolor, -.book .book-body .page-wrapper .page-inner section.normal code .css .hljs-hexcolor { - color: #3e999f; -} -.book .book-body .page-wrapper .page-inner section.normal pre .hljs-function, -.book .book-body .page-wrapper .page-inner section.normal code .hljs-function, -.book .book-body .page-wrapper .page-inner section.normal pre .python .hljs-decorator, -.book .book-body .page-wrapper .page-inner section.normal code .python .hljs-decorator, -.book .book-body .page-wrapper .page-inner section.normal pre .python .hljs-title, -.book .book-body .page-wrapper .page-inner section.normal code .python .hljs-title, -.book .book-body .page-wrapper .page-inner section.normal pre .ruby .hljs-function .hljs-title, -.book .book-body .page-wrapper .page-inner section.normal code .ruby .hljs-function .hljs-title, -.book .book-body .page-wrapper .page-inner section.normal pre .ruby .hljs-title .hljs-keyword, -.book .book-body .page-wrapper .page-inner section.normal code .ruby .hljs-title .hljs-keyword, -.book .book-body .page-wrapper .page-inner section.normal pre .perl .hljs-sub, -.book .book-body .page-wrapper .page-inner section.normal code .perl .hljs-sub, -.book .book-body .page-wrapper .page-inner section.normal pre .javascript .hljs-title, -.book .book-body .page-wrapper .page-inner section.normal code .javascript .hljs-title, -.book .book-body .page-wrapper .page-inner section.normal pre .coffeescript .hljs-title, -.book .book-body .page-wrapper .page-inner section.normal code .coffeescript .hljs-title { - color: #4271ae; -} -.book .book-body .page-wrapper .page-inner section.normal pre .hljs-keyword, -.book .book-body .page-wrapper .page-inner section.normal code .hljs-keyword, -.book .book-body .page-wrapper .page-inner section.normal pre .javascript .hljs-function, -.book .book-body .page-wrapper .page-inner section.normal code .javascript .hljs-function { - color: #8959a8; -} -.book .book-body .page-wrapper .page-inner section.normal pre .hljs, -.book .book-body .page-wrapper .page-inner section.normal code .hljs { - display: block; - background: white; - color: #4d4d4c; - padding: 0.5em; -} -.book .book-body .page-wrapper .page-inner section.normal pre .coffeescript .javascript, -.book .book-body .page-wrapper .page-inner section.normal code .coffeescript .javascript, -.book .book-body .page-wrapper .page-inner section.normal pre .javascript .xml, -.book .book-body .page-wrapper .page-inner section.normal code .javascript .xml, -.book .book-body .page-wrapper .page-inner section.normal pre .tex .hljs-formula, -.book .book-body .page-wrapper .page-inner section.normal code .tex .hljs-formula, -.book .book-body .page-wrapper .page-inner section.normal pre .xml .javascript, -.book .book-body .page-wrapper .page-inner section.normal code .xml .javascript, -.book .book-body .page-wrapper .page-inner section.normal pre .xml .vbscript, -.book .book-body .page-wrapper .page-inner section.normal code .xml .vbscript, -.book .book-body .page-wrapper .page-inner section.normal pre .xml .css, -.book .book-body .page-wrapper .page-inner section.normal code .xml .css, -.book .book-body .page-wrapper .page-inner section.normal pre .xml .hljs-cdata, -.book .book-body .page-wrapper .page-inner section.normal code .xml .hljs-cdata { - opacity: 0.5; -} -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code { - /* - -Orginal Style from ethanschoonover.com/solarized (c) Jeremy Hull - -*/ - /* Solarized Green */ - /* Solarized Cyan */ - /* Solarized Blue */ - /* Solarized Yellow */ - /* Solarized Orange */ - /* Solarized Red */ - /* Solarized Violet */ -} -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs { - display: block; - padding: 0.5em; - background: #fdf6e3; - color: #657b83; -} -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-comment, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-comment, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-template_comment, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-template_comment, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .diff .hljs-header, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .diff .hljs-header, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-doctype, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-doctype, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-pi, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-pi, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .lisp .hljs-string, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .lisp .hljs-string, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-javadoc, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-javadoc { - color: #93a1a1; -} -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-keyword, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-keyword, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-winutils, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-winutils, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .method, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .method, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-addition, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-addition, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .css .hljs-tag, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .css .hljs-tag, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-request, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-request, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-status, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-status, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .nginx .hljs-title, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .nginx .hljs-title { - color: #859900; -} -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-number, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-number, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-command, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-command, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-string, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-string, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-tag .hljs-value, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-tag .hljs-value, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-rules .hljs-value, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-rules .hljs-value, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-phpdoc, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-phpdoc, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .tex .hljs-formula, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .tex .hljs-formula, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-regexp, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-regexp, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-hexcolor, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-hexcolor, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-link_url, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-link_url { - color: #2aa198; -} -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-title, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-title, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-localvars, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-localvars, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-chunk, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-chunk, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-decorator, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-decorator, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-built_in, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-built_in, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-identifier, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-identifier, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .vhdl .hljs-literal, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .vhdl .hljs-literal, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-id, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-id, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .css .hljs-function, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .css .hljs-function { - color: #268bd2; -} -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-attribute, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-attribute, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-variable, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-variable, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .lisp .hljs-body, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .lisp .hljs-body, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .smalltalk .hljs-number, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .smalltalk .hljs-number, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-constant, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-constant, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-class .hljs-title, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-class .hljs-title, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-parent, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-parent, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .haskell .hljs-type, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .haskell .hljs-type, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-link_reference, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-link_reference { - color: #b58900; -} -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-preprocessor, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-preprocessor, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-preprocessor .hljs-keyword, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-preprocessor .hljs-keyword, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-pragma, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-pragma, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-shebang, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-shebang, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-symbol, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-symbol, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-symbol .hljs-string, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-symbol .hljs-string, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .diff .hljs-change, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .diff .hljs-change, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-special, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-special, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-attr_selector, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-attr_selector, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-subst, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-subst, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-cdata, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-cdata, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .clojure .hljs-title, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .clojure .hljs-title, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .css .hljs-pseudo, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .css .hljs-pseudo, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-header, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-header { - color: #cb4b16; -} -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-deletion, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-deletion, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-important, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-important { - color: #dc322f; -} -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .hljs-link_label, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .hljs-link_label { - color: #6c71c4; -} -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal pre .tex .hljs-formula, -.book.color-theme-1 .book-body .page-wrapper .page-inner section.normal code .tex .hljs-formula { - background: #eee8d5; -} -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code { - /* Tomorrow Night Bright Theme */ - /* Original theme - https://github.com/chriskempson/tomorrow-theme */ - /* http://jmblog.github.com/color-themes-for-google-code-highlightjs */ - /* Tomorrow Comment */ - /* Tomorrow Red */ - /* Tomorrow Orange */ - /* Tomorrow Yellow */ - /* Tomorrow Green */ - /* Tomorrow Aqua */ - /* Tomorrow Blue */ - /* Tomorrow Purple */ -} -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .hljs-comment, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .hljs-comment, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .hljs-title, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .hljs-title { - color: #969896; -} -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .hljs-variable, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .hljs-variable, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .hljs-attribute, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .hljs-attribute, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .hljs-tag, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .hljs-tag, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .hljs-regexp, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .hljs-regexp, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .ruby .hljs-constant, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .ruby .hljs-constant, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .xml .hljs-tag .hljs-title, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .xml .hljs-tag .hljs-title, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .xml .hljs-pi, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .xml .hljs-pi, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .xml .hljs-doctype, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .xml .hljs-doctype, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .html .hljs-doctype, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .html .hljs-doctype, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .css .hljs-id, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .css .hljs-id, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .css .hljs-class, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .css .hljs-class, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .css .hljs-pseudo, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .css .hljs-pseudo { - color: #d54e53; -} -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .hljs-number, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .hljs-number, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .hljs-preprocessor, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .hljs-preprocessor, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .hljs-pragma, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .hljs-pragma, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .hljs-built_in, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .hljs-built_in, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .hljs-literal, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .hljs-literal, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .hljs-params, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .hljs-params, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .hljs-constant, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .hljs-constant { - color: #e78c45; -} -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .ruby .hljs-class .hljs-title, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .ruby .hljs-class .hljs-title, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .css .hljs-rules .hljs-attribute, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .css .hljs-rules .hljs-attribute { - color: #e7c547; -} -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .hljs-string, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .hljs-string, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .hljs-value, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .hljs-value, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .hljs-inheritance, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .hljs-inheritance, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .hljs-header, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .hljs-header, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .ruby .hljs-symbol, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .ruby .hljs-symbol, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .xml .hljs-cdata, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .xml .hljs-cdata { - color: #b9ca4a; -} -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .css .hljs-hexcolor, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .css .hljs-hexcolor { - color: #70c0b1; -} -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .hljs-function, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .hljs-function, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .python .hljs-decorator, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .python .hljs-decorator, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .python .hljs-title, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .python .hljs-title, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .ruby .hljs-function .hljs-title, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .ruby .hljs-function .hljs-title, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .ruby .hljs-title .hljs-keyword, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .ruby .hljs-title .hljs-keyword, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .perl .hljs-sub, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .perl .hljs-sub, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .javascript .hljs-title, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .javascript .hljs-title, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .coffeescript .hljs-title, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .coffeescript .hljs-title { - color: #7aa6da; -} -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .hljs-keyword, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .hljs-keyword, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .javascript .hljs-function, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .javascript .hljs-function { - color: #c397d8; -} -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .hljs, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .hljs { - display: block; - background: black; - color: #eaeaea; - padding: 0.5em; -} -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .coffeescript .javascript, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .coffeescript .javascript, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .javascript .xml, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .javascript .xml, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .tex .hljs-formula, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .tex .hljs-formula, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .xml .javascript, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .xml .javascript, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .xml .vbscript, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .xml .vbscript, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .xml .css, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .xml .css, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal pre .xml .hljs-cdata, -.book.color-theme-2 .book-body .page-wrapper .page-inner section.normal code .xml .hljs-cdata { - opacity: 0.5; -} diff --git a/_book/libs/gitbook/css/plugin-search.css b/_book/libs/gitbook/css/plugin-search.css deleted file mode 100644 index c85e557..0000000 --- a/_book/libs/gitbook/css/plugin-search.css +++ /dev/null @@ -1,31 +0,0 @@ -.book .book-summary .book-search { - padding: 6px; - background: transparent; - position: absolute; - top: -50px; - left: 0px; - right: 0px; - transition: top 0.5s ease; -} -.book .book-summary .book-search input, -.book .book-summary .book-search input:focus, -.book .book-summary .book-search input:hover { - width: 100%; - background: transparent; - border: 1px solid #ccc; - box-shadow: none; - outline: none; - line-height: 22px; - padding: 7px 4px; - color: inherit; - box-sizing: border-box; -} -.book.with-search .book-summary .book-search { - top: 0px; -} -.book.with-search .book-summary ul.summary { - top: 50px; -} -.with-search .summary li[data-level] a[href*=".html#"] { - display: none; -} diff --git a/_book/libs/gitbook/css/plugin-table.css b/_book/libs/gitbook/css/plugin-table.css deleted file mode 100644 index 7fba1b9..0000000 --- a/_book/libs/gitbook/css/plugin-table.css +++ /dev/null @@ -1 +0,0 @@ -.book .book-body .page-wrapper .page-inner section.normal table{display:table;width:100%;border-collapse:collapse;border-spacing:0;overflow:auto}.book .book-body .page-wrapper .page-inner section.normal table td,.book .book-body .page-wrapper .page-inner section.normal table th{padding:6px 13px;border:1px solid #ddd}.book .book-body .page-wrapper .page-inner section.normal table tr{background-color:#fff;border-top:1px solid #ccc}.book .book-body .page-wrapper .page-inner section.normal table tr:nth-child(2n){background-color:#f8f8f8}.book .book-body .page-wrapper .page-inner section.normal table th{font-weight:700} diff --git a/_book/libs/gitbook/css/style.css b/_book/libs/gitbook/css/style.css deleted file mode 100644 index b896892..0000000 --- a/_book/libs/gitbook/css/style.css +++ /dev/null @@ -1,10 +0,0 @@ -/*! normalize.css v2.1.0 | MIT License | git.io/normalize */img,legend{border:0}*,.fa{-webkit-font-smoothing:antialiased}.fa-ul>li,sub,sup{position:relative}.book .book-body .page-wrapper .page-inner section.normal hr:after,.book-langs-index .inner .languages:after,.buttons:after,.dropdown-menu .buttons:after{clear:both}body,html{-ms-text-size-adjust:100%;-webkit-text-size-adjust:100%}article,aside,details,figcaption,figure,footer,header,hgroup,main,nav,section,summary{display:block}audio,canvas,video{display:inline-block}.hidden,[hidden]{display:none}audio:not([controls]){display:none;height:0}html{font-family:sans-serif}body,figure{margin:0}a:focus{outline:dotted thin}a:active,a:hover{outline:0}h1{font-size:2em;margin:.67em 0}abbr[title]{border-bottom:1px dotted}b,strong{font-weight:700}dfn{font-style:italic}hr{-moz-box-sizing:content-box;box-sizing:content-box;height:0}mark{background:#ff0;color:#000}code,kbd,pre,samp{font-family:monospace,serif;font-size:1em}pre{white-space:pre-wrap}q{quotes:"\201C" "\201D" "\2018" "\2019"}small{font-size:80%}sub,sup{font-size:75%;line-height:0;vertical-align:baseline}sup{top:-.5em}sub{bottom:-.25em}svg:not(:root){overflow:hidden}fieldset{border:1px solid silver;margin:0 2px;padding:.35em .625em .75em}legend{padding:0}button,input,select,textarea{font-family:inherit;font-size:100%;margin:0}button,input{line-height:normal}button,select{text-transform:none}button,html input[type=button],input[type=reset],input[type=submit]{-webkit-appearance:button;cursor:pointer}button[disabled],html input[disabled]{cursor:default}input[type=checkbox],input[type=radio]{box-sizing:border-box;padding:0}input[type=search]{-webkit-appearance:textfield;-moz-box-sizing:content-box;-webkit-box-sizing:content-box;box-sizing:content-box}input[type=search]::-webkit-search-cancel-button{margin-right:10px;}button::-moz-focus-inner,input::-moz-focus-inner{border:0;padding:0}textarea{overflow:auto;vertical-align:top}table{border-collapse:collapse;border-spacing:0}/*! - * Preboot v2 - * - * Open sourced under MIT license by @mdo. - * Some variables and mixins from Bootstrap (Apache 2 license). - */.link-inherit,.link-inherit:focus,.link-inherit:hover{color:inherit}.fa,.fa-stack{display:inline-block}/*! - * Font Awesome 4.1.0 by @davegandy - http://fontawesome.io - @fontawesome - * License - http://fontawesome.io/license (Font: SIL OFL 1.1, CSS: MIT License) - */@font-face{font-family:FontAwesome;src:url(./fontawesome/fontawesome-webfont.ttf?v=4.1.0) format('truetype');font-weight:400;font-style:normal}.fa{font-family:FontAwesome;font-style:normal;font-weight:400;line-height:1;-moz-osx-font-smoothing:grayscale}.book .book-header,.book .book-summary{font-family:"Helvetica Neue",Helvetica,Arial,sans-serif}.fa-lg{font-size:1.33333333em;line-height:.75em;vertical-align:-15%}.fa-2x{font-size:2em}.fa-3x{font-size:3em}.fa-4x{font-size:4em}.fa-5x{font-size:5em}.fa-fw{width:1.28571429em;text-align:center}.fa-ul{padding-left:0;margin-left:2.14285714em;list-style-type:none}.fa-li{position:absolute;left:-2.14285714em;width:2.14285714em;top:.14285714em;text-align:center}.fa-li.fa-lg{left:-1.85714286em}.fa-border{padding:.2em .25em .15em;border:.08em solid #eee;border-radius:.1em}.pull-right{float:right}.pull-left{float:left}.fa.pull-left{margin-right:.3em}.fa.pull-right{margin-left:.3em}.fa-spin{-webkit-animation:spin 2s infinite linear;-moz-animation:spin 2s infinite linear;-o-animation:spin 2s infinite linear;animation:spin 2s infinite linear}@-moz-keyframes spin{0%{-moz-transform:rotate(0)}100%{-moz-transform:rotate(359deg)}}@-webkit-keyframes spin{0%{-webkit-transform:rotate(0)}100%{-webkit-transform:rotate(359deg)}}@-o-keyframes spin{0%{-o-transform:rotate(0)}100%{-o-transform:rotate(359deg)}}@keyframes spin{0%{-webkit-transform:rotate(0);transform:rotate(0)}100%{-webkit-transform:rotate(359deg);transform:rotate(359deg)}}.fa-rotate-90{filter:progid:DXImageTransform.Microsoft.BasicImage(rotation=1);-webkit-transform:rotate(90deg);-moz-transform:rotate(90deg);-ms-transform:rotate(90deg);-o-transform:rotate(90deg);transform:rotate(90deg)}.fa-rotate-180{filter:progid:DXImageTransform.Microsoft.BasicImage(rotation=2);-webkit-transform:rotate(180deg);-moz-transform:rotate(180deg);-ms-transform:rotate(180deg);-o-transform:rotate(180deg);transform:rotate(180deg)}.fa-rotate-270{filter:progid:DXImageTransform.Microsoft.BasicImage(rotation=3);-webkit-transform:rotate(270deg);-moz-transform:rotate(270deg);-ms-transform:rotate(270deg);-o-transform:rotate(270deg);transform:rotate(270deg)}.fa-flip-horizontal{filter:progid:DXImageTransform.Microsoft.BasicImage(rotation=0, mirror=1);-webkit-transform:scale(-1,1);-moz-transform:scale(-1,1);-ms-transform:scale(-1,1);-o-transform:scale(-1,1);transform:scale(-1,1)}.fa-flip-vertical{filter:progid:DXImageTransform.Microsoft.BasicImage(rotation=2, mirror=1);-webkit-transform:scale(1,-1);-moz-transform:scale(1,-1);-ms-transform:scale(1,-1);-o-transform:scale(1,-1);transform:scale(1,-1)}.fa-stack{position:relative;width:2em;height:2em;line-height:2em;vertical-align:middle}.fa-stack-1x,.fa-stack-2x{position:absolute;left:0;width:100%;text-align:center}.fa-stack-1x{line-height:inherit}.fa-stack-2x{font-size:2em}.fa-inverse{color:#fff}.fa-glass:before{content:"\f000"}.fa-music:before{content:"\f001"}.fa-search:before{content:"\f002"}.fa-envelope-o:before{content:"\f003"}.fa-heart:before{content:"\f004"}.fa-star:before{content:"\f005"}.fa-star-o:before{content:"\f006"}.fa-user:before{content:"\f007"}.fa-film:before{content:"\f008"}.fa-th-large:before{content:"\f009"}.fa-th:before{content:"\f00a"}.fa-th-list:before{content:"\f00b"}.fa-check:before{content:"\f00c"}.fa-times:before{content:"\f00d"}.fa-search-plus:before{content:"\f00e"}.fa-search-minus:before{content:"\f010"}.fa-power-off:before{content:"\f011"}.fa-signal:before{content:"\f012"}.fa-cog:before,.fa-gear:before{content:"\f013"}.fa-trash-o:before{content:"\f014"}.fa-home:before{content:"\f015"}.fa-file-o:before{content:"\f016"}.fa-clock-o:before{content:"\f017"}.fa-road:before{content:"\f018"}.fa-download:before{content:"\f019"}.fa-arrow-circle-o-down:before{content:"\f01a"}.fa-arrow-circle-o-up:before{content:"\f01b"}.fa-inbox:before{content:"\f01c"}.fa-play-circle-o:before{content:"\f01d"}.fa-repeat:before,.fa-rotate-right:before{content:"\f01e"}.fa-refresh:before{content:"\f021"}.fa-list-alt:before{content:"\f022"}.fa-lock:before{content:"\f023"}.fa-flag:before{content:"\f024"}.fa-headphones:before{content:"\f025"}.fa-volume-off:before{content:"\f026"}.fa-volume-down:before{content:"\f027"}.fa-volume-up:before{content:"\f028"}.fa-qrcode:before{content:"\f029"}.fa-barcode:before{content:"\f02a"}.fa-tag:before{content:"\f02b"}.fa-tags:before{content:"\f02c"}.fa-book:before{content:"\f02d"}.fa-bookmark:before{content:"\f02e"}.fa-print:before{content:"\f02f"}.fa-camera:before{content:"\f030"}.fa-font:before{content:"\f031"}.fa-bold:before{content:"\f032"}.fa-italic:before{content:"\f033"}.fa-text-height:before{content:"\f034"}.fa-text-width:before{content:"\f035"}.fa-align-left:before{content:"\f036"}.fa-align-center:before{content:"\f037"}.fa-align-right:before{content:"\f038"}.fa-align-justify:before{content:"\f039"}.fa-list:before{content:"\f03a"}.fa-dedent:before,.fa-outdent:before{content:"\f03b"}.fa-indent:before{content:"\f03c"}.fa-video-camera:before{content:"\f03d"}.fa-image:before,.fa-photo:before,.fa-picture-o:before{content:"\f03e"}.fa-pencil:before{content:"\f040"}.fa-map-marker:before{content:"\f041"}.fa-adjust:before{content:"\f042"}.fa-tint:before{content:"\f043"}.fa-edit:before,.fa-pencil-square-o:before{content:"\f044"}.fa-share-square-o:before{content:"\f045"}.fa-check-square-o:before{content:"\f046"}.fa-arrows:before{content:"\f047"}.fa-step-backward:before{content:"\f048"}.fa-fast-backward:before{content:"\f049"}.fa-backward:before{content:"\f04a"}.fa-play:before{content:"\f04b"}.fa-pause:before{content:"\f04c"}.fa-stop:before{content:"\f04d"}.fa-forward:before{content:"\f04e"}.fa-fast-forward:before{content:"\f050"}.fa-step-forward:before{content:"\f051"}.fa-eject:before{content:"\f052"}.fa-chevron-left:before{content:"\f053"}.fa-chevron-right:before{content:"\f054"}.fa-plus-circle:before{content:"\f055"}.fa-minus-circle:before{content:"\f056"}.fa-times-circle:before{content:"\f057"}.fa-check-circle:before{content:"\f058"}.fa-question-circle:before{content:"\f059"}.fa-info-circle:before{content:"\f05a"}.fa-crosshairs:before{content:"\f05b"}.fa-times-circle-o:before{content:"\f05c"}.fa-check-circle-o:before{content:"\f05d"}.fa-ban:before{content:"\f05e"}.fa-arrow-left:before{content:"\f060"}.fa-arrow-right:before{content:"\f061"}.fa-arrow-up:before{content:"\f062"}.fa-arrow-down:before{content:"\f063"}.fa-mail-forward:before,.fa-share:before{content:"\f064"}.fa-expand:before{content:"\f065"}.fa-compress:before{content:"\f066"}.fa-plus:before{content:"\f067"}.fa-minus:before{content:"\f068"}.fa-asterisk:before{content:"\f069"}.fa-exclamation-circle:before{content:"\f06a"}.fa-gift:before{content:"\f06b"}.fa-leaf:before{content:"\f06c"}.fa-fire:before{content:"\f06d"}.fa-eye:before{content:"\f06e"}.fa-eye-slash:before{content:"\f070"}.fa-exclamation-triangle:before,.fa-warning:before{content:"\f071"}.fa-plane:before{content:"\f072"}.fa-calendar:before{content:"\f073"}.fa-random:before{content:"\f074"}.fa-comment:before{content:"\f075"}.fa-magnet:before{content:"\f076"}.fa-chevron-up:before{content:"\f077"}.fa-chevron-down:before{content:"\f078"}.fa-retweet:before{content:"\f079"}.fa-shopping-cart:before{content:"\f07a"}.fa-folder:before{content:"\f07b"}.fa-folder-open:before{content:"\f07c"}.fa-arrows-v:before{content:"\f07d"}.fa-arrows-h:before{content:"\f07e"}.fa-bar-chart-o:before{content:"\f080"}.fa-twitter-square:before{content:"\f081"}.fa-facebook-square:before{content:"\f082"}.fa-camera-retro:before{content:"\f083"}.fa-key:before{content:"\f084"}.fa-cogs:before,.fa-gears:before{content:"\f085"}.fa-comments:before{content:"\f086"}.fa-thumbs-o-up:before{content:"\f087"}.fa-thumbs-o-down:before{content:"\f088"}.fa-star-half:before{content:"\f089"}.fa-heart-o:before{content:"\f08a"}.fa-sign-out:before{content:"\f08b"}.fa-linkedin-square:before{content:"\f08c"}.fa-thumb-tack:before{content:"\f08d"}.fa-external-link:before{content:"\f08e"}.fa-sign-in:before{content:"\f090"}.fa-trophy:before{content:"\f091"}.fa-github-square:before{content:"\f092"}.fa-upload:before{content:"\f093"}.fa-lemon-o:before{content:"\f094"}.fa-phone:before{content:"\f095"}.fa-square-o:before{content:"\f096"}.fa-bookmark-o:before{content:"\f097"}.fa-phone-square:before{content:"\f098"}.fa-twitter:before{content:"\f099"}.fa-facebook:before{content:"\f09a"}.fa-github:before{content:"\f09b"}.fa-unlock:before{content:"\f09c"}.fa-credit-card:before{content:"\f09d"}.fa-rss:before{content:"\f09e"}.fa-hdd-o:before{content:"\f0a0"}.fa-bullhorn:before{content:"\f0a1"}.fa-bell:before{content:"\f0f3"}.fa-certificate:before{content:"\f0a3"}.fa-hand-o-right:before{content:"\f0a4"}.fa-hand-o-left:before{content:"\f0a5"}.fa-hand-o-up:before{content:"\f0a6"}.fa-hand-o-down:before{content:"\f0a7"}.fa-arrow-circle-left:before{content:"\f0a8"}.fa-arrow-circle-right:before{content:"\f0a9"}.fa-arrow-circle-up:before{content:"\f0aa"}.fa-arrow-circle-down:before{content:"\f0ab"}.fa-globe:before{content:"\f0ac"}.fa-wrench:before{content:"\f0ad"}.fa-tasks:before{content:"\f0ae"}.fa-filter:before{content:"\f0b0"}.fa-briefcase:before{content:"\f0b1"}.fa-arrows-alt:before{content:"\f0b2"}.fa-group:before,.fa-users:before{content:"\f0c0"}.fa-chain:before,.fa-link:before{content:"\f0c1"}.fa-cloud:before{content:"\f0c2"}.fa-flask:before{content:"\f0c3"}.fa-cut:before,.fa-scissors:before{content:"\f0c4"}.fa-copy:before,.fa-files-o:before{content:"\f0c5"}.fa-paperclip:before{content:"\f0c6"}.fa-floppy-o:before,.fa-save:before{content:"\f0c7"}.fa-square:before{content:"\f0c8"}.fa-bars:before,.fa-navicon:before,.fa-reorder:before{content:"\f0c9"}.fa-list-ul:before{content:"\f0ca"}.fa-list-ol:before{content:"\f0cb"}.fa-strikethrough:before{content:"\f0cc"}.fa-underline:before{content:"\f0cd"}.fa-table:before{content:"\f0ce"}.fa-magic:before{content:"\f0d0"}.fa-truck:before{content:"\f0d1"}.fa-pinterest:before{content:"\f0d2"}.fa-pinterest-square:before{content:"\f0d3"}.fa-google-plus-square:before{content:"\f0d4"}.fa-google-plus:before{content:"\f0d5"}.fa-money:before{content:"\f0d6"}.fa-caret-down:before{content:"\f0d7"}.fa-caret-up:before{content:"\f0d8"}.fa-caret-left:before{content:"\f0d9"}.fa-caret-right:before{content:"\f0da"}.fa-columns:before{content:"\f0db"}.fa-sort:before,.fa-unsorted:before{content:"\f0dc"}.fa-sort-desc:before,.fa-sort-down:before{content:"\f0dd"}.fa-sort-asc:before,.fa-sort-up:before{content:"\f0de"}.fa-envelope:before{content:"\f0e0"}.fa-linkedin:before{content:"\f0e1"}.fa-rotate-left:before,.fa-undo:before{content:"\f0e2"}.fa-gavel:before,.fa-legal:before{content:"\f0e3"}.fa-dashboard:before,.fa-tachometer:before{content:"\f0e4"}.fa-comment-o:before{content:"\f0e5"}.fa-comments-o:before{content:"\f0e6"}.fa-bolt:before,.fa-flash:before{content:"\f0e7"}.fa-sitemap:before{content:"\f0e8"}.fa-umbrella:before{content:"\f0e9"}.fa-clipboard:before,.fa-paste:before{content:"\f0ea"}.fa-lightbulb-o:before{content:"\f0eb"}.fa-exchange:before{content:"\f0ec"}.fa-cloud-download:before{content:"\f0ed"}.fa-cloud-upload:before{content:"\f0ee"}.fa-user-md:before{content:"\f0f0"}.fa-stethoscope:before{content:"\f0f1"}.fa-suitcase:before{content:"\f0f2"}.fa-bell-o:before{content:"\f0a2"}.fa-coffee:before{content:"\f0f4"}.fa-cutlery:before{content:"\f0f5"}.fa-file-text-o:before{content:"\f0f6"}.fa-building-o:before{content:"\f0f7"}.fa-hospital-o:before{content:"\f0f8"}.fa-ambulance:before{content:"\f0f9"}.fa-medkit:before{content:"\f0fa"}.fa-fighter-jet:before{content:"\f0fb"}.fa-beer:before{content:"\f0fc"}.fa-h-square:before{content:"\f0fd"}.fa-plus-square:before{content:"\f0fe"}.fa-angle-double-left:before{content:"\f100"}.fa-angle-double-right:before{content:"\f101"}.fa-angle-double-up:before{content:"\f102"}.fa-angle-double-down:before{content:"\f103"}.fa-angle-left:before{content:"\f104"}.fa-angle-right:before{content:"\f105"}.fa-angle-up:before{content:"\f106"}.fa-angle-down:before{content:"\f107"}.fa-desktop:before{content:"\f108"}.fa-laptop:before{content:"\f109"}.fa-tablet:before{content:"\f10a"}.fa-mobile-phone:before,.fa-mobile:before{content:"\f10b"}.fa-circle-o:before{content:"\f10c"}.fa-quote-left:before{content:"\f10d"}.fa-quote-right:before{content:"\f10e"}.fa-spinner:before{content:"\f110"}.fa-circle:before{content:"\f111"}.fa-mail-reply:before,.fa-reply:before{content:"\f112"}.fa-github-alt:before{content:"\f113"}.fa-folder-o:before{content:"\f114"}.fa-folder-open-o:before{content:"\f115"}.fa-smile-o:before{content:"\f118"}.fa-frown-o:before{content:"\f119"}.fa-meh-o:before{content:"\f11a"}.fa-gamepad:before{content:"\f11b"}.fa-keyboard-o:before{content:"\f11c"}.fa-flag-o:before{content:"\f11d"}.fa-flag-checkered:before{content:"\f11e"}.fa-terminal:before{content:"\f120"}.fa-code:before{content:"\f121"}.fa-mail-reply-all:before,.fa-reply-all:before{content:"\f122"}.fa-star-half-empty:before,.fa-star-half-full:before,.fa-star-half-o:before{content:"\f123"}.fa-location-arrow:before{content:"\f124"}.fa-crop:before{content:"\f125"}.fa-code-fork:before{content:"\f126"}.fa-chain-broken:before,.fa-unlink:before{content:"\f127"}.fa-question:before{content:"\f128"}.fa-info:before{content:"\f129"}.fa-exclamation:before{content:"\f12a"}.fa-superscript:before{content:"\f12b"}.fa-subscript:before{content:"\f12c"}.fa-eraser:before{content:"\f12d"}.fa-puzzle-piece:before{content:"\f12e"}.fa-microphone:before{content:"\f130"}.fa-microphone-slash:before{content:"\f131"}.fa-shield:before{content:"\f132"}.fa-calendar-o:before{content:"\f133"}.fa-fire-extinguisher:before{content:"\f134"}.fa-rocket:before{content:"\f135"}.fa-maxcdn:before{content:"\f136"}.fa-chevron-circle-left:before{content:"\f137"}.fa-chevron-circle-right:before{content:"\f138"}.fa-chevron-circle-up:before{content:"\f139"}.fa-chevron-circle-down:before{content:"\f13a"}.fa-html5:before{content:"\f13b"}.fa-css3:before{content:"\f13c"}.fa-anchor:before{content:"\f13d"}.fa-unlock-alt:before{content:"\f13e"}.fa-bullseye:before{content:"\f140"}.fa-ellipsis-h:before{content:"\f141"}.fa-ellipsis-v:before{content:"\f142"}.fa-rss-square:before{content:"\f143"}.fa-play-circle:before{content:"\f144"}.fa-ticket:before{content:"\f145"}.fa-minus-square:before{content:"\f146"}.fa-minus-square-o:before{content:"\f147"}.fa-level-up:before{content:"\f148"}.fa-level-down:before{content:"\f149"}.fa-check-square:before{content:"\f14a"}.fa-pencil-square:before{content:"\f14b"}.fa-external-link-square:before{content:"\f14c"}.fa-share-square:before{content:"\f14d"}.fa-compass:before{content:"\f14e"}.fa-caret-square-o-down:before,.fa-toggle-down:before{content:"\f150"}.fa-caret-square-o-up:before,.fa-toggle-up:before{content:"\f151"}.fa-caret-square-o-right:before,.fa-toggle-right:before{content:"\f152"}.fa-eur:before,.fa-euro:before{content:"\f153"}.fa-gbp:before{content:"\f154"}.fa-dollar:before,.fa-usd:before{content:"\f155"}.fa-inr:before,.fa-rupee:before{content:"\f156"}.fa-cny:before,.fa-jpy:before,.fa-rmb:before,.fa-yen:before{content:"\f157"}.fa-rouble:before,.fa-rub:before,.fa-ruble:before{content:"\f158"}.fa-krw:before,.fa-won:before{content:"\f159"}.fa-bitcoin:before,.fa-btc:before{content:"\f15a"}.fa-file:before{content:"\f15b"}.fa-file-text:before{content:"\f15c"}.fa-sort-alpha-asc:before{content:"\f15d"}.fa-sort-alpha-desc:before{content:"\f15e"}.fa-sort-amount-asc:before{content:"\f160"}.fa-sort-amount-desc:before{content:"\f161"}.fa-sort-numeric-asc:before{content:"\f162"}.fa-sort-numeric-desc:before{content:"\f163"}.fa-thumbs-up:before{content:"\f164"}.fa-thumbs-down:before{content:"\f165"}.fa-youtube-square:before{content:"\f166"}.fa-youtube:before{content:"\f167"}.fa-xing:before{content:"\f168"}.fa-xing-square:before{content:"\f169"}.fa-youtube-play:before{content:"\f16a"}.fa-dropbox:before{content:"\f16b"}.fa-stack-overflow:before{content:"\f16c"}.fa-instagram:before{content:"\f16d"}.fa-flickr:before{content:"\f16e"}.fa-adn:before{content:"\f170"}.fa-bitbucket:before{content:"\f171"}.fa-bitbucket-square:before{content:"\f172"}.fa-tumblr:before{content:"\f173"}.fa-tumblr-square:before{content:"\f174"}.fa-long-arrow-down:before{content:"\f175"}.fa-long-arrow-up:before{content:"\f176"}.fa-long-arrow-left:before{content:"\f177"}.fa-long-arrow-right:before{content:"\f178"}.fa-apple:before{content:"\f179"}.fa-windows:before{content:"\f17a"}.fa-android:before{content:"\f17b"}.fa-linux:before{content:"\f17c"}.fa-dribbble:before{content:"\f17d"}.fa-skype:before{content:"\f17e"}.fa-foursquare:before{content:"\f180"}.fa-trello:before{content:"\f181"}.fa-female:before{content:"\f182"}.fa-male:before{content:"\f183"}.fa-gittip:before{content:"\f184"}.fa-sun-o:before{content:"\f185"}.fa-moon-o:before{content:"\f186"}.fa-archive:before{content:"\f187"}.fa-bug:before{content:"\f188"}.fa-vk:before{content:"\f189"}.fa-weibo:before{content:"\f18a"}.fa-renren:before{content:"\f18b"}.fa-pagelines:before{content:"\f18c"}.fa-stack-exchange:before{content:"\f18d"}.fa-arrow-circle-o-right:before{content:"\f18e"}.fa-arrow-circle-o-left:before{content:"\f190"}.fa-caret-square-o-left:before,.fa-toggle-left:before{content:"\f191"}.fa-dot-circle-o:before{content:"\f192"}.fa-wheelchair:before{content:"\f193"}.fa-vimeo-square:before{content:"\f194"}.fa-try:before,.fa-turkish-lira:before{content:"\f195"}.fa-plus-square-o:before{content:"\f196"}.fa-space-shuttle:before{content:"\f197"}.fa-slack:before{content:"\f198"}.fa-envelope-square:before{content:"\f199"}.fa-wordpress:before{content:"\f19a"}.fa-openid:before{content:"\f19b"}.fa-bank:before,.fa-institution:before,.fa-university:before{content:"\f19c"}.fa-graduation-cap:before,.fa-mortar-board:before{content:"\f19d"}.fa-yahoo:before{content:"\f19e"}.fa-google:before{content:"\f1a0"}.fa-reddit:before{content:"\f1a1"}.fa-reddit-square:before{content:"\f1a2"}.fa-stumbleupon-circle:before{content:"\f1a3"}.fa-stumbleupon:before{content:"\f1a4"}.fa-delicious:before{content:"\f1a5"}.fa-digg:before{content:"\f1a6"}.fa-pied-piper-square:before,.fa-pied-piper:before{content:"\f1a7"}.fa-pied-piper-alt:before{content:"\f1a8"}.fa-drupal:before{content:"\f1a9"}.fa-joomla:before{content:"\f1aa"}.fa-language:before{content:"\f1ab"}.fa-fax:before{content:"\f1ac"}.fa-building:before{content:"\f1ad"}.fa-child:before{content:"\f1ae"}.fa-paw:before{content:"\f1b0"}.fa-spoon:before{content:"\f1b1"}.fa-cube:before{content:"\f1b2"}.fa-cubes:before{content:"\f1b3"}.fa-behance:before{content:"\f1b4"}.fa-behance-square:before{content:"\f1b5"}.fa-steam:before{content:"\f1b6"}.fa-steam-square:before{content:"\f1b7"}.fa-recycle:before{content:"\f1b8"}.fa-automobile:before,.fa-car:before{content:"\f1b9"}.fa-cab:before,.fa-taxi:before{content:"\f1ba"}.fa-tree:before{content:"\f1bb"}.fa-spotify:before{content:"\f1bc"}.fa-deviantart:before{content:"\f1bd"}.fa-soundcloud:before{content:"\f1be"}.fa-database:before{content:"\f1c0"}.fa-file-pdf-o:before{content:"\f1c1"}.fa-file-word-o:before{content:"\f1c2"}.fa-file-excel-o:before{content:"\f1c3"}.fa-file-powerpoint-o:before{content:"\f1c4"}.fa-file-image-o:before,.fa-file-photo-o:before,.fa-file-picture-o:before{content:"\f1c5"}.fa-file-archive-o:before,.fa-file-zip-o:before{content:"\f1c6"}.fa-file-audio-o:before,.fa-file-sound-o:before{content:"\f1c7"}.fa-file-movie-o:before,.fa-file-video-o:before{content:"\f1c8"}.fa-file-code-o:before{content:"\f1c9"}.fa-vine:before{content:"\f1ca"}.fa-codepen:before{content:"\f1cb"}.fa-jsfiddle:before{content:"\f1cc"}.fa-life-bouy:before,.fa-life-ring:before,.fa-life-saver:before,.fa-support:before{content:"\f1cd"}.fa-circle-o-notch:before{content:"\f1ce"}.fa-ra:before,.fa-rebel:before{content:"\f1d0"}.fa-empire:before,.fa-ge:before{content:"\f1d1"}.fa-git-square:before{content:"\f1d2"}.fa-git:before{content:"\f1d3"}.fa-hacker-news:before{content:"\f1d4"}.fa-tencent-weibo:before{content:"\f1d5"}.fa-qq:before{content:"\f1d6"}.fa-wechat:before,.fa-weixin:before{content:"\f1d7"}.fa-paper-plane:before,.fa-send:before{content:"\f1d8"}.fa-paper-plane-o:before,.fa-send-o:before{content:"\f1d9"}.fa-history:before{content:"\f1da"}.fa-circle-thin:before{content:"\f1db"}.fa-header:before{content:"\f1dc"}.fa-paragraph:before{content:"\f1dd"}.fa-sliders:before{content:"\f1de"}.fa-share-alt:before{content:"\f1e0"}.fa-share-alt-square:before{content:"\f1e1"}.fa-bomb:before{content:"\f1e2"}.book-langs-index{width:100%;height:100%;padding:40px 0;margin:0;overflow:auto}@media (max-width:600px){.book-langs-index{padding:0}}.book-langs-index .inner{max-width:600px;width:100%;margin:0 auto;padding:30px;background:#fff;border-radius:3px}.book-langs-index .inner h3{margin:0}.book-langs-index .inner .languages{list-style:none;padding:20px 30px;margin-top:20px;border-top:1px solid #eee}.book-langs-index .inner .languages:after,.book-langs-index .inner .languages:before{content:" ";display:table;line-height:0}.book-langs-index .inner .languages li{width:50%;float:left;padding:10px 5px;font-size:16px}@media (max-width:600px){.book-langs-index .inner .languages li{width:100%;max-width:100%}}.book .book-header{overflow:visible;height:50px;padding:0 8px;z-index:2;font-size:.85em;color:#7e888b;background:0 0}.book .book-header .btn{display:block;height:50px;padding:0 15px;border-bottom:none;color:#ccc;text-transform:uppercase;line-height:50px;-webkit-box-shadow:none!important;box-shadow:none!important;position:relative;font-size:14px}.book .book-header .btn:hover{position:relative;text-decoration:none;color:#444;background:0 0}.book .book-header h1{margin:0;font-size:20px;font-weight:200;text-align:center;line-height:50px;opacity:0;padding-left:200px;padding-right:200px;-webkit-transition:opacity .2s ease;-moz-transition:opacity .2s ease;-o-transition:opacity .2s ease;transition:opacity .2s ease;overflow:hidden;text-overflow:ellipsis;white-space:nowrap}.book .book-header h1 a,.book .book-header h1 a:hover{color:inherit;text-decoration:none}@media (max-width:1000px){.book .book-header h1{display:none}}.book .book-header h1 i{display:none}.book .book-header:hover h1{opacity:1}.book.is-loading .book-header h1 i{display:inline-block}.book.is-loading .book-header h1 a{display:none}.dropdown{position:relative}.dropdown-menu{position:absolute;top:100%;left:0;z-index:100;display:none;float:left;min-width:160px;padding:0;margin:2px 0 0;list-style:none;font-size:14px;background-color:#fafafa;border:1px solid rgba(0,0,0,.07);border-radius:1px;-webkit-box-shadow:0 6px 12px rgba(0,0,0,.175);box-shadow:0 6px 12px rgba(0,0,0,.175);background-clip:padding-box}.dropdown-menu.open{display:block}.dropdown-menu.dropdown-left{left:auto;right:4%}.dropdown-menu.dropdown-left .dropdown-caret{right:14px;left:auto}.dropdown-menu .dropdown-caret{position:absolute;top:-8px;left:14px;width:18px;height:10px;float:left;overflow:hidden}.dropdown-menu .dropdown-caret .caret-inner,.dropdown-menu .dropdown-caret .caret-outer{display:inline-block;top:0;border-left:9px solid transparent;border-right:9px solid transparent;position:absolute}.dropdown-menu .dropdown-caret .caret-outer{border-bottom:9px solid rgba(0,0,0,.1);height:auto;left:0;width:auto;margin-left:-1px}.dropdown-menu .dropdown-caret .caret-inner{margin-top:-1px;top:1px;border-bottom:9px solid #fafafa}.dropdown-menu .buttons{border-bottom:1px solid rgba(0,0,0,.07)}.dropdown-menu .buttons:after,.dropdown-menu .buttons:before{content:" ";display:table;line-height:0}.dropdown-menu .buttons:last-child{border-bottom:none}.dropdown-menu .buttons .button{border:0;background-color:transparent;color:#a6a6a6;width:100%;text-align:center;float:left;line-height:1.42857143;padding:8px 4px}.alert,.dropdown-menu .buttons .button:hover{color:#444}.dropdown-menu .buttons .button:focus,.dropdown-menu .buttons .button:hover{outline:0}.dropdown-menu .buttons .button.size-2{width:50%}.dropdown-menu .buttons .button.size-3{width:33%}.alert{padding:15px;margin-bottom:20px;background:#eee;border-bottom:5px solid #ddd}.alert-success{background:#dff0d8;border-color:#d6e9c6;color:#3c763d}.alert-info{background:#d9edf7;border-color:#bce8f1;color:#31708f}.alert-danger{background:#f2dede;border-color:#ebccd1;color:#a94442}.alert-warning{background:#fcf8e3;border-color:#faebcc;color:#8a6d3b}.book .book-summary{position:absolute;top:0;left:-300px;bottom:0;z-index:1;width:300px;color:#364149;background:#fafafa;border-right:1px solid rgba(0,0,0,.07);-webkit-transition:left 250ms ease;-moz-transition:left 250ms ease;-o-transition:left 250ms ease;transition:left 250ms ease}.book .book-summary ul.summary{position:absolute;top:0;left:0;right:0;bottom:0;overflow-y:auto;list-style:none;margin:0;padding:0;-webkit-transition:top .5s ease;-moz-transition:top .5s ease;-o-transition:top .5s ease;transition:top .5s ease}.book .book-summary ul.summary li{list-style:none}.book .book-summary ul.summary li.divider{height:1px;margin:7px 0;overflow:hidden;background:rgba(0,0,0,.07)}.book .book-summary ul.summary li i.fa-check{display:none;position:absolute;right:9px;top:16px;font-size:9px;color:#3c3}.book .book-summary ul.summary li.done>a{color:#364149;font-weight:400}.book .book-summary ul.summary li.done>a i{display:inline}.book .book-summary ul.summary li a,.book .book-summary ul.summary li span{display:block;padding:10px 15px;border-bottom:none;color:#364149;background:0 0;text-overflow:ellipsis;overflow:hidden;white-space:nowrap;position:relative}.book .book-summary ul.summary li span{cursor:not-allowed;opacity:.3;filter:alpha(opacity=30)}.book .book-summary ul.summary li a:hover,.book .book-summary ul.summary li.active>a{color:#008cff;background:0 0;text-decoration:none}.book .book-summary ul.summary li ul{padding-left:20px}@media (max-width:600px){.book .book-summary{width:calc(100% - 60px);bottom:0;left:-100%}}.book.with-summary .book-summary{left:0}.book.without-animation .book-summary{-webkit-transition:none!important;-moz-transition:none!important;-o-transition:none!important;transition:none!important}.book{position:relative;width:100%;height:100%}.book .book-body,.book .book-body .body-inner{position:absolute;top:0;left:0;overflow-y:auto;bottom:0;right:0}.book .book-body{color:#000;background:#fff;-webkit-transition:left 250ms ease;-moz-transition:left 250ms ease;-o-transition:left 250ms ease;transition:left 250ms ease}.book .book-body .page-wrapper{position:relative;outline:0}.book .book-body .page-wrapper .page-inner{max-width:800px;margin:0 auto;padding:20px 0 40px}.book .book-body .page-wrapper .page-inner section{margin:0;padding:5px 15px;background:#fff;border-radius:2px;line-height:1.7;font-size:1.6rem}.book .book-body .page-wrapper .page-inner .btn-group .btn{border-radius:0;background:#eee;border:0}@media (max-width:1240px){.book .book-body{-webkit-transition:-webkit-transform 250ms ease;-moz-transition:-moz-transform 250ms ease;-o-transition:-o-transform 250ms ease;transition:transform 250ms ease;padding-bottom:20px}.book .book-body .body-inner{position:static;min-height:calc(100% - 50px)}}@media (min-width:600px){.book.with-summary .book-body{left:300px}}@media (max-width:600px){.book.with-summary{overflow:hidden}.book.with-summary .book-body{-webkit-transform:translate(calc(100% - 60px),0);-moz-transform:translate(calc(100% - 60px),0);-ms-transform:translate(calc(100% - 60px),0);-o-transform:translate(calc(100% - 60px),0);transform:translate(calc(100% - 60px),0)}}.book.without-animation .book-body{-webkit-transition:none!important;-moz-transition:none!important;-o-transition:none!important;transition:none!important}.buttons:after,.buttons:before{content:" ";display:table;line-height:0}.button{border:0;background:#eee;color:#666;width:100%;text-align:center;float:left;line-height:1.42857143;padding:8px 4px}.button:hover{color:#444}.button:focus,.button:hover{outline:0}.button.size-2{width:50%}.button.size-3{width:33%}.book .book-body .page-wrapper .page-inner section{display:none}.book .book-body .page-wrapper .page-inner section.normal{display:block;word-wrap:break-word;overflow:hidden;color:#333;line-height:1.7;text-size-adjust:100%;-ms-text-size-adjust:100%;-webkit-text-size-adjust:100%;-moz-text-size-adjust:100%}.book .book-body .page-wrapper .page-inner section.normal *{box-sizing:border-box;-webkit-box-sizing:border-box;}.book .book-body .page-wrapper .page-inner section.normal>:first-child{margin-top:0!important}.book .book-body .page-wrapper .page-inner section.normal>:last-child{margin-bottom:0!important}.book .book-body .page-wrapper .page-inner section.normal blockquote,.book .book-body .page-wrapper .page-inner section.normal code,.book .book-body .page-wrapper .page-inner section.normal figure,.book .book-body .page-wrapper .page-inner section.normal img,.book .book-body .page-wrapper .page-inner section.normal pre,.book .book-body .page-wrapper .page-inner section.normal table,.book .book-body .page-wrapper .page-inner section.normal tr{page-break-inside:avoid}.book .book-body .page-wrapper .page-inner section.normal h2,.book .book-body .page-wrapper .page-inner section.normal h3,.book .book-body .page-wrapper .page-inner section.normal h4,.book .book-body .page-wrapper .page-inner section.normal h5,.book .book-body .page-wrapper .page-inner section.normal p{orphans:3;widows:3}.book .book-body .page-wrapper .page-inner section.normal h1,.book .book-body .page-wrapper .page-inner section.normal h2,.book .book-body .page-wrapper .page-inner section.normal h3,.book .book-body .page-wrapper .page-inner section.normal h4,.book .book-body .page-wrapper .page-inner section.normal h5{page-break-after:avoid}.book .book-body .page-wrapper .page-inner section.normal b,.book .book-body .page-wrapper .page-inner section.normal strong{font-weight:700}.book .book-body .page-wrapper .page-inner section.normal em{font-style:italic}.book .book-body .page-wrapper .page-inner section.normal blockquote,.book .book-body .page-wrapper .page-inner section.normal dl,.book .book-body .page-wrapper .page-inner section.normal ol,.book .book-body .page-wrapper .page-inner section.normal p,.book .book-body .page-wrapper .page-inner section.normal table,.book .book-body .page-wrapper .page-inner section.normal ul{margin-top:0;margin-bottom:.85em}.book .book-body .page-wrapper .page-inner section.normal a{color:#4183c4;text-decoration:none;background:0 0}.book .book-body .page-wrapper .page-inner section.normal a:active,.book .book-body .page-wrapper .page-inner section.normal a:focus,.book .book-body .page-wrapper .page-inner section.normal a:hover{outline:0;text-decoration:underline}.book .book-body .page-wrapper .page-inner section.normal img{border:0;max-width:100%}.book .book-body .page-wrapper .page-inner section.normal hr{height:4px;padding:0;margin:1.7em 0;overflow:hidden;background-color:#e7e7e7;border:none}.book .book-body .page-wrapper .page-inner section.normal hr:after,.book .book-body .page-wrapper .page-inner section.normal hr:before{display:table;content:" "}.book .book-body .page-wrapper .page-inner section.normal h1,.book .book-body .page-wrapper .page-inner section.normal h2,.book .book-body .page-wrapper .page-inner section.normal h3,.book .book-body .page-wrapper .page-inner section.normal h4,.book .book-body .page-wrapper .page-inner section.normal h5,.book .book-body .page-wrapper .page-inner section.normal h6{margin-top:1.275em;margin-bottom:.85em;}.book .book-body .page-wrapper .page-inner section.normal h1{font-size:2em}.book .book-body .page-wrapper .page-inner section.normal h2{font-size:1.75em}.book .book-body .page-wrapper .page-inner section.normal h3{font-size:1.5em}.book .book-body .page-wrapper .page-inner section.normal h4{font-size:1.25em}.book .book-body .page-wrapper .page-inner section.normal h5{font-size:1em}.book .book-body .page-wrapper .page-inner section.normal h6{font-size:1em;color:#777}.book .book-body .page-wrapper .page-inner section.normal code,.book .book-body .page-wrapper .page-inner section.normal pre{font-family:Consolas,"Liberation Mono",Menlo,Courier,monospace;direction:ltr;border:none;color:inherit}.book .book-body .page-wrapper .page-inner section.normal pre{overflow:auto;word-wrap:normal;margin:0 0 1.275em;padding:.85em 1em;background:#f7f7f7}.book .book-body .page-wrapper .page-inner section.normal pre>code{display:inline;max-width:initial;padding:0;margin:0;overflow:initial;line-height:inherit;font-size:.85em;white-space:pre;background:0 0}.book .book-body .page-wrapper .page-inner section.normal pre>code:after,.book .book-body .page-wrapper .page-inner section.normal pre>code:before{content:normal}.book .book-body .page-wrapper .page-inner section.normal code{padding:.2em;margin:0;font-size:.85em;background-color:#f7f7f7}.book .book-body .page-wrapper .page-inner section.normal code:after,.book .book-body .page-wrapper .page-inner section.normal code:before{letter-spacing:-.2em;content:"\00a0"}.book .book-body .page-wrapper .page-inner section.normal ol,.book .book-body .page-wrapper .page-inner section.normal ul{padding:0 0 0 2em;margin:0 0 .85em}.book .book-body .page-wrapper .page-inner section.normal ol ol,.book .book-body .page-wrapper .page-inner section.normal ol ul,.book .book-body .page-wrapper .page-inner section.normal ul ol,.book .book-body .page-wrapper .page-inner section.normal ul ul{margin-top:0;margin-bottom:0}.book .book-body .page-wrapper .page-inner section.normal ol ol{list-style-type:lower-roman}.book .book-body .page-wrapper .page-inner section.normal blockquote{margin:0 0 .85em;padding:0 15px;opacity:0.75;border-left:4px solid #dcdcdc}.book .book-body .page-wrapper .page-inner section.normal blockquote:first-child{margin-top:0}.book .book-body .page-wrapper .page-inner section.normal blockquote:last-child{margin-bottom:0}.book .book-body .page-wrapper .page-inner section.normal dl{padding:0}.book .book-body .page-wrapper .page-inner section.normal dl dt{padding:0;margin-top:.85em;font-style:italic;font-weight:700}.book .book-body .page-wrapper .page-inner section.normal dl dd{padding:0 .85em;margin-bottom:.85em}.book .book-body .page-wrapper .page-inner section.normal dd{margin-left:0}.book .book-body .page-wrapper .page-inner section.normal .glossary-term{cursor:help;text-decoration:underline}.book .book-body .navigation{position:absolute;top:50px;bottom:0;margin:0;max-width:150px;min-width:90px;display:flex;justify-content:center;align-content:center;flex-direction:column;font-size:40px;color:#ccc;text-align:center;-webkit-transition:all 350ms ease;-moz-transition:all 350ms ease;-o-transition:all 350ms ease;transition:all 350ms ease}.book .book-body .navigation:hover{text-decoration:none;color:#444}.book .book-body .navigation.navigation-next{right:0}.book .book-body .navigation.navigation-prev{left:0}@media (max-width:1240px){.book .book-body .navigation{position:static;top:auto;max-width:50%;width:50%;display:inline-block;float:left}.book .book-body .navigation.navigation-unique{max-width:100%;width:100%}}.book .book-body .page-wrapper .page-inner section.glossary{margin-bottom:40px}.book .book-body .page-wrapper .page-inner section.glossary h2 a,.book .book-body .page-wrapper .page-inner section.glossary h2 a:hover{color:inherit;text-decoration:none}.book .book-body .page-wrapper .page-inner section.glossary .glossary-index{list-style:none;margin:0;padding:0}.book .book-body .page-wrapper .page-inner section.glossary .glossary-index li{display:inline;margin:0 8px;white-space:nowrap}*{-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box;-webkit-overflow-scrolling:touch;-webkit-tap-highlight-color:transparent;-webkit-text-size-adjust:none;-webkit-touch-callout:none}a{text-decoration:none}body,html{height:100%}html{font-size:62.5%}body{text-rendering:optimizeLegibility;font-smoothing:antialiased;font-family:"Helvetica Neue",Helvetica,Arial,sans-serif;font-size:14px;letter-spacing:.2px;text-size-adjust:100%} -.book .book-summary ul.summary li a span {display:inline;padding:initial;overflow:visible;cursor:auto;opacity:1;} diff --git a/_book/libs/gitbook/js/app.min.js b/_book/libs/gitbook/js/app.min.js deleted file mode 100644 index 643f1f9..0000000 --- a/_book/libs/gitbook/js/app.min.js +++ /dev/null @@ -1 +0,0 @@ -(function e(t,n,r){function s(o,u){if(!n[o]){if(!t[o]){var a=typeof require=="function"&&require;if(!u&&a)return a(o,!0);if(i)return i(o,!0);var f=new Error("Cannot find module '"+o+"'");throw f.code="MODULE_NOT_FOUND",f}var l=n[o]={exports:{}};t[o][0].call(l.exports,function(e){var n=t[o][1][e];return s(n?n:e)},l,l.exports,e,t,n,r)}return n[o].exports}var i=typeof require=="function"&&require;for(var o=0;o"'`]/g,reHasEscapedHtml=RegExp(reEscapedHtml.source),reHasUnescapedHtml=RegExp(reUnescapedHtml.source);var reEscape=/<%-([\s\S]+?)%>/g,reEvaluate=/<%([\s\S]+?)%>/g,reInterpolate=/<%=([\s\S]+?)%>/g;var reIsDeepProp=/\.|\[(?:[^[\]]*|(["'])(?:(?!\1)[^\n\\]|\\.)*?\1)\]/,reIsPlainProp=/^\w*$/,rePropName=/[^.[\]]+|\[(?:(-?\d+(?:\.\d+)?)|(["'])((?:(?!\2)[^\n\\]|\\.)*?)\2)\]/g;var reRegExpChars=/^[:!,]|[\\^$.*+?()[\]{}|\/]|(^[0-9a-fA-Fnrtuvx])|([\n\r\u2028\u2029])/g,reHasRegExpChars=RegExp(reRegExpChars.source);var reComboMark=/[\u0300-\u036f\ufe20-\ufe23]/g;var reEscapeChar=/\\(\\)?/g;var reEsTemplate=/\$\{([^\\}]*(?:\\.[^\\}]*)*)\}/g;var reFlags=/\w*$/;var reHasHexPrefix=/^0[xX]/;var reIsHostCtor=/^\[object .+?Constructor\]$/;var reIsUint=/^\d+$/;var reLatin1=/[\xc0-\xd6\xd8-\xde\xdf-\xf6\xf8-\xff]/g;var reNoMatch=/($^)/;var reUnescapedString=/['\n\r\u2028\u2029\\]/g;var reWords=function(){var upper="[A-Z\\xc0-\\xd6\\xd8-\\xde]",lower="[a-z\\xdf-\\xf6\\xf8-\\xff]+";return RegExp(upper+"+(?="+upper+lower+")|"+upper+"?"+lower+"|"+upper+"+|[0-9]+","g")}();var contextProps=["Array","ArrayBuffer","Date","Error","Float32Array","Float64Array","Function","Int8Array","Int16Array","Int32Array","Math","Number","Object","RegExp","Set","String","_","clearTimeout","isFinite","parseFloat","parseInt","setTimeout","TypeError","Uint8Array","Uint8ClampedArray","Uint16Array","Uint32Array","WeakMap"];var templateCounter=-1;var typedArrayTags={};typedArrayTags[float32Tag]=typedArrayTags[float64Tag]=typedArrayTags[int8Tag]=typedArrayTags[int16Tag]=typedArrayTags[int32Tag]=typedArrayTags[uint8Tag]=typedArrayTags[uint8ClampedTag]=typedArrayTags[uint16Tag]=typedArrayTags[uint32Tag]=true;typedArrayTags[argsTag]=typedArrayTags[arrayTag]=typedArrayTags[arrayBufferTag]=typedArrayTags[boolTag]=typedArrayTags[dateTag]=typedArrayTags[errorTag]=typedArrayTags[funcTag]=typedArrayTags[mapTag]=typedArrayTags[numberTag]=typedArrayTags[objectTag]=typedArrayTags[regexpTag]=typedArrayTags[setTag]=typedArrayTags[stringTag]=typedArrayTags[weakMapTag]=false;var cloneableTags={};cloneableTags[argsTag]=cloneableTags[arrayTag]=cloneableTags[arrayBufferTag]=cloneableTags[boolTag]=cloneableTags[dateTag]=cloneableTags[float32Tag]=cloneableTags[float64Tag]=cloneableTags[int8Tag]=cloneableTags[int16Tag]=cloneableTags[int32Tag]=cloneableTags[numberTag]=cloneableTags[objectTag]=cloneableTags[regexpTag]=cloneableTags[stringTag]=cloneableTags[uint8Tag]=cloneableTags[uint8ClampedTag]=cloneableTags[uint16Tag]=cloneableTags[uint32Tag]=true;cloneableTags[errorTag]=cloneableTags[funcTag]=cloneableTags[mapTag]=cloneableTags[setTag]=cloneableTags[weakMapTag]=false;var deburredLetters={"À":"A","Á":"A","Â":"A","Ã":"A","Ä":"A","Å":"A","à":"a","á":"a","â":"a","ã":"a","ä":"a","å":"a","Ç":"C","ç":"c","Ð":"D","ð":"d","È":"E","É":"E","Ê":"E","Ë":"E","è":"e","é":"e","ê":"e","ë":"e","Ì":"I","Í":"I","Î":"I","Ï":"I","ì":"i","í":"i","î":"i","ï":"i","Ñ":"N","ñ":"n","Ò":"O","Ó":"O","Ô":"O","Õ":"O","Ö":"O","Ø":"O","ò":"o","ó":"o","ô":"o","õ":"o","ö":"o","ø":"o","Ù":"U","Ú":"U","Û":"U","Ü":"U","ù":"u","ú":"u","û":"u","ü":"u","Ý":"Y","ý":"y","ÿ":"y","Æ":"Ae","æ":"ae","Þ":"Th","þ":"th","ß":"ss"};var htmlEscapes={"&":"&","<":"<",">":">",'"':""","'":"'","`":"`"};var htmlUnescapes={"&":"&","<":"<",">":">",""":'"',"'":"'","`":"`"};var objectTypes={function:true,object:true};var regexpEscapes={0:"x30",1:"x31",2:"x32",3:"x33",4:"x34",5:"x35",6:"x36",7:"x37",8:"x38",9:"x39",A:"x41",B:"x42",C:"x43",D:"x44",E:"x45",F:"x46",a:"x61",b:"x62",c:"x63",d:"x64",e:"x65",f:"x66",n:"x6e",r:"x72",t:"x74",u:"x75",v:"x76",x:"x78"};var stringEscapes={"\\":"\\","'":"'","\n":"n","\r":"r","\u2028":"u2028","\u2029":"u2029"};var freeExports=objectTypes[typeof exports]&&exports&&!exports.nodeType&&exports;var freeModule=objectTypes[typeof module]&&module&&!module.nodeType&&module;var freeGlobal=freeExports&&freeModule&&typeof global=="object"&&global&&global.Object&&global;var freeSelf=objectTypes[typeof self]&&self&&self.Object&&self;var freeWindow=objectTypes[typeof window]&&window&&window.Object&&window;var moduleExports=freeModule&&freeModule.exports===freeExports&&freeExports;var root=freeGlobal||freeWindow!==(this&&this.window)&&freeWindow||freeSelf||this;function baseCompareAscending(value,other){if(value!==other){var valIsNull=value===null,valIsUndef=value===undefined,valIsReflexive=value===value;var othIsNull=other===null,othIsUndef=other===undefined,othIsReflexive=other===other;if(value>other&&!othIsNull||!valIsReflexive||valIsNull&&!othIsUndef&&othIsReflexive||valIsUndef&&othIsReflexive){return 1}if(value-1){}return index}function charsRightIndex(string,chars){var index=string.length;while(index--&&chars.indexOf(string.charAt(index))>-1){}return index}function compareAscending(object,other){return baseCompareAscending(object.criteria,other.criteria)||object.index-other.index}function compareMultiple(object,other,orders){var index=-1,objCriteria=object.criteria,othCriteria=other.criteria,length=objCriteria.length,ordersLength=orders.length;while(++index=ordersLength){return result}var order=orders[index];return result*(order==="asc"||order===true?1:-1)}}return object.index-other.index}function deburrLetter(letter){return deburredLetters[letter]}function escapeHtmlChar(chr){return htmlEscapes[chr]}function escapeRegExpChar(chr,leadingChar,whitespaceChar){if(leadingChar){chr=regexpEscapes[chr]}else if(whitespaceChar){chr=stringEscapes[chr]}return"\\"+chr}function escapeStringChar(chr){return"\\"+stringEscapes[chr]}function indexOfNaN(array,fromIndex,fromRight){var length=array.length,index=fromIndex+(fromRight?0:-1);while(fromRight?index--:++index=9&&charCode<=13)||charCode==32||charCode==160||charCode==5760||charCode==6158||charCode>=8192&&(charCode<=8202||charCode==8232||charCode==8233||charCode==8239||charCode==8287||charCode==12288||charCode==65279)}function replaceHolders(array,placeholder){var index=-1,length=array.length,resIndex=-1,result=[];while(++index>>1;var MAX_SAFE_INTEGER=9007199254740991;var metaMap=WeakMap&&new WeakMap;var realNames={};function lodash(value){if(isObjectLike(value)&&!isArray(value)&&!(value instanceof LazyWrapper)){if(value instanceof LodashWrapper){return value}if(hasOwnProperty.call(value,"__chain__")&&hasOwnProperty.call(value,"__wrapped__")){return wrapperClone(value)}}return new LodashWrapper(value)}function baseLodash(){}function LodashWrapper(value,chainAll,actions){this.__wrapped__=value;this.__actions__=actions||[];this.__chain__=!!chainAll}var support=lodash.support={};lodash.templateSettings={escape:reEscape,evaluate:reEvaluate,interpolate:reInterpolate,variable:"",imports:{_:lodash}};function LazyWrapper(value){this.__wrapped__=value;this.__actions__=[];this.__dir__=1;this.__filtered__=false;this.__iteratees__=[];this.__takeCount__=POSITIVE_INFINITY;this.__views__=[]}function lazyClone(){var result=new LazyWrapper(this.__wrapped__);result.__actions__=arrayCopy(this.__actions__);result.__dir__=this.__dir__;result.__filtered__=this.__filtered__;result.__iteratees__=arrayCopy(this.__iteratees__);result.__takeCount__=this.__takeCount__;result.__views__=arrayCopy(this.__views__);return result}function lazyReverse(){if(this.__filtered__){var result=new LazyWrapper(this);result.__dir__=-1;result.__filtered__=true}else{result=this.clone();result.__dir__*=-1}return result}function lazyValue(){var array=this.__wrapped__.value(),dir=this.__dir__,isArr=isArray(array),isRight=dir<0,arrLength=isArr?array.length:0,view=getView(0,arrLength,this.__views__),start=view.start,end=view.end,length=end-start,index=isRight?end:start-1,iteratees=this.__iteratees__,iterLength=iteratees.length,resIndex=0,takeCount=nativeMin(length,this.__takeCount__);if(!isArr||arrLength=LARGE_ARRAY_SIZE?createCache(values):null,valuesLength=values.length;if(cache){indexOf=cacheIndexOf;isCommon=false;values=cache}outer:while(++indexlength?0:length+start}end=end===undefined||end>length?length:+end||0;if(end<0){end+=length}length=start>end?0:end>>>0;start>>>=0;while(startlength?0:length+start}end=end===undefined||end>length?length:+end||0;if(end<0){end+=length}length=start>end?0:end-start>>>0;start>>>=0;var result=Array(length);while(++index=LARGE_ARRAY_SIZE,seen=isLarge?createCache():null,result=[];if(seen){indexOf=cacheIndexOf;isCommon=false}else{isLarge=false;seen=iteratee?[]:result}outer:while(++index>>1,computed=array[mid];if((retHighest?computed<=value:computed2?sources[length-2]:undefined,guard=length>2?sources[2]:undefined,thisArg=length>1?sources[length-1]:undefined;if(typeof customizer=="function"){customizer=bindCallback(customizer,thisArg,5);length-=2}else{customizer=typeof thisArg=="function"?thisArg:undefined;length-=customizer?1:0}if(guard&&isIterateeCall(sources[0],sources[1],guard)){customizer=length<3?undefined:customizer;length=1}while(++index-1?collection[index]:undefined}return baseFind(collection,predicate,eachFunc)}}function createFindIndex(fromRight){return function(array,predicate,thisArg){if(!(array&&array.length)){return-1}predicate=getCallback(predicate,thisArg,3);return baseFindIndex(array,predicate,fromRight)}}function createFindKey(objectFunc){return function(object,predicate,thisArg){predicate=getCallback(predicate,thisArg,3);return baseFind(object,predicate,objectFunc,true)}}function createFlow(fromRight){return function(){var wrapper,length=arguments.length,index=fromRight?length:-1,leftIndex=0,funcs=Array(length);while(fromRight?index--:++index=LARGE_ARRAY_SIZE){return wrapper.plant(value).value()}var index=0,result=length?funcs[index].apply(this,args):value;while(++index=length||!nativeIsFinite(length)){return""}var padLength=length-strLength;chars=chars==null?" ":chars+"";return repeat(chars,nativeCeil(padLength/chars.length)).slice(0,padLength)}function createPartialWrapper(func,bitmask,thisArg,partials){var isBind=bitmask&BIND_FLAG,Ctor=createCtorWrapper(func);function wrapper(){var argsIndex=-1,argsLength=arguments.length,leftIndex=-1,leftLength=partials.length,args=Array(leftLength+argsLength);while(++leftIndexarrLength)){return false}while(++index-1&&value%1==0&&value-1&&value%1==0&&value<=MAX_SAFE_INTEGER}function isStrictComparable(value){return value===value&&!isObject(value)}function mergeData(data,source){var bitmask=data[1],srcBitmask=source[1],newBitmask=bitmask|srcBitmask,isCommon=newBitmask0){if(++count>=HOT_COUNT){return key}}else{count=0}return baseSetData(key,value)}}();function shimKeys(object){var props=keysIn(object),propsLength=props.length,length=propsLength&&object.length;var allowIndexes=!!length&&isLength(length)&&(isArray(object)||isArguments(object));var index=-1,result=[];while(++index=120?createCache(othIndex&&value):null}var array=arrays[0],index=-1,length=array?array.length:0,seen=caches[0];outer:while(++index-1){splice.call(array,fromIndex,1)}}return array}var pullAt=restParam(function(array,indexes){indexes=baseFlatten(indexes);var result=baseAt(array,indexes);basePullAt(array,indexes.sort(baseCompareAscending));return result});function remove(array,predicate,thisArg){var result=[];if(!(array&&array.length)){return result}var index=-1,indexes=[],length=array.length;predicate=getCallback(predicate,thisArg,3);while(++index2?arrays[length-2]:undefined,thisArg=length>1?arrays[length-1]:undefined;if(length>2&&typeof iteratee=="function"){length-=2}else{iteratee=length>1&&typeof thisArg=="function"?(--length,thisArg):undefined;thisArg=undefined}arrays.length=length;return unzipWith(arrays,iteratee,thisArg)});function chain(value){var result=lodash(value);result.__chain__=true;return result}function tap(value,interceptor,thisArg){interceptor.call(thisArg,value);return value}function thru(value,interceptor,thisArg){return interceptor.call(thisArg,value)}function wrapperChain(){return chain(this)}function wrapperCommit(){return new LodashWrapper(this.value(),this.__chain__)}var wrapperConcat=restParam(function(values){values=baseFlatten(values);return this.thru(function(array){return arrayConcat(isArray(array)?array:[toObject(array)],values)})});function wrapperPlant(value){var result,parent=this;while(parent instanceof baseLodash){var clone=wrapperClone(parent);if(result){previous.__wrapped__=clone}else{result=clone}var previous=clone;parent=parent.__wrapped__}previous.__wrapped__=value;return result}function wrapperReverse(){var value=this.__wrapped__;var interceptor=function(value){return wrapped&&wrapped.__dir__<0?value:value.reverse()};if(value instanceof LazyWrapper){var wrapped=value;if(this.__actions__.length){wrapped=new LazyWrapper(this)}wrapped=wrapped.reverse();wrapped.__actions__.push({func:thru,args:[interceptor],thisArg:undefined});return new LodashWrapper(wrapped,this.__chain__)}return this.thru(interceptor)}function wrapperToString(){return this.value()+""}function wrapperValue(){return baseWrapperValue(this.__wrapped__,this.__actions__)}var at=restParam(function(collection,props){return baseAt(collection,baseFlatten(props))});var countBy=createAggregator(function(result,value,key){hasOwnProperty.call(result,key)?++result[key]:result[key]=1});function every(collection,predicate,thisArg){var func=isArray(collection)?arrayEvery:baseEvery;if(thisArg&&isIterateeCall(collection,predicate,thisArg)){predicate=undefined}if(typeof predicate!="function"||thisArg!==undefined){predicate=getCallback(predicate,thisArg,3)}return func(collection,predicate)}function filter(collection,predicate,thisArg){var func=isArray(collection)?arrayFilter:baseFilter;predicate=getCallback(predicate,thisArg,3);return func(collection,predicate)}var find=createFind(baseEach);var findLast=createFind(baseEachRight,true);function findWhere(collection,source){return find(collection,baseMatches(source))}var forEach=createForEach(arrayEach,baseEach);var forEachRight=createForEach(arrayEachRight,baseEachRight);var groupBy=createAggregator(function(result,value,key){if(hasOwnProperty.call(result,key)){result[key].push(value)}else{result[key]=[value]}});function includes(collection,target,fromIndex,guard){var length=collection?getLength(collection):0;if(!isLength(length)){collection=values(collection);length=collection.length}if(typeof fromIndex!="number"||guard&&isIterateeCall(target,fromIndex,guard)){fromIndex=0}else{fromIndex=fromIndex<0?nativeMax(length+fromIndex,0):fromIndex||0}return typeof collection=="string"||!isArray(collection)&&isString(collection)?fromIndex<=length&&collection.indexOf(target,fromIndex)>-1:!!length&&getIndexOf(collection,target,fromIndex)>-1}var indexBy=createAggregator(function(result,value,key){result[key]=value});var invoke=restParam(function(collection,path,args){var index=-1,isFunc=typeof path=="function",isProp=isKey(path),result=isArrayLike(collection)?Array(collection.length):[];baseEach(collection,function(value){var func=isFunc?path:isProp&&value!=null?value[path]:undefined;result[++index]=func?func.apply(value,args):invokePath(value,path,args)});return result});function map(collection,iteratee,thisArg){var func=isArray(collection)?arrayMap:baseMap;iteratee=getCallback(iteratee,thisArg,3);return func(collection,iteratee)}var partition=createAggregator(function(result,value,key){result[key?0:1].push(value)},function(){return[[],[]]});function pluck(collection,path){return map(collection,property(path))}var reduce=createReduce(arrayReduce,baseEach);var reduceRight=createReduce(arrayReduceRight,baseEachRight);function reject(collection,predicate,thisArg){var func=isArray(collection)?arrayFilter:baseFilter;predicate=getCallback(predicate,thisArg,3);return func(collection,function(value,index,collection){return!predicate(value,index,collection)})}function sample(collection,n,guard){if(guard?isIterateeCall(collection,n,guard):n==null){collection=toIterable(collection);var length=collection.length;return length>0?collection[baseRandom(0,length-1)]:undefined}var index=-1,result=toArray(collection),length=result.length,lastIndex=length-1;n=nativeMin(n<0?0:+n||0,length);while(++index0){result=func.apply(this,arguments)}if(n<=1){func=undefined}return result}}var bind=restParam(function(func,thisArg,partials){var bitmask=BIND_FLAG;if(partials.length){var holders=replaceHolders(partials,bind.placeholder);bitmask|=PARTIAL_FLAG}return createWrapper(func,bitmask,thisArg,partials,holders)});var bindAll=restParam(function(object,methodNames){methodNames=methodNames.length?baseFlatten(methodNames):functions(object);var index=-1,length=methodNames.length;while(++indexwait){complete(trailingCall,maxTimeoutId)}else{timeoutId=setTimeout(delayed,remaining)}}function maxDelayed(){complete(trailing,timeoutId)}function debounced(){args=arguments;stamp=now();thisArg=this;trailingCall=trailing&&(timeoutId||!leading);if(maxWait===false){var leadingCall=leading&&!timeoutId}else{if(!maxTimeoutId&&!leading){lastCalled=stamp}var remaining=maxWait-(stamp-lastCalled),isCalled=remaining<=0||remaining>maxWait;if(isCalled){if(maxTimeoutId){maxTimeoutId=clearTimeout(maxTimeoutId)}lastCalled=stamp;result=func.apply(thisArg,args)}else if(!maxTimeoutId){maxTimeoutId=setTimeout(maxDelayed,remaining)}}if(isCalled&&timeoutId){timeoutId=clearTimeout(timeoutId)}else if(!timeoutId&&wait!==maxWait){timeoutId=setTimeout(delayed,wait)}if(leadingCall){isCalled=true;result=func.apply(thisArg,args)}if(isCalled&&!timeoutId&&!maxTimeoutId){args=thisArg=undefined}return result}debounced.cancel=cancel;return debounced}var defer=restParam(function(func,args){return baseDelay(func,1,args)});var delay=restParam(function(func,wait,args){return baseDelay(func,wait,args)});var flow=createFlow();var flowRight=createFlow(true);function memoize(func,resolver){if(typeof func!="function"||resolver&&typeof resolver!="function"){throw new TypeError(FUNC_ERROR_TEXT)}var memoized=function(){var args=arguments,key=resolver?resolver.apply(this,args):args[0],cache=memoized.cache;if(cache.has(key)){return cache.get(key)}var result=func.apply(this,args);memoized.cache=cache.set(key,result);return result};memoized.cache=new memoize.Cache;return memoized}var modArgs=restParam(function(func,transforms){transforms=baseFlatten(transforms);if(typeof func!="function"||!arrayEvery(transforms,baseIsFunction)){throw new TypeError(FUNC_ERROR_TEXT)}var length=transforms.length;return restParam(function(args){var index=nativeMin(args.length,length);while(index--){args[index]=transforms[index](args[index])}return func.apply(this,args)})});function negate(predicate){if(typeof predicate!="function"){throw new TypeError(FUNC_ERROR_TEXT)}return function(){return!predicate.apply(this,arguments)}}function once(func){return before(2,func)}var partial=createPartial(PARTIAL_FLAG);var partialRight=createPartial(PARTIAL_RIGHT_FLAG);var rearg=restParam(function(func,indexes){return createWrapper(func,REARG_FLAG,undefined,undefined,undefined,baseFlatten(indexes))});function restParam(func,start){if(typeof func!="function"){throw new TypeError(FUNC_ERROR_TEXT)}start=nativeMax(start===undefined?func.length-1:+start||0,0);return function(){var args=arguments,index=-1,length=nativeMax(args.length-start,0),rest=Array(length);while(++indexother}function gte(value,other){return value>=other}function isArguments(value){return isObjectLike(value)&&isArrayLike(value)&&hasOwnProperty.call(value,"callee")&&!propertyIsEnumerable.call(value,"callee")}var isArray=nativeIsArray||function(value){return isObjectLike(value)&&isLength(value.length)&&objToString.call(value)==arrayTag};function isBoolean(value){return value===true||value===false||isObjectLike(value)&&objToString.call(value)==boolTag}function isDate(value){return isObjectLike(value)&&objToString.call(value)==dateTag}function isElement(value){return!!value&&value.nodeType===1&&isObjectLike(value)&&!isPlainObject(value)}function isEmpty(value){if(value==null){return true}if(isArrayLike(value)&&(isArray(value)||isString(value)||isArguments(value)||isObjectLike(value)&&isFunction(value.splice))){return!value.length}return!keys(value).length}function isEqual(value,other,customizer,thisArg){customizer=typeof customizer=="function"?bindCallback(customizer,thisArg,3):undefined;var result=customizer?customizer(value,other):undefined;return result===undefined?baseIsEqual(value,other,customizer):!!result}function isError(value){return isObjectLike(value)&&typeof value.message=="string"&&objToString.call(value)==errorTag}function isFinite(value){return typeof value=="number"&&nativeIsFinite(value)}function isFunction(value){return isObject(value)&&objToString.call(value)==funcTag}function isObject(value){var type=typeof value;return!!value&&(type=="object"||type=="function")}function isMatch(object,source,customizer,thisArg){customizer=typeof customizer=="function"?bindCallback(customizer,thisArg,3):undefined;return baseIsMatch(object,getMatchData(source),customizer)}function isNaN(value){return isNumber(value)&&value!=+value}function isNative(value){if(value==null){return false}if(isFunction(value)){return reIsNative.test(fnToString.call(value))}return isObjectLike(value)&&reIsHostCtor.test(value)}function isNull(value){return value===null}function isNumber(value){return typeof value=="number"||isObjectLike(value)&&objToString.call(value)==numberTag}function isPlainObject(value){var Ctor;if(!(isObjectLike(value)&&objToString.call(value)==objectTag&&!isArguments(value))||!hasOwnProperty.call(value,"constructor")&&(Ctor=value.constructor,typeof Ctor=="function"&&!(Ctor instanceof Ctor))){return false}var result;baseForIn(value,function(subValue,key){result=key});return result===undefined||hasOwnProperty.call(value,result)}function isRegExp(value){return isObject(value)&&objToString.call(value)==regexpTag}function isString(value){return typeof value=="string"||isObjectLike(value)&&objToString.call(value)==stringTag}function isTypedArray(value){return isObjectLike(value)&&isLength(value.length)&&!!typedArrayTags[objToString.call(value)]}function isUndefined(value){return value===undefined}function lt(value,other){return value0;while(++index=nativeMin(start,end)&&value=0&&string.indexOf(target,position)==position}function escape(string){string=baseToString(string);return string&&reHasUnescapedHtml.test(string)?string.replace(reUnescapedHtml,escapeHtmlChar):string}function escapeRegExp(string){string=baseToString(string);return string&&reHasRegExpChars.test(string)?string.replace(reRegExpChars,escapeRegExpChar):string||"(?:)"}var kebabCase=createCompounder(function(result,word,index){return result+(index?"-":"")+word.toLowerCase()});function pad(string,length,chars){string=baseToString(string);length=+length;var strLength=string.length;if(strLength>=length||!nativeIsFinite(length)){return string}var mid=(length-strLength)/2,leftLength=nativeFloor(mid),rightLength=nativeCeil(mid);chars=createPadding("",rightLength,chars);return chars.slice(0,leftLength)+string+chars}var padLeft=createPadDir();var padRight=createPadDir(true);function parseInt(string,radix,guard){if(guard?isIterateeCall(string,radix,guard):radix==null){radix=0}else if(radix){radix=+radix}string=trim(string);return nativeParseInt(string,radix||(reHasHexPrefix.test(string)?16:10))}function repeat(string,n){var result="";string=baseToString(string);n=+n;if(n<1||!string||!nativeIsFinite(n)){return result}do{if(n%2){result+=string}n=nativeFloor(n/2);string+=string}while(n);return result}var snakeCase=createCompounder(function(result,word,index){return result+(index?"_":"")+word.toLowerCase()});var startCase=createCompounder(function(result,word,index){return result+(index?" ":"")+(word.charAt(0).toUpperCase()+word.slice(1))});function startsWith(string,target,position){string=baseToString(string);position=position==null?0:nativeMin(position<0?0:+position||0,string.length);return string.lastIndexOf(target,position)==position}function template(string,options,otherOptions){var settings=lodash.templateSettings;if(otherOptions&&isIterateeCall(string,options,otherOptions)){options=otherOptions=undefined}string=baseToString(string);options=assignWith(baseAssign({},otherOptions||options),settings,assignOwnDefaults);var imports=assignWith(baseAssign({},options.imports),settings.imports,assignOwnDefaults),importsKeys=keys(imports),importsValues=baseValues(imports,importsKeys);var isEscaping,isEvaluating,index=0,interpolate=options.interpolate||reNoMatch,source="__p += '";var reDelimiters=RegExp((options.escape||reNoMatch).source+"|"+interpolate.source+"|"+(interpolate===reInterpolate?reEsTemplate:reNoMatch).source+"|"+(options.evaluate||reNoMatch).source+"|$","g");var sourceURL="//# sourceURL="+("sourceURL"in options?options.sourceURL:"lodash.templateSources["+ ++templateCounter+"]")+"\n";string.replace(reDelimiters,function(match,escapeValue,interpolateValue,esTemplateValue,evaluateValue,offset){interpolateValue||(interpolateValue=esTemplateValue);source+=string.slice(index,offset).replace(reUnescapedString,escapeStringChar);if(escapeValue){isEscaping=true;source+="' +\n__e("+escapeValue+") +\n'"}if(evaluateValue){isEvaluating=true;source+="';\n"+evaluateValue+";\n__p += '"}if(interpolateValue){source+="' +\n((__t = ("+interpolateValue+")) == null ? '' : __t) +\n'"}index=offset+match.length;return match});source+="';\n";var variable=options.variable;if(!variable){source="with (obj) {\n"+source+"\n}\n"}source=(isEvaluating?source.replace(reEmptyStringLeading,""):source).replace(reEmptyStringMiddle,"$1").replace(reEmptyStringTrailing,"$1;");source="function("+(variable||"obj")+") {\n"+(variable?"":"obj || (obj = {});\n")+"var __t, __p = ''"+(isEscaping?", __e = _.escape":"")+(isEvaluating?", __j = Array.prototype.join;\n"+"function print() { __p += __j.call(arguments, '') }\n":";\n")+source+"return __p\n}";var result=attempt(function(){return Function(importsKeys,sourceURL+"return "+source).apply(undefined,importsValues)});result.source=source;if(isError(result)){throw result}return result}function trim(string,chars,guard){var value=string;string=baseToString(string);if(!string){return string}if(guard?isIterateeCall(value,chars,guard):chars==null){return string.slice(trimmedLeftIndex(string),trimmedRightIndex(string)+1)}chars=chars+"";return string.slice(charsLeftIndex(string,chars),charsRightIndex(string,chars)+1)}function trimLeft(string,chars,guard){var value=string;string=baseToString(string);if(!string){return string}if(guard?isIterateeCall(value,chars,guard):chars==null){return string.slice(trimmedLeftIndex(string))}return string.slice(charsLeftIndex(string,chars+""))}function trimRight(string,chars,guard){var value=string;string=baseToString(string);if(!string){return string}if(guard?isIterateeCall(value,chars,guard):chars==null){return string.slice(0,trimmedRightIndex(string)+1)}return string.slice(0,charsRightIndex(string,chars+"")+1)}function trunc(string,options,guard){if(guard&&isIterateeCall(string,options,guard)){options=undefined}var length=DEFAULT_TRUNC_LENGTH,omission=DEFAULT_TRUNC_OMISSION;if(options!=null){if(isObject(options)){var separator="separator"in options?options.separator:separator;length="length"in options?+options.length||0:length;omission="omission"in options?baseToString(options.omission):omission}else{length=+options||0}}string=baseToString(string);if(length>=string.length){return string}var end=length-omission.length;if(end<1){return omission}var result=string.slice(0,end);if(separator==null){return result+omission}if(isRegExp(separator)){if(string.slice(end).search(separator)){var match,newEnd,substring=string.slice(0,end);if(!separator.global){separator=RegExp(separator.source,(reFlags.exec(separator)||"")+"g")}separator.lastIndex=0;while(match=separator.exec(substring)){newEnd=match.index}result=result.slice(0,newEnd==null?end:newEnd)}}else if(string.indexOf(separator,end)!=end){var index=result.lastIndexOf(separator);if(index>-1){result=result.slice(0,index)}}return result+omission}function unescape(string){string=baseToString(string);return string&&reHasEscapedHtml.test(string)?string.replace(reEscapedHtml,unescapeHtmlChar):string}function words(string,pattern,guard){if(guard&&isIterateeCall(string,pattern,guard)){pattern=undefined}string=baseToString(string);return string.match(pattern||reWords)||[]}var attempt=restParam(function(func,args){try{return func.apply(undefined,args)}catch(e){return isError(e)?e:new Error(e)}});function callback(func,thisArg,guard){if(guard&&isIterateeCall(func,thisArg,guard)){thisArg=undefined}return isObjectLike(func)?matches(func):baseCallback(func,thisArg)}function constant(value){return function(){return value}}function identity(value){return value}function matches(source){return baseMatches(baseClone(source,true))}function matchesProperty(path,srcValue){return baseMatchesProperty(path,baseClone(srcValue,true))}var method=restParam(function(path,args){return function(object){return invokePath(object,path,args)}});var methodOf=restParam(function(object,args){return function(path){return invokePath(object,path,args)}});function mixin(object,source,options){if(options==null){var isObj=isObject(source),props=isObj?keys(source):undefined,methodNames=props&&props.length?baseFunctions(source,props):undefined;if(!(methodNames?methodNames.length:isObj)){methodNames=false;options=source;source=object;object=this}}if(!methodNames){methodNames=baseFunctions(source,keys(source))}var chain=true,index=-1,isFunc=isFunction(object),length=methodNames.length;if(options===false){chain=false}else if(isObject(options)&&"chain"in options){chain=options.chain}while(++index0||end<0)){return new LazyWrapper(result)}if(start<0){result=result.takeRight(-start)}else if(start){result=result.drop(start)}if(end!==undefined){end=+end||0;result=end<0?result.dropRight(-end):result.take(end-start)}return result};LazyWrapper.prototype.takeRightWhile=function(predicate,thisArg){return this.reverse().takeWhile(predicate,thisArg).reverse()};LazyWrapper.prototype.toArray=function(){return this.take(POSITIVE_INFINITY)};baseForOwn(LazyWrapper.prototype,function(func,methodName){var checkIteratee=/^(?:filter|map|reject)|While$/.test(methodName),retUnwrapped=/^(?:first|last)$/.test(methodName),lodashFunc=lodash[retUnwrapped?"take"+(methodName=="last"?"Right":""):methodName];if(!lodashFunc){return}lodash.prototype[methodName]=function(){var args=retUnwrapped?[1]:arguments,chainAll=this.__chain__,value=this.__wrapped__,isHybrid=!!this.__actions__.length,isLazy=value instanceof LazyWrapper,iteratee=args[0],useLazy=isLazy||isArray(value);if(useLazy&&checkIteratee&&typeof iteratee=="function"&&iteratee.length!=1){isLazy=useLazy=false}var interceptor=function(value){return retUnwrapped&&chainAll?lodashFunc(value,1)[0]:lodashFunc.apply(undefined,arrayPush([value],args))};var action={func:thru,args:[interceptor],thisArg:undefined},onlyLazy=isLazy&&!isHybrid;if(retUnwrapped&&!chainAll){if(onlyLazy){value=value.clone();value.__actions__.push(action);return func.call(value)}return lodashFunc.call(undefined,this.value())[0]}if(!retUnwrapped&&useLazy){value=onlyLazy?value:new LazyWrapper(this);var result=func.apply(value,args);result.__actions__.push(action);return new LodashWrapper(result,chainAll)}return this.thru(interceptor)}});arrayEach(["join","pop","push","replace","shift","sort","splice","split","unshift"],function(methodName){var func=(/^(?:replace|split)$/.test(methodName)?stringProto:arrayProto)[methodName],chainName=/^(?:push|sort|unshift)$/.test(methodName)?"tap":"thru",retUnwrapped=/^(?:join|pop|replace|shift)$/.test(methodName);lodash.prototype[methodName]=function(){var args=arguments;if(retUnwrapped&&!this.__chain__){return func.apply(this.value(),args)}return this[chainName](function(value){return func.apply(value,args)})}});baseForOwn(LazyWrapper.prototype,function(func,methodName){var lodashFunc=lodash[methodName];if(lodashFunc){var key=lodashFunc.name,names=realNames[key]||(realNames[key]=[]);names.push({name:methodName,func:lodashFunc})}});realNames[createHybridWrapper(undefined,BIND_KEY_FLAG).name]=[{name:"wrapper",func:undefined}];LazyWrapper.prototype.clone=lazyClone;LazyWrapper.prototype.reverse=lazyReverse;LazyWrapper.prototype.value=lazyValue;lodash.prototype.chain=wrapperChain;lodash.prototype.commit=wrapperCommit;lodash.prototype.concat=wrapperConcat;lodash.prototype.plant=wrapperPlant;lodash.prototype.reverse=wrapperReverse;lodash.prototype.toString=wrapperToString;lodash.prototype.run=lodash.prototype.toJSON=lodash.prototype.valueOf=lodash.prototype.value=wrapperValue;lodash.prototype.collect=lodash.prototype.map;lodash.prototype.head=lodash.prototype.first;lodash.prototype.select=lodash.prototype.filter;lodash.prototype.tail=lodash.prototype.rest;return lodash}var _=runInContext();if(typeof define=="function"&&typeof define.amd=="object"&&define.amd){root._=_;define(function(){return _})}else if(freeExports&&freeModule){if(moduleExports){(freeModule.exports=_)._=_}else{freeExports._=_}}else{root._=_}}).call(this)}).call(this,typeof global!=="undefined"?global:typeof self!=="undefined"?self:typeof window!=="undefined"?window:{})},{}],3:[function(require,module,exports){(function(window,document,undefined){var _MAP={8:"backspace",9:"tab",13:"enter",16:"shift",17:"ctrl",18:"alt",20:"capslock",27:"esc",32:"space",33:"pageup",34:"pagedown",35:"end",36:"home",37:"left",38:"up",39:"right",40:"down",45:"ins",46:"del",91:"meta",93:"meta",224:"meta"};var _KEYCODE_MAP={106:"*",107:"+",109:"-",110:".",111:"/",186:";",187:"=",188:",",189:"-",190:".",191:"/",192:"`",219:"[",220:"\\",221:"]",222:"'"};var _SHIFT_MAP={"~":"`","!":"1","@":"2","#":"3",$:"4","%":"5","^":"6","&":"7","*":"8","(":"9",")":"0",_:"-","+":"=",":":";",'"':"'","<":",",">":".","?":"/","|":"\\"};var _SPECIAL_ALIASES={option:"alt",command:"meta",return:"enter",escape:"esc",plus:"+",mod:/Mac|iPod|iPhone|iPad/.test(navigator.platform)?"meta":"ctrl"};var _REVERSE_MAP;for(var i=1;i<20;++i){_MAP[111+i]="f"+i}for(i=0;i<=9;++i){_MAP[i+96]=i}function _addEvent(object,type,callback){if(object.addEventListener){object.addEventListener(type,callback,false);return}object.attachEvent("on"+type,callback)}function _characterFromEvent(e){if(e.type=="keypress"){var character=String.fromCharCode(e.which);if(!e.shiftKey){character=character.toLowerCase()}return character}if(_MAP[e.which]){return _MAP[e.which]}if(_KEYCODE_MAP[e.which]){return _KEYCODE_MAP[e.which]}return String.fromCharCode(e.which).toLowerCase()}function _modifiersMatch(modifiers1,modifiers2){return modifiers1.sort().join(",")===modifiers2.sort().join(",")}function _eventModifiers(e){var modifiers=[];if(e.shiftKey){modifiers.push("shift")}if(e.altKey){modifiers.push("alt")}if(e.ctrlKey){modifiers.push("ctrl")}if(e.metaKey){modifiers.push("meta")}return modifiers}function _preventDefault(e){if(e.preventDefault){e.preventDefault();return}e.returnValue=false}function _stopPropagation(e){if(e.stopPropagation){e.stopPropagation();return}e.cancelBubble=true}function _isModifier(key){return key=="shift"||key=="ctrl"||key=="alt"||key=="meta"}function _getReverseMap(){if(!_REVERSE_MAP){_REVERSE_MAP={};for(var key in _MAP){if(key>95&&key<112){continue}if(_MAP.hasOwnProperty(key)){_REVERSE_MAP[_MAP[key]]=key}}}return _REVERSE_MAP}function _pickBestAction(key,modifiers,action){if(!action){action=_getReverseMap()[key]?"keydown":"keypress"}if(action=="keypress"&&modifiers.length){action="keydown"}return action}function _keysFromString(combination){if(combination==="+"){return["+"]}combination=combination.replace(/\+{2}/g,"+plus");return combination.split("+")}function _getKeyInfo(combination,action){var keys;var key;var i;var modifiers=[];keys=_keysFromString(combination);for(i=0;i1){_bindSequence(combination,sequence,callback,action);return}info=_getKeyInfo(combination,action);self._callbacks[info.key]=self._callbacks[info.key]||[];_getMatches(info.key,info.modifiers,{type:info.action},sequenceName,combination,level);self._callbacks[info.key][sequenceName?"unshift":"push"]({callback:callback,modifiers:info.modifiers,action:info.action,seq:sequenceName,level:level,combo:combination})}self._bindMultiple=function(combinations,callback,action){for(var i=0;i-1){return false}if(_belongsTo(element,self.target)){return false}return element.tagName=="INPUT"||element.tagName=="SELECT"||element.tagName=="TEXTAREA"||element.isContentEditable};Mousetrap.prototype.handleKey=function(){var self=this;return self._handleKey.apply(self,arguments)};Mousetrap.init=function(){var documentMousetrap=Mousetrap(document);for(var method in documentMousetrap){if(method.charAt(0)!=="_"){Mousetrap[method]=function(method){return function(){return documentMousetrap[method].apply(documentMousetrap,arguments)}}(method)}}};Mousetrap.init();window.Mousetrap=Mousetrap;if(typeof module!=="undefined"&&module.exports){module.exports=Mousetrap}if(typeof define==="function"&&define.amd){define(function(){return Mousetrap})}})(window,document)},{}],4:[function(require,module,exports){(function(process){function normalizeArray(parts,allowAboveRoot){var up=0;for(var i=parts.length-1;i>=0;i--){var last=parts[i];if(last==="."){parts.splice(i,1)}else if(last===".."){parts.splice(i,1);up++}else if(up){parts.splice(i,1);up--}}if(allowAboveRoot){for(;up--;up){parts.unshift("..")}}return parts}var splitPathRe=/^(\/?|)([\s\S]*?)((?:\.{1,2}|[^\/]+?|)(\.[^.\/]*|))(?:[\/]*)$/;var splitPath=function(filename){return splitPathRe.exec(filename).slice(1)};exports.resolve=function(){var resolvedPath="",resolvedAbsolute=false;for(var i=arguments.length-1;i>=-1&&!resolvedAbsolute;i--){var path=i>=0?arguments[i]:process.cwd();if(typeof path!=="string"){throw new TypeError("Arguments to path.resolve must be strings")}else if(!path){continue}resolvedPath=path+"/"+resolvedPath;resolvedAbsolute=path.charAt(0)==="/"}resolvedPath=normalizeArray(filter(resolvedPath.split("/"),function(p){return!!p}),!resolvedAbsolute).join("/");return(resolvedAbsolute?"/":"")+resolvedPath||"."};exports.normalize=function(path){var isAbsolute=exports.isAbsolute(path),trailingSlash=substr(path,-1)==="/";path=normalizeArray(filter(path.split("/"),function(p){return!!p}),!isAbsolute).join("/");if(!path&&!isAbsolute){path="."}if(path&&trailingSlash){path+="/"}return(isAbsolute?"/":"")+path};exports.isAbsolute=function(path){return path.charAt(0)==="/"};exports.join=function(){var paths=Array.prototype.slice.call(arguments,0);return exports.normalize(filter(paths,function(p,index){if(typeof p!=="string"){throw new TypeError("Arguments to path.join must be strings")}return p}).join("/"))};exports.relative=function(from,to){from=exports.resolve(from).substr(1);to=exports.resolve(to).substr(1);function trim(arr){var start=0;for(;start=0;end--){if(arr[end]!=="")break}if(start>end)return[];return arr.slice(start,end-start+1)}var fromParts=trim(from.split("/"));var toParts=trim(to.split("/"));var length=Math.min(fromParts.length,toParts.length);var samePartsLength=length;for(var i=0;i1){for(var i=1;i= 0x80 (not a basic code point)","invalid-input":"Invalid input"},baseMinusTMin=base-tMin,floor=Math.floor,stringFromCharCode=String.fromCharCode,key;function error(type){throw RangeError(errors[type])}function map(array,fn){var length=array.length;var result=[];while(length--){result[length]=fn(array[length])}return result}function mapDomain(string,fn){var parts=string.split("@");var result="";if(parts.length>1){result=parts[0]+"@";string=parts[1]}string=string.replace(regexSeparators,".");var labels=string.split(".");var encoded=map(labels,fn).join(".");return result+encoded}function ucs2decode(string){var output=[],counter=0,length=string.length,value,extra;while(counter=55296&&value<=56319&&counter65535){value-=65536;output+=stringFromCharCode(value>>>10&1023|55296);value=56320|value&1023}output+=stringFromCharCode(value);return output}).join("")}function basicToDigit(codePoint){if(codePoint-48<10){return codePoint-22}if(codePoint-65<26){return codePoint-65}if(codePoint-97<26){return codePoint-97}return base}function digitToBasic(digit,flag){return digit+22+75*(digit<26)-((flag!=0)<<5)}function adapt(delta,numPoints,firstTime){var k=0;delta=firstTime?floor(delta/damp):delta>>1;delta+=floor(delta/numPoints);for(;delta>baseMinusTMin*tMax>>1;k+=base){delta=floor(delta/baseMinusTMin)}return floor(k+(baseMinusTMin+1)*delta/(delta+skew))}function decode(input){var output=[],inputLength=input.length,out,i=0,n=initialN,bias=initialBias,basic,j,index,oldi,w,k,digit,t,baseMinusT;basic=input.lastIndexOf(delimiter);if(basic<0){basic=0}for(j=0;j=128){error("not-basic")}output.push(input.charCodeAt(j))}for(index=basic>0?basic+1:0;index=inputLength){error("invalid-input")}digit=basicToDigit(input.charCodeAt(index++));if(digit>=base||digit>floor((maxInt-i)/w)){error("overflow")}i+=digit*w;t=k<=bias?tMin:k>=bias+tMax?tMax:k-bias;if(digitfloor(maxInt/baseMinusT)){error("overflow")}w*=baseMinusT}out=output.length+1;bias=adapt(i-oldi,out,oldi==0);if(floor(i/out)>maxInt-n){error("overflow")}n+=floor(i/out);i%=out;output.splice(i++,0,n)}return ucs2encode(output)}function encode(input){var n,delta,handledCPCount,basicLength,bias,j,m,q,k,t,currentValue,output=[],inputLength,handledCPCountPlusOne,baseMinusT,qMinusT;input=ucs2decode(input);inputLength=input.length;n=initialN;delta=0;bias=initialBias;for(j=0;j=n&¤tValuefloor((maxInt-delta)/handledCPCountPlusOne)){error("overflow")}delta+=(m-n)*handledCPCountPlusOne;n=m;for(j=0;jmaxInt){error("overflow")}if(currentValue==n){for(q=delta,k=base;;k+=base){t=k<=bias?tMin:k>=bias+tMax?tMax:k-bias;if(q0&&len>maxKeys){len=maxKeys}for(var i=0;i=0){kstr=x.substr(0,idx);vstr=x.substr(idx+1)}else{kstr=x;vstr=""}k=decodeURIComponent(kstr);v=decodeURIComponent(vstr);if(!hasOwnProperty(obj,k)){obj[k]=v}else if(isArray(obj[k])){obj[k].push(v)}else{obj[k]=[obj[k],v]}}return obj};var isArray=Array.isArray||function(xs){return Object.prototype.toString.call(xs)==="[object Array]"}},{}],8:[function(require,module,exports){"use strict";var stringifyPrimitive=function(v){switch(typeof v){case"string":return v;case"boolean":return v?"true":"false";case"number":return isFinite(v)?v:"";default:return""}};module.exports=function(obj,sep,eq,name){sep=sep||"&";eq=eq||"=";if(obj===null){obj=undefined}if(typeof obj==="object"){return map(objectKeys(obj),function(k){var ks=encodeURIComponent(stringifyPrimitive(k))+eq;if(isArray(obj[k])){return map(obj[k],function(v){return ks+encodeURIComponent(stringifyPrimitive(v))}).join(sep)}else{return ks+encodeURIComponent(stringifyPrimitive(obj[k]))}}).join(sep)}if(!name)return"";return encodeURIComponent(stringifyPrimitive(name))+eq+encodeURIComponent(stringifyPrimitive(obj))};var isArray=Array.isArray||function(xs){return Object.prototype.toString.call(xs)==="[object Array]"};function map(xs,f){if(xs.map)return xs.map(f);var res=[];for(var i=0;i",'"',"`"," ","\r","\n","\t"],unwise=["{","}","|","\\","^","`"].concat(delims),autoEscape=["'"].concat(unwise),nonHostChars=["%","/","?",";","#"].concat(autoEscape),hostEndingChars=["/","?","#"],hostnameMaxLen=255,hostnamePartPattern=/^[a-z0-9A-Z_-]{0,63}$/,hostnamePartStart=/^([a-z0-9A-Z_-]{0,63})(.*)$/,unsafeProtocol={javascript:true,"javascript:":true},hostlessProtocol={javascript:true,"javascript:":true},slashedProtocol={http:true,https:true,ftp:true,gopher:true,file:true,"http:":true,"https:":true,"ftp:":true,"gopher:":true,"file:":true},querystring=require("querystring");function urlParse(url,parseQueryString,slashesDenoteHost){if(url&&isObject(url)&&url instanceof Url)return url;var u=new Url;u.parse(url,parseQueryString,slashesDenoteHost);return u}Url.prototype.parse=function(url,parseQueryString,slashesDenoteHost){if(!isString(url)){throw new TypeError("Parameter 'url' must be a string, not "+typeof url)}var rest=url;rest=rest.trim();var proto=protocolPattern.exec(rest);if(proto){proto=proto[0];var lowerProto=proto.toLowerCase();this.protocol=lowerProto;rest=rest.substr(proto.length)}if(slashesDenoteHost||proto||rest.match(/^\/\/[^@\/]+@[^@\/]+/)){var slashes=rest.substr(0,2)==="//";if(slashes&&!(proto&&hostlessProtocol[proto])){rest=rest.substr(2);this.slashes=true}}if(!hostlessProtocol[proto]&&(slashes||proto&&!slashedProtocol[proto])){var hostEnd=-1;for(var i=0;i127){newpart+="x"}else{newpart+=part[j]}}if(!newpart.match(hostnamePartPattern)){var validParts=hostparts.slice(0,i);var notHost=hostparts.slice(i+1);var bit=part.match(hostnamePartStart);if(bit){validParts.push(bit[1]);notHost.unshift(bit[2])}if(notHost.length){rest="/"+notHost.join(".")+rest}this.hostname=validParts.join(".");break}}}}if(this.hostname.length>hostnameMaxLen){this.hostname=""}else{this.hostname=this.hostname.toLowerCase()}if(!ipv6Hostname){var domainArray=this.hostname.split(".");var newOut=[];for(var i=0;i0?result.host.split("@"):false;if(authInHost){result.auth=authInHost.shift();result.host=result.hostname=authInHost.shift()}}result.search=relative.search;result.query=relative.query;if(!isNull(result.pathname)||!isNull(result.search)){result.path=(result.pathname?result.pathname:"")+(result.search?result.search:"")}result.href=result.format();return result}if(!srcPath.length){result.pathname=null;if(result.search){result.path="/"+result.search}else{result.path=null}result.href=result.format();return result}var last=srcPath.slice(-1)[0];var hasTrailingSlash=(result.host||relative.host)&&(last==="."||last==="..")||last==="";var up=0;for(var i=srcPath.length;i>=0;i--){last=srcPath[i];if(last=="."){srcPath.splice(i,1)}else if(last===".."){srcPath.splice(i,1);up++}else if(up){srcPath.splice(i,1);up--}}if(!mustEndAbs&&!removeAllDots){for(;up--;up){srcPath.unshift("..")}}if(mustEndAbs&&srcPath[0]!==""&&(!srcPath[0]||srcPath[0].charAt(0)!=="/")){srcPath.unshift("")}if(hasTrailingSlash&&srcPath.join("/").substr(-1)!=="/"){srcPath.push("")}var isAbsolute=srcPath[0]===""||srcPath[0]&&srcPath[0].charAt(0)==="/";if(psychotic){result.hostname=result.host=isAbsolute?"":srcPath.length?srcPath.shift():"";var authInHost=result.host&&result.host.indexOf("@")>0?result.host.split("@"):false;if(authInHost){result.auth=authInHost.shift();result.host=result.hostname=authInHost.shift()}}mustEndAbs=mustEndAbs||result.host&&srcPath.length;if(mustEndAbs&&!isAbsolute){srcPath.unshift("")}if(!srcPath.length){result.pathname=null;result.path=null}else{result.pathname=srcPath.join("/")}if(!isNull(result.pathname)||!isNull(result.search)){result.path=(result.pathname?result.pathname:"")+(result.search?result.search:"")}result.auth=relative.auth||result.auth;result.slashes=result.slashes||relative.slashes;result.href=result.format();return result};Url.prototype.parseHost=function(){var host=this.host;var port=portPattern.exec(host);if(port){port=port[0];if(port!==":"){this.port=port.substr(1)}host=host.substr(0,host.length-port.length)}if(host)this.hostname=host};function isString(arg){return typeof arg==="string"}function isObject(arg){return typeof arg==="object"&&arg!==null}function isNull(arg){return arg===null}function isNullOrUndefined(arg){return arg==null}},{punycode:6,querystring:9}],11:[function(require,module,exports){var $=require("jquery");function toggleDropdown(e){var $dropdown=$(e.currentTarget).parent().find(".dropdown-menu");$dropdown.toggleClass("open");e.stopPropagation();e.preventDefault()}function closeDropdown(e){$(".dropdown-menu").removeClass("open")}function init(){$(document).on("click",".toggle-dropdown",toggleDropdown);$(document).on("click",".dropdown-menu",function(e){e.stopPropagation()});$(document).on("click",closeDropdown)}module.exports={init:init}},{jquery:1}],12:[function(require,module,exports){var $=require("jquery");module.exports=$({})},{jquery:1}],13:[function(require,module,exports){var $=require("jquery");var _=require("lodash");var storage=require("./storage");var dropdown=require("./dropdown");var events=require("./events");var state=require("./state");var keyboard=require("./keyboard");var navigation=require("./navigation");var sidebar=require("./sidebar");var toolbar=require("./toolbar");function start(config){sidebar.init();keyboard.init();dropdown.init();navigation.init();toolbar.createButton({index:0,icon:"fa fa-align-justify",label:"Toggle Sidebar",onClick:function(e){e.preventDefault();sidebar.toggle()}});events.trigger("start",config);navigation.notify()}var gitbook={start:start,events:events,state:state,toolbar:toolbar,sidebar:sidebar,storage:storage,keyboard:keyboard};var MODULES={gitbook:gitbook,jquery:$,lodash:_};window.gitbook=gitbook;window.$=$;window.jQuery=$;gitbook.require=function(mods,fn){mods=_.map(mods,function(mod){mod=mod.toLowerCase();if(!MODULES[mod]){throw new Error("GitBook module "+mod+" doesn't exist")}return MODULES[mod]});fn.apply(null,mods)};module.exports={}},{"./dropdown":11,"./events":12,"./keyboard":14,"./navigation":16,"./sidebar":18,"./state":19,"./storage":20,"./toolbar":21,jquery:1,lodash:2}],14:[function(require,module,exports){var Mousetrap=require("mousetrap");var navigation=require("./navigation");var sidebar=require("./sidebar");function bindShortcut(keys,fn){Mousetrap.bind(keys,function(e){fn();return false})}function init(){bindShortcut(["right"],function(e){navigation.goNext()});bindShortcut(["left"],function(e){navigation.goPrev()});bindShortcut(["s"],function(e){sidebar.toggle()})}module.exports={init:init,bind:bindShortcut}},{"./navigation":16,"./sidebar":18,mousetrap:3}],15:[function(require,module,exports){var state=require("./state");function showLoading(p){state.$book.addClass("is-loading");p.always(function(){state.$book.removeClass("is-loading")});return p}module.exports={show:showLoading}},{"./state":19}],16:[function(require,module,exports){var $=require("jquery");var url=require("url");var events=require("./events");var state=require("./state");var loading=require("./loading");var usePushState=typeof history.pushState!=="undefined";function handleNavigation(relativeUrl,push){var uri=url.resolve(window.location.pathname,relativeUrl);notifyPageChange();location.href=relativeUrl;return}function updateNavigationPosition(){var bodyInnerWidth,pageWrapperWidth;bodyInnerWidth=parseInt($(".body-inner").css("width"),10);pageWrapperWidth=parseInt($(".page-wrapper").css("width"),10);$(".navigation-next").css("margin-right",bodyInnerWidth-pageWrapperWidth+"px")}function notifyPageChange(){events.trigger("page.change")}function preparePage(notify){var $bookBody=$(".book-body");var $bookInner=$bookBody.find(".body-inner");var $pageWrapper=$bookInner.find(".page-wrapper");updateNavigationPosition();$bookInner.scrollTop(0);$bookBody.scrollTop(0);if(notify!==false)notifyPageChange()}function isLeftClickEvent(e){return e.button===0}function isModifiedEvent(e){return!!(e.metaKey||e.altKey||e.ctrlKey||e.shiftKey)}function handlePagination(e){if(isModifiedEvent(e)||!isLeftClickEvent(e)){return}e.stopPropagation();e.preventDefault();var url=$(this).attr("href");if(url)handleNavigation(url,true)}function goNext(){var url=$(".navigation-next").attr("href");if(url)handleNavigation(url,true)}function goPrev(){var url=$(".navigation-prev").attr("href");if(url)handleNavigation(url,true)}function init(){$.ajaxSetup({});if(location.protocol!=="file:"){history.replaceState({path:window.location.href},"")}window.onpopstate=function(event){if(event.state===null){return}return handleNavigation(event.state.path,false)};$(document).on("click",".navigation-prev",handlePagination);$(document).on("click",".navigation-next",handlePagination);$(document).on("click",".summary [data-path] a",handlePagination);$(window).resize(updateNavigationPosition);preparePage(false)}module.exports={init:init,goNext:goNext,goPrev:goPrev,notify:notifyPageChange}},{"./events":12,"./loading":15,"./state":19,jquery:1,url:10}],17:[function(require,module,exports){module.exports={isMobile:function(){return document.body.clientWidth<=600}}},{}],18:[function(require,module,exports){var $=require("jquery");var _=require("lodash");var storage=require("./storage");var platform=require("./platform");var state=require("./state");function toggleSidebar(_state,animation){if(state!=null&&isOpen()==_state)return;if(animation==null)animation=true;state.$book.toggleClass("without-animation",!animation);state.$book.toggleClass("with-summary",_state);storage.set("sidebar",isOpen())}function isOpen(){return state.$book.hasClass("with-summary")}function init(){if(platform.isMobile()){toggleSidebar(false,false)}else{toggleSidebar(storage.get("sidebar",true),false)}$(document).on("click",".book-summary li.chapter a",function(e){if(platform.isMobile())toggleSidebar(false,false)})}function filterSummary(paths){var $summary=$(".book-summary");$summary.find("li").each(function(){var path=$(this).data("path");var st=paths==null||_.contains(paths,path);$(this).toggle(st);if(st)$(this).parents("li").show()})}module.exports={init:init,isOpen:isOpen,toggle:toggleSidebar,filter:filterSummary}},{"./platform":17,"./state":19,"./storage":20,jquery:1,lodash:2}],19:[function(require,module,exports){var $=require("jquery");var url=require("url");var path=require("path");var state={};state.update=function(dom){var $book=$(dom.find(".book"));state.$book=$book;state.level=$book.data("level");state.basePath=$book.data("basepath");state.innerLanguage=$book.data("innerlanguage");state.revision=$book.data("revision");state.filepath=$book.data("filepath");state.chapterTitle=$book.data("chapter-title");state.root=url.resolve(location.protocol+"//"+location.host,path.dirname(path.resolve(location.pathname.replace(/\/$/,"/index.html"),state.basePath))).replace(/\/?$/,"/");state.bookRoot=state.innerLanguage?url.resolve(state.root,".."):state.root};state.update($);module.exports=state},{jquery:1,path:4,url:10}],20:[function(require,module,exports){var baseKey="";module.exports={setBaseKey:function(key){baseKey=key},set:function(key,value){key=baseKey+":"+key;try{sessionStorage[key]=JSON.stringify(value)}catch(e){}},get:function(key,def){key=baseKey+":"+key;if(sessionStorage[key]===undefined)return def;try{var v=JSON.parse(sessionStorage[key]);return v==null?def:v}catch(err){return sessionStorage[key]||def}},remove:function(key){key=baseKey+":"+key;sessionStorage.removeItem(key)}}},{}],21:[function(require,module,exports){var $=require("jquery");var _=require("lodash");var events=require("./events");var buttons=[];function insertAt(parent,selector,index,element){var lastIndex=parent.children(selector).length;if(index<0){index=Math.max(0,lastIndex+1+index)}parent.append(element);if(index",{class:"dropdown-menu",html:''});if(_.isString(dropdown)){$menu.append(dropdown)}else{var groups=_.map(dropdown,function(group){if(_.isArray(group))return group;else return[group]});_.each(groups,function(group){var $group=$("
",{class:"buttons"});var sizeClass="size-"+group.length;_.each(group,function(btn){btn=_.defaults(btn||{},{text:"",className:"",onClick:defaultOnClick});var $btn=$("'; - var clipboard; - - gitbook.events.bind("page.change", function() { - - if (!ClipboardJS.isSupported()) return; - - // the page.change event is thrown twice: before and after the page changes - if (clipboard) { - // clipboard is already defined - // we can deduct that we are before page changes - clipboard.destroy(); // destroy the previous events listeners - clipboard = undefined; // reset the clipboard object - return; - } - - $(copyButton).prependTo("div.sourceCode"); - - clipboard = new ClipboardJS(".copy-to-clipboard-button", { - text: function(trigger) { - return trigger.parentNode.textContent; - } - }); - - }); - -}); diff --git a/_book/libs/gitbook/js/plugin-fontsettings.js b/_book/libs/gitbook/js/plugin-fontsettings.js deleted file mode 100644 index a70f0fb..0000000 --- a/_book/libs/gitbook/js/plugin-fontsettings.js +++ /dev/null @@ -1,152 +0,0 @@ -gitbook.require(["gitbook", "lodash", "jQuery"], function(gitbook, _, $) { - var fontState; - - var THEMES = { - "white": 0, - "sepia": 1, - "night": 2 - }; - - var FAMILY = { - "serif": 0, - "sans": 1 - }; - - // Save current font settings - function saveFontSettings() { - gitbook.storage.set("fontState", fontState); - update(); - } - - // Increase font size - function enlargeFontSize(e) { - e.preventDefault(); - if (fontState.size >= 4) return; - - fontState.size++; - saveFontSettings(); - }; - - // Decrease font size - function reduceFontSize(e) { - e.preventDefault(); - if (fontState.size <= 0) return; - - fontState.size--; - saveFontSettings(); - }; - - // Change font family - function changeFontFamily(index, e) { - e.preventDefault(); - - fontState.family = index; - saveFontSettings(); - }; - - // Change type of color - function changeColorTheme(index, e) { - e.preventDefault(); - - var $book = $(".book"); - - if (fontState.theme !== 0) - $book.removeClass("color-theme-"+fontState.theme); - - fontState.theme = index; - if (fontState.theme !== 0) - $book.addClass("color-theme-"+fontState.theme); - - saveFontSettings(); - }; - - function update() { - var $book = gitbook.state.$book; - - $(".font-settings .font-family-list li").removeClass("active"); - $(".font-settings .font-family-list li:nth-child("+(fontState.family+1)+")").addClass("active"); - - $book[0].className = $book[0].className.replace(/\bfont-\S+/g, ''); - $book.addClass("font-size-"+fontState.size); - $book.addClass("font-family-"+fontState.family); - - if(fontState.theme !== 0) { - $book[0].className = $book[0].className.replace(/\bcolor-theme-\S+/g, ''); - $book.addClass("color-theme-"+fontState.theme); - } - }; - - function init(config) { - var $bookBody, $book; - - //Find DOM elements. - $book = gitbook.state.$book; - $bookBody = $book.find(".book-body"); - - // Instantiate font state object - fontState = gitbook.storage.get("fontState", { - size: config.size || 2, - family: FAMILY[config.family || "sans"], - theme: THEMES[config.theme || "white"] - }); - - update(); - }; - - - gitbook.events.bind("start", function(e, config) { - var opts = config.fontsettings; - if (!opts) return; - - // Create buttons in toolbar - gitbook.toolbar.createButton({ - icon: 'fa fa-font', - label: 'Font Settings', - className: 'font-settings', - dropdown: [ - [ - { - text: 'A', - className: 'font-reduce', - onClick: reduceFontSize - }, - { - text: 'A', - className: 'font-enlarge', - onClick: enlargeFontSize - } - ], - [ - { - text: 'Serif', - onClick: _.partial(changeFontFamily, 0) - }, - { - text: 'Sans', - onClick: _.partial(changeFontFamily, 1) - } - ], - [ - { - text: 'White', - onClick: _.partial(changeColorTheme, 0) - }, - { - text: 'Sepia', - onClick: _.partial(changeColorTheme, 1) - }, - { - text: 'Night', - onClick: _.partial(changeColorTheme, 2) - } - ] - ] - }); - - - // Init current settings - init(opts); - }); -}); - - diff --git a/_book/libs/gitbook/js/plugin-search.js b/_book/libs/gitbook/js/plugin-search.js deleted file mode 100644 index 31b5786..0000000 --- a/_book/libs/gitbook/js/plugin-search.js +++ /dev/null @@ -1,223 +0,0 @@ -gitbook.require(["gitbook", "lodash", "jQuery"], function(gitbook, _, $) { - var index = null; - var $searchInput, $searchLabel, $searchForm; - var $highlighted = [], hi, hiOpts = { className: 'search-highlight' }; - var collapse = false, toc_visible = []; - - // Use a specific index - function loadIndex(data) { - // [Yihui] In bookdown, I use a character matrix to store the chapter - // content, and the index is dynamically built on the client side. - // Gitbook prebuilds the index data instead: https://github.com/GitbookIO/plugin-search - // We can certainly do that via R packages V8 and jsonlite, but let's - // see how slow it really is before improving it. On the other hand, - // lunr cannot handle non-English text very well, e.g. the default - // tokenizer cannot deal with Chinese text, so we may want to replace - // lunr with a dumb simple text matching approach. - index = lunr(function () { - this.ref('url'); - this.field('title', { boost: 10 }); - this.field('body'); - }); - data.map(function(item) { - index.add({ - url: item[0], - title: item[1], - body: item[2] - }); - }); - } - - // Fetch the search index - function fetchIndex() { - return $.getJSON(gitbook.state.basePath+"/search_index.json") - .then(loadIndex); // [Yihui] we need to use this object later - } - - // Search for a term and return results - function search(q) { - if (!index) return; - - var results = _.chain(index.search(q)) - .map(function(result) { - var parts = result.ref.split("#"); - return { - path: parts[0], - hash: parts[1] - }; - }) - .value(); - - // [Yihui] Highlight the search keyword on current page - $highlighted = results.length === 0 ? [] : $('.page-inner') - .unhighlight(hiOpts).highlight(q, hiOpts).find('span.search-highlight'); - scrollToHighlighted(0); - - return results; - } - - // [Yihui] Scroll the chapter body to the i-th highlighted string - function scrollToHighlighted(d) { - var n = $highlighted.length; - hi = hi === undefined ? 0 : hi + d; - // navignate to the previous/next page in the search results if reached the top/bottom - var b = hi < 0; - if (d !== 0 && (b || hi >= n)) { - var path = currentPath(), n2 = toc_visible.length; - if (n2 === 0) return; - for (var i = b ? 0 : n2; (b && i < n2) || (!b && i >= 0); i += b ? 1 : -1) { - if (toc_visible.eq(i).data('path') === path) break; - } - i += b ? -1 : 1; - if (i < 0) i = n2 - 1; - if (i >= n2) i = 0; - var lnk = toc_visible.eq(i).find('a[href$=".html"]'); - if (lnk.length) lnk[0].click(); - return; - } - if (n === 0) return; - var $p = $highlighted.eq(hi); - $p[0].scrollIntoView(); - $highlighted.css('background-color', ''); - // an orange background color on the current item and removed later - $p.css('background-color', 'orange'); - setTimeout(function() { - $p.css('background-color', ''); - }, 2000); - } - - function currentPath() { - var href = window.location.pathname; - href = href.substr(href.lastIndexOf('/') + 1); - return href === '' ? 'index.html' : href; - } - - // Create search form - function createForm(value) { - if ($searchForm) $searchForm.remove(); - if ($searchLabel) $searchLabel.remove(); - if ($searchInput) $searchInput.remove(); - - $searchForm = $('
', { - 'class': 'book-search', - 'role': 'search' - }); - - $searchLabel = $('
").addClass(errClass).css("position", "absolute") - .css("top", el.offsetTop) - .css("left", el.offsetLeft) - // setting width can push out the page size, forcing otherwise - // unnecessary scrollbars to appear and making it impossible for - // the element to shrink; so use max-width instead - .css("maxWidth", el.offsetWidth) - .css("height", el.offsetHeight); - errorDiv.text(err.message); - $el.after(errorDiv); - - // Really dumb way to keep the size/position of the error in sync with - // the parent element as the window is resized or whatever. - var intId = setInterval(function() { - if (!errorDiv[0].parentElement) { - clearInterval(intId); - return; - } - errorDiv - .css("top", el.offsetTop) - .css("left", el.offsetLeft) - .css("maxWidth", el.offsetWidth) - .css("height", el.offsetHeight); - }, 500); - } - } - }, - clearError: function(el) { - var $el = $(el); - var display = $el.data("restore-display-mode"); - $el.data("restore-display-mode", null); - - if (display === "inline" || display === "inline-block") { - if (display) - $el.css("display", display); - $(el.nextSibling).filter(".htmlwidgets-error").remove(); - } else if (display === "block"){ - $el.css("visibility", "inherit"); - $(el.nextSibling).filter(".htmlwidgets-error").remove(); - } - }, - sizing: {} - }; - - // Called by widget bindings to register a new type of widget. The definition - // object can contain the following properties: - // - name (required) - A string indicating the binding name, which will be - // used by default as the CSS classname to look for. - // - initialize (optional) - A function(el) that will be called once per - // widget element; if a value is returned, it will be passed as the third - // value to renderValue. - // - renderValue (required) - A function(el, data, initValue) that will be - // called with data. Static contexts will cause this to be called once per - // element; Shiny apps will cause this to be called multiple times per - // element, as the data changes. - window.HTMLWidgets.widget = function(definition) { - if (!definition.name) { - throw new Error("Widget must have a name"); - } - if (!definition.type) { - throw new Error("Widget must have a type"); - } - // Currently we only support output widgets - if (definition.type !== "output") { - throw new Error("Unrecognized widget type '" + definition.type + "'"); - } - // TODO: Verify that .name is a valid CSS classname - - // Support new-style instance-bound definitions. Old-style class-bound - // definitions have one widget "object" per widget per type/class of - // widget; the renderValue and resize methods on such widget objects - // take el and instance arguments, because the widget object can't - // store them. New-style instance-bound definitions have one widget - // object per widget instance; the definition that's passed in doesn't - // provide renderValue or resize methods at all, just the single method - // factory(el, width, height) - // which returns an object that has renderValue(x) and resize(w, h). - // This enables a far more natural programming style for the widget - // author, who can store per-instance state using either OO-style - // instance fields or functional-style closure variables (I guess this - // is in contrast to what can only be called C-style pseudo-OO which is - // what we required before). - if (definition.factory) { - definition = createLegacyDefinitionAdapter(definition); - } - - if (!definition.renderValue) { - throw new Error("Widget must have a renderValue function"); - } - - // For static rendering (non-Shiny), use a simple widget registration - // scheme. We also use this scheme for Shiny apps/documents that also - // contain static widgets. - window.HTMLWidgets.widgets = window.HTMLWidgets.widgets || []; - // Merge defaults into the definition; don't mutate the original definition. - var staticBinding = extend({}, defaults, definition); - overrideMethod(staticBinding, "find", function(superfunc) { - return function(scope) { - var results = superfunc(scope); - // Filter out Shiny outputs, we only want the static kind - return filterByClass(results, "html-widget-output", false); - }; - }); - window.HTMLWidgets.widgets.push(staticBinding); - - if (shinyMode) { - // Shiny is running. Register the definition with an output binding. - // The definition itself will not be the output binding, instead - // we will make an output binding object that delegates to the - // definition. This is because we foolishly used the same method - // name (renderValue) for htmlwidgets definition and Shiny bindings - // but they actually have quite different semantics (the Shiny - // bindings receive data that includes lots of metadata that it - // strips off before calling htmlwidgets renderValue). We can't - // just ignore the difference because in some widgets it's helpful - // to call this.renderValue() from inside of resize(), and if - // we're not delegating, then that call will go to the Shiny - // version instead of the htmlwidgets version. - - // Merge defaults with definition, without mutating either. - var bindingDef = extend({}, defaults, definition); - - // This object will be our actual Shiny binding. - var shinyBinding = new Shiny.OutputBinding(); - - // With a few exceptions, we'll want to simply use the bindingDef's - // version of methods if they are available, otherwise fall back to - // Shiny's defaults. NOTE: If Shiny's output bindings gain additional - // methods in the future, and we want them to be overrideable by - // HTMLWidget binding definitions, then we'll need to add them to this - // list. - delegateMethod(shinyBinding, bindingDef, "getId"); - delegateMethod(shinyBinding, bindingDef, "onValueChange"); - delegateMethod(shinyBinding, bindingDef, "onValueError"); - delegateMethod(shinyBinding, bindingDef, "renderError"); - delegateMethod(shinyBinding, bindingDef, "clearError"); - delegateMethod(shinyBinding, bindingDef, "showProgress"); - - // The find, renderValue, and resize are handled differently, because we - // want to actually decorate the behavior of the bindingDef methods. - - shinyBinding.find = function(scope) { - var results = bindingDef.find(scope); - - // Only return elements that are Shiny outputs, not static ones - var dynamicResults = results.filter(".html-widget-output"); - - // It's possible that whatever caused Shiny to think there might be - // new dynamic outputs, also caused there to be new static outputs. - // Since there might be lots of different htmlwidgets bindings, we - // schedule execution for later--no need to staticRender multiple - // times. - if (results.length !== dynamicResults.length) - scheduleStaticRender(); - - return dynamicResults; - }; - - // Wrap renderValue to handle initialization, which unfortunately isn't - // supported natively by Shiny at the time of this writing. - - shinyBinding.renderValue = function(el, data) { - Shiny.renderDependencies(data.deps); - // Resolve strings marked as javascript literals to objects - if (!(data.evals instanceof Array)) data.evals = [data.evals]; - for (var i = 0; data.evals && i < data.evals.length; i++) { - window.HTMLWidgets.evaluateStringMember(data.x, data.evals[i]); - } - if (!bindingDef.renderOnNullValue) { - if (data.x === null) { - el.style.visibility = "hidden"; - return; - } else { - el.style.visibility = "inherit"; - } - } - if (!elementData(el, "initialized")) { - initSizing(el); - - elementData(el, "initialized", true); - if (bindingDef.initialize) { - var result = bindingDef.initialize(el, el.offsetWidth, - el.offsetHeight); - elementData(el, "init_result", result); - } - } - bindingDef.renderValue(el, data.x, elementData(el, "init_result")); - evalAndRun(data.jsHooks.render, elementData(el, "init_result"), [el, data.x]); - }; - - // Only override resize if bindingDef implements it - if (bindingDef.resize) { - shinyBinding.resize = function(el, width, height) { - // Shiny can call resize before initialize/renderValue have been - // called, which doesn't make sense for widgets. - if (elementData(el, "initialized")) { - bindingDef.resize(el, width, height, elementData(el, "init_result")); - } - }; - } - - Shiny.outputBindings.register(shinyBinding, bindingDef.name); - } - }; - - var scheduleStaticRenderTimerId = null; - function scheduleStaticRender() { - if (!scheduleStaticRenderTimerId) { - scheduleStaticRenderTimerId = setTimeout(function() { - scheduleStaticRenderTimerId = null; - window.HTMLWidgets.staticRender(); - }, 1); - } - } - - // Render static widgets after the document finishes loading - // Statically render all elements that are of this widget's class - window.HTMLWidgets.staticRender = function() { - var bindings = window.HTMLWidgets.widgets || []; - forEach(bindings, function(binding) { - var matches = binding.find(document.documentElement); - forEach(matches, function(el) { - var sizeObj = initSizing(el, binding); - - if (hasClass(el, "html-widget-static-bound")) - return; - el.className = el.className + " html-widget-static-bound"; - - var initResult; - if (binding.initialize) { - initResult = binding.initialize(el, - sizeObj ? sizeObj.getWidth() : el.offsetWidth, - sizeObj ? sizeObj.getHeight() : el.offsetHeight - ); - elementData(el, "init_result", initResult); - } - - if (binding.resize) { - var lastSize = { - w: sizeObj ? sizeObj.getWidth() : el.offsetWidth, - h: sizeObj ? sizeObj.getHeight() : el.offsetHeight - }; - var resizeHandler = function(e) { - var size = { - w: sizeObj ? sizeObj.getWidth() : el.offsetWidth, - h: sizeObj ? sizeObj.getHeight() : el.offsetHeight - }; - if (size.w === 0 && size.h === 0) - return; - if (size.w === lastSize.w && size.h === lastSize.h) - return; - lastSize = size; - binding.resize(el, size.w, size.h, initResult); - }; - - on(window, "resize", resizeHandler); - - // This is needed for cases where we're running in a Shiny - // app, but the widget itself is not a Shiny output, but - // rather a simple static widget. One example of this is - // an rmarkdown document that has runtime:shiny and widget - // that isn't in a render function. Shiny only knows to - // call resize handlers for Shiny outputs, not for static - // widgets, so we do it ourselves. - if (window.jQuery) { - window.jQuery(document).on( - "shown.htmlwidgets shown.bs.tab.htmlwidgets shown.bs.collapse.htmlwidgets", - resizeHandler - ); - window.jQuery(document).on( - "hidden.htmlwidgets hidden.bs.tab.htmlwidgets hidden.bs.collapse.htmlwidgets", - resizeHandler - ); - } - - // This is needed for the specific case of ioslides, which - // flips slides between display:none and display:block. - // Ideally we would not have to have ioslide-specific code - // here, but rather have ioslides raise a generic event, - // but the rmarkdown package just went to CRAN so the - // window to getting that fixed may be long. - if (window.addEventListener) { - // It's OK to limit this to window.addEventListener - // browsers because ioslides itself only supports - // such browsers. - on(document, "slideenter", resizeHandler); - on(document, "slideleave", resizeHandler); - } - } - - var scriptData = document.querySelector("script[data-for='" + el.id + "'][type='application/json']"); - if (scriptData) { - var data = JSON.parse(scriptData.textContent || scriptData.text); - // Resolve strings marked as javascript literals to objects - if (!(data.evals instanceof Array)) data.evals = [data.evals]; - for (var k = 0; data.evals && k < data.evals.length; k++) { - window.HTMLWidgets.evaluateStringMember(data.x, data.evals[k]); - } - binding.renderValue(el, data.x, initResult); - evalAndRun(data.jsHooks.render, initResult, [el, data.x]); - } - }); - }); - - invokePostRenderHandlers(); - } - - - function has_jQuery3() { - if (!window.jQuery) { - return false; - } - var $version = window.jQuery.fn.jquery; - var $major_version = parseInt($version.split(".")[0]); - return $major_version >= 3; - } - - /* - / Shiny 1.4 bumped jQuery from 1.x to 3.x which means jQuery's - / on-ready handler (i.e., $(fn)) is now asyncronous (i.e., it now - / really means $(setTimeout(fn)). - / https://jquery.com/upgrade-guide/3.0/#breaking-change-document-ready-handlers-are-now-asynchronous - / - / Since Shiny uses $() to schedule initShiny, shiny>=1.4 calls initShiny - / one tick later than it did before, which means staticRender() is - / called renderValue() earlier than (advanced) widget authors might be expecting. - / https://github.com/rstudio/shiny/issues/2630 - / - / For a concrete example, leaflet has some methods (e.g., updateBounds) - / which reference Shiny methods registered in initShiny (e.g., setInputValue). - / Since leaflet is privy to this life-cycle, it knows to use setTimeout() to - / delay execution of those methods (until Shiny methods are ready) - / https://github.com/rstudio/leaflet/blob/18ec981/javascript/src/index.js#L266-L268 - / - / Ideally widget authors wouldn't need to use this setTimeout() hack that - / leaflet uses to call Shiny methods on a staticRender(). In the long run, - / the logic initShiny should be broken up so that method registration happens - / right away, but binding happens later. - */ - function maybeStaticRenderLater() { - if (shinyMode && has_jQuery3()) { - window.jQuery(window.HTMLWidgets.staticRender); - } else { - window.HTMLWidgets.staticRender(); - } - } - - if (document.addEventListener) { - document.addEventListener("DOMContentLoaded", function() { - document.removeEventListener("DOMContentLoaded", arguments.callee, false); - maybeStaticRenderLater(); - }, false); - } else if (document.attachEvent) { - document.attachEvent("onreadystatechange", function() { - if (document.readyState === "complete") { - document.detachEvent("onreadystatechange", arguments.callee); - maybeStaticRenderLater(); - } - }); - } - - - window.HTMLWidgets.getAttachmentUrl = function(depname, key) { - // If no key, default to the first item - if (typeof(key) === "undefined") - key = 1; - - var link = document.getElementById(depname + "-" + key + "-attachment"); - if (!link) { - throw new Error("Attachment " + depname + "/" + key + " not found in document"); - } - return link.getAttribute("href"); - }; - - window.HTMLWidgets.dataframeToD3 = function(df) { - var names = []; - var length; - for (var name in df) { - if (df.hasOwnProperty(name)) - names.push(name); - if (typeof(df[name]) !== "object" || typeof(df[name].length) === "undefined") { - throw new Error("All fields must be arrays"); - } else if (typeof(length) !== "undefined" && length !== df[name].length) { - throw new Error("All fields must be arrays of the same length"); - } - length = df[name].length; - } - var results = []; - var item; - for (var row = 0; row < length; row++) { - item = {}; - for (var col = 0; col < names.length; col++) { - item[names[col]] = df[names[col]][row]; - } - results.push(item); - } - return results; - }; - - window.HTMLWidgets.transposeArray2D = function(array) { - if (array.length === 0) return array; - var newArray = array[0].map(function(col, i) { - return array.map(function(row) { - return row[i] - }) - }); - return newArray; - }; - // Split value at splitChar, but allow splitChar to be escaped - // using escapeChar. Any other characters escaped by escapeChar - // will be included as usual (including escapeChar itself). - function splitWithEscape(value, splitChar, escapeChar) { - var results = []; - var escapeMode = false; - var currentResult = ""; - for (var pos = 0; pos < value.length; pos++) { - if (!escapeMode) { - if (value[pos] === splitChar) { - results.push(currentResult); - currentResult = ""; - } else if (value[pos] === escapeChar) { - escapeMode = true; - } else { - currentResult += value[pos]; - } - } else { - currentResult += value[pos]; - escapeMode = false; - } - } - if (currentResult !== "") { - results.push(currentResult); - } - return results; - } - // Function authored by Yihui/JJ Allaire - window.HTMLWidgets.evaluateStringMember = function(o, member) { - var parts = splitWithEscape(member, '.', '\\'); - for (var i = 0, l = parts.length; i < l; i++) { - var part = parts[i]; - // part may be a character or 'numeric' member name - if (o !== null && typeof o === "object" && part in o) { - if (i == (l - 1)) { // if we are at the end of the line then evalulate - if (typeof o[part] === "string") - o[part] = tryEval(o[part]); - } else { // otherwise continue to next embedded object - o = o[part]; - } - } - } - }; - - // Retrieve the HTMLWidget instance (i.e. the return value of an - // HTMLWidget binding's initialize() or factory() function) - // associated with an element, or null if none. - window.HTMLWidgets.getInstance = function(el) { - return elementData(el, "init_result"); - }; - - // Finds the first element in the scope that matches the selector, - // and returns the HTMLWidget instance (i.e. the return value of - // an HTMLWidget binding's initialize() or factory() function) - // associated with that element, if any. If no element matches the - // selector, or the first matching element has no HTMLWidget - // instance associated with it, then null is returned. - // - // The scope argument is optional, and defaults to window.document. - window.HTMLWidgets.find = function(scope, selector) { - if (arguments.length == 1) { - selector = scope; - scope = document; - } - - var el = scope.querySelector(selector); - if (el === null) { - return null; - } else { - return window.HTMLWidgets.getInstance(el); - } - }; - - // Finds all elements in the scope that match the selector, and - // returns the HTMLWidget instances (i.e. the return values of - // an HTMLWidget binding's initialize() or factory() function) - // associated with the elements, in an array. If elements that - // match the selector don't have an associated HTMLWidget - // instance, the returned array will contain nulls. - // - // The scope argument is optional, and defaults to window.document. - window.HTMLWidgets.findAll = function(scope, selector) { - if (arguments.length == 1) { - selector = scope; - scope = document; - } - - var nodes = scope.querySelectorAll(selector); - var results = []; - for (var i = 0; i < nodes.length; i++) { - results.push(window.HTMLWidgets.getInstance(nodes[i])); - } - return results; - }; - - var postRenderHandlers = []; - function invokePostRenderHandlers() { - while (postRenderHandlers.length) { - var handler = postRenderHandlers.shift(); - if (handler) { - handler(); - } - } - } - - // Register the given callback function to be invoked after the - // next time static widgets are rendered. - window.HTMLWidgets.addPostRenderHandler = function(callback) { - postRenderHandlers.push(callback); - }; - - // Takes a new-style instance-bound definition, and returns an - // old-style class-bound definition. This saves us from having - // to rewrite all the logic in this file to accomodate both - // types of definitions. - function createLegacyDefinitionAdapter(defn) { - var result = { - name: defn.name, - type: defn.type, - initialize: function(el, width, height) { - return defn.factory(el, width, height); - }, - renderValue: function(el, x, instance) { - return instance.renderValue(x); - }, - resize: function(el, width, height, instance) { - return instance.resize(width, height); - } - }; - - if (defn.find) - result.find = defn.find; - if (defn.renderError) - result.renderError = defn.renderError; - if (defn.clearError) - result.clearError = defn.clearError; - - return result; - } -})(); - diff --git a/_book/libs/jquery/jquery.min.js b/_book/libs/jquery/jquery.min.js deleted file mode 100644 index b8c4187..0000000 --- a/_book/libs/jquery/jquery.min.js +++ /dev/null @@ -1,4 +0,0 @@ -/*! jQuery v2.2.3 | (c) jQuery Foundation | jquery.org/license */ -!function(a,b){"object"==typeof module&&"object"==typeof module.exports?module.exports=a.document?b(a,!0):function(a){if(!a.document)throw new Error("jQuery requires a window with a document");return b(a)}:b(a)}("undefined"!=typeof window?window:this,function(a,b){var c=[],d=a.document,e=c.slice,f=c.concat,g=c.push,h=c.indexOf,i={},j=i.toString,k=i.hasOwnProperty,l={},m="2.2.3",n=function(a,b){return new n.fn.init(a,b)},o=/^[\s\uFEFF\xA0]+|[\s\uFEFF\xA0]+$/g,p=/^-ms-/,q=/-([\da-z])/gi,r=function(a,b){return b.toUpperCase()};n.fn=n.prototype={jquery:m,constructor:n,selector:"",length:0,toArray:function(){return e.call(this)},get:function(a){return null!=a?0>a?this[a+this.length]:this[a]:e.call(this)},pushStack:function(a){var b=n.merge(this.constructor(),a);return b.prevObject=this,b.context=this.context,b},each:function(a){return n.each(this,a)},map:function(a){return this.pushStack(n.map(this,function(b,c){return a.call(b,c,b)}))},slice:function(){return this.pushStack(e.apply(this,arguments))},first:function(){return this.eq(0)},last:function(){return this.eq(-1)},eq:function(a){var b=this.length,c=+a+(0>a?b:0);return this.pushStack(c>=0&&b>c?[this[c]]:[])},end:function(){return this.prevObject||this.constructor()},push:g,sort:c.sort,splice:c.splice},n.extend=n.fn.extend=function(){var a,b,c,d,e,f,g=arguments[0]||{},h=1,i=arguments.length,j=!1;for("boolean"==typeof g&&(j=g,g=arguments[h]||{},h++),"object"==typeof g||n.isFunction(g)||(g={}),h===i&&(g=this,h--);i>h;h++)if(null!=(a=arguments[h]))for(b in a)c=g[b],d=a[b],g!==d&&(j&&d&&(n.isPlainObject(d)||(e=n.isArray(d)))?(e?(e=!1,f=c&&n.isArray(c)?c:[]):f=c&&n.isPlainObject(c)?c:{},g[b]=n.extend(j,f,d)):void 0!==d&&(g[b]=d));return g},n.extend({expando:"jQuery"+(m+Math.random()).replace(/\D/g,""),isReady:!0,error:function(a){throw new Error(a)},noop:function(){},isFunction:function(a){return"function"===n.type(a)},isArray:Array.isArray,isWindow:function(a){return null!=a&&a===a.window},isNumeric:function(a){var b=a&&a.toString();return!n.isArray(a)&&b-parseFloat(b)+1>=0},isPlainObject:function(a){var b;if("object"!==n.type(a)||a.nodeType||n.isWindow(a))return!1;if(a.constructor&&!k.call(a,"constructor")&&!k.call(a.constructor.prototype||{},"isPrototypeOf"))return!1;for(b in a);return void 0===b||k.call(a,b)},isEmptyObject:function(a){var b;for(b in a)return!1;return!0},type:function(a){return null==a?a+"":"object"==typeof a||"function"==typeof a?i[j.call(a)]||"object":typeof a},globalEval:function(a){var b,c=eval;a=n.trim(a),a&&(1===a.indexOf("use strict")?(b=d.createElement("script"),b.text=a,d.head.appendChild(b).parentNode.removeChild(b)):c(a))},camelCase:function(a){return a.replace(p,"ms-").replace(q,r)},nodeName:function(a,b){return a.nodeName&&a.nodeName.toLowerCase()===b.toLowerCase()},each:function(a,b){var c,d=0;if(s(a)){for(c=a.length;c>d;d++)if(b.call(a[d],d,a[d])===!1)break}else for(d in a)if(b.call(a[d],d,a[d])===!1)break;return a},trim:function(a){return null==a?"":(a+"").replace(o,"")},makeArray:function(a,b){var c=b||[];return null!=a&&(s(Object(a))?n.merge(c,"string"==typeof a?[a]:a):g.call(c,a)),c},inArray:function(a,b,c){return null==b?-1:h.call(b,a,c)},merge:function(a,b){for(var c=+b.length,d=0,e=a.length;c>d;d++)a[e++]=b[d];return a.length=e,a},grep:function(a,b,c){for(var d,e=[],f=0,g=a.length,h=!c;g>f;f++)d=!b(a[f],f),d!==h&&e.push(a[f]);return e},map:function(a,b,c){var d,e,g=0,h=[];if(s(a))for(d=a.length;d>g;g++)e=b(a[g],g,c),null!=e&&h.push(e);else for(g in a)e=b(a[g],g,c),null!=e&&h.push(e);return f.apply([],h)},guid:1,proxy:function(a,b){var c,d,f;return"string"==typeof b&&(c=a[b],b=a,a=c),n.isFunction(a)?(d=e.call(arguments,2),f=function(){return a.apply(b||this,d.concat(e.call(arguments)))},f.guid=a.guid=a.guid||n.guid++,f):void 0},now:Date.now,support:l}),"function"==typeof Symbol&&(n.fn[Symbol.iterator]=c[Symbol.iterator]),n.each("Boolean Number String Function Array Date RegExp Object Error Symbol".split(" "),function(a,b){i["[object "+b+"]"]=b.toLowerCase()});function s(a){var b=!!a&&"length"in a&&a.length,c=n.type(a);return"function"===c||n.isWindow(a)?!1:"array"===c||0===b||"number"==typeof b&&b>0&&b-1 in a}var t=function(a){var b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u="sizzle"+1*new Date,v=a.document,w=0,x=0,y=ga(),z=ga(),A=ga(),B=function(a,b){return a===b&&(l=!0),0},C=1<<31,D={}.hasOwnProperty,E=[],F=E.pop,G=E.push,H=E.push,I=E.slice,J=function(a,b){for(var c=0,d=a.length;d>c;c++)if(a[c]===b)return c;return-1},K="checked|selected|async|autofocus|autoplay|controls|defer|disabled|hidden|ismap|loop|multiple|open|readonly|required|scoped",L="[\\x20\\t\\r\\n\\f]",M="(?:\\\\.|[\\w-]|[^\\x00-\\xa0])+",N="\\["+L+"*("+M+")(?:"+L+"*([*^$|!~]?=)"+L+"*(?:'((?:\\\\.|[^\\\\'])*)'|\"((?:\\\\.|[^\\\\\"])*)\"|("+M+"))|)"+L+"*\\]",O=":("+M+")(?:\\((('((?:\\\\.|[^\\\\'])*)'|\"((?:\\\\.|[^\\\\\"])*)\")|((?:\\\\.|[^\\\\()[\\]]|"+N+")*)|.*)\\)|)",P=new RegExp(L+"+","g"),Q=new RegExp("^"+L+"+|((?:^|[^\\\\])(?:\\\\.)*)"+L+"+$","g"),R=new RegExp("^"+L+"*,"+L+"*"),S=new RegExp("^"+L+"*([>+~]|"+L+")"+L+"*"),T=new RegExp("="+L+"*([^\\]'\"]*?)"+L+"*\\]","g"),U=new RegExp(O),V=new RegExp("^"+M+"$"),W={ID:new RegExp("^#("+M+")"),CLASS:new RegExp("^\\.("+M+")"),TAG:new RegExp("^("+M+"|[*])"),ATTR:new RegExp("^"+N),PSEUDO:new RegExp("^"+O),CHILD:new RegExp("^:(only|first|last|nth|nth-last)-(child|of-type)(?:\\("+L+"*(even|odd|(([+-]|)(\\d*)n|)"+L+"*(?:([+-]|)"+L+"*(\\d+)|))"+L+"*\\)|)","i"),bool:new RegExp("^(?:"+K+")$","i"),needsContext:new RegExp("^"+L+"*[>+~]|:(even|odd|eq|gt|lt|nth|first|last)(?:\\("+L+"*((?:-\\d)?\\d*)"+L+"*\\)|)(?=[^-]|$)","i")},X=/^(?:input|select|textarea|button)$/i,Y=/^h\d$/i,Z=/^[^{]+\{\s*\[native \w/,$=/^(?:#([\w-]+)|(\w+)|\.([\w-]+))$/,_=/[+~]/,aa=/'|\\/g,ba=new RegExp("\\\\([\\da-f]{1,6}"+L+"?|("+L+")|.)","ig"),ca=function(a,b,c){var d="0x"+b-65536;return d!==d||c?b:0>d?String.fromCharCode(d+65536):String.fromCharCode(d>>10|55296,1023&d|56320)},da=function(){m()};try{H.apply(E=I.call(v.childNodes),v.childNodes),E[v.childNodes.length].nodeType}catch(ea){H={apply:E.length?function(a,b){G.apply(a,I.call(b))}:function(a,b){var c=a.length,d=0;while(a[c++]=b[d++]);a.length=c-1}}}function fa(a,b,d,e){var f,h,j,k,l,o,r,s,w=b&&b.ownerDocument,x=b?b.nodeType:9;if(d=d||[],"string"!=typeof a||!a||1!==x&&9!==x&&11!==x)return d;if(!e&&((b?b.ownerDocument||b:v)!==n&&m(b),b=b||n,p)){if(11!==x&&(o=$.exec(a)))if(f=o[1]){if(9===x){if(!(j=b.getElementById(f)))return d;if(j.id===f)return d.push(j),d}else if(w&&(j=w.getElementById(f))&&t(b,j)&&j.id===f)return d.push(j),d}else{if(o[2])return H.apply(d,b.getElementsByTagName(a)),d;if((f=o[3])&&c.getElementsByClassName&&b.getElementsByClassName)return H.apply(d,b.getElementsByClassName(f)),d}if(c.qsa&&!A[a+" "]&&(!q||!q.test(a))){if(1!==x)w=b,s=a;else if("object"!==b.nodeName.toLowerCase()){(k=b.getAttribute("id"))?k=k.replace(aa,"\\$&"):b.setAttribute("id",k=u),r=g(a),h=r.length,l=V.test(k)?"#"+k:"[id='"+k+"']";while(h--)r[h]=l+" "+qa(r[h]);s=r.join(","),w=_.test(a)&&oa(b.parentNode)||b}if(s)try{return H.apply(d,w.querySelectorAll(s)),d}catch(y){}finally{k===u&&b.removeAttribute("id")}}}return i(a.replace(Q,"$1"),b,d,e)}function ga(){var a=[];function b(c,e){return a.push(c+" ")>d.cacheLength&&delete b[a.shift()],b[c+" "]=e}return b}function ha(a){return a[u]=!0,a}function ia(a){var b=n.createElement("div");try{return!!a(b)}catch(c){return!1}finally{b.parentNode&&b.parentNode.removeChild(b),b=null}}function ja(a,b){var c=a.split("|"),e=c.length;while(e--)d.attrHandle[c[e]]=b}function ka(a,b){var c=b&&a,d=c&&1===a.nodeType&&1===b.nodeType&&(~b.sourceIndex||C)-(~a.sourceIndex||C);if(d)return d;if(c)while(c=c.nextSibling)if(c===b)return-1;return a?1:-1}function la(a){return function(b){var c=b.nodeName.toLowerCase();return"input"===c&&b.type===a}}function ma(a){return function(b){var c=b.nodeName.toLowerCase();return("input"===c||"button"===c)&&b.type===a}}function na(a){return ha(function(b){return b=+b,ha(function(c,d){var e,f=a([],c.length,b),g=f.length;while(g--)c[e=f[g]]&&(c[e]=!(d[e]=c[e]))})})}function oa(a){return a&&"undefined"!=typeof a.getElementsByTagName&&a}c=fa.support={},f=fa.isXML=function(a){var b=a&&(a.ownerDocument||a).documentElement;return b?"HTML"!==b.nodeName:!1},m=fa.setDocument=function(a){var b,e,g=a?a.ownerDocument||a:v;return g!==n&&9===g.nodeType&&g.documentElement?(n=g,o=n.documentElement,p=!f(n),(e=n.defaultView)&&e.top!==e&&(e.addEventListener?e.addEventListener("unload",da,!1):e.attachEvent&&e.attachEvent("onunload",da)),c.attributes=ia(function(a){return a.className="i",!a.getAttribute("className")}),c.getElementsByTagName=ia(function(a){return a.appendChild(n.createComment("")),!a.getElementsByTagName("*").length}),c.getElementsByClassName=Z.test(n.getElementsByClassName),c.getById=ia(function(a){return o.appendChild(a).id=u,!n.getElementsByName||!n.getElementsByName(u).length}),c.getById?(d.find.ID=function(a,b){if("undefined"!=typeof b.getElementById&&p){var c=b.getElementById(a);return c?[c]:[]}},d.filter.ID=function(a){var b=a.replace(ba,ca);return function(a){return a.getAttribute("id")===b}}):(delete d.find.ID,d.filter.ID=function(a){var b=a.replace(ba,ca);return function(a){var c="undefined"!=typeof a.getAttributeNode&&a.getAttributeNode("id");return c&&c.value===b}}),d.find.TAG=c.getElementsByTagName?function(a,b){return"undefined"!=typeof b.getElementsByTagName?b.getElementsByTagName(a):c.qsa?b.querySelectorAll(a):void 0}:function(a,b){var c,d=[],e=0,f=b.getElementsByTagName(a);if("*"===a){while(c=f[e++])1===c.nodeType&&d.push(c);return d}return f},d.find.CLASS=c.getElementsByClassName&&function(a,b){return"undefined"!=typeof b.getElementsByClassName&&p?b.getElementsByClassName(a):void 0},r=[],q=[],(c.qsa=Z.test(n.querySelectorAll))&&(ia(function(a){o.appendChild(a).innerHTML="",a.querySelectorAll("[msallowcapture^='']").length&&q.push("[*^$]="+L+"*(?:''|\"\")"),a.querySelectorAll("[selected]").length||q.push("\\["+L+"*(?:value|"+K+")"),a.querySelectorAll("[id~="+u+"-]").length||q.push("~="),a.querySelectorAll(":checked").length||q.push(":checked"),a.querySelectorAll("a#"+u+"+*").length||q.push(".#.+[+~]")}),ia(function(a){var b=n.createElement("input");b.setAttribute("type","hidden"),a.appendChild(b).setAttribute("name","D"),a.querySelectorAll("[name=d]").length&&q.push("name"+L+"*[*^$|!~]?="),a.querySelectorAll(":enabled").length||q.push(":enabled",":disabled"),a.querySelectorAll("*,:x"),q.push(",.*:")})),(c.matchesSelector=Z.test(s=o.matches||o.webkitMatchesSelector||o.mozMatchesSelector||o.oMatchesSelector||o.msMatchesSelector))&&ia(function(a){c.disconnectedMatch=s.call(a,"div"),s.call(a,"[s!='']:x"),r.push("!=",O)}),q=q.length&&new RegExp(q.join("|")),r=r.length&&new RegExp(r.join("|")),b=Z.test(o.compareDocumentPosition),t=b||Z.test(o.contains)?function(a,b){var c=9===a.nodeType?a.documentElement:a,d=b&&b.parentNode;return a===d||!(!d||1!==d.nodeType||!(c.contains?c.contains(d):a.compareDocumentPosition&&16&a.compareDocumentPosition(d)))}:function(a,b){if(b)while(b=b.parentNode)if(b===a)return!0;return!1},B=b?function(a,b){if(a===b)return l=!0,0;var d=!a.compareDocumentPosition-!b.compareDocumentPosition;return d?d:(d=(a.ownerDocument||a)===(b.ownerDocument||b)?a.compareDocumentPosition(b):1,1&d||!c.sortDetached&&b.compareDocumentPosition(a)===d?a===n||a.ownerDocument===v&&t(v,a)?-1:b===n||b.ownerDocument===v&&t(v,b)?1:k?J(k,a)-J(k,b):0:4&d?-1:1)}:function(a,b){if(a===b)return l=!0,0;var c,d=0,e=a.parentNode,f=b.parentNode,g=[a],h=[b];if(!e||!f)return a===n?-1:b===n?1:e?-1:f?1:k?J(k,a)-J(k,b):0;if(e===f)return ka(a,b);c=a;while(c=c.parentNode)g.unshift(c);c=b;while(c=c.parentNode)h.unshift(c);while(g[d]===h[d])d++;return d?ka(g[d],h[d]):g[d]===v?-1:h[d]===v?1:0},n):n},fa.matches=function(a,b){return fa(a,null,null,b)},fa.matchesSelector=function(a,b){if((a.ownerDocument||a)!==n&&m(a),b=b.replace(T,"='$1']"),c.matchesSelector&&p&&!A[b+" "]&&(!r||!r.test(b))&&(!q||!q.test(b)))try{var d=s.call(a,b);if(d||c.disconnectedMatch||a.document&&11!==a.document.nodeType)return d}catch(e){}return fa(b,n,null,[a]).length>0},fa.contains=function(a,b){return(a.ownerDocument||a)!==n&&m(a),t(a,b)},fa.attr=function(a,b){(a.ownerDocument||a)!==n&&m(a);var e=d.attrHandle[b.toLowerCase()],f=e&&D.call(d.attrHandle,b.toLowerCase())?e(a,b,!p):void 0;return void 0!==f?f:c.attributes||!p?a.getAttribute(b):(f=a.getAttributeNode(b))&&f.specified?f.value:null},fa.error=function(a){throw new Error("Syntax error, unrecognized expression: "+a)},fa.uniqueSort=function(a){var b,d=[],e=0,f=0;if(l=!c.detectDuplicates,k=!c.sortStable&&a.slice(0),a.sort(B),l){while(b=a[f++])b===a[f]&&(e=d.push(f));while(e--)a.splice(d[e],1)}return k=null,a},e=fa.getText=function(a){var b,c="",d=0,f=a.nodeType;if(f){if(1===f||9===f||11===f){if("string"==typeof a.textContent)return a.textContent;for(a=a.firstChild;a;a=a.nextSibling)c+=e(a)}else if(3===f||4===f)return a.nodeValue}else while(b=a[d++])c+=e(b);return c},d=fa.selectors={cacheLength:50,createPseudo:ha,match:W,attrHandle:{},find:{},relative:{">":{dir:"parentNode",first:!0}," ":{dir:"parentNode"},"+":{dir:"previousSibling",first:!0},"~":{dir:"previousSibling"}},preFilter:{ATTR:function(a){return a[1]=a[1].replace(ba,ca),a[3]=(a[3]||a[4]||a[5]||"").replace(ba,ca),"~="===a[2]&&(a[3]=" "+a[3]+" "),a.slice(0,4)},CHILD:function(a){return a[1]=a[1].toLowerCase(),"nth"===a[1].slice(0,3)?(a[3]||fa.error(a[0]),a[4]=+(a[4]?a[5]+(a[6]||1):2*("even"===a[3]||"odd"===a[3])),a[5]=+(a[7]+a[8]||"odd"===a[3])):a[3]&&fa.error(a[0]),a},PSEUDO:function(a){var b,c=!a[6]&&a[2];return W.CHILD.test(a[0])?null:(a[3]?a[2]=a[4]||a[5]||"":c&&U.test(c)&&(b=g(c,!0))&&(b=c.indexOf(")",c.length-b)-c.length)&&(a[0]=a[0].slice(0,b),a[2]=c.slice(0,b)),a.slice(0,3))}},filter:{TAG:function(a){var b=a.replace(ba,ca).toLowerCase();return"*"===a?function(){return!0}:function(a){return a.nodeName&&a.nodeName.toLowerCase()===b}},CLASS:function(a){var b=y[a+" "];return b||(b=new RegExp("(^|"+L+")"+a+"("+L+"|$)"))&&y(a,function(a){return b.test("string"==typeof a.className&&a.className||"undefined"!=typeof a.getAttribute&&a.getAttribute("class")||"")})},ATTR:function(a,b,c){return function(d){var e=fa.attr(d,a);return null==e?"!="===b:b?(e+="","="===b?e===c:"!="===b?e!==c:"^="===b?c&&0===e.indexOf(c):"*="===b?c&&e.indexOf(c)>-1:"$="===b?c&&e.slice(-c.length)===c:"~="===b?(" "+e.replace(P," ")+" ").indexOf(c)>-1:"|="===b?e===c||e.slice(0,c.length+1)===c+"-":!1):!0}},CHILD:function(a,b,c,d,e){var f="nth"!==a.slice(0,3),g="last"!==a.slice(-4),h="of-type"===b;return 1===d&&0===e?function(a){return!!a.parentNode}:function(b,c,i){var j,k,l,m,n,o,p=f!==g?"nextSibling":"previousSibling",q=b.parentNode,r=h&&b.nodeName.toLowerCase(),s=!i&&!h,t=!1;if(q){if(f){while(p){m=b;while(m=m[p])if(h?m.nodeName.toLowerCase()===r:1===m.nodeType)return!1;o=p="only"===a&&!o&&"nextSibling"}return!0}if(o=[g?q.firstChild:q.lastChild],g&&s){m=q,l=m[u]||(m[u]={}),k=l[m.uniqueID]||(l[m.uniqueID]={}),j=k[a]||[],n=j[0]===w&&j[1],t=n&&j[2],m=n&&q.childNodes[n];while(m=++n&&m&&m[p]||(t=n=0)||o.pop())if(1===m.nodeType&&++t&&m===b){k[a]=[w,n,t];break}}else if(s&&(m=b,l=m[u]||(m[u]={}),k=l[m.uniqueID]||(l[m.uniqueID]={}),j=k[a]||[],n=j[0]===w&&j[1],t=n),t===!1)while(m=++n&&m&&m[p]||(t=n=0)||o.pop())if((h?m.nodeName.toLowerCase()===r:1===m.nodeType)&&++t&&(s&&(l=m[u]||(m[u]={}),k=l[m.uniqueID]||(l[m.uniqueID]={}),k[a]=[w,t]),m===b))break;return t-=e,t===d||t%d===0&&t/d>=0}}},PSEUDO:function(a,b){var c,e=d.pseudos[a]||d.setFilters[a.toLowerCase()]||fa.error("unsupported pseudo: "+a);return e[u]?e(b):e.length>1?(c=[a,a,"",b],d.setFilters.hasOwnProperty(a.toLowerCase())?ha(function(a,c){var d,f=e(a,b),g=f.length;while(g--)d=J(a,f[g]),a[d]=!(c[d]=f[g])}):function(a){return e(a,0,c)}):e}},pseudos:{not:ha(function(a){var b=[],c=[],d=h(a.replace(Q,"$1"));return d[u]?ha(function(a,b,c,e){var f,g=d(a,null,e,[]),h=a.length;while(h--)(f=g[h])&&(a[h]=!(b[h]=f))}):function(a,e,f){return b[0]=a,d(b,null,f,c),b[0]=null,!c.pop()}}),has:ha(function(a){return function(b){return fa(a,b).length>0}}),contains:ha(function(a){return a=a.replace(ba,ca),function(b){return(b.textContent||b.innerText||e(b)).indexOf(a)>-1}}),lang:ha(function(a){return V.test(a||"")||fa.error("unsupported lang: "+a),a=a.replace(ba,ca).toLowerCase(),function(b){var c;do if(c=p?b.lang:b.getAttribute("xml:lang")||b.getAttribute("lang"))return c=c.toLowerCase(),c===a||0===c.indexOf(a+"-");while((b=b.parentNode)&&1===b.nodeType);return!1}}),target:function(b){var c=a.location&&a.location.hash;return c&&c.slice(1)===b.id},root:function(a){return a===o},focus:function(a){return a===n.activeElement&&(!n.hasFocus||n.hasFocus())&&!!(a.type||a.href||~a.tabIndex)},enabled:function(a){return a.disabled===!1},disabled:function(a){return a.disabled===!0},checked:function(a){var b=a.nodeName.toLowerCase();return"input"===b&&!!a.checked||"option"===b&&!!a.selected},selected:function(a){return a.parentNode&&a.parentNode.selectedIndex,a.selected===!0},empty:function(a){for(a=a.firstChild;a;a=a.nextSibling)if(a.nodeType<6)return!1;return!0},parent:function(a){return!d.pseudos.empty(a)},header:function(a){return Y.test(a.nodeName)},input:function(a){return X.test(a.nodeName)},button:function(a){var b=a.nodeName.toLowerCase();return"input"===b&&"button"===a.type||"button"===b},text:function(a){var b;return"input"===a.nodeName.toLowerCase()&&"text"===a.type&&(null==(b=a.getAttribute("type"))||"text"===b.toLowerCase())},first:na(function(){return[0]}),last:na(function(a,b){return[b-1]}),eq:na(function(a,b,c){return[0>c?c+b:c]}),even:na(function(a,b){for(var c=0;b>c;c+=2)a.push(c);return a}),odd:na(function(a,b){for(var c=1;b>c;c+=2)a.push(c);return a}),lt:na(function(a,b,c){for(var d=0>c?c+b:c;--d>=0;)a.push(d);return a}),gt:na(function(a,b,c){for(var d=0>c?c+b:c;++db;b++)d+=a[b].value;return d}function ra(a,b,c){var d=b.dir,e=c&&"parentNode"===d,f=x++;return b.first?function(b,c,f){while(b=b[d])if(1===b.nodeType||e)return a(b,c,f)}:function(b,c,g){var h,i,j,k=[w,f];if(g){while(b=b[d])if((1===b.nodeType||e)&&a(b,c,g))return!0}else while(b=b[d])if(1===b.nodeType||e){if(j=b[u]||(b[u]={}),i=j[b.uniqueID]||(j[b.uniqueID]={}),(h=i[d])&&h[0]===w&&h[1]===f)return k[2]=h[2];if(i[d]=k,k[2]=a(b,c,g))return!0}}}function sa(a){return a.length>1?function(b,c,d){var e=a.length;while(e--)if(!a[e](b,c,d))return!1;return!0}:a[0]}function ta(a,b,c){for(var d=0,e=b.length;e>d;d++)fa(a,b[d],c);return c}function ua(a,b,c,d,e){for(var f,g=[],h=0,i=a.length,j=null!=b;i>h;h++)(f=a[h])&&(c&&!c(f,d,e)||(g.push(f),j&&b.push(h)));return g}function va(a,b,c,d,e,f){return d&&!d[u]&&(d=va(d)),e&&!e[u]&&(e=va(e,f)),ha(function(f,g,h,i){var j,k,l,m=[],n=[],o=g.length,p=f||ta(b||"*",h.nodeType?[h]:h,[]),q=!a||!f&&b?p:ua(p,m,a,h,i),r=c?e||(f?a:o||d)?[]:g:q;if(c&&c(q,r,h,i),d){j=ua(r,n),d(j,[],h,i),k=j.length;while(k--)(l=j[k])&&(r[n[k]]=!(q[n[k]]=l))}if(f){if(e||a){if(e){j=[],k=r.length;while(k--)(l=r[k])&&j.push(q[k]=l);e(null,r=[],j,i)}k=r.length;while(k--)(l=r[k])&&(j=e?J(f,l):m[k])>-1&&(f[j]=!(g[j]=l))}}else r=ua(r===g?r.splice(o,r.length):r),e?e(null,g,r,i):H.apply(g,r)})}function wa(a){for(var b,c,e,f=a.length,g=d.relative[a[0].type],h=g||d.relative[" "],i=g?1:0,k=ra(function(a){return a===b},h,!0),l=ra(function(a){return J(b,a)>-1},h,!0),m=[function(a,c,d){var e=!g&&(d||c!==j)||((b=c).nodeType?k(a,c,d):l(a,c,d));return b=null,e}];f>i;i++)if(c=d.relative[a[i].type])m=[ra(sa(m),c)];else{if(c=d.filter[a[i].type].apply(null,a[i].matches),c[u]){for(e=++i;f>e;e++)if(d.relative[a[e].type])break;return va(i>1&&sa(m),i>1&&qa(a.slice(0,i-1).concat({value:" "===a[i-2].type?"*":""})).replace(Q,"$1"),c,e>i&&wa(a.slice(i,e)),f>e&&wa(a=a.slice(e)),f>e&&qa(a))}m.push(c)}return sa(m)}function xa(a,b){var c=b.length>0,e=a.length>0,f=function(f,g,h,i,k){var l,o,q,r=0,s="0",t=f&&[],u=[],v=j,x=f||e&&d.find.TAG("*",k),y=w+=null==v?1:Math.random()||.1,z=x.length;for(k&&(j=g===n||g||k);s!==z&&null!=(l=x[s]);s++){if(e&&l){o=0,g||l.ownerDocument===n||(m(l),h=!p);while(q=a[o++])if(q(l,g||n,h)){i.push(l);break}k&&(w=y)}c&&((l=!q&&l)&&r--,f&&t.push(l))}if(r+=s,c&&s!==r){o=0;while(q=b[o++])q(t,u,g,h);if(f){if(r>0)while(s--)t[s]||u[s]||(u[s]=F.call(i));u=ua(u)}H.apply(i,u),k&&!f&&u.length>0&&r+b.length>1&&fa.uniqueSort(i)}return k&&(w=y,j=v),t};return c?ha(f):f}return h=fa.compile=function(a,b){var c,d=[],e=[],f=A[a+" "];if(!f){b||(b=g(a)),c=b.length;while(c--)f=wa(b[c]),f[u]?d.push(f):e.push(f);f=A(a,xa(e,d)),f.selector=a}return f},i=fa.select=function(a,b,e,f){var i,j,k,l,m,n="function"==typeof a&&a,o=!f&&g(a=n.selector||a);if(e=e||[],1===o.length){if(j=o[0]=o[0].slice(0),j.length>2&&"ID"===(k=j[0]).type&&c.getById&&9===b.nodeType&&p&&d.relative[j[1].type]){if(b=(d.find.ID(k.matches[0].replace(ba,ca),b)||[])[0],!b)return e;n&&(b=b.parentNode),a=a.slice(j.shift().value.length)}i=W.needsContext.test(a)?0:j.length;while(i--){if(k=j[i],d.relative[l=k.type])break;if((m=d.find[l])&&(f=m(k.matches[0].replace(ba,ca),_.test(j[0].type)&&oa(b.parentNode)||b))){if(j.splice(i,1),a=f.length&&qa(j),!a)return H.apply(e,f),e;break}}}return(n||h(a,o))(f,b,!p,e,!b||_.test(a)&&oa(b.parentNode)||b),e},c.sortStable=u.split("").sort(B).join("")===u,c.detectDuplicates=!!l,m(),c.sortDetached=ia(function(a){return 1&a.compareDocumentPosition(n.createElement("div"))}),ia(function(a){return a.innerHTML="","#"===a.firstChild.getAttribute("href")})||ja("type|href|height|width",function(a,b,c){return c?void 0:a.getAttribute(b,"type"===b.toLowerCase()?1:2)}),c.attributes&&ia(function(a){return a.innerHTML="",a.firstChild.setAttribute("value",""),""===a.firstChild.getAttribute("value")})||ja("value",function(a,b,c){return c||"input"!==a.nodeName.toLowerCase()?void 0:a.defaultValue}),ia(function(a){return null==a.getAttribute("disabled")})||ja(K,function(a,b,c){var d;return c?void 0:a[b]===!0?b.toLowerCase():(d=a.getAttributeNode(b))&&d.specified?d.value:null}),fa}(a);n.find=t,n.expr=t.selectors,n.expr[":"]=n.expr.pseudos,n.uniqueSort=n.unique=t.uniqueSort,n.text=t.getText,n.isXMLDoc=t.isXML,n.contains=t.contains;var u=function(a,b,c){var d=[],e=void 0!==c;while((a=a[b])&&9!==a.nodeType)if(1===a.nodeType){if(e&&n(a).is(c))break;d.push(a)}return d},v=function(a,b){for(var c=[];a;a=a.nextSibling)1===a.nodeType&&a!==b&&c.push(a);return c},w=n.expr.match.needsContext,x=/^<([\w-]+)\s*\/?>(?:<\/\1>|)$/,y=/^.[^:#\[\.,]*$/;function z(a,b,c){if(n.isFunction(b))return n.grep(a,function(a,d){return!!b.call(a,d,a)!==c});if(b.nodeType)return n.grep(a,function(a){return a===b!==c});if("string"==typeof b){if(y.test(b))return n.filter(b,a,c);b=n.filter(b,a)}return n.grep(a,function(a){return h.call(b,a)>-1!==c})}n.filter=function(a,b,c){var d=b[0];return c&&(a=":not("+a+")"),1===b.length&&1===d.nodeType?n.find.matchesSelector(d,a)?[d]:[]:n.find.matches(a,n.grep(b,function(a){return 1===a.nodeType}))},n.fn.extend({find:function(a){var b,c=this.length,d=[],e=this;if("string"!=typeof a)return this.pushStack(n(a).filter(function(){for(b=0;c>b;b++)if(n.contains(e[b],this))return!0}));for(b=0;c>b;b++)n.find(a,e[b],d);return d=this.pushStack(c>1?n.unique(d):d),d.selector=this.selector?this.selector+" "+a:a,d},filter:function(a){return this.pushStack(z(this,a||[],!1))},not:function(a){return this.pushStack(z(this,a||[],!0))},is:function(a){return!!z(this,"string"==typeof a&&w.test(a)?n(a):a||[],!1).length}});var A,B=/^(?:\s*(<[\w\W]+>)[^>]*|#([\w-]*))$/,C=n.fn.init=function(a,b,c){var e,f;if(!a)return this;if(c=c||A,"string"==typeof a){if(e="<"===a[0]&&">"===a[a.length-1]&&a.length>=3?[null,a,null]:B.exec(a),!e||!e[1]&&b)return!b||b.jquery?(b||c).find(a):this.constructor(b).find(a);if(e[1]){if(b=b instanceof n?b[0]:b,n.merge(this,n.parseHTML(e[1],b&&b.nodeType?b.ownerDocument||b:d,!0)),x.test(e[1])&&n.isPlainObject(b))for(e in b)n.isFunction(this[e])?this[e](b[e]):this.attr(e,b[e]);return this}return f=d.getElementById(e[2]),f&&f.parentNode&&(this.length=1,this[0]=f),this.context=d,this.selector=a,this}return a.nodeType?(this.context=this[0]=a,this.length=1,this):n.isFunction(a)?void 0!==c.ready?c.ready(a):a(n):(void 0!==a.selector&&(this.selector=a.selector,this.context=a.context),n.makeArray(a,this))};C.prototype=n.fn,A=n(d);var D=/^(?:parents|prev(?:Until|All))/,E={children:!0,contents:!0,next:!0,prev:!0};n.fn.extend({has:function(a){var b=n(a,this),c=b.length;return this.filter(function(){for(var a=0;c>a;a++)if(n.contains(this,b[a]))return!0})},closest:function(a,b){for(var c,d=0,e=this.length,f=[],g=w.test(a)||"string"!=typeof a?n(a,b||this.context):0;e>d;d++)for(c=this[d];c&&c!==b;c=c.parentNode)if(c.nodeType<11&&(g?g.index(c)>-1:1===c.nodeType&&n.find.matchesSelector(c,a))){f.push(c);break}return this.pushStack(f.length>1?n.uniqueSort(f):f)},index:function(a){return a?"string"==typeof a?h.call(n(a),this[0]):h.call(this,a.jquery?a[0]:a):this[0]&&this[0].parentNode?this.first().prevAll().length:-1},add:function(a,b){return this.pushStack(n.uniqueSort(n.merge(this.get(),n(a,b))))},addBack:function(a){return this.add(null==a?this.prevObject:this.prevObject.filter(a))}});function F(a,b){while((a=a[b])&&1!==a.nodeType);return a}n.each({parent:function(a){var b=a.parentNode;return b&&11!==b.nodeType?b:null},parents:function(a){return u(a,"parentNode")},parentsUntil:function(a,b,c){return u(a,"parentNode",c)},next:function(a){return F(a,"nextSibling")},prev:function(a){return F(a,"previousSibling")},nextAll:function(a){return u(a,"nextSibling")},prevAll:function(a){return u(a,"previousSibling")},nextUntil:function(a,b,c){return u(a,"nextSibling",c)},prevUntil:function(a,b,c){return u(a,"previousSibling",c)},siblings:function(a){return v((a.parentNode||{}).firstChild,a)},children:function(a){return v(a.firstChild)},contents:function(a){return a.contentDocument||n.merge([],a.childNodes)}},function(a,b){n.fn[a]=function(c,d){var e=n.map(this,b,c);return"Until"!==a.slice(-5)&&(d=c),d&&"string"==typeof d&&(e=n.filter(d,e)),this.length>1&&(E[a]||n.uniqueSort(e),D.test(a)&&e.reverse()),this.pushStack(e)}});var G=/\S+/g;function H(a){var b={};return n.each(a.match(G)||[],function(a,c){b[c]=!0}),b}n.Callbacks=function(a){a="string"==typeof a?H(a):n.extend({},a);var b,c,d,e,f=[],g=[],h=-1,i=function(){for(e=a.once,d=b=!0;g.length;h=-1){c=g.shift();while(++h-1)f.splice(c,1),h>=c&&h--}),this},has:function(a){return a?n.inArray(a,f)>-1:f.length>0},empty:function(){return f&&(f=[]),this},disable:function(){return e=g=[],f=c="",this},disabled:function(){return!f},lock:function(){return e=g=[],c||(f=c=""),this},locked:function(){return!!e},fireWith:function(a,c){return e||(c=c||[],c=[a,c.slice?c.slice():c],g.push(c),b||i()),this},fire:function(){return j.fireWith(this,arguments),this},fired:function(){return!!d}};return j},n.extend({Deferred:function(a){var b=[["resolve","done",n.Callbacks("once memory"),"resolved"],["reject","fail",n.Callbacks("once memory"),"rejected"],["notify","progress",n.Callbacks("memory")]],c="pending",d={state:function(){return c},always:function(){return e.done(arguments).fail(arguments),this},then:function(){var a=arguments;return n.Deferred(function(c){n.each(b,function(b,f){var g=n.isFunction(a[b])&&a[b];e[f[1]](function(){var a=g&&g.apply(this,arguments);a&&n.isFunction(a.promise)?a.promise().progress(c.notify).done(c.resolve).fail(c.reject):c[f[0]+"With"](this===d?c.promise():this,g?[a]:arguments)})}),a=null}).promise()},promise:function(a){return null!=a?n.extend(a,d):d}},e={};return d.pipe=d.then,n.each(b,function(a,f){var g=f[2],h=f[3];d[f[1]]=g.add,h&&g.add(function(){c=h},b[1^a][2].disable,b[2][2].lock),e[f[0]]=function(){return e[f[0]+"With"](this===e?d:this,arguments),this},e[f[0]+"With"]=g.fireWith}),d.promise(e),a&&a.call(e,e),e},when:function(a){var b=0,c=e.call(arguments),d=c.length,f=1!==d||a&&n.isFunction(a.promise)?d:0,g=1===f?a:n.Deferred(),h=function(a,b,c){return function(d){b[a]=this,c[a]=arguments.length>1?e.call(arguments):d,c===i?g.notifyWith(b,c):--f||g.resolveWith(b,c)}},i,j,k;if(d>1)for(i=new Array(d),j=new Array(d),k=new Array(d);d>b;b++)c[b]&&n.isFunction(c[b].promise)?c[b].promise().progress(h(b,j,i)).done(h(b,k,c)).fail(g.reject):--f;return f||g.resolveWith(k,c),g.promise()}});var I;n.fn.ready=function(a){return n.ready.promise().done(a),this},n.extend({isReady:!1,readyWait:1,holdReady:function(a){a?n.readyWait++:n.ready(!0)},ready:function(a){(a===!0?--n.readyWait:n.isReady)||(n.isReady=!0,a!==!0&&--n.readyWait>0||(I.resolveWith(d,[n]),n.fn.triggerHandler&&(n(d).triggerHandler("ready"),n(d).off("ready"))))}});function J(){d.removeEventListener("DOMContentLoaded",J),a.removeEventListener("load",J),n.ready()}n.ready.promise=function(b){return I||(I=n.Deferred(),"complete"===d.readyState||"loading"!==d.readyState&&!d.documentElement.doScroll?a.setTimeout(n.ready):(d.addEventListener("DOMContentLoaded",J),a.addEventListener("load",J))),I.promise(b)},n.ready.promise();var K=function(a,b,c,d,e,f,g){var h=0,i=a.length,j=null==c;if("object"===n.type(c)){e=!0;for(h in c)K(a,b,h,c[h],!0,f,g)}else if(void 0!==d&&(e=!0,n.isFunction(d)||(g=!0),j&&(g?(b.call(a,d),b=null):(j=b,b=function(a,b,c){return j.call(n(a),c)})),b))for(;i>h;h++)b(a[h],c,g?d:d.call(a[h],h,b(a[h],c)));return e?a:j?b.call(a):i?b(a[0],c):f},L=function(a){return 1===a.nodeType||9===a.nodeType||!+a.nodeType};function M(){this.expando=n.expando+M.uid++}M.uid=1,M.prototype={register:function(a,b){var c=b||{};return a.nodeType?a[this.expando]=c:Object.defineProperty(a,this.expando,{value:c,writable:!0,configurable:!0}),a[this.expando]},cache:function(a){if(!L(a))return{};var b=a[this.expando];return b||(b={},L(a)&&(a.nodeType?a[this.expando]=b:Object.defineProperty(a,this.expando,{value:b,configurable:!0}))),b},set:function(a,b,c){var d,e=this.cache(a);if("string"==typeof b)e[b]=c;else for(d in b)e[d]=b[d];return e},get:function(a,b){return void 0===b?this.cache(a):a[this.expando]&&a[this.expando][b]},access:function(a,b,c){var d;return void 0===b||b&&"string"==typeof b&&void 0===c?(d=this.get(a,b),void 0!==d?d:this.get(a,n.camelCase(b))):(this.set(a,b,c),void 0!==c?c:b)},remove:function(a,b){var c,d,e,f=a[this.expando];if(void 0!==f){if(void 0===b)this.register(a);else{n.isArray(b)?d=b.concat(b.map(n.camelCase)):(e=n.camelCase(b),b in f?d=[b,e]:(d=e,d=d in f?[d]:d.match(G)||[])),c=d.length;while(c--)delete f[d[c]]}(void 0===b||n.isEmptyObject(f))&&(a.nodeType?a[this.expando]=void 0:delete a[this.expando])}},hasData:function(a){var b=a[this.expando];return void 0!==b&&!n.isEmptyObject(b)}};var N=new M,O=new M,P=/^(?:\{[\w\W]*\}|\[[\w\W]*\])$/,Q=/[A-Z]/g;function R(a,b,c){var d;if(void 0===c&&1===a.nodeType)if(d="data-"+b.replace(Q,"-$&").toLowerCase(),c=a.getAttribute(d),"string"==typeof c){try{c="true"===c?!0:"false"===c?!1:"null"===c?null:+c+""===c?+c:P.test(c)?n.parseJSON(c):c; -}catch(e){}O.set(a,b,c)}else c=void 0;return c}n.extend({hasData:function(a){return O.hasData(a)||N.hasData(a)},data:function(a,b,c){return O.access(a,b,c)},removeData:function(a,b){O.remove(a,b)},_data:function(a,b,c){return N.access(a,b,c)},_removeData:function(a,b){N.remove(a,b)}}),n.fn.extend({data:function(a,b){var c,d,e,f=this[0],g=f&&f.attributes;if(void 0===a){if(this.length&&(e=O.get(f),1===f.nodeType&&!N.get(f,"hasDataAttrs"))){c=g.length;while(c--)g[c]&&(d=g[c].name,0===d.indexOf("data-")&&(d=n.camelCase(d.slice(5)),R(f,d,e[d])));N.set(f,"hasDataAttrs",!0)}return e}return"object"==typeof a?this.each(function(){O.set(this,a)}):K(this,function(b){var c,d;if(f&&void 0===b){if(c=O.get(f,a)||O.get(f,a.replace(Q,"-$&").toLowerCase()),void 0!==c)return c;if(d=n.camelCase(a),c=O.get(f,d),void 0!==c)return c;if(c=R(f,d,void 0),void 0!==c)return c}else d=n.camelCase(a),this.each(function(){var c=O.get(this,d);O.set(this,d,b),a.indexOf("-")>-1&&void 0!==c&&O.set(this,a,b)})},null,b,arguments.length>1,null,!0)},removeData:function(a){return this.each(function(){O.remove(this,a)})}}),n.extend({queue:function(a,b,c){var d;return a?(b=(b||"fx")+"queue",d=N.get(a,b),c&&(!d||n.isArray(c)?d=N.access(a,b,n.makeArray(c)):d.push(c)),d||[]):void 0},dequeue:function(a,b){b=b||"fx";var c=n.queue(a,b),d=c.length,e=c.shift(),f=n._queueHooks(a,b),g=function(){n.dequeue(a,b)};"inprogress"===e&&(e=c.shift(),d--),e&&("fx"===b&&c.unshift("inprogress"),delete f.stop,e.call(a,g,f)),!d&&f&&f.empty.fire()},_queueHooks:function(a,b){var c=b+"queueHooks";return N.get(a,c)||N.access(a,c,{empty:n.Callbacks("once memory").add(function(){N.remove(a,[b+"queue",c])})})}}),n.fn.extend({queue:function(a,b){var c=2;return"string"!=typeof a&&(b=a,a="fx",c--),arguments.length",""],thead:[1,"","
"],col:[2,"","
"],tr:[2,"","
"],td:[3,"","
"],_default:[0,"",""]};$.optgroup=$.option,$.tbody=$.tfoot=$.colgroup=$.caption=$.thead,$.th=$.td;function _(a,b){var c="undefined"!=typeof a.getElementsByTagName?a.getElementsByTagName(b||"*"):"undefined"!=typeof a.querySelectorAll?a.querySelectorAll(b||"*"):[];return void 0===b||b&&n.nodeName(a,b)?n.merge([a],c):c}function aa(a,b){for(var c=0,d=a.length;d>c;c++)N.set(a[c],"globalEval",!b||N.get(b[c],"globalEval"))}var ba=/<|&#?\w+;/;function ca(a,b,c,d,e){for(var f,g,h,i,j,k,l=b.createDocumentFragment(),m=[],o=0,p=a.length;p>o;o++)if(f=a[o],f||0===f)if("object"===n.type(f))n.merge(m,f.nodeType?[f]:f);else if(ba.test(f)){g=g||l.appendChild(b.createElement("div")),h=(Y.exec(f)||["",""])[1].toLowerCase(),i=$[h]||$._default,g.innerHTML=i[1]+n.htmlPrefilter(f)+i[2],k=i[0];while(k--)g=g.lastChild;n.merge(m,g.childNodes),g=l.firstChild,g.textContent=""}else m.push(b.createTextNode(f));l.textContent="",o=0;while(f=m[o++])if(d&&n.inArray(f,d)>-1)e&&e.push(f);else if(j=n.contains(f.ownerDocument,f),g=_(l.appendChild(f),"script"),j&&aa(g),c){k=0;while(f=g[k++])Z.test(f.type||"")&&c.push(f)}return l}!function(){var a=d.createDocumentFragment(),b=a.appendChild(d.createElement("div")),c=d.createElement("input");c.setAttribute("type","radio"),c.setAttribute("checked","checked"),c.setAttribute("name","t"),b.appendChild(c),l.checkClone=b.cloneNode(!0).cloneNode(!0).lastChild.checked,b.innerHTML="",l.noCloneChecked=!!b.cloneNode(!0).lastChild.defaultValue}();var da=/^key/,ea=/^(?:mouse|pointer|contextmenu|drag|drop)|click/,fa=/^([^.]*)(?:\.(.+)|)/;function ga(){return!0}function ha(){return!1}function ia(){try{return d.activeElement}catch(a){}}function ja(a,b,c,d,e,f){var g,h;if("object"==typeof b){"string"!=typeof c&&(d=d||c,c=void 0);for(h in b)ja(a,h,c,d,b[h],f);return a}if(null==d&&null==e?(e=c,d=c=void 0):null==e&&("string"==typeof c?(e=d,d=void 0):(e=d,d=c,c=void 0)),e===!1)e=ha;else if(!e)return a;return 1===f&&(g=e,e=function(a){return n().off(a),g.apply(this,arguments)},e.guid=g.guid||(g.guid=n.guid++)),a.each(function(){n.event.add(this,b,e,d,c)})}n.event={global:{},add:function(a,b,c,d,e){var f,g,h,i,j,k,l,m,o,p,q,r=N.get(a);if(r){c.handler&&(f=c,c=f.handler,e=f.selector),c.guid||(c.guid=n.guid++),(i=r.events)||(i=r.events={}),(g=r.handle)||(g=r.handle=function(b){return"undefined"!=typeof n&&n.event.triggered!==b.type?n.event.dispatch.apply(a,arguments):void 0}),b=(b||"").match(G)||[""],j=b.length;while(j--)h=fa.exec(b[j])||[],o=q=h[1],p=(h[2]||"").split(".").sort(),o&&(l=n.event.special[o]||{},o=(e?l.delegateType:l.bindType)||o,l=n.event.special[o]||{},k=n.extend({type:o,origType:q,data:d,handler:c,guid:c.guid,selector:e,needsContext:e&&n.expr.match.needsContext.test(e),namespace:p.join(".")},f),(m=i[o])||(m=i[o]=[],m.delegateCount=0,l.setup&&l.setup.call(a,d,p,g)!==!1||a.addEventListener&&a.addEventListener(o,g)),l.add&&(l.add.call(a,k),k.handler.guid||(k.handler.guid=c.guid)),e?m.splice(m.delegateCount++,0,k):m.push(k),n.event.global[o]=!0)}},remove:function(a,b,c,d,e){var f,g,h,i,j,k,l,m,o,p,q,r=N.hasData(a)&&N.get(a);if(r&&(i=r.events)){b=(b||"").match(G)||[""],j=b.length;while(j--)if(h=fa.exec(b[j])||[],o=q=h[1],p=(h[2]||"").split(".").sort(),o){l=n.event.special[o]||{},o=(d?l.delegateType:l.bindType)||o,m=i[o]||[],h=h[2]&&new RegExp("(^|\\.)"+p.join("\\.(?:.*\\.|)")+"(\\.|$)"),g=f=m.length;while(f--)k=m[f],!e&&q!==k.origType||c&&c.guid!==k.guid||h&&!h.test(k.namespace)||d&&d!==k.selector&&("**"!==d||!k.selector)||(m.splice(f,1),k.selector&&m.delegateCount--,l.remove&&l.remove.call(a,k));g&&!m.length&&(l.teardown&&l.teardown.call(a,p,r.handle)!==!1||n.removeEvent(a,o,r.handle),delete i[o])}else for(o in i)n.event.remove(a,o+b[j],c,d,!0);n.isEmptyObject(i)&&N.remove(a,"handle events")}},dispatch:function(a){a=n.event.fix(a);var b,c,d,f,g,h=[],i=e.call(arguments),j=(N.get(this,"events")||{})[a.type]||[],k=n.event.special[a.type]||{};if(i[0]=a,a.delegateTarget=this,!k.preDispatch||k.preDispatch.call(this,a)!==!1){h=n.event.handlers.call(this,a,j),b=0;while((f=h[b++])&&!a.isPropagationStopped()){a.currentTarget=f.elem,c=0;while((g=f.handlers[c++])&&!a.isImmediatePropagationStopped())a.rnamespace&&!a.rnamespace.test(g.namespace)||(a.handleObj=g,a.data=g.data,d=((n.event.special[g.origType]||{}).handle||g.handler).apply(f.elem,i),void 0!==d&&(a.result=d)===!1&&(a.preventDefault(),a.stopPropagation()))}return k.postDispatch&&k.postDispatch.call(this,a),a.result}},handlers:function(a,b){var c,d,e,f,g=[],h=b.delegateCount,i=a.target;if(h&&i.nodeType&&("click"!==a.type||isNaN(a.button)||a.button<1))for(;i!==this;i=i.parentNode||this)if(1===i.nodeType&&(i.disabled!==!0||"click"!==a.type)){for(d=[],c=0;h>c;c++)f=b[c],e=f.selector+" ",void 0===d[e]&&(d[e]=f.needsContext?n(e,this).index(i)>-1:n.find(e,this,null,[i]).length),d[e]&&d.push(f);d.length&&g.push({elem:i,handlers:d})}return h]*)\/>/gi,la=/\s*$/g;function pa(a,b){return n.nodeName(a,"table")&&n.nodeName(11!==b.nodeType?b:b.firstChild,"tr")?a.getElementsByTagName("tbody")[0]||a.appendChild(a.ownerDocument.createElement("tbody")):a}function qa(a){return a.type=(null!==a.getAttribute("type"))+"/"+a.type,a}function ra(a){var b=na.exec(a.type);return b?a.type=b[1]:a.removeAttribute("type"),a}function sa(a,b){var c,d,e,f,g,h,i,j;if(1===b.nodeType){if(N.hasData(a)&&(f=N.access(a),g=N.set(b,f),j=f.events)){delete g.handle,g.events={};for(e in j)for(c=0,d=j[e].length;d>c;c++)n.event.add(b,e,j[e][c])}O.hasData(a)&&(h=O.access(a),i=n.extend({},h),O.set(b,i))}}function ta(a,b){var c=b.nodeName.toLowerCase();"input"===c&&X.test(a.type)?b.checked=a.checked:"input"!==c&&"textarea"!==c||(b.defaultValue=a.defaultValue)}function ua(a,b,c,d){b=f.apply([],b);var e,g,h,i,j,k,m=0,o=a.length,p=o-1,q=b[0],r=n.isFunction(q);if(r||o>1&&"string"==typeof q&&!l.checkClone&&ma.test(q))return a.each(function(e){var f=a.eq(e);r&&(b[0]=q.call(this,e,f.html())),ua(f,b,c,d)});if(o&&(e=ca(b,a[0].ownerDocument,!1,a,d),g=e.firstChild,1===e.childNodes.length&&(e=g),g||d)){for(h=n.map(_(e,"script"),qa),i=h.length;o>m;m++)j=e,m!==p&&(j=n.clone(j,!0,!0),i&&n.merge(h,_(j,"script"))),c.call(a[m],j,m);if(i)for(k=h[h.length-1].ownerDocument,n.map(h,ra),m=0;i>m;m++)j=h[m],Z.test(j.type||"")&&!N.access(j,"globalEval")&&n.contains(k,j)&&(j.src?n._evalUrl&&n._evalUrl(j.src):n.globalEval(j.textContent.replace(oa,"")))}return a}function va(a,b,c){for(var d,e=b?n.filter(b,a):a,f=0;null!=(d=e[f]);f++)c||1!==d.nodeType||n.cleanData(_(d)),d.parentNode&&(c&&n.contains(d.ownerDocument,d)&&aa(_(d,"script")),d.parentNode.removeChild(d));return a}n.extend({htmlPrefilter:function(a){return a.replace(ka,"<$1>")},clone:function(a,b,c){var d,e,f,g,h=a.cloneNode(!0),i=n.contains(a.ownerDocument,a);if(!(l.noCloneChecked||1!==a.nodeType&&11!==a.nodeType||n.isXMLDoc(a)))for(g=_(h),f=_(a),d=0,e=f.length;e>d;d++)ta(f[d],g[d]);if(b)if(c)for(f=f||_(a),g=g||_(h),d=0,e=f.length;e>d;d++)sa(f[d],g[d]);else sa(a,h);return g=_(h,"script"),g.length>0&&aa(g,!i&&_(a,"script")),h},cleanData:function(a){for(var b,c,d,e=n.event.special,f=0;void 0!==(c=a[f]);f++)if(L(c)){if(b=c[N.expando]){if(b.events)for(d in b.events)e[d]?n.event.remove(c,d):n.removeEvent(c,d,b.handle);c[N.expando]=void 0}c[O.expando]&&(c[O.expando]=void 0)}}}),n.fn.extend({domManip:ua,detach:function(a){return va(this,a,!0)},remove:function(a){return va(this,a)},text:function(a){return K(this,function(a){return void 0===a?n.text(this):this.empty().each(function(){1!==this.nodeType&&11!==this.nodeType&&9!==this.nodeType||(this.textContent=a)})},null,a,arguments.length)},append:function(){return ua(this,arguments,function(a){if(1===this.nodeType||11===this.nodeType||9===this.nodeType){var b=pa(this,a);b.appendChild(a)}})},prepend:function(){return ua(this,arguments,function(a){if(1===this.nodeType||11===this.nodeType||9===this.nodeType){var b=pa(this,a);b.insertBefore(a,b.firstChild)}})},before:function(){return ua(this,arguments,function(a){this.parentNode&&this.parentNode.insertBefore(a,this)})},after:function(){return ua(this,arguments,function(a){this.parentNode&&this.parentNode.insertBefore(a,this.nextSibling)})},empty:function(){for(var a,b=0;null!=(a=this[b]);b++)1===a.nodeType&&(n.cleanData(_(a,!1)),a.textContent="");return this},clone:function(a,b){return a=null==a?!1:a,b=null==b?a:b,this.map(function(){return n.clone(this,a,b)})},html:function(a){return K(this,function(a){var b=this[0]||{},c=0,d=this.length;if(void 0===a&&1===b.nodeType)return b.innerHTML;if("string"==typeof a&&!la.test(a)&&!$[(Y.exec(a)||["",""])[1].toLowerCase()]){a=n.htmlPrefilter(a);try{for(;d>c;c++)b=this[c]||{},1===b.nodeType&&(n.cleanData(_(b,!1)),b.innerHTML=a);b=0}catch(e){}}b&&this.empty().append(a)},null,a,arguments.length)},replaceWith:function(){var a=[];return ua(this,arguments,function(b){var c=this.parentNode;n.inArray(this,a)<0&&(n.cleanData(_(this)),c&&c.replaceChild(b,this))},a)}}),n.each({appendTo:"append",prependTo:"prepend",insertBefore:"before",insertAfter:"after",replaceAll:"replaceWith"},function(a,b){n.fn[a]=function(a){for(var c,d=[],e=n(a),f=e.length-1,h=0;f>=h;h++)c=h===f?this:this.clone(!0),n(e[h])[b](c),g.apply(d,c.get());return this.pushStack(d)}});var wa,xa={HTML:"block",BODY:"block"};function ya(a,b){var c=n(b.createElement(a)).appendTo(b.body),d=n.css(c[0],"display");return c.detach(),d}function za(a){var b=d,c=xa[a];return c||(c=ya(a,b),"none"!==c&&c||(wa=(wa||n("