improved plots

alexhuang1117 · Oct 20, 2017 · 4f4a2bd · 4f4a2bd
1 parent 131258a
commit 4f4a2bd
Show file tree

Hide file tree

Showing 5 changed files with 43 additions and 43 deletions.
diff --git a/GDP and Future Orientation/GDP and Future Orientation.rmd b/GDP and Future Orientation/GDP and Future Orientation.rmd
@@ -138,6 +138,7 @@ Now that we have the FOI index and GPD per capita, PPP value for each country, w
 reg = lm(NY.GDP.PCAP.PP.KD~FOI, data=table)
 summary(reg)
 plot(table$FOI, table$NY.GDP.PCAP.PP.KD, main='GDP vs Future orientation', ylab='GDP per capita PPP', xlab='Future Orientation Index')
+text(table$FOI, table$NY.GDP.PCAP.PP.KD, labels=table$iso2c, cex= 0.7, pos=3)
 abline(reg, col='red')
 ```
 

diff --git a/GDP and Future Orientation/GDP_and_Future_Orientation.md b/GDP and Future Orientation/GDP_and_Future_Orientation.md
@@ -149,7 +149,7 @@ Log in to google using your username and password. code not shown.
 
     ## Loading required package: devtools
 
-    ## Skipping install of 'gtrendsR' from a github remote, the SHA1 (6cc120f4) has not changed since last install.
+    ## Skipping install of 'gtrendsR' from a github remote, the SHA1 (0165d264) has not changed since last install.
     ##   Use `force = TRUE` to force installation
 
     ## Loading required package: gtrendsR
@@ -178,55 +178,55 @@ for(current_country in table[,"iso2c"])
 ```
 
     ## [1] "AR"
-    ## [1] 0.8791045
+    ## [1] 0.8827068
     ## [1] "AU"
-    ## [1] 0.9743291
+    ## [1] 0.9790454
     ## [1] "BD"
-    ## [1] 0.313314
+    ## [1] 0.3104197
     ## [1] "BR"
-    ## [1] 0.9115169
+    ## [1] 0.909732
     ## [1] "CA"
-    ## [1] 0.8142494
+    ## [1] 0.8132992
     ## [1] "CN"
-    ## [1] 0.5427632
+    ## [1] 0.5336948
     ## [1] "CO"
-    ## [1] 0.5757576
+    ## [1] 0.5783866
     ## [1] "DE"
-    ## [1] 1.38796
+    ## [1] 1.378913
     ## [1] "ES"
     ## [1] 0.7142857
     ## [1] "FR"
-    ## [1] 1.055313
+    ## [1] 1.049927
     ## [1] "GB"
-    ## [1] 1.099424
+    ## [1] 1.100865
     ## [1] "IN"
-    ## [1] 0.4487847
+    ## [1] 0.4503883
     ## [1] "IT"
-    ## [1] 0.9098712
+    ## [1] 0.9156118
     ## [1] "JP"
-    ## [1] 1.172336
+    ## [1] 1.174107
     ## [1] "KR"
-    ## [1] 0.5128032
+    ## [1] 0.5088874
     ## [1] "MX"
-    ## [1] 0.6952247
+    ## [1] 0.6951049
     ## [1] "NL"
-    ## [1] 1.154446
+    ## [1] 1.140458
     ## [1] "PH"
-    ## [1] 0.3449782
+    ## [1] 0.3443766
     ## [1] "PL"
-    ## [1] 0.5089202
+    ## [1] 0.5103578
     ## [1] "RU"
-    ## [1] 0.4608896
+    ## [1] 0.4633385
     ## [1] "TH"
-    ## [1] 0.3623932
+    ## [1] 0.3622942
     ## [1] "TR"
-    ## [1] 0.7066116
+    ## [1] 0.7091413
     ## [1] "UA"
-    ## [1] 0.3592233
+    ## [1] 0.3613445
     ## [1] "US"
-    ## [1] 0.8492063
+    ## [1] 0.8498024
     ## [1] "VN"
-    ## [1] 0.4278884
+    ## [1] 0.4226804
 
 ``` r
 #table = table[complete.cases(table),]
@@ -247,21 +247,22 @@ summary(reg)
     ## 
     ## Residuals:
     ##    Min     1Q Median     3Q    Max 
-    ## -18857  -6956  -1396   7893  22765 
+    ## -18848  -6734  -1525   7860  22692 
     ## 
     ## Coefficients:
     ##             Estimate Std. Error t value Pr(>|t|)    
-    ## (Intercept)    -1838       5289  -0.348    0.731    
-    ## FOI            38089       6730   5.659 9.23e-06 ***
+    ## (Intercept)    -1904       5293  -0.360    0.722    
+    ## FOI            38226       6747   5.666 9.08e-06 ***
     ## ---
     ## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
     ## 
     ## Residual standard error: 10010 on 23 degrees of freedom
-    ## Multiple R-squared:  0.582,  Adjusted R-squared:  0.5639 
-    ## F-statistic: 32.03 on 1 and 23 DF,  p-value: 9.233e-06
+    ## Multiple R-squared:  0.5826, Adjusted R-squared:  0.5645 
+    ## F-statistic:  32.1 on 1 and 23 DF,  p-value: 9.083e-06
 
 ``` r
 plot(table$FOI, table$NY.GDP.PCAP.PP.KD, main='GDP vs Future orientation', ylab='GDP per capita PPP', xlab='Future Orientation Index')
+text(table$FOI, table$NY.GDP.PCAP.PP.KD, labels=table$iso2c, cex= 0.7, pos=3)
 abline(reg, col='red')
 ```
 

diff --git a/...rientation_files/figure-markdown_github-ascii_identifiers/unnamed-chunk-8-1.png b/...rientation_files/figure-markdown_github-ascii_identifiers/unnamed-chunk-8-1.png
diff --git a/Power_Law_vs_Lognormal_US_Babynames/Power_Law_vs_Lognormal_US_Babynames.Rmd b/Power_Law_vs_Lognormal_US_Babynames/Power_Law_vs_Lognormal_US_Babynames.Rmd
@@ -1,5 +1,5 @@
 ---
-title: "Exercise04 Solution"
+title: "Power-law and Log-Normal Distributions"
 output: rmarkdown::github_document
 ---
 
@@ -64,7 +64,7 @@ X = table$count
 CCDF = ecdf(X)
 xval = sort(unique(X))
 yval = CCDF(xval)
-plot(xval, 1-yval,log = "xy", main="Baby Name CCDF", ylab="P(X>x)", xlab='babies')
+plot(xval, 1-yval,log = "xy", main="Baby Name CCDF", ylab="P(X>x)", xlab='Number of Babies')
 ```
 
 
@@ -77,9 +77,9 @@ mm$getPars()
 (est=estimate_pars(mm))
 (est=estimate_xmin(mm))
 mm$setXmin(est)
-plot(mm)
+plot(mm, ylab="P(X>x)", xlab='Number of Babies')
 lines(mm, col=2)
-legend("bottomleft", "Power-law", col="red")
+legend("topright", "Power-law", col="red")
 ```
 
 
@@ -90,27 +90,25 @@ We see that at the tail the function drops down quite a bit. Power-law might not
 Using bootstrapping, we can find out the probability that the data follows a distribution. Bootstraping is sampling the data with replacement to form multiple data samples. Each sample will return a parameter for $\alpha$ and $c$. This part takes some time to run.
 ```{r}
 bs=bootstrap(mm, no_of_sims = 100, threads = 4)
-hist(bs$bootstraps[,2], breaks="fd", main = 'Distribution of Xmin')
-hist(bs$bootstraps[,3], breaks="fd", main = 'Distribution of alpha')
-plot(jitter(bs$bootstraps[,2], factor=1.2), bs$bootstraps[,3])
-bs_p=bootstrap_p(mm, no_of_sims = 200, threads=2)
+hist(bs$bootstraps[,2], breaks="fd", main = 'Distribution of Xmin',xlab='Xmin')
+hist(bs$bootstraps[,3], breaks="fd", main = 'Distribution of alpha',xlab='Alpha')
+plot(jitter(bs$bootstraps[,2], factor=1.2), bs$bootstraps[,3], xlab='Xmin', ylab='alpha', main='Bootstrap Results of Power Law Distribution')
+bs_p=bootstrap_p(mm, no_of_sims = 100, threads=4)
 bs_p$p
 
 ```
 The above chunk of code uses bootstrapping to find a distribution of X_min and alpha and plots the distributions. However it takes too long to run, so it is left commented at the moment. bs_p$p shows the p value of the distribution being a power-law distribution. High p value means the disitribution is likely to follow power-law.
 
 The result might not be a power-law distribution, we can try to fit a log-normal distribution instead. Let's see if the fit is better. Log-normal distribution is defined by the following formula.
-$$f(x) =  c
-                                    \frac{1}{x^{p}}\,
-                                    \exp\!\!\left(-\frac{\ln(x/\mu)^2}{2\sigma^2}\right)$$
+$$f(x) =  c \frac{1}{x^{p}}\, \exp\!\!\left(-\frac{\ln(x/\mu)^2}{2\sigma^2}\right)$$
 
 Where the log of the distribution follows a Gaussian distribution.
 ```{r}
 mm2 = dislnorm$new(X)
 #mm2$setXmin(30145) #If you want to analyze the top 1000
 est2 = estimate_pars(mm2)
 mm2$setPars(est2)
-plot(mm,xlab="Baby name sample",ylab="Baby name counts", main="")
+plot(mm,xlab="Baby name sample",ylab="Baby name counts", main="Power Law vs Log Normal Fit")
 lines(mm, col=2)
 lines(mm2, col=3)
 legend("bottomleft", c("power-law","log-normal"), col=c(2,3), lwd=c(1,1))
@@ -202,5 +200,5 @@ lines(mm, col=2)
 
 
 
-It could be argue that both of these distribution are power-law distributions or another distribution. In real life it is hard to pin point the distribution with 100% certainty.
+It could be argue that both of these distribution are power-law distributions or other distributions. In real life it is hard to pin point the distribution with 100% certainty. However in these cases power-law does seem to be a good fit.
 
diff --git a/..._Babynames_files/figure-markdown_github-ascii_identifiers/unnamed-chunk-4-1.png b/..._Babynames_files/figure-markdown_github-ascii_identifiers/unnamed-chunk-4-1.png