docs/2-2-RSATDOSA.html

<!DOCTYPE html>
<html >

<head>

  <meta charset="UTF-8">
  <meta http-equiv="X-UA-Compatible" content="IE=edge">
  <title>2.2 Random Sampling and the Distribution of Sample Averages | Introduction to Econometrics with R</title>
  <meta name="description" content="Beginners with little background in statistics and econometrics often have a hard time understanding the benefits of having programming skills for learning and applying Econometrics. ‘Introduction to Econometrics with R’ is an interactive companion to the well-received textbook ‘Introduction to Econometrics’ by James H. Stock and Mark W. Watson (2015). It gives a gentle introduction to the essentials of R programming and guides students in implementing the empirical applications presented throughout the textbook using the newly aquired skills. This is supported by interactive programming exercises generated with DataCamp Light and integration of interactive visualizations of central concepts which are based on the flexible JavaScript library D3.js.">
  <meta name="generator" content="bookdown  and GitBook 2.6.7">

  <meta property="og:title" content="2.2 Random Sampling and the Distribution of Sample Averages | Introduction to Econometrics with R" />
  <meta property="og:type" content="book" />
  <meta property="og:url" content="https://www.econometrics-with-r.org/" />
  <meta property="og:image" content="https://www.econometrics-with-r.org/images/cover.png" />
  <meta property="og:description" content="Beginners with little background in statistics and econometrics often have a hard time understanding the benefits of having programming skills for learning and applying Econometrics. ‘Introduction to Econometrics with R’ is an interactive companion to the well-received textbook ‘Introduction to Econometrics’ by James H. Stock and Mark W. Watson (2015). It gives a gentle introduction to the essentials of R programming and guides students in implementing the empirical applications presented throughout the textbook using the newly aquired skills. This is supported by interactive programming exercises generated with DataCamp Light and integration of interactive visualizations of central concepts which are based on the flexible JavaScript library D3.js." />
  <meta name="github-repo" content="mca91/EconometricsWithR" />

  <meta name="twitter:card" content="summary" />
  <meta name="twitter:title" content="2.2 Random Sampling and the Distribution of Sample Averages | Introduction to Econometrics with R" />
  
  <meta name="twitter:description" content="Beginners with little background in statistics and econometrics often have a hard time understanding the benefits of having programming skills for learning and applying Econometrics. ‘Introduction to Econometrics with R’ is an interactive companion to the well-received textbook ‘Introduction to Econometrics’ by James H. Stock and Mark W. Watson (2015). It gives a gentle introduction to the essentials of R programming and guides students in implementing the empirical applications presented throughout the textbook using the newly aquired skills. This is supported by interactive programming exercises generated with DataCamp Light and integration of interactive visualizations of central concepts which are based on the flexible JavaScript library D3.js." />
  <meta name="twitter:image" content="https://www.econometrics-with-r.org/images/cover.png" />

<meta name="author" content="Christoph Hanck, Martin Arnold, Alexander Gerber and Martin Schmelzer">


<meta name="date" content="2019-03-12">

  <meta name="viewport" content="width=device-width, initial-scale=1">
  <meta name="apple-mobile-web-app-capable" content="yes">
  <meta name="apple-mobile-web-app-status-bar-style" content="black">
  
  
<link rel="prev" href="2-1-random-variables-and-probability-distributions.html">
<link rel="next" href="2-3-exercises.html">
<script src="libs/jquery-2.2.3/jquery.min.js"></script>
<link href="libs/gitbook-2.6.7/css/style.css" rel="stylesheet" />
<link href="libs/gitbook-2.6.7/css/plugin-table.css" rel="stylesheet" />
<link href="libs/gitbook-2.6.7/css/plugin-bookdown.css" rel="stylesheet" />
<link href="libs/gitbook-2.6.7/css/plugin-highlight.css" rel="stylesheet" />
<link href="libs/gitbook-2.6.7/css/plugin-search.css" rel="stylesheet" />
<link href="libs/gitbook-2.6.7/css/plugin-fontsettings.css" rel="stylesheet" />


<script src="libs/htmlwidgets-1.3/htmlwidgets.js"></script>
<script src="libs/plotly-binding-4.8.0/plotly.js"></script>
<script src="libs/typedarray-0.1/typedarray.min.js"></script>
<link href="libs/crosstalk-1.0.0/css/crosstalk.css" rel="stylesheet" />
<script src="libs/crosstalk-1.0.0/js/crosstalk.min.js"></script>
<link href="libs/plotly-htmlwidgets-css-1.39.2/plotly-htmlwidgets.css" rel="stylesheet" />
<script src="libs/plotly-main-1.39.2/plotly-latest.min.js"></script>
<!-- font families -->

<link href="https://fonts.googleapis.com/css?family=PT+Sans|Pacifico|Source+Sans+Pro" rel="stylesheet">

<script src="js/hideOutput.js"></script>

<!-- Mathjax -->
<script type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/default.js"></script>

 <script type="text/x-mathjax-config">
      MathJax.Hub.Config({
        extensions: ["tex2jax.js", "TeX/AMSmath.js"],
        tex2jax: {inlineMath: [['$','$'], ['\\(','\\)']]},
        jax: ["input/TeX","output/CommonHTML"]
      });
      MathJax.Hub.processSectionDelay = 0;
  </script>

<!-- Global site tag (gtag.js) - Google Analytics -->
<script async src="https://www.googletagmanager.com/gtag/js?id=UA-110299877-1"></script>
<script>
  window.dataLayer = window.dataLayer || [];
  function gtag(){dataLayer.push(arguments);}
  gtag('js', new Date());

  gtag('config', 'UA-110299877-1');
</script>

<!-- open review block -->

<script async defer src="https://hypothes.is/embed.js"></script>


<style type="text/css">
a.sourceLine { display: inline-block; line-height: 1.25; }
a.sourceLine { pointer-events: none; color: inherit; text-decoration: inherit; }
a.sourceLine:empty { height: 1.2em; }
.sourceCode { overflow: visible; }
code.sourceCode { white-space: pre; position: relative; }
div.sourceCode { margin: 1em 0; }
pre.sourceCode { margin: 0; }
@media screen {
div.sourceCode { overflow: auto; }
}
@media print {
code.sourceCode { white-space: pre-wrap; }
a.sourceLine { text-indent: -1em; padding-left: 1em; }
}
pre.numberSource a.sourceLine
  { position: relative; left: -4em; }
pre.numberSource a.sourceLine::before
  { content: attr(data-line-number);
    position: relative; left: -1em; text-align: right; vertical-align: baseline;
    border: none; pointer-events: all; display: inline-block;
    -webkit-touch-callout: none; -webkit-user-select: none;
    -khtml-user-select: none; -moz-user-select: none;
    -ms-user-select: none; user-select: none;
    padding: 0 4px; width: 4em;
    color: #aaaaaa;
  }
pre.numberSource { margin-left: 3em; border-left: 1px solid #aaaaaa;  padding-left: 4px; }
div.sourceCode
  { background-color: #f8f8f8; }
@media screen {
a.sourceLine::before { text-decoration: underline; }
}
code span.al { color: #ef2929; } /* Alert */
code span.an { color: #8f5902; font-weight: bold; font-style: italic; } /* Annotation */
code span.at { color: #c4a000; } /* Attribute */
code span.bn { color: #0000cf; } /* BaseN */
code span.cf { color: #204a87; font-weight: bold; } /* ControlFlow */
code span.ch { color: #4e9a06; } /* Char */
code span.cn { color: #000000; } /* Constant */
code span.co { color: #8f5902; font-style: italic; } /* Comment */
code span.cv { color: #8f5902; font-weight: bold; font-style: italic; } /* CommentVar */
code span.do { color: #8f5902; font-weight: bold; font-style: italic; } /* Documentation */
code span.dt { color: #204a87; } /* DataType */
code span.dv { color: #0000cf; } /* DecVal */
code span.er { color: #a40000; font-weight: bold; } /* Error */
code span.ex { } /* Extension */
code span.fl { color: #0000cf; } /* Float */
code span.fu { color: #000000; } /* Function */
code span.im { } /* Import */
code span.in { color: #8f5902; font-weight: bold; font-style: italic; } /* Information */
code span.kw { color: #204a87; font-weight: bold; } /* Keyword */
code span.op { color: #ce5c00; font-weight: bold; } /* Operator */
code span.ot { color: #8f5902; } /* Other */
code span.pp { color: #8f5902; font-style: italic; } /* Preprocessor */
code span.sc { color: #000000; } /* SpecialChar */
code span.ss { color: #4e9a06; } /* SpecialString */
code span.st { color: #4e9a06; } /* String */
code span.va { color: #000000; } /* Variable */
code span.vs { color: #4e9a06; } /* VerbatimString */
code span.wa { color: #8f5902; font-weight: bold; font-style: italic; } /* Warning */
</style>

<link rel="stylesheet" href="style.css" type="text/css" />
<link rel="stylesheet" href="toc.css" type="text/css" />
</head>

<body>


  <div class="book without-animation with-summary font-size-2 font-family-1" data-basepath=".">

    <div class="book-summary">
      <nav role="navigation">

<ul class="summary">
<li><center><img src="images/logo.png" alt="logo" width="50%" height="50%"style="margin: 15px 0 0 0"></center></li>

<li class="divider"></li>
<li class="chapter" data-level="" data-path="index.html"><a href="index.html"><i class="fa fa-check"></i>Preface</a></li>
<li class="chapter" data-level="1" data-path="1-introduction.html"><a href="1-introduction.html"><i class="fa fa-check"></i><b>1</b> Introduction</a><ul>
<li class="chapter" data-level="1.1" data-path="1-1-a-very-short-introduction-to-r-and-rstudio.html"><a href="1-1-a-very-short-introduction-to-r-and-rstudio.html"><i class="fa fa-check"></i><b>1.1</b> A Very Short Introduction to <tt>R</tt> and <em>RStudio</em></a></li>
</ul></li>
<li class="chapter" data-level="2" data-path="2-pt.html"><a href="2-pt.html"><i class="fa fa-check"></i><b>2</b> Probability Theory</a><ul>
<li class="chapter" data-level="2.1" data-path="2-1-random-variables-and-probability-distributions.html"><a href="2-1-random-variables-and-probability-distributions.html"><i class="fa fa-check"></i><b>2.1</b> Random Variables and Probability Distributions</a><ul>
<li class="chapter" data-level="" data-path="2-1-random-variables-and-probability-distributions.html"><a href="2-1-random-variables-and-probability-distributions.html#probability-distributions-of-discrete-random-variables"><i class="fa fa-check"></i>Probability Distributions of Discrete Random Variables</a></li>
<li class="chapter" data-level="" data-path="2-1-random-variables-and-probability-distributions.html"><a href="2-1-random-variables-and-probability-distributions.html#bernoulli-trials"><i class="fa fa-check"></i>Bernoulli Trials</a></li>
<li class="chapter" data-level="" data-path="2-1-random-variables-and-probability-distributions.html"><a href="2-1-random-variables-and-probability-distributions.html#expected-value-mean-and-variance"><i class="fa fa-check"></i>Expected Value, Mean and Variance</a></li>
<li class="chapter" data-level="" data-path="2-1-random-variables-and-probability-distributions.html"><a href="2-1-random-variables-and-probability-distributions.html#probability-distributions-of-continuous-random-variables"><i class="fa fa-check"></i>Probability Distributions of Continuous Random Variables</a></li>
<li class="chapter" data-level="" data-path="2-1-random-variables-and-probability-distributions.html"><a href="2-1-random-variables-and-probability-distributions.html#the-normal-distribution"><i class="fa fa-check"></i>The Normal Distribution</a></li>
<li class="chapter" data-level="" data-path="2-1-random-variables-and-probability-distributions.html"><a href="2-1-random-variables-and-probability-distributions.html#the-chi-squared-distribution"><i class="fa fa-check"></i>The Chi-Squared Distribution</a></li>
<li class="chapter" data-level="" data-path="2-1-random-variables-and-probability-distributions.html"><a href="2-1-random-variables-and-probability-distributions.html#thetdist"><i class="fa fa-check"></i>The Student t Distribution</a></li>
<li class="chapter" data-level="" data-path="2-1-random-variables-and-probability-distributions.html"><a href="2-1-random-variables-and-probability-distributions.html#the-f-distribution"><i class="fa fa-check"></i>The F Distribution</a></li>
</ul></li>
<li class="chapter" data-level="2.2" data-path="2-2-RSATDOSA.html"><a href="2-2-RSATDOSA.html"><i class="fa fa-check"></i><b>2.2</b> Random Sampling and the Distribution of Sample Averages</a><ul>
<li class="chapter" data-level="" data-path="2-2-RSATDOSA.html"><a href="2-2-RSATDOSA.html#mean-and-variance-of-the-sample-mean"><i class="fa fa-check"></i>Mean and Variance of the Sample Mean</a></li>
<li class="chapter" data-level="" data-path="2-2-RSATDOSA.html"><a href="2-2-RSATDOSA.html#large-sample-approximations-to-sampling-distributions"><i class="fa fa-check"></i>Large Sample Approximations to Sampling Distributions</a></li>
</ul></li>
<li class="chapter" data-level="2.3" data-path="2-3-exercises.html"><a href="2-3-exercises.html"><i class="fa fa-check"></i><b>2.3</b> Exercises</a></li>
</ul></li>
<li class="chapter" data-level="3" data-path="3-arosur.html"><a href="3-arosur.html"><i class="fa fa-check"></i><b>3</b> A Review of Statistics using R</a><ul>
<li class="chapter" data-level="3.1" data-path="3-1-estimation-of-the-population-mean.html"><a href="3-1-estimation-of-the-population-mean.html"><i class="fa fa-check"></i><b>3.1</b> Estimation of the Population Mean</a></li>
<li class="chapter" data-level="3.2" data-path="3-2-potsm.html"><a href="3-2-potsm.html"><i class="fa fa-check"></i><b>3.2</b> Properties of the Sample Mean</a></li>
<li class="chapter" data-level="3.3" data-path="3-3-hypothesis-tests-concerning-the-population-mean.html"><a href="3-3-hypothesis-tests-concerning-the-population-mean.html"><i class="fa fa-check"></i><b>3.3</b> Hypothesis Tests Concerning the Population Mean</a><ul>
<li class="chapter" data-level="" data-path="3-3-hypothesis-tests-concerning-the-population-mean.html"><a href="3-3-hypothesis-tests-concerning-the-population-mean.html#the-p-value"><i class="fa fa-check"></i>The p-Value</a></li>
<li class="chapter" data-level="" data-path="3-3-hypothesis-tests-concerning-the-population-mean.html"><a href="3-3-hypothesis-tests-concerning-the-population-mean.html#calculating-the-p-value-when-the-standard-deviation-is-known"><i class="fa fa-check"></i>Calculating the p-Value when the Standard Deviation is Known</a></li>
<li class="chapter" data-level="" data-path="3-3-hypothesis-tests-concerning-the-population-mean.html"><a href="3-3-hypothesis-tests-concerning-the-population-mean.html#SVSSDASE"><i class="fa fa-check"></i>Sample Variance, Sample Standard Deviation and Standard Error</a></li>
<li class="chapter" data-level="" data-path="3-3-hypothesis-tests-concerning-the-population-mean.html"><a href="3-3-hypothesis-tests-concerning-the-population-mean.html#calculating-the-p-value-when-the-standard-deviation-is-unknown"><i class="fa fa-check"></i>Calculating the p-value When the Standard Deviation is Unknown</a></li>
<li class="chapter" data-level="" data-path="3-3-hypothesis-tests-concerning-the-population-mean.html"><a href="3-3-hypothesis-tests-concerning-the-population-mean.html#the-t-statistic"><i class="fa fa-check"></i>The t-statistic</a></li>
<li class="chapter" data-level="" data-path="3-3-hypothesis-tests-concerning-the-population-mean.html"><a href="3-3-hypothesis-tests-concerning-the-population-mean.html#hypothesis-testing-with-a-prespecified-significance-level"><i class="fa fa-check"></i>Hypothesis Testing with a Prespecified Significance Level</a></li>
<li class="chapter" data-level="" data-path="3-3-hypothesis-tests-concerning-the-population-mean.html"><a href="3-3-hypothesis-tests-concerning-the-population-mean.html#one-sided-alternatives"><i class="fa fa-check"></i>One-sided Alternatives</a></li>
</ul></li>
<li class="chapter" data-level="3.4" data-path="3-4-confidence-intervals-for-the-population-mean.html"><a href="3-4-confidence-intervals-for-the-population-mean.html"><i class="fa fa-check"></i><b>3.4</b> Confidence Intervals for the Population Mean</a></li>
<li class="chapter" data-level="3.5" data-path="3-5-cmfdp.html"><a href="3-5-cmfdp.html"><i class="fa fa-check"></i><b>3.5</b> Comparing Means from Different Populations</a></li>
<li class="chapter" data-level="3.6" data-path="3-6-aattggoe.html"><a href="3-6-aattggoe.html"><i class="fa fa-check"></i><b>3.6</b> An Application to the Gender Gap of Earnings</a></li>
<li class="chapter" data-level="3.7" data-path="3-7-scatterplots-sample-covariance-and-sample-correlation.html"><a href="3-7-scatterplots-sample-covariance-and-sample-correlation.html"><i class="fa fa-check"></i><b>3.7</b> Scatterplots, Sample Covariance and Sample Correlation</a></li>
<li class="chapter" data-level="3.8" data-path="3-8-exercises-1.html"><a href="3-8-exercises-1.html"><i class="fa fa-check"></i><b>3.8</b> Exercises</a></li>
</ul></li>
<li class="chapter" data-level="4" data-path="4-lrwor.html"><a href="4-lrwor.html"><i class="fa fa-check"></i><b>4</b> Linear Regression with One Regressor</a><ul>
<li class="chapter" data-level="4.1" data-path="4-1-simple-linear-regression.html"><a href="4-1-simple-linear-regression.html"><i class="fa fa-check"></i><b>4.1</b> Simple Linear Regression</a></li>
<li class="chapter" data-level="4.2" data-path="4-2-estimating-the-coefficients-of-the-linear-regression-model.html"><a href="4-2-estimating-the-coefficients-of-the-linear-regression-model.html"><i class="fa fa-check"></i><b>4.2</b> Estimating the Coefficients of the Linear Regression Model</a><ul>
<li class="chapter" data-level="" data-path="4-2-estimating-the-coefficients-of-the-linear-regression-model.html"><a href="4-2-estimating-the-coefficients-of-the-linear-regression-model.html#the-ordinary-least-squares-estimator"><i class="fa fa-check"></i>The Ordinary Least Squares Estimator</a></li>
</ul></li>
<li class="chapter" data-level="4.3" data-path="4-3-measures-of-fit.html"><a href="4-3-measures-of-fit.html"><i class="fa fa-check"></i><b>4.3</b> Measures of Fit</a><ul>
<li class="chapter" data-level="" data-path="4-3-measures-of-fit.html"><a href="4-3-measures-of-fit.html#the-coefficient-of-determination"><i class="fa fa-check"></i>The Coefficient of Determination</a></li>
<li class="chapter" data-level="" data-path="4-3-measures-of-fit.html"><a href="4-3-measures-of-fit.html#the-standard-error-of-the-regression"><i class="fa fa-check"></i>The Standard Error of the Regression</a></li>
<li class="chapter" data-level="" data-path="4-3-measures-of-fit.html"><a href="4-3-measures-of-fit.html#application-to-the-test-score-data"><i class="fa fa-check"></i>Application to the Test Score Data</a></li>
</ul></li>
<li class="chapter" data-level="4.4" data-path="4-4-tlsa.html"><a href="4-4-tlsa.html"><i class="fa fa-check"></i><b>4.4</b> The Least Squares Assumptions</a><ul>
<li class="chapter" data-level="" data-path="4-4-tlsa.html"><a href="4-4-tlsa.html#assumption-1-the-error-term-has-conditional-mean-of-zero"><i class="fa fa-check"></i>Assumption 1: The Error Term has Conditional Mean of Zero</a></li>
<li class="chapter" data-level="" data-path="4-4-tlsa.html"><a href="4-4-tlsa.html#assumption-2-independently-and-identically-distributed-data"><i class="fa fa-check"></i>Assumption 2: Independently and Identically Distributed Data</a></li>
<li class="chapter" data-level="" data-path="4-4-tlsa.html"><a href="4-4-tlsa.html#assumption-3-large-outliers-are-unlikely"><i class="fa fa-check"></i>Assumption 3: Large Outliers are Unlikely</a></li>
</ul></li>
<li class="chapter" data-level="4.5" data-path="4-5-tsdotoe.html"><a href="4-5-tsdotoe.html"><i class="fa fa-check"></i><b>4.5</b> The Sampling Distribution of the OLS Estimator</a><ul>
<li class="chapter" data-level="" data-path="4-5-tsdotoe.html"><a href="4-5-tsdotoe.html#simulation-study-1"><i class="fa fa-check"></i>Simulation Study 1</a></li>
<li class="chapter" data-level="" data-path="4-5-tsdotoe.html"><a href="4-5-tsdotoe.html#simulation-study-2"><i class="fa fa-check"></i>Simulation Study 2</a></li>
<li class="chapter" data-level="" data-path="4-5-tsdotoe.html"><a href="4-5-tsdotoe.html#simulation-study-3"><i class="fa fa-check"></i>Simulation Study 3</a></li>
</ul></li>
<li class="chapter" data-level="4.6" data-path="4-6-exercises-2.html"><a href="4-6-exercises-2.html"><i class="fa fa-check"></i><b>4.6</b> Exercises</a></li>
</ul></li>
<li class="chapter" data-level="5" data-path="5-htaciitslrm.html"><a href="5-htaciitslrm.html"><i class="fa fa-check"></i><b>5</b> Hypothesis Tests and Confidence Intervals in the Simple Linear Regression Model</a><ul>
<li class="chapter" data-level="5.1" data-path="5-1-testing-two-sided-hypotheses-concerning-the-slope-coefficient.html"><a href="5-1-testing-two-sided-hypotheses-concerning-the-slope-coefficient.html"><i class="fa fa-check"></i><b>5.1</b> Testing Two-Sided Hypotheses Concerning the Slope Coefficient</a></li>
<li class="chapter" data-level="5.2" data-path="5-2-cifrc.html"><a href="5-2-cifrc.html"><i class="fa fa-check"></i><b>5.2</b> Confidence Intervals for Regression Coefficients</a><ul>
<li class="chapter" data-level="" data-path="5-2-cifrc.html"><a href="5-2-cifrc.html#simulation-study-confidence-intervals"><i class="fa fa-check"></i>Simulation Study: Confidence Intervals</a></li>
</ul></li>
<li class="chapter" data-level="5.3" data-path="5-3-rwxiabv.html"><a href="5-3-rwxiabv.html"><i class="fa fa-check"></i><b>5.3</b> Regression when X is a Binary Variable</a></li>
<li class="chapter" data-level="5.4" data-path="5-4-hah.html"><a href="5-4-hah.html"><i class="fa fa-check"></i><b>5.4</b> Heteroskedasticity and Homoskedasticity</a><ul>
<li class="chapter" data-level="" data-path="5-4-hah.html"><a href="5-4-hah.html#a-real-world-example-for-heteroskedasticity"><i class="fa fa-check"></i>A Real-World Example for Heteroskedasticity</a></li>
<li class="chapter" data-level="" data-path="5-4-hah.html"><a href="5-4-hah.html#should-we-care-about-heteroskedasticity"><i class="fa fa-check"></i>Should We Care About Heteroskedasticity?</a></li>
<li class="chapter" data-level="" data-path="5-4-hah.html"><a href="5-4-hah.html#computation-of-heteroskedasticity-robust-standard-errors"><i class="fa fa-check"></i>Computation of Heteroskedasticity-Robust Standard Errors</a></li>
</ul></li>
<li class="chapter" data-level="5.5" data-path="5-5-the-gauss-markov-theorem.html"><a href="5-5-the-gauss-markov-theorem.html"><i class="fa fa-check"></i><b>5.5</b> The Gauss-Markov Theorem</a><ul>
<li class="chapter" data-level="" data-path="5-5-the-gauss-markov-theorem.html"><a href="5-5-the-gauss-markov-theorem.html#simulation-study-blue-estimator"><i class="fa fa-check"></i>Simulation Study: BLUE Estimator</a></li>
</ul></li>
<li class="chapter" data-level="5.6" data-path="5-6-using-the-t-statistic-in-regression-when-the-sample-size-is-small.html"><a href="5-6-using-the-t-statistic-in-regression-when-the-sample-size-is-small.html"><i class="fa fa-check"></i><b>5.6</b> Using the t-Statistic in Regression When the Sample Size Is Small</a></li>
<li class="chapter" data-level="5.7" data-path="5-7-exercises-3.html"><a href="5-7-exercises-3.html"><i class="fa fa-check"></i><b>5.7</b> Exercises</a></li>
</ul></li>
<li class="chapter" data-level="6" data-path="6-rmwmr.html"><a href="6-rmwmr.html"><i class="fa fa-check"></i><b>6</b> Regression Models with Multiple Regressors</a><ul>
<li class="chapter" data-level="6.1" data-path="6-1-omitted-variable-bias.html"><a href="6-1-omitted-variable-bias.html"><i class="fa fa-check"></i><b>6.1</b> Omitted Variable Bias</a></li>
<li class="chapter" data-level="6.2" data-path="6-2-tmrm.html"><a href="6-2-tmrm.html"><i class="fa fa-check"></i><b>6.2</b> The Multiple Regression Model</a></li>
<li class="chapter" data-level="6.3" data-path="6-3-mofimr.html"><a href="6-3-mofimr.html"><i class="fa fa-check"></i><b>6.3</b> Measures of Fit in Multiple Regression</a></li>
<li class="chapter" data-level="6.4" data-path="6-4-ols-assumptions-in-multiple-regression.html"><a href="6-4-ols-assumptions-in-multiple-regression.html"><i class="fa fa-check"></i><b>6.4</b> OLS Assumptions in Multiple Regression</a><ul>
<li class="chapter" data-level="" data-path="6-4-ols-assumptions-in-multiple-regression.html"><a href="6-4-ols-assumptions-in-multiple-regression.html#multicollinearity"><i class="fa fa-check"></i>Multicollinearity</a></li>
<li class="chapter" data-level="" data-path="6-4-ols-assumptions-in-multiple-regression.html"><a href="6-4-ols-assumptions-in-multiple-regression.html#simulation-study-imperfect-multicollinearity"><i class="fa fa-check"></i>Simulation Study: Imperfect Multicollinearity</a></li>
</ul></li>
<li class="chapter" data-level="6.5" data-path="6-5-the-distribution-of-the-ols-estimators-in-multiple-regression.html"><a href="6-5-the-distribution-of-the-ols-estimators-in-multiple-regression.html"><i class="fa fa-check"></i><b>6.5</b> The Distribution of the OLS Estimators in Multiple Regression</a></li>
<li class="chapter" data-level="6.6" data-path="6-6-exercises-4.html"><a href="6-6-exercises-4.html"><i class="fa fa-check"></i><b>6.6</b> Exercises</a></li>
</ul></li>
<li class="chapter" data-level="7" data-path="7-htaciimr.html"><a href="7-htaciimr.html"><i class="fa fa-check"></i><b>7</b> Hypothesis Tests and Confidence Intervals in Multiple Regression</a><ul>
<li class="chapter" data-level="7.1" data-path="7-1-hypothesis-tests-and-confidence-intervals-for-a-single-coefficient.html"><a href="7-1-hypothesis-tests-and-confidence-intervals-for-a-single-coefficient.html"><i class="fa fa-check"></i><b>7.1</b> Hypothesis Tests and Confidence Intervals for a Single Coefficient</a></li>
<li class="chapter" data-level="7.2" data-path="7-2-an-application-to-test-scores-and-the-student-teacher-ratio.html"><a href="7-2-an-application-to-test-scores-and-the-student-teacher-ratio.html"><i class="fa fa-check"></i><b>7.2</b> An Application to Test Scores and the Student-Teacher Ratio</a><ul>
<li class="chapter" data-level="" data-path="7-2-an-application-to-test-scores-and-the-student-teacher-ratio.html"><a href="7-2-an-application-to-test-scores-and-the-student-teacher-ratio.html#another-augmentation-of-the-model"><i class="fa fa-check"></i>Another Augmentation of the Model</a></li>
</ul></li>
<li class="chapter" data-level="7.3" data-path="7-3-joint-hypothesis-testing-using-the-f-statistic.html"><a href="7-3-joint-hypothesis-testing-using-the-f-statistic.html"><i class="fa fa-check"></i><b>7.3</b> Joint Hypothesis Testing Using the F-Statistic</a></li>
<li class="chapter" data-level="7.4" data-path="7-4-confidence-sets-for-multiple-coefficients.html"><a href="7-4-confidence-sets-for-multiple-coefficients.html"><i class="fa fa-check"></i><b>7.4</b> Confidence Sets for Multiple Coefficients</a></li>
<li class="chapter" data-level="7.5" data-path="7-5-model-specification-for-multiple-regression.html"><a href="7-5-model-specification-for-multiple-regression.html"><i class="fa fa-check"></i><b>7.5</b> Model Specification for Multiple Regression</a><ul>
<li class="chapter" data-level="" data-path="7-5-model-specification-for-multiple-regression.html"><a href="7-5-model-specification-for-multiple-regression.html#model-specification-in-theory-and-in-practice"><i class="fa fa-check"></i>Model Specification in Theory and in Practice</a></li>
</ul></li>
<li class="chapter" data-level="7.6" data-path="7-6-analysis-of-the-test-score-data-set.html"><a href="7-6-analysis-of-the-test-score-data-set.html"><i class="fa fa-check"></i><b>7.6</b> Analysis of the Test Score Data Set</a></li>
<li class="chapter" data-level="7.7" data-path="7-7-exercises-5.html"><a href="7-7-exercises-5.html"><i class="fa fa-check"></i><b>7.7</b> Exercises</a></li>
</ul></li>
<li class="chapter" data-level="8" data-path="8-nrf.html"><a href="8-nrf.html"><i class="fa fa-check"></i><b>8</b> Nonlinear Regression Functions</a><ul>
<li class="chapter" data-level="8.1" data-path="8-1-a-general-strategy-for-modelling-nonlinear-regression-functions.html"><a href="8-1-a-general-strategy-for-modelling-nonlinear-regression-functions.html"><i class="fa fa-check"></i><b>8.1</b> A General Strategy for Modelling Nonlinear Regression Functions</a></li>
<li class="chapter" data-level="8.2" data-path="8-2-nfoasiv.html"><a href="8-2-nfoasiv.html"><i class="fa fa-check"></i><b>8.2</b> Nonlinear Functions of a Single Independent Variable</a><ul>
<li class="chapter" data-level="" data-path="8-2-nfoasiv.html"><a href="8-2-nfoasiv.html#polynomials"><i class="fa fa-check"></i>Polynomials</a></li>
<li class="chapter" data-level="" data-path="8-2-nfoasiv.html"><a href="8-2-nfoasiv.html#logarithms"><i class="fa fa-check"></i>Logarithms</a></li>
</ul></li>
<li class="chapter" data-level="8.3" data-path="8-3-interactions-between-independent-variables.html"><a href="8-3-interactions-between-independent-variables.html"><i class="fa fa-check"></i><b>8.3</b> Interactions Between Independent Variables</a></li>
<li class="chapter" data-level="8.4" data-path="8-4-nonlinear-effects-on-test-scores-of-the-student-teacher-ratio.html"><a href="8-4-nonlinear-effects-on-test-scores-of-the-student-teacher-ratio.html"><i class="fa fa-check"></i><b>8.4</b> Nonlinear Effects on Test Scores of the Student-Teacher Ratio</a></li>
<li class="chapter" data-level="8.5" data-path="8-5-exercises-6.html"><a href="8-5-exercises-6.html"><i class="fa fa-check"></i><b>8.5</b> Exercises</a></li>
</ul></li>
<li class="chapter" data-level="9" data-path="9-asbomr.html"><a href="9-asbomr.html"><i class="fa fa-check"></i><b>9</b> Assessing Studies Based on Multiple Regression</a><ul>
<li class="chapter" data-level="9.1" data-path="9-1-internal-and-external-validity.html"><a href="9-1-internal-and-external-validity.html"><i class="fa fa-check"></i><b>9.1</b> Internal and External Validity</a></li>
<li class="chapter" data-level="9.2" data-path="9-2-ttivomra.html"><a href="9-2-ttivomra.html"><i class="fa fa-check"></i><b>9.2</b> Threats to Internal Validity of Multiple Regression Analysis</a></li>
<li class="chapter" data-level="9.3" data-path="9-3-internal-and-external-validity-when-the-regression-is-used-for-forecasting.html"><a href="9-3-internal-and-external-validity-when-the-regression-is-used-for-forecasting.html"><i class="fa fa-check"></i><b>9.3</b> Internal and External Validity when the Regression is Used for Forecasting</a></li>
<li class="chapter" data-level="9.4" data-path="9-4-etsacs.html"><a href="9-4-etsacs.html"><i class="fa fa-check"></i><b>9.4</b> Example: Test Scores and Class Size</a></li>
<li class="chapter" data-level="9.5" data-path="9-5-exercises-7.html"><a href="9-5-exercises-7.html"><i class="fa fa-check"></i><b>9.5</b> Exercises</a></li>
</ul></li>
<li class="chapter" data-level="10" data-path="10-rwpd.html"><a href="10-rwpd.html"><i class="fa fa-check"></i><b>10</b> Regression with Panel Data</a><ul>
<li class="chapter" data-level="10.1" data-path="10-1-panel-data.html"><a href="10-1-panel-data.html"><i class="fa fa-check"></i><b>10.1</b> Panel Data</a></li>
<li class="chapter" data-level="10.2" data-path="10-2-PDWTTP.html"><a href="10-2-PDWTTP.html"><i class="fa fa-check"></i><b>10.2</b> Panel Data with Two Time Periods: “Before and After” Comparisons</a></li>
<li class="chapter" data-level="10.3" data-path="10-3-fixed-effects-regression.html"><a href="10-3-fixed-effects-regression.html"><i class="fa fa-check"></i><b>10.3</b> Fixed Effects Regression</a><ul>
<li class="chapter" data-level="" data-path="10-3-fixed-effects-regression.html"><a href="10-3-fixed-effects-regression.html#estimation-and-inference"><i class="fa fa-check"></i>Estimation and Inference</a></li>
<li class="chapter" data-level="" data-path="10-3-fixed-effects-regression.html"><a href="10-3-fixed-effects-regression.html#application-to-traffic-deaths"><i class="fa fa-check"></i>Application to Traffic Deaths</a></li>
</ul></li>
<li class="chapter" data-level="10.4" data-path="10-4-regression-with-time-fixed-effects.html"><a href="10-4-regression-with-time-fixed-effects.html"><i class="fa fa-check"></i><b>10.4</b> Regression with Time Fixed Effects</a></li>
<li class="chapter" data-level="10.5" data-path="10-5-tferaaseffer.html"><a href="10-5-tferaaseffer.html"><i class="fa fa-check"></i><b>10.5</b> The Fixed Effects Regression Assumptions and Standard Errors for Fixed Effects Regression</a></li>
<li class="chapter" data-level="10.6" data-path="10-6-drunk-driving-laws-and-traffic-deaths.html"><a href="10-6-drunk-driving-laws-and-traffic-deaths.html"><i class="fa fa-check"></i><b>10.6</b> Drunk Driving Laws and Traffic Deaths</a></li>
<li class="chapter" data-level="10.7" data-path="10-7-exercises-8.html"><a href="10-7-exercises-8.html"><i class="fa fa-check"></i><b>10.7</b> Exercises</a></li>
</ul></li>
<li class="chapter" data-level="11" data-path="11-rwabdv.html"><a href="11-rwabdv.html"><i class="fa fa-check"></i><b>11</b> Regression with a Binary Dependent Variable</a><ul>
<li class="chapter" data-level="11.1" data-path="11-1-binary-dependent-variables-and-the-linear-probability-model.html"><a href="11-1-binary-dependent-variables-and-the-linear-probability-model.html"><i class="fa fa-check"></i><b>11.1</b> Binary Dependent Variables and the Linear Probability Model</a></li>
<li class="chapter" data-level="11.2" data-path="11-2-palr.html"><a href="11-2-palr.html"><i class="fa fa-check"></i><b>11.2</b> Probit and Logit Regression</a><ul>
<li class="chapter" data-level="" data-path="11-2-palr.html"><a href="11-2-palr.html#probit-regression"><i class="fa fa-check"></i>Probit Regression</a></li>
<li class="chapter" data-level="" data-path="11-2-palr.html"><a href="11-2-palr.html#logit-regression"><i class="fa fa-check"></i>Logit Regression</a></li>
</ul></li>
<li class="chapter" data-level="11.3" data-path="11-3-estimation-and-inference-in-the-logit-and-probit-models.html"><a href="11-3-estimation-and-inference-in-the-logit-and-probit-models.html"><i class="fa fa-check"></i><b>11.3</b> Estimation and Inference in the Logit and Probit Models</a></li>
<li class="chapter" data-level="11.4" data-path="11-4-application-to-the-boston-hmda-data.html"><a href="11-4-application-to-the-boston-hmda-data.html"><i class="fa fa-check"></i><b>11.4</b> Application to the Boston HMDA Data</a></li>
<li class="chapter" data-level="11.5" data-path="11-5-exercises-9.html"><a href="11-5-exercises-9.html"><i class="fa fa-check"></i><b>11.5</b> Exercises</a></li>
</ul></li>
<li class="chapter" data-level="12" data-path="12-ivr.html"><a href="12-ivr.html"><i class="fa fa-check"></i><b>12</b> Instrumental Variables Regression</a><ul>
<li class="chapter" data-level="12.1" data-path="12-1-TIVEWASRAASI.html"><a href="12-1-TIVEWASRAASI.html"><i class="fa fa-check"></i><b>12.1</b> The IV Estimator with a Single Regressor and a Single Instrument</a></li>
<li class="chapter" data-level="12.2" data-path="12-2-TGIVRM.html"><a href="12-2-TGIVRM.html"><i class="fa fa-check"></i><b>12.2</b> The General IV Regression Model</a></li>
<li class="chapter" data-level="12.3" data-path="12-3-civ.html"><a href="12-3-civ.html"><i class="fa fa-check"></i><b>12.3</b> Checking Instrument Validity</a></li>
<li class="chapter" data-level="12.4" data-path="12-4-attdfc.html"><a href="12-4-attdfc.html"><i class="fa fa-check"></i><b>12.4</b> Application to the Demand for Cigarettes</a></li>
<li class="chapter" data-level="12.5" data-path="12-5-where-do-valid-instruments-come-from.html"><a href="12-5-where-do-valid-instruments-come-from.html"><i class="fa fa-check"></i><b>12.5</b> Where Do Valid Instruments Come From?</a></li>
<li class="chapter" data-level="12.6" data-path="12-6-exercises-10.html"><a href="12-6-exercises-10.html"><i class="fa fa-check"></i><b>12.6</b> Exercises</a></li>
</ul></li>
<li class="chapter" data-level="13" data-path="13-eaqe.html"><a href="13-eaqe.html"><i class="fa fa-check"></i><b>13</b> Experiments and Quasi-Experiments</a><ul>
<li class="chapter" data-level="13.1" data-path="13-1-poceaie.html"><a href="13-1-poceaie.html"><i class="fa fa-check"></i><b>13.1</b> Potential Outcomes, Causal Effects and Idealized Experiments</a></li>
<li class="chapter" data-level="13.2" data-path="13-2-threats-to-validity-of-experiments.html"><a href="13-2-threats-to-validity-of-experiments.html"><i class="fa fa-check"></i><b>13.2</b> Threats to Validity of Experiments</a></li>
<li class="chapter" data-level="13.3" data-path="13-3-experimental-estimates-of-the-effect-of-class-size-reductions.html"><a href="13-3-experimental-estimates-of-the-effect-of-class-size-reductions.html"><i class="fa fa-check"></i><b>13.3</b> Experimental Estimates of the Effect of Class Size Reductions</a><ul>
<li class="chapter" data-level="" data-path="13-3-experimental-estimates-of-the-effect-of-class-size-reductions.html"><a href="13-3-experimental-estimates-of-the-effect-of-class-size-reductions.html#experimental-design-and-the-data-set"><i class="fa fa-check"></i>Experimental Design and the Data Set</a></li>
<li class="chapter" data-level="" data-path="13-3-experimental-estimates-of-the-effect-of-class-size-reductions.html"><a href="13-3-experimental-estimates-of-the-effect-of-class-size-reductions.html#analysis-of-the-star-data"><i class="fa fa-check"></i>Analysis of the STAR Data</a></li>
</ul></li>
<li class="chapter" data-level="13.4" data-path="13-4-qe.html"><a href="13-4-qe.html"><i class="fa fa-check"></i><b>13.4</b> Quasi Experiments</a><ul>
<li class="chapter" data-level="" data-path="13-4-qe.html"><a href="13-4-qe.html#the-differences-in-differences-estimator"><i class="fa fa-check"></i>The Differences-in-Differences Estimator</a></li>
<li class="chapter" data-level="" data-path="13-4-qe.html"><a href="13-4-qe.html#regression-discontinuity-estimators"><i class="fa fa-check"></i>Regression Discontinuity Estimators</a></li>
</ul></li>
<li class="chapter" data-level="13.5" data-path="13-5-exercises-11.html"><a href="13-5-exercises-11.html"><i class="fa fa-check"></i><b>13.5</b> Exercises</a></li>
</ul></li>
<li class="chapter" data-level="14" data-path="14-ittsraf.html"><a href="14-ittsraf.html"><i class="fa fa-check"></i><b>14</b> Introduction to Time Series Regression and Forecasting</a><ul>
<li class="chapter" data-level="14.1" data-path="14-1-using-regression-models-for-forecasting.html"><a href="14-1-using-regression-models-for-forecasting.html"><i class="fa fa-check"></i><b>14.1</b> Using Regression Models for Forecasting</a></li>
<li class="chapter" data-level="14.2" data-path="14-2-tsdasc.html"><a href="14-2-tsdasc.html"><i class="fa fa-check"></i><b>14.2</b> Time Series Data and Serial Correlation</a><ul>
<li class="chapter" data-level="" data-path="14-2-tsdasc.html"><a href="14-2-tsdasc.html#notation-lags-differences-logarithms-and-growth-rates"><i class="fa fa-check"></i>Notation, Lags, Differences, Logarithms and Growth Rates</a></li>
</ul></li>
<li class="chapter" data-level="14.3" data-path="14-3-autoregressions.html"><a href="14-3-autoregressions.html"><i class="fa fa-check"></i><b>14.3</b> Autoregressions</a><ul>
<li><a href="14-3-autoregressions.html#autoregressive-models-of-order-p">Autoregressive Models of Order <span class="math inline">\(p\)</span></a></li>
</ul></li>
<li class="chapter" data-level="14.4" data-path="14-4-cybtmpi.html"><a href="14-4-cybtmpi.html"><i class="fa fa-check"></i><b>14.4</b> Can You Beat the Market? (Part I)</a></li>
<li class="chapter" data-level="14.5" data-path="14-5-apatadlm.html"><a href="14-5-apatadlm.html"><i class="fa fa-check"></i><b>14.5</b> Additional Predictors and The ADL Model</a><ul>
<li class="chapter" data-level="" data-path="14-5-apatadlm.html"><a href="14-5-apatadlm.html#forecast-uncertainty-and-forecast-intervals"><i class="fa fa-check"></i>Forecast Uncertainty and Forecast Intervals</a></li>
</ul></li>
<li class="chapter" data-level="14.6" data-path="14-6-llsuic.html"><a href="14-6-llsuic.html"><i class="fa fa-check"></i><b>14.6</b> Lag Length Selection Using Information Criteria</a></li>
<li class="chapter" data-level="14.7" data-path="14-7-nit.html"><a href="14-7-nit.html"><i class="fa fa-check"></i><b>14.7</b> Nonstationarity I: Trends</a></li>
<li class="chapter" data-level="14.8" data-path="14-8-niib.html"><a href="14-8-niib.html"><i class="fa fa-check"></i><b>14.8</b> Nonstationarity II: Breaks</a></li>
<li class="chapter" data-level="14.9" data-path="14-9-can-you-beat-the-market-part-ii.html"><a href="14-9-can-you-beat-the-market-part-ii.html"><i class="fa fa-check"></i><b>14.9</b> Can You Beat the Market? (Part II)</a></li>
</ul></li>
<li class="chapter" data-level="15" data-path="15-eodce.html"><a href="15-eodce.html"><i class="fa fa-check"></i><b>15</b> Estimation of Dynamic Causal Effects</a><ul>
<li class="chapter" data-level="15.1" data-path="15-1-the-orange-juice-data.html"><a href="15-1-the-orange-juice-data.html"><i class="fa fa-check"></i><b>15.1</b> The Orange Juice Data</a></li>
<li class="chapter" data-level="15.2" data-path="15-2-dynamic-causal-effects.html"><a href="15-2-dynamic-causal-effects.html"><i class="fa fa-check"></i><b>15.2</b> Dynamic Causal Effects</a></li>
<li class="chapter" data-level="15.3" data-path="15-3-dynamic-multipliers-and-cumulative-dynamic-multipliers.html"><a href="15-3-dynamic-multipliers-and-cumulative-dynamic-multipliers.html"><i class="fa fa-check"></i><b>15.3</b> Dynamic Multipliers and Cumulative Dynamic Multipliers</a></li>
<li class="chapter" data-level="15.4" data-path="15-4-hac-standard-errors.html"><a href="15-4-hac-standard-errors.html"><i class="fa fa-check"></i><b>15.4</b> HAC Standard Errors</a></li>
<li class="chapter" data-level="15.5" data-path="15-5-estimation-of-dynamic-causal-effects-with-strictly-exogeneous-regressors.html"><a href="15-5-estimation-of-dynamic-causal-effects-with-strictly-exogeneous-regressors.html"><i class="fa fa-check"></i><b>15.5</b> Estimation of Dynamic Causal Effects with Strictly Exogeneous Regressors</a></li>
<li class="chapter" data-level="15.6" data-path="15-6-orange-juice-prices-and-cold-weather.html"><a href="15-6-orange-juice-prices-and-cold-weather.html"><i class="fa fa-check"></i><b>15.6</b> Orange Juice Prices and Cold Weather</a></li>
</ul></li>
<li class="chapter" data-level="16" data-path="16-atitsr.html"><a href="16-atitsr.html"><i class="fa fa-check"></i><b>16</b> Additional Topics in Time Series Regression</a><ul>
<li class="chapter" data-level="16.1" data-path="16-1-vector-autoregressions.html"><a href="16-1-vector-autoregressions.html"><i class="fa fa-check"></i><b>16.1</b> Vector Autoregressions</a></li>
<li class="chapter" data-level="16.2" data-path="16-2-ooiatdfglsurt.html"><a href="16-2-ooiatdfglsurt.html"><i class="fa fa-check"></i><b>16.2</b> Orders of Integration and the DF-GLS Unit Root Test</a></li>
<li class="chapter" data-level="16.3" data-path="16-3-cointegration.html"><a href="16-3-cointegration.html"><i class="fa fa-check"></i><b>16.3</b> Cointegration</a></li>
<li class="chapter" data-level="16.4" data-path="16-4-volatility-clustering-and-autoregressive-conditional-heteroskedasticity.html"><a href="16-4-volatility-clustering-and-autoregressive-conditional-heteroskedasticity.html"><i class="fa fa-check"></i><b>16.4</b> Volatility Clustering and Autoregressive Conditional Heteroskedasticity</a><ul>
<li class="chapter" data-level="" data-path="16-4-volatility-clustering-and-autoregressive-conditional-heteroskedasticity.html"><a href="16-4-volatility-clustering-and-autoregressive-conditional-heteroskedasticity.html#arch-and-garch-models"><i class="fa fa-check"></i>ARCH and GARCH Models</a></li>
<li class="chapter" data-level="" data-path="16-4-volatility-clustering-and-autoregressive-conditional-heteroskedasticity.html"><a href="16-4-volatility-clustering-and-autoregressive-conditional-heteroskedasticity.html#application-to-stock-price-volatility"><i class="fa fa-check"></i>Application to Stock Price Volatility</a></li>
<li class="chapter" data-level="" data-path="16-4-volatility-clustering-and-autoregressive-conditional-heteroskedasticity.html"><a href="16-4-volatility-clustering-and-autoregressive-conditional-heteroskedasticity.html#summary-8"><i class="fa fa-check"></i>Summary</a></li>
</ul></li>
<li class="chapter" data-level="16.5" data-path="16-5-exercises-12.html"><a href="16-5-exercises-12.html"><i class="fa fa-check"></i><b>16.5</b> Exercises</a></li>
</ul></li>
<li class="chapter" data-level="" data-path="references.html"><a href="references.html"><i class="fa fa-check"></i>References</a></li>
<li class="divider"></li>
<li><a href="https://github.com/rstudio/bookdown" target="blank">Published with bookdown</a></li>

</ul>

      </nav>
    </div>

    <div class="book-body">
      <div class="body-inner">
        <div class="book-header" role="navigation">
          <h1>
            <i class="fa fa-circle-o-notch fa-spin"></i><a href="./">Introduction to Econometrics with R</a>
          </h1>
        </div>

        <div class="page-wrapper" tabindex="-1" role="main">
          <div class="page-inner">

            <section class="normal" id="section-">
<div class = rmdreview>
This book is in <b>Open Review</b>. We want your feedback to make the book better for you and other students. You may annotate some text by <span style="background-color: #3297FD; color: white">selecting it with the cursor</span> and then click the <i class="h-icon-annotate"></i> on the pop-up menu. You can also see the annotations of others: click the <i class="h-icon-chevron-left"></i> in the upper right hand corner of the page <i class="fa fa-arrow-circle-right  fa-rotate-315" aria-hidden="true"></i>
</div>
<div id="RSATDOSA" class="section level2">
<h2><span class="header-section-number">2.2</span> Random Sampling and the Distribution of Sample Averages</h2>
<p>To clarify the basic idea of random sampling, let us jump back to the dice rolling example:</p>
<p>Suppose we are rolling the dice <span class="math inline">\(n\)</span> times. This means we are interested in the outcomes of random <span class="math inline">\(Y_i, \ i=1,...,n\)</span> which are characterized by the same distribution. Since these outcomes are selected randomly, they are <em>random variables</em> themselves and their realizations will differ each time we draw a sample, i.e., each time we roll the dice <span class="math inline">\(n\)</span> times. Furthermore, each observation is randomly drawn from the same population, that is, the numbers from <span class="math inline">\(1\)</span> to <span class="math inline">\(6\)</span>, and their individual distribution is the same. Hence <span class="math inline">\(Y_1,\dots,Y_n\)</span> are identically distributed.</p>
<p>Moreover, we know that the value of any of the <span class="math inline">\(Y_i\)</span> does not provide any information on the remainder of the outcomes In our example, rolling a six as the first observation in our sample does not alter the distributions of <span class="math inline">\(Y_2,\dots,Y_n\)</span>: all numbers are equally likely to occur. This means that all <span class="math inline">\(Y_i\)</span> are also independently distributed. Thus <span class="math inline">\(Y_1,\dots,Y_n\)</span> are independently and identically distributed (<em>i.i.d.</em>).
The dice example uses this most simple sampling scheme. That is why it is called <em>simple random sampling</em>. This concept is summarized in Key Concept 2.5.</p>
<div id="KC2.5" class="keyconcept">
<h3 class="right">
Key Concept 2.5
</h3>
<h3 class="left">
Simple Random Sampling and i.i.d. Random Variables
</h3>
<p>
In simple random sampling, <span class="math inline">\(n\)</span> objects are drawn at random from a population. Each object is equally likely to end up in the sample. We denote the value of the random variable <span class="math inline">\(Y\)</span> for the <span class="math inline">\(i^{th}\)</span> randomly drawn object as <span class="math inline">\(Y_i\)</span>. Since all objects are equally likely to be drawn and the distribution of <span class="math inline">\(Y_i\)</span> is the same for all <span class="math inline">\(i\)</span>, the <span class="math inline">\(Y_i, \dots, Y_n\)</span> are independently and identically distributed (i.i.d.). This means the distribution of <span class="math inline">\(Y_i\)</span> is the same for all <span class="math inline">\(i=1,\dots,n\)</span> and <span class="math inline">\(Y_1\)</span> is distributed independently of <span class="math inline">\(Y_2, \dots, Y_n\)</span> and <span class="math inline">\(Y_2\)</span> is distributed independently of <span class="math inline">\(Y_1, Y_3, \dots, Y_n\)</span> and so forth.
</p>
</div>
<p>What happens if we consider functions of the sample data? Consider the example of rolling a dice two times in a row once again. A sample now consists of two independent random draws from the set <span class="math inline">\(\{1,2,3,4,5,6\}\)</span>. It is apparent that any function of these two random variables, e.g. their sum, is also random. Convince yourself by executing the code below several times.</p>
<pre class="sourceCode r"><code class="sourceCode r"><span class="kw">sum</span>(<span class="kw">sample</span>(<span class="dv">1</span><span class="op">:</span><span class="dv">6</span>, <span class="dv">2</span>, <span class="dt">replace =</span> T))</code></pre>
<pre><code>## [1] 6</code></pre>
<p>Clearly, this sum, let us call it <span class="math inline">\(S\)</span>, is a random variable as it depends on randomly drawn summands. For this example, we can completely enumerate all outcomes and hence write down the theoretical probability distribution of our function of the sample data, <span class="math inline">\(S\)</span>:</p>
<p>We face <span class="math inline">\(6^2=36\)</span> possible pairs. Those pairs are</p>
<p><span class="math display">\[\begin{align*}
  &amp;(1,1)    (1,2)   (1,3)   (1,4)   (1,5)   (1,6) \\ 
  &amp;(2,1)    (2,2)   (2,3)   (2,4)   (2,5)   (2,6) \\ 
  &amp;(3,1)    (3,2)   (3,3)   (3,4)   (3,5)   (3,6) \\ 
  &amp;(4,1)    (4,2)   (4,3)   (4,4)   (4,5)   (4,6) \\ 
  &amp;(5,1)    (5,2)   (5,3)   (5,4)   (5,5)   (5,6) \\ 
  &amp;(6,1)    (6,2)   (6,3)   (6,4)   (6,5)   (6,6)
\end{align*}\]</span></p>
<p>Thus, possible outcomes for <span class="math inline">\(S\)</span> are</p>
<p><span class="math display">\[ \left\{ 2,3,4,5,6,7,8,9,10,11,12 \right\} . \]</span>
Enumeration of outcomes yields</p>
<p><span class="math display">\[\begin{align}
  P(S) = 
  \begin{cases} 
    1/36, \ &amp; S = 2 \\ 
    2/36, \ &amp; S = 3 \\
    3/36, \ &amp; S = 4 \\
    4/36, \ &amp; S = 5 \\
    5/36, \ &amp; S = 6 \\
    6/36, \ &amp; S = 7 \\
    5/36, \ &amp; S = 8 \\
    4/36, \ &amp; S = 9 \\
    3/36, \ &amp; S = 10 \\
    2/36, \ &amp; S = 11 \\
    1/36, \ &amp; S = 12
  \end{cases}
\end{align}\]</span></p>
<p>We can also compute <span class="math inline">\(E(S)\)</span> and <span class="math inline">\(\text{Var}(S)\)</span> as stated in Key Concept 2.1 and Key Concept 2.2.</p>
<pre class="sourceCode r"><code class="sourceCode r"><span class="co"># Vector of outcomes</span>
S &lt;-<span class="st"> </span><span class="dv">2</span><span class="op">:</span><span class="dv">12</span>

<span class="co"># Vector of probabilities</span>
PS &lt;-<span class="st"> </span><span class="kw">c</span>(<span class="dv">1</span><span class="op">:</span><span class="dv">6</span>, <span class="dv">5</span><span class="op">:</span><span class="dv">1</span>) <span class="op">/</span><span class="st"> </span><span class="dv">36</span>

<span class="co"># Expectation of S</span>
ES &lt;-<span class="st"> </span><span class="kw">sum</span>(S <span class="op">*</span><span class="st"> </span>PS)
ES</code></pre>
<pre><code>## [1] 7</code></pre>
<pre class="sourceCode r"><code class="sourceCode r"><span class="co"># Variance of S</span>
VarS &lt;-<span class="st"> </span><span class="kw">sum</span>(
  (S <span class="op">-</span><span class="st"> </span><span class="kw">c</span>(ES))<span class="op">^</span><span class="dv">2</span> <span class="op">*</span><span class="st"> </span>PS
  )
VarS</code></pre>
<pre><code>## [1] 5.833333</code></pre>
<p>The <tt>%*%</tt> operator is used to compute the scalar product of two vectors.</p>
<p>So the distribution of <span class="math inline">\(S\)</span> is known. It is also evident that its distribution differs considerably from the marginal distribution, i.e,the distribution of a single dice roll’s outcome, <span class="math inline">\(D\)</span> . Let us visualize this using bar plots.</p>
<div class="unfolded">
<pre class="sourceCode r"><code class="sourceCode r"><span class="co"># divide the plotting area into one row with two columns</span>
<span class="kw">par</span>(<span class="dt">mfrow =</span> <span class="kw">c</span>(<span class="dv">1</span>, <span class="dv">2</span>))

<span class="co"># plot the distribution of S</span>
<span class="kw">barplot</span>(PS, 
        <span class="dt">ylim =</span> <span class="kw">c</span>(<span class="dv">0</span>, <span class="fl">0.2</span>), 
        <span class="dt">xlab =</span> <span class="st">&quot;S&quot;</span>, 
        <span class="dt">ylab =</span> <span class="st">&quot;Probability&quot;</span>, 
        <span class="dt">col =</span> <span class="st">&quot;steelblue&quot;</span>, 
        <span class="dt">space =</span> <span class="dv">0</span>, 
        <span class="dt">main =</span> <span class="st">&quot;Sum of Two Dice Rolls&quot;</span>)

<span class="co"># plot the distribution of D </span>
probability &lt;-<span class="st"> </span><span class="kw">rep</span>(<span class="dv">1</span><span class="op">/</span><span class="dv">6</span>, <span class="dv">6</span>)
<span class="kw">names</span>(probability) &lt;-<span class="st"> </span><span class="dv">1</span><span class="op">:</span><span class="dv">6</span>

<span class="kw">barplot</span>(probability, 
        <span class="dt">ylim =</span> <span class="kw">c</span>(<span class="dv">0</span>, <span class="fl">0.2</span>), 
        <span class="dt">xlab =</span> <span class="st">&quot;D&quot;</span>, 
        <span class="dt">col =</span> <span class="st">&quot;steelblue&quot;</span>, 
        <span class="dt">space =</span> <span class="dv">0</span>, 
        <span class="dt">main =</span> <span class="st">&quot;Outcome of a Single Dice Roll&quot;</span>)</code></pre>
<p><img src="ITER_files/figure-html/unnamed-chunk-59-1.png" width="80%" style="display: block; margin: auto;" /></p>
</div>
<p>Many econometric procedures deal with averages of sampled data. It is typically assumed that observations are drawn randomly from a larger, unknown population. As demonstrated for the sample function <span class="math inline">\(S\)</span>, computing an average of a random sample has the effect that the average is a random variable itself. This random variable in turn has a probability distribution, called the sampling distribution. Knowledge about the sampling distribution of the average is therefore crucial for understanding the performance of econometric procedures.</p>
<p>The <em>sample average</em> of a sample of <span class="math inline">\(n\)</span> observations <span class="math inline">\(Y_1, \dots, Y_n\)</span> is</p>
<p><span class="math display">\[ \overline{Y} = \frac{1}{n} \sum_{i=1}^n Y_i = \frac{1}{n} (Y_1 + Y_2 + \cdots + Y_n). \]</span>
<span class="math inline">\(\overline{Y}\)</span> is also called the sample mean.</p>
<div id="mean-and-variance-of-the-sample-mean" class="section level3 unnumbered">
<h3>Mean and Variance of the Sample Mean</h3>
<p>suppose that all observations <span class="math inline">\(Y_1,\dots,Y_n\)</span> are i.i.d. and denote <span class="math inline">\(\mu_Y\)</span> and <span class="math inline">\(\sigma_Y^2\)</span> the mean and the variance of the <span class="math inline">\(Y_i\)</span>. Then we have that</p>
<p><span class="math display">\[ E(\overline{Y}) = E\left(\frac{1}{n} \sum_{i=1}^n Y_i \right) = \frac{1}{n} E\left(\sum_{i=1}^n Y_i\right) = \frac{1}{n} \sum_{i=1}^n E\left(Y_i\right) = \frac{1}{n} \cdot n \cdot \mu_Y = \mu_Y    \]</span>
and</p>
<p><span class="math display">\[\begin{align*}
  \text{Var}(\overline{Y}) =&amp; \text{Var}\left(\frac{1}{n} \sum_{i=1}^n Y_i \right) \\
  =&amp; \frac{1}{n^2} \sum_{i=1}^n \text{Var}(Y_i) + \frac{1}{n^2} \sum_{i=1}^n \sum_{j=1, j\neq i}^n \text{cov}(Y_i,Y_j) \\
  =&amp; \frac{\sigma^2_Y}{n} \\
  =&amp; \sigma_{\overline{Y}}^2.
\end{align*}\]</span></p>
<p>The second summand vanishes since <span class="math inline">\(\text{cov}(Y_i,Y_j)=0\)</span> for <span class="math inline">\(i\neq j\)</span> due to independence. Consequently, the standard deviation of the sample mean is given by <span class="math display">\[\sigma_{\overline{Y}} = \frac{\sigma_Y}{\sqrt{n}}.\]</span></p>
<p>It is worthwhile to mention that these results hold irrespective of the underlying distribution of the <span class="math inline">\(Y_i\)</span>.</p>
<div id="the-sampling-distribution-of-overliney-when-y-is-normally-distributed" class="section level4 unnumbered">
<h4>The Sampling Distribution of <span class="math inline">\(\overline{Y}\)</span> when <span class="math inline">\(Y\)</span> Is Normally Distributed</h4>
<p>If the <span class="math inline">\(Y_1,\dots,Y_n\)</span> are i.i.d. draws from a normal distribution with mean <span class="math inline">\(\mu_Y\)</span> and variance <span class="math inline">\(\sigma_Y^2\)</span>, the following holds for their sample average <span class="math inline">\(\overline{Y}\)</span>:</p>
<p><span class="math display">\[ \overline{Y} \sim \mathcal{N}(\mu_Y, \sigma_Y^2/n) \tag{2.4} \]</span></p>
<p>For example, if a sample <span class="math inline">\(Y_i\)</span> with <span class="math inline">\(i=1,\dots,10\)</span> is drawn from a standard normal distribution with mean <span class="math inline">\(\mu_Y = 0\)</span> and variance <span class="math inline">\(\sigma_Y^2=1\)</span> we have</p>
<p><span class="math display">\[ \overline{Y} \sim \mathcal{N}(0,0.1).\]</span></p>
<p>We can use <tt>R</tt>’s random number generation facilities to verify this result. The basic idea is to simulate outcomes of the true distribution of <span class="math inline">\(\overline{Y}\)</span> by repeatedly drawing random samples of 10 observation from the <span class="math inline">\(\mathcal{N}(0,1)\)</span> distribution and computing their respective averages. If we do this for a large number of repetitions, the simulated data set of averages should quite accurately reflect the theoretical distribution of <span class="math inline">\(\overline{Y}\)</span> if the theoretical result holds.</p>
<p>The approach sketched above is an example of what is commonly known as <em>Monte Carlo Simulation</em> or <em>Monte Carlo Experiment</em>. To perform this simulation in <tt>R</tt>, we proceed as follows:</p>
<ol style="list-style-type: decimal">
<li>Choose a sample size <tt>n</tt> and the number of samples to be drawn, <tt>reps</tt>.</li>
<li>Use the function <tt>replicate()</tt> in conjunction with <tt>rnorm()</tt> to draw <tt>n</tt> observations from the standard normal distribution <tt>rep</tt> times. <strong>Note</strong>: the outcome of <tt>replicate()</tt> is a matrix with dimensions <tt>n</tt> <span class="math inline">\(\times\)</span> <tt>rep</tt>. It contains the drawn samples as <em>columns</em>.</li>
<li>Compute sample means using <tt>colMeans()</tt>. This function computes the mean of each column, i.e., of each sample and returns a vector.</li>
</ol>
<pre class="sourceCode r"><code class="sourceCode r"><span class="co"># set sample size and number of samples</span>
n &lt;-<span class="st"> </span><span class="dv">10</span>
reps &lt;-<span class="st"> </span><span class="dv">10000</span>

<span class="co"># perform random sampling</span>
samples &lt;-<span class="st"> </span><span class="kw">replicate</span>(reps, <span class="kw">rnorm</span>(n)) <span class="co"># 10 x 10000 sample matrix</span>

<span class="co"># compute sample means</span>
sample.avgs &lt;-<span class="st"> </span><span class="kw">colMeans</span>(samples)</code></pre>
<p>We then end up with a vector of sample averages. You can check the vector property of <tt>sample.avgs</tt>:</p>
<pre class="sourceCode r"><code class="sourceCode r"><span class="co"># check that &#39;sample.avgs&#39; is a vector</span>
<span class="kw">is.vector</span>(sample.avgs) </code></pre>
<pre><code>## [1] TRUE</code></pre>
<pre class="sourceCode r"><code class="sourceCode r"><span class="co"># print the first few entries to the console</span>
<span class="kw">head</span>(sample.avgs)</code></pre>
<pre><code>## [1] -0.12406767 -0.10649421 -0.01033423 -0.39905236 -0.41897968 -0.90883537</code></pre>
<p>A straightforward approach to examine the distribution of univariate numerical data is to plot it as a histogram and compare it to some known or assumed distribution. By default, <tt>hist()</tt> will give us a frequency histogram, i.e., a bar chart where observations are grouped into ranges, also called bins. The ordinate reports the number of observations falling into each of the bins. Instead, we want it to report density estimates for comparison purposes. This is achieved by setting the argument <tt>freq = FALSE</tt>. The number of bins is adjusted by the argument <tt>breaks</tt>.</p>
<p>Using <tt>curve()</tt>, we overlay the histogram with a red line, the theoretical density of a <span class="math inline">\(\mathcal{N}(0, 0.1)\)</span> random variable. Remember to use the argument <tt>add = TRUE</tt> to add the curve to the current plot. Otherwise <tt>R</tt> will open a new graphic device and discard the previous plot!<a href="#fn3" class="footnote-ref" id="fnref3"><sup>3</sup></a></p>
<div class="unfolded">
<pre class="sourceCode r"><code class="sourceCode r"><span class="co"># Plot the density histogram</span>
<span class="kw">hist</span>(sample.avgs, 
     <span class="dt">ylim =</span> <span class="kw">c</span>(<span class="dv">0</span>, <span class="fl">1.4</span>), 
     <span class="dt">col =</span> <span class="st">&quot;steelblue&quot;</span> , 
     <span class="dt">freq =</span> F, 
     <span class="dt">breaks =</span> <span class="dv">20</span>)

<span class="co"># overlay the theoretical distribution of sample averages on top of the histogram</span>
<span class="kw">curve</span>(<span class="kw">dnorm</span>(x, <span class="dt">sd =</span> <span class="dv">1</span><span class="op">/</span><span class="kw">sqrt</span>(n)), 
      <span class="dt">col =</span> <span class="st">&quot;red&quot;</span>, 
      <span class="dt">lwd =</span> <span class="st">&quot;2&quot;</span>, 
      <span class="dt">add =</span> T)</code></pre>
<p><img src="ITER_files/figure-html/unnamed-chunk-62-1.png" width="80%" style="display: block; margin: auto;" /></p>
</div>
<p>The sampling distribution of <span class="math inline">\(\overline{Y}\)</span> is indeed very close to that of a <span class="math inline">\(\mathcal{N}(0, 0.1)\)</span> distribution so the Monte Carlo Simulation supports the theoretical claim.</p>
<p>Let us discuss another example where using simple random sampling in a simulation setup helps to verify a well known result. As discussed before, the <a href="#chisquare">Chi-squared</a> distribution with <span class="math inline">\(M\)</span> degrees of freedom arises as the distribution of the sum of <span class="math inline">\(M\)</span> independent squared standard normal distributed random variables.</p>
<p>To visualize the claim stated in equation (<a href="#mjx-eqn-2.3">2.3</a>), we proceed similarly as in the example before:</p>
<ol style="list-style-type: decimal">
<li>Choose the degrees of freedom, <tt>DF</tt>, and the number of samples to be drawn <tt>reps</tt>.</li>
<li>Draw <tt>reps</tt> random samples of size <tt>DF</tt> from the standard normal distribution using <tt>replicate()</tt>.</li>
<li>For each sample, square the outcomes and sum them up column-wise. Store the results.</li>
</ol>
<p>Again, we produce a density estimate for the distribution underlying our simulated data using a density histogram and overlay it with a line graph of the theoretical density function of the <span class="math inline">\(\chi^2_3\)</span> distribution.</p>
<div class="unfolded">
<pre class="sourceCode r"><code class="sourceCode r"><span class="co"># number of repititions</span>
reps &lt;-<span class="st"> </span><span class="dv">10000</span>

<span class="co"># set degrees of freedom of a chi-Square Distribution</span>
DF &lt;-<span class="st"> </span><span class="dv">3</span> 

<span class="co"># sample 10000 column vectors à 3 N(0,1) R.V.S</span>
Z &lt;-<span class="st"> </span><span class="kw">replicate</span>(reps, <span class="kw">rnorm</span>(DF)) 

<span class="co"># column sums of squares</span>
X &lt;-<span class="st"> </span><span class="kw">colSums</span>(Z<span class="op">^</span><span class="dv">2</span>)

<span class="co"># histogram of column sums of squares</span>
<span class="kw">hist</span>(X, 
     <span class="dt">freq =</span> F, 
     <span class="dt">col =</span> <span class="st">&quot;steelblue&quot;</span>, 
     <span class="dt">breaks =</span> <span class="dv">40</span>, 
     <span class="dt">ylab =</span> <span class="st">&quot;Density&quot;</span>, 
     <span class="dt">main =</span> <span class="st">&quot;&quot;</span>)

<span class="co"># add theoretical density</span>
<span class="kw">curve</span>(<span class="kw">dchisq</span>(x, <span class="dt">df =</span> DF), 
      <span class="dt">type =</span> <span class="st">&#39;l&#39;</span>, 
      <span class="dt">lwd =</span> <span class="dv">2</span>, 
      <span class="dt">col =</span> <span class="st">&quot;red&quot;</span>, 
      <span class="dt">add =</span> T)</code></pre>
<p><img src="ITER_files/figure-html/unnamed-chunk-63-1.png" width="80%" style="display: block; margin: auto;" /></p>
</div>
</div>
</div>
<div id="large-sample-approximations-to-sampling-distributions" class="section level3 unnumbered">
<h3>Large Sample Approximations to Sampling Distributions</h3>
<p>Sampling distributions as considered in the last section play an important role in the development of econometric methods. There are two main approaches in characterizing sampling distributions: an “exact” approach and an “approximate” approach.</p>
<p>The exact approach aims to find a general formula for the sampling distribution that holds for any sample size <span class="math inline">\(n\)</span>. We call this the <em>exact distribution</em> or <em>finite-sample distribution</em>. In the previous examples of dice rolling and normal variates, we have dealt with functions of random variables whose sample distributions are <em>excactly known</em> in the sense that we can write them down as analytic expressions. However, this is not always possible. For <span class="math inline">\(\overline{Y}\)</span>, result (<a href="#mjx-eqn-2.4">2.4</a>) tells us that normality of the <span class="math inline">\(Y_i\)</span> implies normality of <span class="math inline">\(\overline{Y}\)</span> (we demonstrated this for the special case of <span class="math inline">\(Y_i \overset{i.i.d.}{\sim} \mathcal{N}(0,1)\)</span> with <span class="math inline">\(n=10\)</span> using a simulation study that involves simple random sampling). Unfortunately, the <em>exact</em> distribution of <span class="math inline">\(\overline{Y}\)</span> is generally unknown and often hard to derive (or even untraceable) if we drop the assumption that the <span class="math inline">\(Y_i\)</span> have a normal distribution.</p>
<p>Therefore, as can be guessed from its name, the “approximate” approach aims to find an approximation to the sampling distribution where it is required that the sample size <span class="math inline">\(n\)</span> is large. A distribution that is used as a large-sample approximation to the sampling distribution is also called the <em>asymptotic distribution</em>. This is due to the fact that the asymptotic distribution <em>is</em> the sampling distribution for <span class="math inline">\(n \rightarrow \infty\)</span>, i.e., the approximation becomes exact if the sample size goes to infinity. However, the difference between the sampling distribution and the asymptotic distribution is negligible for moderate or even small samples sizes so that approximations using the asymptotic distribution are useful.</p>
<p>In this section we will discuss two well known results that are used to approximate sampling distributions and thus constitute key tools in econometric theory: the <em>law of large numbers</em> and the <em>central limit theorem</em>. The law of large numbers states that in large samples, <span class="math inline">\(\overline{Y}\)</span> is close to <span class="math inline">\(\mu_Y\)</span> with high probability. The central limit theorem says that the sampling distribution of the standardized sample average, that is, <span class="math inline">\((\overline{Y} - \mu_Y)/\sigma_{\overline{Y}}\)</span> is asymptotically normally distributed. It is particularly interesting that both results do not depend on the distribution of <span class="math inline">\(Y\)</span>. In other words, being unable to describe the complicated sampling distribution of <span class="math inline">\(\overline{Y}\)</span> if <span class="math inline">\(Y\)</span> is not normal, approximations of the latter using the central limit theorem simplify the development and applicability of econometric procedures enormously. This is a key component underlying the theory of statistical inference for regression models. Both results are summarized in Key Concept 2.6 and Key Concept 2.7.</p>
<div id="KC2.6" class="keyconcept">
<h3 class="right">
Key Concept 2.6
</h3>
<h3 class="left">
Convergence in Probability, Consistency and the Law of Large Numbers
</h3>
<p>
<p>The sample average <span class="math inline">\(\overline{Y}\)</span> converges in probability to <span class="math inline">\(\mu_Y\)</span>: <span class="math inline">\(\overline{Y}\)</span> is <em>consistent</em> for <span class="math inline">\(\mu_Y\)</span> if the probability that <span class="math inline">\(\overline{Y}\)</span> is in the range <span class="math inline">\((\mu_Y - \epsilon)\)</span> to <span class="math inline">\((\mu_Y + \epsilon)\)</span> becomes arbitrary close to <span class="math inline">\(1\)</span> as <span class="math inline">\(n\)</span> increases for any constant <span class="math inline">\(\epsilon &gt; 0\)</span>. We write this as</p>
<p><span class="math display">\[ P(\mu_Y-\epsilon \leq \overline{Y} \leq \mu_Y + \epsilon) \rightarrow 1, \, \epsilon &gt; 0 \text{ as } n\rightarrow\infty. \]</span></p>
<p>Consider the independently and identically distributed random variables <span class="math inline">\(Y_i, i=1,\dots,n\)</span> with expectation <span class="math inline">\(E(Y_i)=\mu_Y\)</span> and variance <span class="math inline">\(\text{Var}(Y_i)=\sigma^2_Y\)</span>. Under the condition that <span class="math inline">\(\sigma^2_Y&lt; \infty\)</span>, that is, large outliers are unlikely, the law of large numbers thus states that</p>
<p><span class="math display">\[ \overline{Y} \xrightarrow[]{p} \mu_Y. \]</span></p>
The following application simulates a large number of coin tosses (you may set the number of trials using the slider) with a fair coin and computes the fraction of heads observed for each additional toss. The result is a random path that, as stated by the law of large numbers, shows a tendency to approach the value of <span class="math inline">\(0.5\)</span> as <span class="math inline">\(n\)</span> grows.
<iframe height="570" width="800" frameborder="0" scrolling="no" src="CoinTossingWLLN.html"></iframe>
</p>
</div>
<p>The core statement of the law of large numbers is that under quite general conditions, the probability of obtaining a sample average <span class="math inline">\(\overline{Y}\)</span> that is close to <span class="math inline">\(\mu_Y\)</span> is high if we have a large sample size.</p>
<p>Consider the example of repeatedly tossing a coin where <span class="math inline">\(Y_i\)</span> is the result of the <span class="math inline">\(i^{th}\)</span> coin toss. <span class="math inline">\(Y_i\)</span> is a Bernoulli distributed random variable with <span class="math inline">\(p\)</span> the probability of observing head
<span class="math display">\[ P(Y_i) = \begin{cases} p, &amp; Y_i = 1 \\ 1-p, &amp; Y_i = 0 \end{cases} \]</span>
where <span class="math inline">\(p = 0.5\)</span> as we assume a fair coin. It is straightforward to show that</p>
<p><span class="math display">\[ \mu_Y = p = 0.5. \]</span>
Let <span class="math inline">\(R_n\)</span> denote the proportion of heads in the first <span class="math inline">\(n\)</span> tosses,</p>
<p><span class="math display">\[ R_n = \frac{1}{n} \sum_{i=1}^n Y_i. \tag{2.5}\]</span></p>
<p>According to the law of large numbers, the observed proportion of heads converges in probability to <span class="math inline">\(\mu_Y = 0.5\)</span>, the probability of tossing head in a <em>single</em> coin toss, <span class="math display">\[ R_n \xrightarrow[]{p} \mu_Y=0.5 \ \ \text{as} \ \ n \rightarrow \infty.\]</span> This result is the as the one illustrated by the interactive application in Key Concept 2.6. We now show how to replicate this using <tt>R</tt>.</p>
<p>The procedure is as follows:</p>
<ol style="list-style-type: decimal">
<li>Sample <tt>N</tt> observations from the Bernoulli distribution, e.g., using <tt>sample()</tt>.</li>
<li>Calculate the proportion of heads <span class="math inline">\(R_n\)</span> as in (<a href="#mjx-eqn-2.5">2.5</a>). A way to achieve this is to call <tt>cumsum()</tt> on the vector of observations <tt>Y</tt> to obtain its cumulative sum and then divide by the respective number of observations.</li>
</ol>
<p>We continue by plotting the path and also add a dashed line for the benchmark probability <span class="math inline">\(p = 0.5\)</span>.</p>
<pre class="sourceCode r"><code class="sourceCode r"><span class="co"># set seed</span>
<span class="kw">set.seed</span>(<span class="dv">1</span>)

<span class="co"># set number of coin tosses and simulate</span>
N &lt;-<span class="st"> </span><span class="dv">30000</span>
Y &lt;-<span class="st"> </span><span class="kw">sample</span>(<span class="dv">0</span><span class="op">:</span><span class="dv">1</span>, N, <span class="dt">replace =</span> T)

<span class="co"># Calculate R_n for 1:N</span>
S &lt;-<span class="st"> </span><span class="kw">cumsum</span>(Y)
R &lt;-<span class="st"> </span>S<span class="op">/</span>(<span class="dv">1</span><span class="op">:</span>N)

<span class="co"># Plot the path.</span>
<span class="kw">plot</span>(R, 
     <span class="dt">ylim =</span> <span class="kw">c</span>(<span class="fl">0.3</span>, <span class="fl">0.7</span>), 
     <span class="dt">type =</span> <span class="st">&quot;l&quot;</span>, 
     <span class="dt">col =</span> <span class="st">&quot;steelblue&quot;</span>, 
     <span class="dt">lwd =</span> <span class="dv">2</span>, 
     <span class="dt">xlab =</span> <span class="st">&quot;n&quot;</span>, 
     <span class="dt">ylab =</span> <span class="st">&quot;R_n&quot;</span>,
     <span class="dt">main =</span> <span class="st">&quot;Converging Share of Heads in Repeated Coin Tossing&quot;</span>)

<span class="co"># Add a dashed line for R_n = 0.5</span>
<span class="kw">lines</span>(<span class="kw">c</span>(<span class="dv">0</span>, N), 
      <span class="kw">c</span>(<span class="fl">0.5</span>, <span class="fl">0.5</span>), 
      <span class="dt">col =</span> <span class="st">&quot;darkred&quot;</span>, 
      <span class="dt">lty =</span> <span class="dv">2</span>, 
      <span class="dt">lwd =</span> <span class="dv">1</span>)</code></pre>
<p><img src="ITER_files/figure-html/unnamed-chunk-66-1.png" width="80%" style="display: block; margin: auto;" /></p>
<p>There are several things to be said about this plot.</p>
<ul>
<li><p>The blue graph shows the observed proportion of heads when tossing a coin <span class="math inline">\(n\)</span> times.</p></li>
<li><p>Since the <span class="math inline">\(Y_i\)</span> are random variables, <span class="math inline">\(R_n\)</span> is a random variate, too. The path depicted is only one of many possible realizations of <span class="math inline">\(R_n\)</span> as it is determined by the <span class="math inline">\(30000\)</span> observations sampled from the Bernoulli distribution. Thus, the code chunk above produces a different path every time you execute it (try this below!).</p></li>
<li><p>If the number of coin tosses <span class="math inline">\(n\)</span> is small, the proportion of heads may be anything but close to its theoretical value, <span class="math inline">\(\mu_Y = 0.5\)</span>. However, as more and more observation are included in the sample we find that the path stabilizes in the neighborhood of <span class="math inline">\(0.5\)</span>. The average of multiple trials shows a clear tendency to converge to its expected value as the sample size increases, just as claimed by the law of large numbers.</p></li>
</ul>
<div id="KC2.7" class="keyconcept">
<h3 class="right">
Key Concept 2.7
</h3>
<h3 class="left">
The Central Limit Theorem
</h3>
<p>
<p>Suppose that <span class="math inline">\(Y_1,\dots,Y_n\)</span> are independently and identically distributed random variables with expectation <span class="math inline">\(E(Y_i)=\mu_Y\)</span> and variance <span class="math inline">\(\text{Var}(Y_i)=\sigma^2_Y\)</span> where <span class="math inline">\(0&lt;\sigma^2_Y&lt;\infty\)</span>.<br>
The Central Limit Theorem (CLT) states that, if the sample size <span class="math inline">\(n\)</span> goes to infinity, the distribution of the standardized sample average
<span class="math display">\[ \frac{\overline{Y} - \mu_Y}{\sigma_{\overline{Y}}} = \frac{\overline{Y} - \mu_Y}{\sigma_Y/\sqrt{n}} \ \]</span>
becomes arbitrarily well approximated by the standard normal distribution.</p>
<p>The application below demonstrates the CLT for the sample average of normally distributed random variables with mean <span class="math inline">\(5\)</span> and variance <span class="math inline">\(25^2\)</span>. You may check the following properties:</p>
<ul>
<li>The distribution of the sample average is normal.</li>
<li>As the sample size increases, the distribution of <span class="math inline">\(\overline{Y}\)</span> tightens around the true mean of <span class="math inline">\(5\)</span>.</li>
<li>The distribution of the standardized sample average is close to the standard normal distribution for large <span class="math inline">\(n\)</span>.</li>
</ul>
<iframe height="620" width="800" frameborder="0" scrolling="no" src="normaldisthistWLLN.html">
</iframe>
</p>
</div>
<p>According to the CLT, the distribution of the sample mean <span class="math inline">\(\overline{Y}\)</span> of the Bernoulli distributed random variables <span class="math inline">\(Y_i\)</span>, <span class="math inline">\(i=1,...,n\)</span>, is well approximated by the normal distribution with parameters <span class="math inline">\(\mu_Y=p=0.5\)</span> and <span class="math inline">\(\sigma^2_{Y} = p(1-p) = 0.25\)</span> for large <span class="math inline">\(n\)</span>. Consequently, for the standardized sample mean we conclude that <span class="math display">\[ \frac{\overline{Y} - 0.5}{0.5/\sqrt{n}} \tag{2.6}\]</span> should be well approximated by the standard normal distribution <span class="math inline">\(\mathcal{N}(0,1)\)</span>. We employ another simulation study to demonstrate this graphically. The idea is as follows.</p>
<p>Draw a large number of random samples, <span class="math inline">\(10000\)</span> say, of size <span class="math inline">\(n\)</span> from the Bernoulli distribution and compute the sample averages. Standardize the averages as shown in (<a href="#mjx-eqn-2.6">2.6</a>). Next, visualize the distribution of the generated standardized sample averages by means of a histogram and compare to the standard normal distribution. Repeat this for different sample sizes <span class="math inline">\(n\)</span> to see how increasing the sample size <span class="math inline">\(n\)</span> impacts the simulated distribution of the averages.</p>
<p>In <tt>R</tt>, realize this as follows:</p>
<ol style="list-style-type: decimal">
<li><p>We start by defining that the next four subsequently generated figures shall be drawn in a <span class="math inline">\(2\times2\)</span> array such that they can be easily compared. This is done by calling <code>par(mfrow = c(2, 2))</code> before generating the figures.</p></li>
<li><p>We define the number of repetitions <tt>reps</tt> as <span class="math inline">\(10000\)</span> and create a vector of sample sizes named <tt>sample.sizes</tt>. We consider samples of sizes <span class="math inline">\(2\)</span>, <span class="math inline">\(10\)</span>, <span class="math inline">\(50\)</span> and <span class="math inline">\(100\)</span>.</p></li>
<li><p>Next, we combine two <tt>for()</tt> loops to simulate the data and plot the distributions. The inner loop generates <span class="math inline">\(10000\)</span> random samples, each consisting of <tt>n</tt> observations that are drawn from the Bernoulli distribution, and computes the standardized averages. The outer loop executes the inner loop for the different sample sizes <tt>n</tt> and produces a plot for each iteration.</p></li>
</ol>
<pre class="sourceCode r"><code class="sourceCode r"><span class="co"># subdivide the plot panel into a 2-by-2 array</span>
<span class="kw">par</span>(<span class="dt">mfrow =</span> <span class="kw">c</span>(<span class="dv">2</span>, <span class="dv">2</span>))

<span class="co"># set the number of repetitions and the sample sizes</span>
reps &lt;-<span class="st"> </span><span class="dv">10000</span>
sample.sizes &lt;-<span class="st"> </span><span class="kw">c</span>(<span class="dv">5</span>, <span class="dv">20</span>, <span class="dv">75</span>, <span class="dv">100</span>)

<span class="co"># set seed for reproducibility</span>
<span class="kw">set.seed</span>(<span class="dv">123</span>)

<span class="co"># vector of break numbers for histograms</span>
b &lt;-<span class="st"> </span><span class="kw">c</span>(<span class="dv">5</span>, <span class="dv">10</span>, <span class="dv">15</span>, <span class="dv">30</span>)

<span class="co"># outer loop (loop over the sample sizes)</span>
  <span class="cf">for</span> (n <span class="cf">in</span> sample.sizes) {
    
    samplemean &lt;-<span class="st"> </span><span class="kw">rep</span>(<span class="dv">0</span>, reps) <span class="co">#initialize the vector of sample menas</span>
    stdsamplemean &lt;-<span class="st"> </span><span class="kw">rep</span>(<span class="dv">0</span>, reps) <span class="co">#initialize the vector of standardized sample menas</span>

<span class="co"># inner loop (loop over repetitions)   </span>
    <span class="cf">for</span> (i <span class="cf">in</span> <span class="dv">1</span><span class="op">:</span>reps) {
      x &lt;-<span class="st"> </span><span class="kw">rbinom</span>(n, <span class="dv">1</span>, <span class="fl">0.5</span>)
      samplemean[i] &lt;-<span class="st"> </span><span class="kw">mean</span>(x)
      stdsamplemean[i] &lt;-<span class="st"> </span><span class="kw">sqrt</span>(n)<span class="op">*</span>(<span class="kw">mean</span>(x) <span class="op">-</span><span class="st"> </span><span class="fl">0.5</span>)<span class="op">/</span><span class="fl">0.5</span>
    }
    
<span class="co"># plot histogram and overlay the N(0,1) density in every iteration    </span>
    <span class="kw">hist</span>(stdsamplemean, 
         <span class="dt">col =</span> <span class="st">&quot;steelblue&quot;</span>, 
         <span class="dt">freq =</span> <span class="ot">FALSE</span>, 
         <span class="dt">breaks =</span> <span class="dv">40</span>,
         <span class="dt">xlim =</span> <span class="kw">c</span>(<span class="op">-</span><span class="dv">3</span>, <span class="dv">3</span>), 
         <span class="dt">ylim =</span> <span class="kw">c</span>(<span class="dv">0</span>, <span class="fl">0.8</span>), 
         <span class="dt">xlab =</span> <span class="kw">paste</span>(<span class="st">&quot;n =&quot;</span>, n), 
         <span class="dt">main =</span> <span class="st">&quot;&quot;</span>)
    
    <span class="kw">curve</span>(<span class="kw">dnorm</span>(x), 
          <span class="dt">lwd =</span> <span class="dv">2</span>, 
          <span class="dt">col =</span> <span class="st">&quot;darkred&quot;</span>, 
          <span class="dt">add =</span> <span class="ot">TRUE</span>)
  }  </code></pre>
<p><img src="ITER_files/figure-html/unnamed-chunk-69-1.png" width="80%" style="display: block; margin: auto;" /></p>
<p>We see that the simulated sampling distribution of the standardized average tends to deviate strongly from the standard normal distribution if the sample size is small, e.g., for <span class="math inline">\(n=5\)</span> and <span class="math inline">\(n=10\)</span>. However as <span class="math inline">\(n\)</span> grows, the histograms approach the standard normal distribution. The approximation works quite well, see <span class="math inline">\(n=100\)</span>.</p>
<iframe src="DCL/playground.html" frameborder="0" scrolling="no" style="width:100%;height:340px">
</iframe>
</div>
</div>
<div class="footnotes">
<hr />
<ol start="3">
<li id="fn3"><p><em>Hint:</em> <tt>T</tt> and <tt>F</tt> are alternatives for <tt>TRUE</tt> and <tt>FALSE</tt>.<a href="2-2-RSATDOSA.html#fnref3" class="footnote-back">↩</a></p></li>
</ol>
</div>
            </section>

          </div>
        </div>
      </div>
<a href="2-1-random-variables-and-probability-distributions.html" class="navigation navigation-prev " aria-label="Previous page"><i class="fa fa-angle-left"></i></a>
<a href="2-3-exercises.html" class="navigation navigation-next " aria-label="Next page"><i class="fa fa-angle-right"></i></a>
    </div>
  </div>
<script src="libs/gitbook-2.6.7/js/app.min.js"></script>
<script src="libs/gitbook-2.6.7/js/lunr.js"></script>
<script src="libs/gitbook-2.6.7/js/plugin-search.js"></script>
<script src="libs/gitbook-2.6.7/js/plugin-sharing.js"></script>
<script src="libs/gitbook-2.6.7/js/plugin-fontsettings.js"></script>
<script src="libs/gitbook-2.6.7/js/plugin-bookdown.js"></script>
<script src="libs/gitbook-2.6.7/js/jquery.highlight.js"></script>
<script>
gitbook.require(["gitbook"], function(gitbook) {
gitbook.start({
"sharing": {
"github": true,
"facebook": true,
"twitter": true,
"google": false,
"linkedin": true,
"weibo": false,
"instapaper": false,
"vk": false,
"all": ["facebook", "google", "twitter", "linkedin", "weibo", "instapaper"]
},
"fontsettings": {
"theme": "white",
"family": "serif",
"size": 2
},
"edit": {
"link": "https://github.com/mca91/EconometricsWithR/edit/master/02-ch2.Rmd",
"text": "Edit"
},
"history": {
"link": null,
"text": null
},
"download": ["ITER.pdf"],
"toc": {
"collapse": "subsection",
"scroll_highlight": true
}
});
});
</script>

<!-- dynamically load mathjax for compatibility with self-contained -->
<script>
  (function () {
    var script = document.createElement("script");
    script.type = "text/javascript";
    var src = "true";
    if (src === "" || src === "true") src = "https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-MML-AM_CHTML";
    if (location.protocol !== "file:" && /^https?:/.test(src))
      src = src.replace(/^https?:/, '');
    script.src = src;
    document.getElementsByTagName("head")[0].appendChild(script);
  })();
</script>
</body>

</html>