docs/comparingcombining-scrnaseq-datasets.html

<!DOCTYPE html>
<html >

<head>

  <meta charset="UTF-8">
  <meta http-equiv="X-UA-Compatible" content="IE=edge">
  <title>Analysis of single cell RNA-seq data</title>
  <meta name="description" content="Analysis of single cell RNA-seq data">
  <meta name="generator" content="bookdown 0.6 and GitBook 2.6.7">

  <meta property="og:title" content="Analysis of single cell RNA-seq data" />
  <meta property="og:type" content="book" />
  
  
  <meta name="twitter:card" content="summary" />
  <meta name="twitter:title" content="Analysis of single cell RNA-seq data" />
  
  
<meta name="author" content="Vladimir Kiselev (wikiselev), Tallulah Andrews, Jennifer Westoby (Jenni_Westoby), Davis McCarthy (davisjmcc), Maren Büttner (marenbuettner) and Martin Hemberg (m_hemberg)">


<meta name="date" content="2018-02-03">

  <meta name="viewport" content="width=device-width, initial-scale=1">
  <meta name="apple-mobile-web-app-capable" content="yes">
  <meta name="apple-mobile-web-app-status-bar-style" content="black">
  
  
<link rel="prev" href="biological-analysis.html">
<link rel="next" href="seurat-chapter.html">
<script src="libs/jquery-2.2.3/jquery.min.js"></script>
<link href="libs/gitbook-2.6.7/css/style.css" rel="stylesheet" />
<link href="libs/gitbook-2.6.7/css/plugin-bookdown.css" rel="stylesheet" />
<link href="libs/gitbook-2.6.7/css/plugin-highlight.css" rel="stylesheet" />
<link href="libs/gitbook-2.6.7/css/plugin-search.css" rel="stylesheet" />
<link href="libs/gitbook-2.6.7/css/plugin-fontsettings.css" rel="stylesheet" />


<!-- for Facebook -->  
<meta property="og:url" content="http://hemberg-lab.github.io/scRNA.seq.course/" />
<meta property="og:description" content="In this course we will be surveying the existing problems as well as the available computational and statistical frameworks available for the analysis of scRNA-seq. The course is taught through the University of Cambridge Bioinformatics training unit, but the material found on these pages is meant to be used for anyone interested in learning about computational analysis of scRNA-seq data." />
<meta property="og:image" content="http://hemberg-lab.github.io/scRNA.seq.course/figures/RNA-Seq_workflow-5.pdf.jpg" />

<!-- for Twitter -->          
<meta name="twitter:card" content="summary_large_image" />
<meta name="twitter:title" content="Analysis of single-cell RNA-seq data" />
<meta name="twitter:description" content="In this course we will be surveying the existing problems as well as the available computational and statistical frameworks available for the analysis of scRNA-seq. The course is taught through the University of Cambridge Bioinformatics training unit, but the material found on these pages is meant to be used for anyone interested in learning about computational analysis of scRNA-seq data." />
<meta name="twitter:image" content="http://hemberg-lab.github.io/scRNA.seq.course/figures/RNA-Seq_workflow-5.pdf.jpg" />

<!-- Google Analytics -->
<script>
  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
  m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
  })(window,document,'script','//www.google-analytics.com/analytics.js','ga');

  ga('create', 'UA-71525309-1', 'auto');
  ga('send', 'pageview');

</script>


<style type="text/css">
div.sourceCode { overflow-x: auto; }
table.sourceCode, tr.sourceCode, td.lineNumbers, td.sourceCode {
  margin: 0; padding: 0; vertical-align: baseline; border: none; }
table.sourceCode { width: 100%; line-height: 100%; }
td.lineNumbers { text-align: right; padding-right: 4px; padding-left: 4px; color: #aaaaaa; border-right: 1px solid #aaaaaa; }
td.sourceCode { padding-left: 5px; }
code > span.kw { color: #007020; font-weight: bold; } /* Keyword */
code > span.dt { color: #902000; } /* DataType */
code > span.dv { color: #40a070; } /* DecVal */
code > span.bn { color: #40a070; } /* BaseN */
code > span.fl { color: #40a070; } /* Float */
code > span.ch { color: #4070a0; } /* Char */
code > span.st { color: #4070a0; } /* String */
code > span.co { color: #60a0b0; font-style: italic; } /* Comment */
code > span.ot { color: #007020; } /* Other */
code > span.al { color: #ff0000; font-weight: bold; } /* Alert */
code > span.fu { color: #06287e; } /* Function */
code > span.er { color: #ff0000; font-weight: bold; } /* Error */
code > span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } /* Warning */
code > span.cn { color: #880000; } /* Constant */
code > span.sc { color: #4070a0; } /* SpecialChar */
code > span.vs { color: #4070a0; } /* VerbatimString */
code > span.ss { color: #bb6688; } /* SpecialString */
code > span.im { } /* Import */
code > span.va { color: #19177c; } /* Variable */
code > span.cf { color: #007020; font-weight: bold; } /* ControlFlow */
code > span.op { color: #666666; } /* Operator */
code > span.bu { } /* BuiltIn */
code > span.ex { } /* Extension */
code > span.pp { color: #bc7a00; } /* Preprocessor */
code > span.at { color: #7d9029; } /* Attribute */
code > span.do { color: #ba2121; font-style: italic; } /* Documentation */
code > span.an { color: #60a0b0; font-weight: bold; font-style: italic; } /* Annotation */
code > span.cv { color: #60a0b0; font-weight: bold; font-style: italic; } /* CommentVar */
code > span.in { color: #60a0b0; font-weight: bold; font-style: italic; } /* Information */
</style>

<link rel="stylesheet" href="style.css" type="text/css" />
</head>

<body>


  <div class="book without-animation with-summary font-size-2 font-family-1" data-basepath=".">

    <div class="book-summary">
      <nav role="navigation">

<ul class="summary">
<li><a href="index.html">Table of Contents</a></li>

<li class="divider"></li>
<li class="chapter" data-level="1" data-path="index.html"><a href="index.html"><i class="fa fa-check"></i><b>1</b> About the course</a><ul>
<li class="chapter" data-level="1.1" data-path="index.html"><a href="index.html#video"><i class="fa fa-check"></i><b>1.1</b> Video</a></li>
<li class="chapter" data-level="1.2" data-path="index.html"><a href="index.html#registration"><i class="fa fa-check"></i><b>1.2</b> Registration</a></li>
<li class="chapter" data-level="1.3" data-path="index.html"><a href="index.html#github"><i class="fa fa-check"></i><b>1.3</b> GitHub</a></li>
<li class="chapter" data-level="1.4" data-path="index.html"><a href="index.html#docker-image-rstudio"><i class="fa fa-check"></i><b>1.4</b> Docker image (RStudio)</a></li>
<li class="chapter" data-level="1.5" data-path="index.html"><a href="index.html#manual-installation"><i class="fa fa-check"></i><b>1.5</b> Manual installation</a></li>
<li class="chapter" data-level="1.6" data-path="index.html"><a href="index.html#license"><i class="fa fa-check"></i><b>1.6</b> License</a></li>
<li class="chapter" data-level="1.7" data-path="index.html"><a href="index.html#prerequisites"><i class="fa fa-check"></i><b>1.7</b> Prerequisites</a></li>
<li class="chapter" data-level="1.8" data-path="index.html"><a href="index.html#contact"><i class="fa fa-check"></i><b>1.8</b> Contact</a></li>
</ul></li>
<li class="chapter" data-level="2" data-path="introduction-to-single-cell-rna-seq.html"><a href="introduction-to-single-cell-rna-seq.html"><i class="fa fa-check"></i><b>2</b> Introduction to single-cell RNA-seq</a><ul>
<li class="chapter" data-level="2.1" data-path="introduction-to-single-cell-rna-seq.html"><a href="introduction-to-single-cell-rna-seq.html#bulk-rna-seq"><i class="fa fa-check"></i><b>2.1</b> Bulk RNA-seq</a></li>
<li class="chapter" data-level="2.2" data-path="introduction-to-single-cell-rna-seq.html"><a href="introduction-to-single-cell-rna-seq.html#scrna-seq"><i class="fa fa-check"></i><b>2.2</b> scRNA-seq</a></li>
<li class="chapter" data-level="2.3" data-path="introduction-to-single-cell-rna-seq.html"><a href="introduction-to-single-cell-rna-seq.html#workflow"><i class="fa fa-check"></i><b>2.3</b> Workflow</a></li>
<li class="chapter" data-level="2.4" data-path="introduction-to-single-cell-rna-seq.html"><a href="introduction-to-single-cell-rna-seq.html#computational-analysis"><i class="fa fa-check"></i><b>2.4</b> Computational Analysis</a></li>
<li class="chapter" data-level="2.5" data-path="introduction-to-single-cell-rna-seq.html"><a href="introduction-to-single-cell-rna-seq.html#challenges"><i class="fa fa-check"></i><b>2.5</b> Challenges</a></li>
<li class="chapter" data-level="2.6" data-path="introduction-to-single-cell-rna-seq.html"><a href="introduction-to-single-cell-rna-seq.html#experimental-methods"><i class="fa fa-check"></i><b>2.6</b> Experimental methods</a></li>
<li class="chapter" data-level="2.7" data-path="introduction-to-single-cell-rna-seq.html"><a href="introduction-to-single-cell-rna-seq.html#what-platform-to-use-for-my-experiment"><i class="fa fa-check"></i><b>2.7</b> What platform to use for my experiment?</a></li>
</ul></li>
<li class="chapter" data-level="3" data-path="processing-raw-scrna-seq-data.html"><a href="processing-raw-scrna-seq-data.html"><i class="fa fa-check"></i><b>3</b> Processing Raw scRNA-seq Data</a><ul>
<li class="chapter" data-level="3.1" data-path="processing-raw-scrna-seq-data.html"><a href="processing-raw-scrna-seq-data.html#fastqc"><i class="fa fa-check"></i><b>3.1</b> FastQC</a><ul>
<li class="chapter" data-level="3.1.1" data-path="processing-raw-scrna-seq-data.html"><a href="processing-raw-scrna-seq-data.html#solution-and-downloading-the-report"><i class="fa fa-check"></i><b>3.1.1</b> Solution and Downloading the Report</a></li>
</ul></li>
<li class="chapter" data-level="3.2" data-path="processing-raw-scrna-seq-data.html"><a href="processing-raw-scrna-seq-data.html#trimming-reads"><i class="fa fa-check"></i><b>3.2</b> Trimming Reads</a><ul>
<li class="chapter" data-level="3.2.1" data-path="processing-raw-scrna-seq-data.html"><a href="processing-raw-scrna-seq-data.html#solution"><i class="fa fa-check"></i><b>3.2.1</b> Solution</a></li>
</ul></li>
<li class="chapter" data-level="3.3" data-path="processing-raw-scrna-seq-data.html"><a href="processing-raw-scrna-seq-data.html#file-formats"><i class="fa fa-check"></i><b>3.3</b> File formats</a><ul>
<li class="chapter" data-level="3.3.1" data-path="processing-raw-scrna-seq-data.html"><a href="processing-raw-scrna-seq-data.html#fastq"><i class="fa fa-check"></i><b>3.3.1</b> FastQ</a></li>
<li class="chapter" data-level="3.3.2" data-path="processing-raw-scrna-seq-data.html"><a href="processing-raw-scrna-seq-data.html#bam"><i class="fa fa-check"></i><b>3.3.2</b> BAM</a></li>
<li class="chapter" data-level="3.3.3" data-path="processing-raw-scrna-seq-data.html"><a href="processing-raw-scrna-seq-data.html#cram"><i class="fa fa-check"></i><b>3.3.3</b> CRAM</a></li>
<li class="chapter" data-level="3.3.4" data-path="processing-raw-scrna-seq-data.html"><a href="processing-raw-scrna-seq-data.html#mannually-inspecting-files"><i class="fa fa-check"></i><b>3.3.4</b> Mannually Inspecting files</a></li>
<li class="chapter" data-level="3.3.5" data-path="processing-raw-scrna-seq-data.html"><a href="processing-raw-scrna-seq-data.html#genome-fasta-gtf"><i class="fa fa-check"></i><b>3.3.5</b> Genome (FASTA, GTF)</a></li>
</ul></li>
<li class="chapter" data-level="3.4" data-path="processing-raw-scrna-seq-data.html"><a href="processing-raw-scrna-seq-data.html#demultiplexing"><i class="fa fa-check"></i><b>3.4</b> Demultiplexing</a><ul>
<li class="chapter" data-level="3.4.1" data-path="processing-raw-scrna-seq-data.html"><a href="processing-raw-scrna-seq-data.html#identifying-cell-containing-dropletsmicrowells"><i class="fa fa-check"></i><b>3.4.1</b> Identifying cell-containing droplets/microwells</a></li>
</ul></li>
<li class="chapter" data-level="3.5" data-path="processing-raw-scrna-seq-data.html"><a href="processing-raw-scrna-seq-data.html#using-star-to-align-reads"><i class="fa fa-check"></i><b>3.5</b> Using STAR to Align Reads</a><ul>
<li class="chapter" data-level="3.5.1" data-path="processing-raw-scrna-seq-data.html"><a href="processing-raw-scrna-seq-data.html#solution-for-star-alignment"><i class="fa fa-check"></i><b>3.5.1</b> Solution for STAR Alignment</a></li>
</ul></li>
<li class="chapter" data-level="3.6" data-path="processing-raw-scrna-seq-data.html"><a href="processing-raw-scrna-seq-data.html#kallisto-and-pseudo-alignment"><i class="fa fa-check"></i><b>3.6</b> Kallisto and Pseudo-Alignment</a><ul>
<li class="chapter" data-level="3.6.1" data-path="processing-raw-scrna-seq-data.html"><a href="processing-raw-scrna-seq-data.html#what-is-a-k-mer"><i class="fa fa-check"></i><b>3.6.1</b> What is a k-mer?</a></li>
<li class="chapter" data-level="3.6.2" data-path="processing-raw-scrna-seq-data.html"><a href="processing-raw-scrna-seq-data.html#why-map-k-mers-rather-than-reads"><i class="fa fa-check"></i><b>3.6.2</b> Why map k-mers rather than reads?</a></li>
<li class="chapter" data-level="3.6.3" data-path="processing-raw-scrna-seq-data.html"><a href="processing-raw-scrna-seq-data.html#kallistos-pseudo-mode"><i class="fa fa-check"></i><b>3.6.3</b> Kallisto’s pseudo mode</a></li>
<li class="chapter" data-level="3.6.4" data-path="processing-raw-scrna-seq-data.html"><a href="processing-raw-scrna-seq-data.html#solution-to-kallisto-pseudo-alignment"><i class="fa fa-check"></i><b>3.6.4</b> Solution to Kallisto Pseudo-Alignment</a></li>
<li class="chapter" data-level="3.6.5" data-path="processing-raw-scrna-seq-data.html"><a href="processing-raw-scrna-seq-data.html#understanding-the-output-of-kallisto-pseudo-alignment"><i class="fa fa-check"></i><b>3.6.5</b> Understanding the Output of Kallisto Pseudo-Alignment</a></li>
</ul></li>
</ul></li>
<li class="chapter" data-level="4" data-path="construction-of-expression-matrix.html"><a href="construction-of-expression-matrix.html"><i class="fa fa-check"></i><b>4</b> Construction of expression matrix</a><ul>
<li class="chapter" data-level="4.1" data-path="construction-of-expression-matrix.html"><a href="construction-of-expression-matrix.html#reads-qc"><i class="fa fa-check"></i><b>4.1</b> Reads QC</a></li>
<li class="chapter" data-level="4.2" data-path="construction-of-expression-matrix.html"><a href="construction-of-expression-matrix.html#reads-alignment"><i class="fa fa-check"></i><b>4.2</b> Reads alignment</a></li>
<li class="chapter" data-level="4.3" data-path="construction-of-expression-matrix.html"><a href="construction-of-expression-matrix.html#alignment-example"><i class="fa fa-check"></i><b>4.3</b> Alignment example</a></li>
<li class="chapter" data-level="4.4" data-path="construction-of-expression-matrix.html"><a href="construction-of-expression-matrix.html#mapping-qc"><i class="fa fa-check"></i><b>4.4</b> Mapping QC</a></li>
<li class="chapter" data-level="4.5" data-path="construction-of-expression-matrix.html"><a href="construction-of-expression-matrix.html#reads-quantification"><i class="fa fa-check"></i><b>4.5</b> Reads quantification</a></li>
<li class="chapter" data-level="4.6" data-path="construction-of-expression-matrix.html"><a href="construction-of-expression-matrix.html#umichapter"><i class="fa fa-check"></i><b>4.6</b> Unique Molecular Identifiers (UMIs)</a><ul>
<li class="chapter" data-level="4.6.1" data-path="construction-of-expression-matrix.html"><a href="construction-of-expression-matrix.html#introduction"><i class="fa fa-check"></i><b>4.6.1</b> Introduction</a></li>
<li class="chapter" data-level="4.6.2" data-path="construction-of-expression-matrix.html"><a href="construction-of-expression-matrix.html#mapping-barcodes"><i class="fa fa-check"></i><b>4.6.2</b> Mapping Barcodes</a></li>
<li class="chapter" data-level="4.6.3" data-path="construction-of-expression-matrix.html"><a href="construction-of-expression-matrix.html#counting-barcodes"><i class="fa fa-check"></i><b>4.6.3</b> Counting Barcodes</a></li>
<li class="chapter" data-level="4.6.4" data-path="construction-of-expression-matrix.html"><a href="construction-of-expression-matrix.html#correcting-for-errors"><i class="fa fa-check"></i><b>4.6.4</b> Correcting for Errors</a></li>
<li class="chapter" data-level="4.6.5" data-path="construction-of-expression-matrix.html"><a href="construction-of-expression-matrix.html#downstream-analysis"><i class="fa fa-check"></i><b>4.6.5</b> Downstream Analysis</a></li>
</ul></li>
</ul></li>
<li class="chapter" data-level="5" data-path="introduction-to-rbioconductor.html"><a href="introduction-to-rbioconductor.html"><i class="fa fa-check"></i><b>5</b> Introduction to R/Bioconductor</a><ul>
<li class="chapter" data-level="5.1" data-path="introduction-to-rbioconductor.html"><a href="introduction-to-rbioconductor.html#installing-packages"><i class="fa fa-check"></i><b>5.1</b> Installing packages</a><ul>
<li class="chapter" data-level="5.1.1" data-path="introduction-to-rbioconductor.html"><a href="introduction-to-rbioconductor.html#cran"><i class="fa fa-check"></i><b>5.1.1</b> CRAN</a></li>
<li class="chapter" data-level="5.1.2" data-path="introduction-to-rbioconductor.html"><a href="introduction-to-rbioconductor.html#github-1"><i class="fa fa-check"></i><b>5.1.2</b> Github</a></li>
<li class="chapter" data-level="5.1.3" data-path="introduction-to-rbioconductor.html"><a href="introduction-to-rbioconductor.html#bioconductor"><i class="fa fa-check"></i><b>5.1.3</b> Bioconductor</a></li>
<li class="chapter" data-level="5.1.4" data-path="introduction-to-rbioconductor.html"><a href="introduction-to-rbioconductor.html#source"><i class="fa fa-check"></i><b>5.1.4</b> Source</a></li>
</ul></li>
<li class="chapter" data-level="5.2" data-path="introduction-to-rbioconductor.html"><a href="introduction-to-rbioconductor.html#installation-instructions"><i class="fa fa-check"></i><b>5.2</b> Installation instructions:</a></li>
<li class="chapter" data-level="5.3" data-path="introduction-to-rbioconductor.html"><a href="introduction-to-rbioconductor.html#data-typesclasses"><i class="fa fa-check"></i><b>5.3</b> Data-types/classes</a><ul>
<li class="chapter" data-level="5.3.1" data-path="introduction-to-rbioconductor.html"><a href="introduction-to-rbioconductor.html#numeric"><i class="fa fa-check"></i><b>5.3.1</b> Numeric</a></li>
<li class="chapter" data-level="5.3.2" data-path="introduction-to-rbioconductor.html"><a href="introduction-to-rbioconductor.html#characterstring"><i class="fa fa-check"></i><b>5.3.2</b> Character/String</a></li>
<li class="chapter" data-level="5.3.3" data-path="introduction-to-rbioconductor.html"><a href="introduction-to-rbioconductor.html#logical"><i class="fa fa-check"></i><b>5.3.3</b> Logical</a></li>
<li class="chapter" data-level="5.3.4" data-path="introduction-to-rbioconductor.html"><a href="introduction-to-rbioconductor.html#factors"><i class="fa fa-check"></i><b>5.3.4</b> Factors</a></li>
<li class="chapter" data-level="5.3.5" data-path="introduction-to-rbioconductor.html"><a href="introduction-to-rbioconductor.html#checking-classtype"><i class="fa fa-check"></i><b>5.3.5</b> Checking class/type</a></li>
</ul></li>
<li class="chapter" data-level="5.4" data-path="introduction-to-rbioconductor.html"><a href="introduction-to-rbioconductor.html#basic-data-structures"><i class="fa fa-check"></i><b>5.4</b> Basic data structures</a></li>
<li class="chapter" data-level="5.5" data-path="introduction-to-rbioconductor.html"><a href="introduction-to-rbioconductor.html#more-information"><i class="fa fa-check"></i><b>5.5</b> More information</a></li>
<li class="chapter" data-level="5.6" data-path="introduction-to-rbioconductor.html"><a href="introduction-to-rbioconductor.html#data-types"><i class="fa fa-check"></i><b>5.6</b> Data Types</a><ul>
<li class="chapter" data-level="5.6.1" data-path="introduction-to-rbioconductor.html"><a href="introduction-to-rbioconductor.html#what-is-tidy-data"><i class="fa fa-check"></i><b>5.6.1</b> What is Tidy Data?</a></li>
<li class="chapter" data-level="5.6.2" data-path="introduction-to-rbioconductor.html"><a href="introduction-to-rbioconductor.html#what-is-rich-data"><i class="fa fa-check"></i><b>5.6.2</b> What is Rich Data?</a></li>
<li class="chapter" data-level="5.6.3" data-path="introduction-to-rbioconductor.html"><a href="introduction-to-rbioconductor.html#what-is-bioconductor"><i class="fa fa-check"></i><b>5.6.3</b> What is Bioconductor?</a></li>
<li class="chapter" data-level="5.6.4" data-path="introduction-to-rbioconductor.html"><a href="introduction-to-rbioconductor.html#singlecellexperiment-class"><i class="fa fa-check"></i><b>5.6.4</b> <code>SingleCellExperiment</code> class</a></li>
<li class="chapter" data-level="5.6.5" data-path="introduction-to-rbioconductor.html"><a href="introduction-to-rbioconductor.html#scater-package"><i class="fa fa-check"></i><b>5.6.5</b> <code>scater</code> package</a></li>
</ul></li>
<li class="chapter" data-level="5.7" data-path="introduction-to-rbioconductor.html"><a href="introduction-to-rbioconductor.html#bioconductor-singlecellexperiment-and-scater"><i class="fa fa-check"></i><b>5.7</b> Bioconductor, <code>SingleCellExperiment</code> and <code>scater</code></a><ul>
<li class="chapter" data-level="5.7.1" data-path="introduction-to-rbioconductor.html"><a href="introduction-to-rbioconductor.html#bioconductor-1"><i class="fa fa-check"></i><b>5.7.1</b> Bioconductor</a></li>
<li class="chapter" data-level="5.7.2" data-path="introduction-to-rbioconductor.html"><a href="introduction-to-rbioconductor.html#singlecellexperiment-class-1"><i class="fa fa-check"></i><b>5.7.2</b> <code>SingleCellExperiment</code> class</a></li>
<li class="chapter" data-level="5.7.3" data-path="introduction-to-rbioconductor.html"><a href="introduction-to-rbioconductor.html#scater-package-1"><i class="fa fa-check"></i><b>5.7.3</b> <code>scater</code> package</a></li>
</ul></li>
<li class="chapter" data-level="5.8" data-path="introduction-to-rbioconductor.html"><a href="introduction-to-rbioconductor.html#an-introduction-to-ggplot2"><i class="fa fa-check"></i><b>5.8</b> An Introduction to ggplot2</a><ul>
<li class="chapter" data-level="5.8.1" data-path="introduction-to-rbioconductor.html"><a href="introduction-to-rbioconductor.html#what-is-ggplot2"><i class="fa fa-check"></i><b>5.8.1</b> What is ggplot2?</a></li>
<li class="chapter" data-level="5.8.2" data-path="introduction-to-rbioconductor.html"><a href="introduction-to-rbioconductor.html#principles-of-ggplot2"><i class="fa fa-check"></i><b>5.8.2</b> Principles of ggplot2</a></li>
<li class="chapter" data-level="5.8.3" data-path="introduction-to-rbioconductor.html"><a href="introduction-to-rbioconductor.html#using-the-aes-mapping-function"><i class="fa fa-check"></i><b>5.8.3</b> Using the <code>aes</code> mapping function</a></li>
<li class="chapter" data-level="5.8.4" data-path="introduction-to-rbioconductor.html"><a href="introduction-to-rbioconductor.html#geoms"><i class="fa fa-check"></i><b>5.8.4</b> Geoms</a></li>
<li class="chapter" data-level="5.8.5" data-path="introduction-to-rbioconductor.html"><a href="introduction-to-rbioconductor.html#plotting-data-from-more-than-2-cells"><i class="fa fa-check"></i><b>5.8.5</b> Plotting data from more than 2 cells</a></li>
<li class="chapter" data-level="5.8.6" data-path="introduction-to-rbioconductor.html"><a href="introduction-to-rbioconductor.html#plotting-heatmaps"><i class="fa fa-check"></i><b>5.8.6</b> Plotting heatmaps</a></li>
<li class="chapter" data-level="5.8.7" data-path="introduction-to-rbioconductor.html"><a href="introduction-to-rbioconductor.html#principle-component-analysis"><i class="fa fa-check"></i><b>5.8.7</b> Principle Component Analysis</a></li>
</ul></li>
</ul></li>
<li class="chapter" data-level="6" data-path="tabula-muris.html"><a href="tabula-muris.html"><i class="fa fa-check"></i><b>6</b> Tabula Muris</a><ul>
<li class="chapter" data-level="6.1" data-path="tabula-muris.html"><a href="tabula-muris.html#introduction-1"><i class="fa fa-check"></i><b>6.1</b> Introduction</a></li>
<li class="chapter" data-level="6.2" data-path="tabula-muris.html"><a href="tabula-muris.html#downloading-the-data"><i class="fa fa-check"></i><b>6.2</b> Downloading the data</a></li>
<li class="chapter" data-level="6.3" data-path="tabula-muris.html"><a href="tabula-muris.html#reading-the-data-smartseq2"><i class="fa fa-check"></i><b>6.3</b> Reading the data (Smartseq2)</a></li>
<li class="chapter" data-level="6.4" data-path="tabula-muris.html"><a href="tabula-muris.html#building-a-scater-object"><i class="fa fa-check"></i><b>6.4</b> Building a scater object</a></li>
<li class="chapter" data-level="6.5" data-path="tabula-muris.html"><a href="tabula-muris.html#reading-the-data-10x"><i class="fa fa-check"></i><b>6.5</b> Reading the data (10X)</a></li>
<li class="chapter" data-level="6.6" data-path="tabula-muris.html"><a href="tabula-muris.html#building-a-scater-object-1"><i class="fa fa-check"></i><b>6.6</b> Building a scater object</a></li>
<li class="chapter" data-level="6.7" data-path="tabula-muris.html"><a href="tabula-muris.html#advanced-exercise"><i class="fa fa-check"></i><b>6.7</b> Advanced Exercise</a></li>
</ul></li>
<li class="chapter" data-level="7" data-path="cleaning-the-expression-matrix.html"><a href="cleaning-the-expression-matrix.html"><i class="fa fa-check"></i><b>7</b> Cleaning the Expression Matrix</a><ul>
<li class="chapter" data-level="7.1" data-path="cleaning-the-expression-matrix.html"><a href="cleaning-the-expression-matrix.html#exprs-qc"><i class="fa fa-check"></i><b>7.1</b> Expression QC (UMI)</a><ul>
<li class="chapter" data-level="7.1.1" data-path="cleaning-the-expression-matrix.html"><a href="cleaning-the-expression-matrix.html#introduction-2"><i class="fa fa-check"></i><b>7.1.1</b> Introduction</a></li>
<li class="chapter" data-level="7.1.2" data-path="cleaning-the-expression-matrix.html"><a href="cleaning-the-expression-matrix.html#tung-dataset"><i class="fa fa-check"></i><b>7.1.2</b> Tung dataset</a></li>
<li class="chapter" data-level="7.1.3" data-path="cleaning-the-expression-matrix.html"><a href="cleaning-the-expression-matrix.html#cell-qc"><i class="fa fa-check"></i><b>7.1.3</b> Cell QC</a></li>
<li class="chapter" data-level="7.1.4" data-path="cleaning-the-expression-matrix.html"><a href="cleaning-the-expression-matrix.html#cell-filtering"><i class="fa fa-check"></i><b>7.1.4</b> Cell filtering</a></li>
<li class="chapter" data-level="7.1.5" data-path="cleaning-the-expression-matrix.html"><a href="cleaning-the-expression-matrix.html#compare-filterings"><i class="fa fa-check"></i><b>7.1.5</b> Compare filterings</a></li>
<li class="chapter" data-level="7.1.6" data-path="cleaning-the-expression-matrix.html"><a href="cleaning-the-expression-matrix.html#gene-analysis"><i class="fa fa-check"></i><b>7.1.6</b> Gene analysis</a></li>
<li class="chapter" data-level="7.1.7" data-path="cleaning-the-expression-matrix.html"><a href="cleaning-the-expression-matrix.html#save-the-data"><i class="fa fa-check"></i><b>7.1.7</b> Save the data</a></li>
<li class="chapter" data-level="7.1.8" data-path="cleaning-the-expression-matrix.html"><a href="cleaning-the-expression-matrix.html#big-exercise"><i class="fa fa-check"></i><b>7.1.8</b> Big Exercise</a></li>
<li class="chapter" data-level="7.1.9" data-path="cleaning-the-expression-matrix.html"><a href="cleaning-the-expression-matrix.html#sessioninfo"><i class="fa fa-check"></i><b>7.1.9</b> sessionInfo()</a></li>
</ul></li>
<li class="chapter" data-level="7.2" data-path="cleaning-the-expression-matrix.html"><a href="cleaning-the-expression-matrix.html#expression-qc-reads"><i class="fa fa-check"></i><b>7.2</b> Expression QC (Reads)</a></li>
<li class="chapter" data-level="7.3" data-path="cleaning-the-expression-matrix.html"><a href="cleaning-the-expression-matrix.html#data-visualization"><i class="fa fa-check"></i><b>7.3</b> Data visualization</a><ul>
<li class="chapter" data-level="7.3.1" data-path="cleaning-the-expression-matrix.html"><a href="cleaning-the-expression-matrix.html#introduction-3"><i class="fa fa-check"></i><b>7.3.1</b> Introduction</a></li>
<li class="chapter" data-level="7.3.2" data-path="cleaning-the-expression-matrix.html"><a href="cleaning-the-expression-matrix.html#visual-pca"><i class="fa fa-check"></i><b>7.3.2</b> PCA plot</a></li>
<li class="chapter" data-level="7.3.3" data-path="cleaning-the-expression-matrix.html"><a href="cleaning-the-expression-matrix.html#visual-tsne"><i class="fa fa-check"></i><b>7.3.3</b> tSNE map</a></li>
<li class="chapter" data-level="7.3.4" data-path="cleaning-the-expression-matrix.html"><a href="cleaning-the-expression-matrix.html#big-exercise-1"><i class="fa fa-check"></i><b>7.3.4</b> Big Exercise</a></li>
<li class="chapter" data-level="7.3.5" data-path="cleaning-the-expression-matrix.html"><a href="cleaning-the-expression-matrix.html#sessioninfo-1"><i class="fa fa-check"></i><b>7.3.5</b> sessionInfo()</a></li>
</ul></li>
<li class="chapter" data-level="7.4" data-path="cleaning-the-expression-matrix.html"><a href="cleaning-the-expression-matrix.html#data-visualization-reads"><i class="fa fa-check"></i><b>7.4</b> Data visualization (Reads)</a></li>
<li class="chapter" data-level="7.5" data-path="cleaning-the-expression-matrix.html"><a href="cleaning-the-expression-matrix.html#identifying-confounding-factors"><i class="fa fa-check"></i><b>7.5</b> Identifying confounding factors</a><ul>
<li class="chapter" data-level="7.5.1" data-path="cleaning-the-expression-matrix.html"><a href="cleaning-the-expression-matrix.html#introduction-4"><i class="fa fa-check"></i><b>7.5.1</b> Introduction</a></li>
<li class="chapter" data-level="7.5.2" data-path="cleaning-the-expression-matrix.html"><a href="cleaning-the-expression-matrix.html#correlations-with-pcs"><i class="fa fa-check"></i><b>7.5.2</b> Correlations with PCs</a></li>
<li class="chapter" data-level="7.5.3" data-path="cleaning-the-expression-matrix.html"><a href="cleaning-the-expression-matrix.html#explanatory-variables"><i class="fa fa-check"></i><b>7.5.3</b> Explanatory variables</a></li>
<li class="chapter" data-level="7.5.4" data-path="cleaning-the-expression-matrix.html"><a href="cleaning-the-expression-matrix.html#other-confounders"><i class="fa fa-check"></i><b>7.5.4</b> Other confounders</a></li>
<li class="chapter" data-level="7.5.5" data-path="cleaning-the-expression-matrix.html"><a href="cleaning-the-expression-matrix.html#exercise"><i class="fa fa-check"></i><b>7.5.5</b> Exercise</a></li>
<li class="chapter" data-level="7.5.6" data-path="cleaning-the-expression-matrix.html"><a href="cleaning-the-expression-matrix.html#sessioninfo-2"><i class="fa fa-check"></i><b>7.5.6</b> sessionInfo()</a></li>
</ul></li>
<li class="chapter" data-level="7.6" data-path="cleaning-the-expression-matrix.html"><a href="cleaning-the-expression-matrix.html#identifying-confounding-factors-reads"><i class="fa fa-check"></i><b>7.6</b> Identifying confounding factors (Reads)</a></li>
<li class="chapter" data-level="7.7" data-path="cleaning-the-expression-matrix.html"><a href="cleaning-the-expression-matrix.html#normalization-theory"><i class="fa fa-check"></i><b>7.7</b> Normalization theory</a><ul>
<li class="chapter" data-level="7.7.1" data-path="cleaning-the-expression-matrix.html"><a href="cleaning-the-expression-matrix.html#introduction-5"><i class="fa fa-check"></i><b>7.7.1</b> Introduction</a></li>
<li class="chapter" data-level="7.7.2" data-path="cleaning-the-expression-matrix.html"><a href="cleaning-the-expression-matrix.html#library-size-1"><i class="fa fa-check"></i><b>7.7.2</b> Library size</a></li>
<li class="chapter" data-level="7.7.3" data-path="cleaning-the-expression-matrix.html"><a href="cleaning-the-expression-matrix.html#normalisations"><i class="fa fa-check"></i><b>7.7.3</b> Normalisations</a></li>
<li class="chapter" data-level="7.7.4" data-path="cleaning-the-expression-matrix.html"><a href="cleaning-the-expression-matrix.html#effectiveness"><i class="fa fa-check"></i><b>7.7.4</b> Effectiveness</a></li>
</ul></li>
<li class="chapter" data-level="7.8" data-path="cleaning-the-expression-matrix.html"><a href="cleaning-the-expression-matrix.html#normalization-practice-umi"><i class="fa fa-check"></i><b>7.8</b> Normalization practice (UMI)</a><ul>
<li class="chapter" data-level="7.8.1" data-path="cleaning-the-expression-matrix.html"><a href="cleaning-the-expression-matrix.html#raw"><i class="fa fa-check"></i><b>7.8.1</b> Raw</a></li>
<li class="chapter" data-level="7.8.2" data-path="cleaning-the-expression-matrix.html"><a href="cleaning-the-expression-matrix.html#cpm-1"><i class="fa fa-check"></i><b>7.8.2</b> CPM</a></li>
<li class="chapter" data-level="7.8.3" data-path="cleaning-the-expression-matrix.html"><a href="cleaning-the-expression-matrix.html#size-factor-rle"><i class="fa fa-check"></i><b>7.8.3</b> Size-factor (RLE)</a></li>
<li class="chapter" data-level="7.8.4" data-path="cleaning-the-expression-matrix.html"><a href="cleaning-the-expression-matrix.html#upperquantile"><i class="fa fa-check"></i><b>7.8.4</b> Upperquantile</a></li>
<li class="chapter" data-level="7.8.5" data-path="cleaning-the-expression-matrix.html"><a href="cleaning-the-expression-matrix.html#tmm-1"><i class="fa fa-check"></i><b>7.8.5</b> TMM</a></li>
<li class="chapter" data-level="7.8.6" data-path="cleaning-the-expression-matrix.html"><a href="cleaning-the-expression-matrix.html#scran-1"><i class="fa fa-check"></i><b>7.8.6</b> scran</a></li>
<li class="chapter" data-level="7.8.7" data-path="cleaning-the-expression-matrix.html"><a href="cleaning-the-expression-matrix.html#downsampling-1"><i class="fa fa-check"></i><b>7.8.7</b> Downsampling</a></li>
<li class="chapter" data-level="7.8.8" data-path="cleaning-the-expression-matrix.html"><a href="cleaning-the-expression-matrix.html#normalisation-for-genetranscript-length"><i class="fa fa-check"></i><b>7.8.8</b> Normalisation for gene/transcript length</a></li>
<li class="chapter" data-level="7.8.9" data-path="cleaning-the-expression-matrix.html"><a href="cleaning-the-expression-matrix.html#exercise-1"><i class="fa fa-check"></i><b>7.8.9</b> Exercise</a></li>
<li class="chapter" data-level="7.8.10" data-path="cleaning-the-expression-matrix.html"><a href="cleaning-the-expression-matrix.html#sessioninfo-3"><i class="fa fa-check"></i><b>7.8.10</b> sessionInfo()</a></li>
</ul></li>
<li class="chapter" data-level="7.9" data-path="cleaning-the-expression-matrix.html"><a href="cleaning-the-expression-matrix.html#normalization-practice-reads"><i class="fa fa-check"></i><b>7.9</b> Normalization practice (Reads)</a></li>
<li class="chapter" data-level="7.10" data-path="cleaning-the-expression-matrix.html"><a href="cleaning-the-expression-matrix.html#dealing-with-confounders"><i class="fa fa-check"></i><b>7.10</b> Dealing with confounders</a><ul>
<li class="chapter" data-level="7.10.1" data-path="cleaning-the-expression-matrix.html"><a href="cleaning-the-expression-matrix.html#introduction-6"><i class="fa fa-check"></i><b>7.10.1</b> Introduction</a></li>
<li class="chapter" data-level="7.10.2" data-path="cleaning-the-expression-matrix.html"><a href="cleaning-the-expression-matrix.html#remove-unwanted-variation"><i class="fa fa-check"></i><b>7.10.2</b> Remove Unwanted Variation</a></li>
<li class="chapter" data-level="7.10.3" data-path="cleaning-the-expression-matrix.html"><a href="cleaning-the-expression-matrix.html#combat"><i class="fa fa-check"></i><b>7.10.3</b> Combat</a></li>
<li class="chapter" data-level="7.10.4" data-path="cleaning-the-expression-matrix.html"><a href="cleaning-the-expression-matrix.html#mnncorrect"><i class="fa fa-check"></i><b>7.10.4</b> mnnCorrect</a></li>
<li class="chapter" data-level="7.10.5" data-path="cleaning-the-expression-matrix.html"><a href="cleaning-the-expression-matrix.html#glm"><i class="fa fa-check"></i><b>7.10.5</b> GLM</a></li>
<li class="chapter" data-level="7.10.6" data-path="cleaning-the-expression-matrix.html"><a href="cleaning-the-expression-matrix.html#how-to-evaluate-and-compare-confounder-removal-strategies"><i class="fa fa-check"></i><b>7.10.6</b> How to evaluate and compare confounder removal strategies</a></li>
<li class="chapter" data-level="7.10.7" data-path="cleaning-the-expression-matrix.html"><a href="cleaning-the-expression-matrix.html#big-exercise-2"><i class="fa fa-check"></i><b>7.10.7</b> Big Exercise</a></li>
<li class="chapter" data-level="7.10.8" data-path="cleaning-the-expression-matrix.html"><a href="cleaning-the-expression-matrix.html#sessioninfo-4"><i class="fa fa-check"></i><b>7.10.8</b> sessionInfo()</a></li>
</ul></li>
<li class="chapter" data-level="7.11" data-path="cleaning-the-expression-matrix.html"><a href="cleaning-the-expression-matrix.html#dealing-with-confounders-reads"><i class="fa fa-check"></i><b>7.11</b> Dealing with confounders (Reads)</a></li>
</ul></li>
<li class="chapter" data-level="8" data-path="biological-analysis.html"><a href="biological-analysis.html"><i class="fa fa-check"></i><b>8</b> Biological Analysis</a><ul>
<li class="chapter" data-level="8.1" data-path="biological-analysis.html"><a href="biological-analysis.html#clustering-introduction"><i class="fa fa-check"></i><b>8.1</b> Clustering Introduction</a><ul>
<li class="chapter" data-level="8.1.1" data-path="biological-analysis.html"><a href="biological-analysis.html#introduction-7"><i class="fa fa-check"></i><b>8.1.1</b> Introduction</a></li>
<li class="chapter" data-level="8.1.2" data-path="biological-analysis.html"><a href="biological-analysis.html#dimensionality-reductions"><i class="fa fa-check"></i><b>8.1.2</b> Dimensionality reductions</a></li>
<li class="chapter" data-level="8.1.3" data-path="biological-analysis.html"><a href="biological-analysis.html#clustering-methods"><i class="fa fa-check"></i><b>8.1.3</b> Clustering methods</a></li>
<li class="chapter" data-level="8.1.4" data-path="biological-analysis.html"><a href="biological-analysis.html#challenges-in-clustering"><i class="fa fa-check"></i><b>8.1.4</b> Challenges in clustering</a></li>
<li class="chapter" data-level="8.1.5" data-path="biological-analysis.html"><a href="biological-analysis.html#tools-for-scrna-seq-data"><i class="fa fa-check"></i><b>8.1.5</b> Tools for scRNA-seq data</a></li>
<li class="chapter" data-level="8.1.6" data-path="biological-analysis.html"><a href="biological-analysis.html#comparing-clustering"><i class="fa fa-check"></i><b>8.1.6</b> Comparing clustering</a></li>
</ul></li>
<li class="chapter" data-level="8.2" data-path="biological-analysis.html"><a href="biological-analysis.html#clust-methods"><i class="fa fa-check"></i><b>8.2</b> Clustering example</a><ul>
<li class="chapter" data-level="8.2.1" data-path="biological-analysis.html"><a href="biological-analysis.html#deng-dataset"><i class="fa fa-check"></i><b>8.2.1</b> Deng dataset</a></li>
<li class="chapter" data-level="8.2.2" data-path="biological-analysis.html"><a href="biological-analysis.html#sc3-1"><i class="fa fa-check"></i><b>8.2.2</b> SC3</a></li>
<li class="chapter" data-level="8.2.3" data-path="biological-analysis.html"><a href="biological-analysis.html#pcareduce-1"><i class="fa fa-check"></i><b>8.2.3</b> pcaReduce</a></li>
<li class="chapter" data-level="8.2.4" data-path="biological-analysis.html"><a href="biological-analysis.html#tsne-kmeans"><i class="fa fa-check"></i><b>8.2.4</b> tSNE + kmeans</a></li>
<li class="chapter" data-level="8.2.5" data-path="biological-analysis.html"><a href="biological-analysis.html#snn-cliq-1"><i class="fa fa-check"></i><b>8.2.5</b> SNN-Cliq</a></li>
<li class="chapter" data-level="8.2.6" data-path="biological-analysis.html"><a href="biological-analysis.html#sincera-1"><i class="fa fa-check"></i><b>8.2.6</b> SINCERA</a></li>
<li class="chapter" data-level="8.2.7" data-path="biological-analysis.html"><a href="biological-analysis.html#sessioninfo-5"><i class="fa fa-check"></i><b>8.2.7</b> sessionInfo()</a></li>
</ul></li>
<li class="chapter" data-level="8.3" data-path="biological-analysis.html"><a href="biological-analysis.html#feature-selection"><i class="fa fa-check"></i><b>8.3</b> Feature Selection</a><ul>
<li class="chapter" data-level="8.3.1" data-path="biological-analysis.html"><a href="biological-analysis.html#identifying-genes-vs-a-null-model"><i class="fa fa-check"></i><b>8.3.1</b> Identifying Genes vs a Null Model</a></li>
<li class="chapter" data-level="8.3.2" data-path="biological-analysis.html"><a href="biological-analysis.html#correlated-expression"><i class="fa fa-check"></i><b>8.3.2</b> Correlated Expression</a></li>
<li class="chapter" data-level="8.3.3" data-path="biological-analysis.html"><a href="biological-analysis.html#comparing-methods"><i class="fa fa-check"></i><b>8.3.3</b> Comparing Methods</a></li>
<li class="chapter" data-level="8.3.4" data-path="biological-analysis.html"><a href="biological-analysis.html#sessioninfo-6"><i class="fa fa-check"></i><b>8.3.4</b> sessionInfo()</a></li>
</ul></li>
<li class="chapter" data-level="8.4" data-path="biological-analysis.html"><a href="biological-analysis.html#pseudotime-analysis"><i class="fa fa-check"></i><b>8.4</b> Pseudotime analysis</a><ul>
<li class="chapter" data-level="8.4.1" data-path="biological-analysis.html"><a href="biological-analysis.html#tscan"><i class="fa fa-check"></i><b>8.4.1</b> TSCAN</a></li>
<li class="chapter" data-level="8.4.2" data-path="biological-analysis.html"><a href="biological-analysis.html#monocle"><i class="fa fa-check"></i><b>8.4.2</b> monocle</a></li>
<li class="chapter" data-level="8.4.3" data-path="biological-analysis.html"><a href="biological-analysis.html#diffusion-maps"><i class="fa fa-check"></i><b>8.4.3</b> Diffusion maps</a></li>
<li class="chapter" data-level="8.4.4" data-path="biological-analysis.html"><a href="biological-analysis.html#slicer"><i class="fa fa-check"></i><b>8.4.4</b> SLICER</a></li>
<li class="chapter" data-level="8.4.5" data-path="biological-analysis.html"><a href="biological-analysis.html#comparison-of-the-methods"><i class="fa fa-check"></i><b>8.4.5</b> Comparison of the methods</a></li>
<li class="chapter" data-level="8.4.6" data-path="biological-analysis.html"><a href="biological-analysis.html#expression-of-genes-through-time"><i class="fa fa-check"></i><b>8.4.6</b> Expression of genes through time</a></li>
<li class="chapter" data-level="8.4.7" data-path="biological-analysis.html"><a href="biological-analysis.html#sessioninfo-7"><i class="fa fa-check"></i><b>8.4.7</b> sessionInfo()</a></li>
</ul></li>
<li class="chapter" data-level="8.5" data-path="biological-analysis.html"><a href="biological-analysis.html#imputation"><i class="fa fa-check"></i><b>8.5</b> Imputation</a><ul>
<li class="chapter" data-level="8.5.1" data-path="biological-analysis.html"><a href="biological-analysis.html#scimpute"><i class="fa fa-check"></i><b>8.5.1</b> scImpute</a></li>
<li class="chapter" data-level="8.5.2" data-path="biological-analysis.html"><a href="biological-analysis.html#magic"><i class="fa fa-check"></i><b>8.5.2</b> MAGIC</a></li>
<li class="chapter" data-level="8.5.3" data-path="biological-analysis.html"><a href="biological-analysis.html#sessioninfo-8"><i class="fa fa-check"></i><b>8.5.3</b> sessionInfo()</a></li>
</ul></li>
<li class="chapter" data-level="8.6" data-path="biological-analysis.html"><a href="biological-analysis.html#dechapter"><i class="fa fa-check"></i><b>8.6</b> Differential Expression (DE) analysis</a><ul>
<li class="chapter" data-level="8.6.1" data-path="biological-analysis.html"><a href="biological-analysis.html#bulk-rna-seq-1"><i class="fa fa-check"></i><b>8.6.1</b> Bulk RNA-seq</a></li>
<li class="chapter" data-level="8.6.2" data-path="biological-analysis.html"><a href="biological-analysis.html#single-cell-rna-seq"><i class="fa fa-check"></i><b>8.6.2</b> Single cell RNA-seq</a></li>
<li class="chapter" data-level="8.6.3" data-path="biological-analysis.html"><a href="biological-analysis.html#differences-in-distribution"><i class="fa fa-check"></i><b>8.6.3</b> Differences in Distribution</a></li>
<li class="chapter" data-level="8.6.4" data-path="biological-analysis.html"><a href="biological-analysis.html#models-of-single-cell-rnaseq-data"><i class="fa fa-check"></i><b>8.6.4</b> Models of single-cell RNASeq data</a></li>
</ul></li>
<li class="chapter" data-level="8.7" data-path="biological-analysis.html"><a href="biological-analysis.html#de-in-a-real-dataset"><i class="fa fa-check"></i><b>8.7</b> DE in a real dataset</a><ul>
<li class="chapter" data-level="8.7.1" data-path="biological-analysis.html"><a href="biological-analysis.html#introduction-8"><i class="fa fa-check"></i><b>8.7.1</b> Introduction</a></li>
<li class="chapter" data-level="8.7.2" data-path="biological-analysis.html"><a href="biological-analysis.html#kolmogorov-smirnov-test"><i class="fa fa-check"></i><b>8.7.2</b> Kolmogorov-Smirnov test</a></li>
<li class="chapter" data-level="8.7.3" data-path="biological-analysis.html"><a href="biological-analysis.html#wilcoxmann-whitney-u-test"><i class="fa fa-check"></i><b>8.7.3</b> Wilcox/Mann-Whitney-U Test</a></li>
<li class="chapter" data-level="8.7.4" data-path="biological-analysis.html"><a href="biological-analysis.html#edger"><i class="fa fa-check"></i><b>8.7.4</b> edgeR</a></li>
<li class="chapter" data-level="8.7.5" data-path="biological-analysis.html"><a href="biological-analysis.html#monocle-1"><i class="fa fa-check"></i><b>8.7.5</b> Monocle</a></li>
<li class="chapter" data-level="8.7.6" data-path="biological-analysis.html"><a href="biological-analysis.html#mast"><i class="fa fa-check"></i><b>8.7.6</b> MAST</a></li>
<li class="chapter" data-level="8.7.7" data-path="biological-analysis.html"><a href="biological-analysis.html#slow-methods-1h-to-run"><i class="fa fa-check"></i><b>8.7.7</b> Slow Methods (&gt;1h to run)</a></li>
<li class="chapter" data-level="8.7.8" data-path="biological-analysis.html"><a href="biological-analysis.html#bpsc"><i class="fa fa-check"></i><b>8.7.8</b> BPSC</a></li>
<li class="chapter" data-level="8.7.9" data-path="biological-analysis.html"><a href="biological-analysis.html#scde"><i class="fa fa-check"></i><b>8.7.9</b> SCDE</a></li>
<li class="chapter" data-level="8.7.10" data-path="biological-analysis.html"><a href="biological-analysis.html#sessioninfo-9"><i class="fa fa-check"></i><b>8.7.10</b> sessionInfo()</a></li>
</ul></li>
</ul></li>
<li class="chapter" data-level="9" data-path="comparingcombining-scrnaseq-datasets.html"><a href="comparingcombining-scrnaseq-datasets.html"><i class="fa fa-check"></i><b>9</b> Comparing/Combining scRNASeq datasets</a><ul>
<li class="chapter" data-level="9.1" data-path="comparingcombining-scrnaseq-datasets.html"><a href="comparingcombining-scrnaseq-datasets.html#introduction-9"><i class="fa fa-check"></i><b>9.1</b> Introduction</a></li>
<li class="chapter" data-level="9.2" data-path="comparingcombining-scrnaseq-datasets.html"><a href="comparingcombining-scrnaseq-datasets.html#datasets"><i class="fa fa-check"></i><b>9.2</b> Datasets</a></li>
<li class="chapter" data-level="9.3" data-path="comparingcombining-scrnaseq-datasets.html"><a href="comparingcombining-scrnaseq-datasets.html#projecting-cells-onto-annotated-cell-types-scmap"><i class="fa fa-check"></i><b>9.3</b> Projecting cells onto annotated cell-types (scmap)</a><ul>
<li class="chapter" data-level="9.3.1" data-path="comparingcombining-scrnaseq-datasets.html"><a href="comparingcombining-scrnaseq-datasets.html#cell-to-cell-mapping"><i class="fa fa-check"></i><b>9.3.1</b> Cell-to-Cell mapping</a></li>
</ul></li>
<li class="chapter" data-level="9.4" data-path="comparingcombining-scrnaseq-datasets.html"><a href="comparingcombining-scrnaseq-datasets.html#metaneighbour"><i class="fa fa-check"></i><b>9.4</b> Metaneighbour</a><ul>
<li class="chapter" data-level="9.4.1" data-path="comparingcombining-scrnaseq-datasets.html"><a href="comparingcombining-scrnaseq-datasets.html#prepare-data"><i class="fa fa-check"></i><b>9.4.1</b> Prepare Data</a></li>
</ul></li>
<li class="chapter" data-level="9.5" data-path="comparingcombining-scrnaseq-datasets.html"><a href="comparingcombining-scrnaseq-datasets.html#mnncorrect-1"><i class="fa fa-check"></i><b>9.5</b> mnnCorrect</a></li>
<li class="chapter" data-level="9.6" data-path="comparingcombining-scrnaseq-datasets.html"><a href="comparingcombining-scrnaseq-datasets.html#cannonical-correlation-analysis-seurat"><i class="fa fa-check"></i><b>9.6</b> Cannonical Correlation Analysis (Seurat)</a><ul>
<li class="chapter" data-level="9.6.1" data-path="comparingcombining-scrnaseq-datasets.html"><a href="comparingcombining-scrnaseq-datasets.html#sessioninfo-10"><i class="fa fa-check"></i><b>9.6.1</b> sessionInfo()</a></li>
</ul></li>
<li class="chapter" data-level="9.7" data-path="comparingcombining-scrnaseq-datasets.html"><a href="comparingcombining-scrnaseq-datasets.html#search-scrna-seq-data"><i class="fa fa-check"></i><b>9.7</b> Search scRNA-Seq data</a><ul>
<li class="chapter" data-level="9.7.1" data-path="comparingcombining-scrnaseq-datasets.html"><a href="comparingcombining-scrnaseq-datasets.html#about"><i class="fa fa-check"></i><b>9.7.1</b> About</a></li>
<li class="chapter" data-level="9.7.2" data-path="comparingcombining-scrnaseq-datasets.html"><a href="comparingcombining-scrnaseq-datasets.html#dataset"><i class="fa fa-check"></i><b>9.7.2</b> Dataset</a></li>
<li class="chapter" data-level="9.7.3" data-path="comparingcombining-scrnaseq-datasets.html"><a href="comparingcombining-scrnaseq-datasets.html#gene-index"><i class="fa fa-check"></i><b>9.7.3</b> Gene Index</a></li>
<li class="chapter" data-level="9.7.4" data-path="comparingcombining-scrnaseq-datasets.html"><a href="comparingcombining-scrnaseq-datasets.html#marker-genes"><i class="fa fa-check"></i><b>9.7.4</b> Marker genes</a></li>
<li class="chapter" data-level="9.7.5" data-path="comparingcombining-scrnaseq-datasets.html"><a href="comparingcombining-scrnaseq-datasets.html#search-cells-by-a-gene-list"><i class="fa fa-check"></i><b>9.7.5</b> Search cells by a gene list</a></li>
<li class="chapter" data-level="9.7.6" data-path="comparingcombining-scrnaseq-datasets.html"><a href="comparingcombining-scrnaseq-datasets.html#sessioninfo-11"><i class="fa fa-check"></i><b>9.7.6</b> sessionInfo()</a></li>
</ul></li>
</ul></li>
<li class="chapter" data-level="10" data-path="seurat-chapter.html"><a href="seurat-chapter.html"><i class="fa fa-check"></i><b>10</b> Seurat</a><ul>
<li class="chapter" data-level="10.1" data-path="seurat-chapter.html"><a href="seurat-chapter.html#seurat-object-class"><i class="fa fa-check"></i><b>10.1</b> <code>Seurat</code> object class</a></li>
<li class="chapter" data-level="10.2" data-path="seurat-chapter.html"><a href="seurat-chapter.html#expression-qc"><i class="fa fa-check"></i><b>10.2</b> Expression QC</a></li>
<li class="chapter" data-level="10.3" data-path="seurat-chapter.html"><a href="seurat-chapter.html#normalization"><i class="fa fa-check"></i><b>10.3</b> Normalization</a></li>
<li class="chapter" data-level="10.4" data-path="seurat-chapter.html"><a href="seurat-chapter.html#highly-variable-genes-1"><i class="fa fa-check"></i><b>10.4</b> Highly variable genes</a></li>
<li class="chapter" data-level="10.5" data-path="seurat-chapter.html"><a href="seurat-chapter.html#dealing-with-confounders-1"><i class="fa fa-check"></i><b>10.5</b> Dealing with confounders</a></li>
<li class="chapter" data-level="10.6" data-path="seurat-chapter.html"><a href="seurat-chapter.html#linear-dimensionality-reduction"><i class="fa fa-check"></i><b>10.6</b> Linear dimensionality reduction</a></li>
<li class="chapter" data-level="10.7" data-path="seurat-chapter.html"><a href="seurat-chapter.html#significant-pcs"><i class="fa fa-check"></i><b>10.7</b> Significant PCs</a></li>
<li class="chapter" data-level="10.8" data-path="seurat-chapter.html"><a href="seurat-chapter.html#clustering-cells"><i class="fa fa-check"></i><b>10.8</b> Clustering cells</a></li>
<li class="chapter" data-level="10.9" data-path="seurat-chapter.html"><a href="seurat-chapter.html#marker-genes-1"><i class="fa fa-check"></i><b>10.9</b> Marker genes</a></li>
<li class="chapter" data-level="10.10" data-path="seurat-chapter.html"><a href="seurat-chapter.html#sessioninfo-12"><i class="fa fa-check"></i><b>10.10</b> sessionInfo()</a></li>
</ul></li>
<li class="chapter" data-level="11" data-path="ideal-scrnaseq-pipeline-as-of-oct-2017.html"><a href="ideal-scrnaseq-pipeline-as-of-oct-2017.html"><i class="fa fa-check"></i><b>11</b> “Ideal” scRNAseq pipeline (as of Oct 2017)</a><ul>
<li class="chapter" data-level="11.1" data-path="ideal-scrnaseq-pipeline-as-of-oct-2017.html"><a href="ideal-scrnaseq-pipeline-as-of-oct-2017.html#experimental-design"><i class="fa fa-check"></i><b>11.1</b> Experimental Design</a></li>
<li class="chapter" data-level="11.2" data-path="ideal-scrnaseq-pipeline-as-of-oct-2017.html"><a href="ideal-scrnaseq-pipeline-as-of-oct-2017.html#processing-reads"><i class="fa fa-check"></i><b>11.2</b> Processing Reads</a></li>
<li class="chapter" data-level="11.3" data-path="ideal-scrnaseq-pipeline-as-of-oct-2017.html"><a href="ideal-scrnaseq-pipeline-as-of-oct-2017.html#preparing-expression-matrix"><i class="fa fa-check"></i><b>11.3</b> Preparing Expression Matrix</a></li>
<li class="chapter" data-level="11.4" data-path="ideal-scrnaseq-pipeline-as-of-oct-2017.html"><a href="ideal-scrnaseq-pipeline-as-of-oct-2017.html#biological-interpretation"><i class="fa fa-check"></i><b>11.4</b> Biological Interpretation</a></li>
</ul></li>
<li class="chapter" data-level="12" data-path="advanced-exercises.html"><a href="advanced-exercises.html"><i class="fa fa-check"></i><b>12</b> Advanced exercises</a></li>
<li class="chapter" data-level="13" data-path="resources.html"><a href="resources.html"><i class="fa fa-check"></i><b>13</b> Resources</a><ul>
<li class="chapter" data-level="13.1" data-path="resources.html"><a href="resources.html#scrna-seq-protocols"><i class="fa fa-check"></i><b>13.1</b> scRNA-seq protocols</a></li>
<li class="chapter" data-level="13.2" data-path="resources.html"><a href="resources.html#external-rna-control-consortium-ercc"><i class="fa fa-check"></i><b>13.2</b> External RNA Control Consortium (ERCC)</a></li>
<li class="chapter" data-level="13.3" data-path="resources.html"><a href="resources.html#scrna-seq-analysis-tools"><i class="fa fa-check"></i><b>13.3</b> scRNA-seq analysis tools</a></li>
<li class="chapter" data-level="13.4" data-path="resources.html"><a href="resources.html#scrna-seq-public-datasets"><i class="fa fa-check"></i><b>13.4</b> scRNA-seq public datasets</a></li>
</ul></li>
<li class="chapter" data-level="14" data-path="references.html"><a href="references.html"><i class="fa fa-check"></i><b>14</b> References</a></li>
<li class="divider"></li>
<li><a href="http://www.sanger.ac.uk/science/groups/hemberg-group" target="blank">Hemberg Lab</a></li>

</ul>

      </nav>
    </div>

    <div class="book-body">
      <div class="body-inner">
        <div class="book-header" role="navigation">
          <h1>
            <i class="fa fa-circle-o-notch fa-spin"></i><a href="./">Analysis of single cell RNA-seq data</a>
          </h1>
        </div>

        <div class="page-wrapper" tabindex="-1" role="main">
          <div class="page-inner">

            <section class="normal" id="section-">
<div id="comparingcombining-scrnaseq-datasets" class="section level1">
<h1><span class="header-section-number">9</span> Comparing/Combining scRNASeq datasets</h1>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">library</span>(scater)
<span class="kw">library</span>(SingleCellExperiment)</code></pre></div>
<div id="introduction-9" class="section level2">
<h2><span class="header-section-number">9.1</span> Introduction</h2>
<p>As more and more scRNA-seq datasets become available, carrying merged_seurat comparisons between them is key. There are two main approaches to comparing scRNASeq datasets. The first approach is “label-centric” which is focused on trying to identify equivalent cell-types/states across datasets by comparing individual cells or groups of cells. The other approach is “cross-dataset normalization” which attempts to computationally remove experiment-specific technical/biological effects so that data from multiple experiments can be combined and jointly analyzed.</p>
<p>The label-centric approach can be used with dataset with high-confidence cell-annotations, e.g. the Human Cell Atlas (HCA) <span class="citation">(Regev et al. <a href="#ref-Regev2017-mw">2017</a>)</span> or the Tabula Muris <span class="citation">(<span class="citeproc-not-found" data-reference-id="Quake2017"><strong>???</strong></span>)</span> once they are completed, to project cells or clusters from a new sample onto this reference to consider tissue composition and/or identify cells with novel/unknown identity. Conceptually, such projections are similar to the popular BLAST method <span class="citation">(Altschul et al. <a href="#ref-Altschul1990-ts">1990</a>)</span>, which makes it possible to quickly find the closest match in a database for a newly identified nucleotide or amino acid sequence. The label-centric approach can also be used to compare datasets of similar biological origin collected by different labs to ensure that the annotation and the analysis is consistent.</p>
<div class="figure" style="text-align: center"><span id="fig:unnamed-chunk-3"></span>
<img src="figures/CourseCompareTypes.png" alt="Label-centric dataset comparison can be used to compare the annotations of two different samples."  />
<p class="caption">
Figure 2.4: Label-centric dataset comparison can be used to compare the annotations of two different samples.
</p>
</div>
<div class="figure" style="text-align: center"><span id="fig:unnamed-chunk-4"></span>
<img src="figures/CourseAtlasAssignment.png" alt="Label-centric dataset comparison can project cells from a new experiment onto an annotated reference."  />
<p class="caption">
Figure 2.5: Label-centric dataset comparison can project cells from a new experiment onto an annotated reference.
</p>
</div>
<p>The cross-dataset normalization approach can also be used to compare datasets of similar biological origin, unlike the label-centric approach it enables the join analysis of multiple datasets to facilitate the identification of rare cell-types which may to too sparsely sampled in each individual dataset to be reliably detected. However, cross-dataset normalization is not applicable to very large and diverse references since it assumes a significant portion of the biological variablility in each of the datasets overlaps with others.</p>
<div class="figure" style="text-align: center"><span id="fig:unnamed-chunk-5"></span>
<img src="figures/CourseCrossNorm.png" alt="Cross-dataset normalization enables joint-analysis of 2+ scRNASeq datasets."  />
<p class="caption">
Figure 2.6: Cross-dataset normalization enables joint-analysis of 2+ scRNASeq datasets.
</p>
</div>
</div>
<div id="datasets" class="section level2">
<h2><span class="header-section-number">9.2</span> Datasets</h2>
<p>We will running these methods on two human pancreas datasets: <span class="citation">(Muraro et al. <a href="#ref-Muraro2016-yk">2016</a>)</span> and <span class="citation">(Segerstolpe et al. <a href="#ref-Segerstolpe2016-wc">2016</a>)</span>. Since the pancreas has been widely studied, these datasets are well annotated.</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">muraro &lt;-<span class="st"> </span><span class="kw">readRDS</span>(<span class="st">&quot;pancreas/muraro.rds&quot;</span>)
segerstolpe &lt;-<span class="st"> </span><span class="kw">readRDS</span>(<span class="st">&quot;pancreas/segerstolpe.rds&quot;</span>)</code></pre></div>
<p>This data has already been formatted for scmap. Cell type labels must be stored in the <code>cell_type1</code> column of the <code>colData</code> slots, and gene ids that are consistent across both datasets must be stored in the <code>feature_symbol</code> column of the <code>rowData</code> slots.</p>
<p>First, lets check our gene-ids match across both datasets:</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">sum</span>(<span class="kw">rowData</span>(muraro)<span class="op">$</span>feature_symbol <span class="op">%in%</span><span class="st"> </span><span class="kw">rowData</span>(segerstolpe)<span class="op">$</span>feature_symbol)<span class="op">/</span><span class="kw">nrow</span>(muraro)</code></pre></div>
<pre><code>## [1] 0.9599519</code></pre>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">sum</span>(<span class="kw">rowData</span>(segerstolpe)<span class="op">$</span>feature_symbol <span class="op">%in%</span><span class="st"> </span><span class="kw">rowData</span>(muraro)<span class="op">$</span>feature_symbol)<span class="op">/</span><span class="kw">nrow</span>(segerstolpe)</code></pre></div>
<pre><code>## [1] 0.719334</code></pre>
<p>Here we can see that 96% of the genes present in muraro match genes in segerstople and 72% of genes in segerstolpe are match genes in muraro. This is as expected because the segerstolpe dataset was more deeply sequenced than the muraro dataset. However, it highlights some of the difficulties in comparing scRNASeq datasets.</p>
<p>We can confirm this by checking the overall size of these two datasets.</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">dim</span>(muraro)</code></pre></div>
<pre><code>## [1] 19127  2126</code></pre>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">dim</span>(segerstolpe)</code></pre></div>
<pre><code>## [1] 25525  3514</code></pre>
<p>In addition, we can check the cell-type annotations for each of these dataset using the command below:</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">summary</span>(<span class="kw">factor</span>(<span class="kw">colData</span>(muraro)<span class="op">$</span>cell_type1))</code></pre></div>
<pre><code>##      acinar       alpha        beta       delta      ductal endothelial 
##         219         812         448         193         245          21 
##     epsilon       gamma mesenchymal     unclear 
##           3         101          80           4</code></pre>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">summary</span>(<span class="kw">factor</span>(<span class="kw">colData</span>(segerstolpe)<span class="op">$</span>cell_type1))</code></pre></div>
<pre><code>##                 acinar                  alpha                   beta 
##                    185                    886                    270 
##          co-expression                  delta                 ductal 
##                     39                    114                    386 
##            endothelial                epsilon                  gamma 
##                     16                      7                    197 
##                   mast           MHC class II         not applicable 
##                      7                      5                   1305 
##                    PSC           unclassified unclassified endocrine 
##                     54                      2                     41</code></pre>
<p>Here we can see that even though both datasets considered the same biological tissue the two datasets, they have been annotated with slightly different sets of cell-types. If you are familiar withpancreas biology you might recognize that the pancreatic stellate cells (PSCs) in segerstolpe are a type of mesenchymal stem cell which would fall under the “mesenchymal” type in muraro. However, it isn’t clear whether these two annotations should be considered synonymous or not. We can use label-centric comparison methods to determine if these two cell-type annotations are indeed equivalent.</p>
<p>Alternatively, we might be interested in understanding the function of those cells that were “unclassified endocrine” or were deemed too poor quality (“not applicable”) for the original clustering in each dataset by leveraging in formation across datasets. Either we could attempt to infer which of the existing annotations they most likely belong to using label-centric approaches or we could try to uncover a novel cell-type among them (or a sub-type within the existing annotations) using cross-dataset normalization.</p>
<p>To simplify our demonstration analyses we will remove the small classes of unassigned cells, and the poor quality cells. We will retain the “unclassified endocrine” to see if any of these methods can elucidate what cell-type they belong to.</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">segerstolpe &lt;-<span class="st"> </span>segerstolpe[,<span class="kw">colData</span>(segerstolpe)<span class="op">$</span>cell_type1 <span class="op">!=</span><span class="st"> &quot;unclassified&quot;</span>]
segerstolpe &lt;-<span class="st"> </span>segerstolpe[,<span class="kw">colData</span>(segerstolpe)<span class="op">$</span>cell_type1 <span class="op">!=</span><span class="st"> &quot;not applicable&quot;</span>,]
muraro &lt;-<span class="st"> </span>muraro[,<span class="kw">colData</span>(muraro)<span class="op">$</span>cell_type1 <span class="op">!=</span><span class="st"> &quot;unclear&quot;</span>]</code></pre></div>
</div>
<div id="projecting-cells-onto-annotated-cell-types-scmap" class="section level2">
<h2><span class="header-section-number">9.3</span> Projecting cells onto annotated cell-types (scmap)</h2>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">library</span>(scmap)
<span class="kw">set.seed</span>(<span class="dv">1234567</span>)</code></pre></div>
<p>We recently developed <code>scmap</code> <span class="citation">(Kiselev and Hemberg <a href="#ref-Kiselev2017-nb">2017</a>)</span> - a method for projecting cells from a scRNA-seq experiment onto the cell-types identified in other experiments. Additionally, a cloud version of <code>scmap</code> can be run for free, withmerged_seurat restrictions, from <a href="http://www.hemberg-lab.cloud/scmap" class="uri">http://www.hemberg-lab.cloud/scmap</a>.</p>
<div id="feature-selection-1" class="section level4">
<h4><span class="header-section-number">9.3.0.1</span> Feature Selection</h4>
<p>Once we have a <code>SingleCellExperiment</code> object we can run <code>scmap</code>. First we have to build the “index” of our reference clusters. Since we want to know whether PSCs and mesenchymal cells are synonymous we will project each dataset to the other so we will build an index for each dataset. This requires first selecting the most informative features for the reference dataset.</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">muraro &lt;-<span class="st"> </span><span class="kw">selectFeatures</span>(muraro, <span class="dt">suppress_plot =</span> <span class="ot">FALSE</span>)</code></pre></div>
<pre><code>## Warning in linearModel(object, n_features): Your object does not contain
## counts() slot. Dropouts were calculated using logcounts() slot...</code></pre>
<p><img src="31-projection_files/figure-html/unnamed-chunk-12-1.png" width="672" style="display: block; margin: auto;" /></p>
<p>Genes highlighted with the red colour will be used in the futher analysis (projection).</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">segerstolpe &lt;-<span class="st"> </span><span class="kw">selectFeatures</span>(segerstolpe, <span class="dt">suppress_plot =</span> <span class="ot">FALSE</span>)</code></pre></div>
<p><img src="31-projection_files/figure-html/unnamed-chunk-13-1.png" width="672" style="display: block; margin: auto;" /> From the y-axis of these plots we can see that scmap uses a dropmerged_seurat-based feature selection method.</p>
<p>Now calculate the cell-type index:</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">muraro &lt;-<span class="st"> </span><span class="kw">indexCluster</span>(muraro)
segerstolpe &lt;-<span class="st"> </span><span class="kw">indexCluster</span>(segerstolpe)</code></pre></div>
<p>We can also visualize the index:</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">heatmap</span>(<span class="kw">as.matrix</span>(<span class="kw">metadata</span>(muraro)<span class="op">$</span>scmap_cluster_index))</code></pre></div>
<p><img src="31-projection_files/figure-html/unnamed-chunk-15-1.png" width="672" style="display: block; margin: auto;" /></p>
<p>You may want to adjust your features using the <code>setFeatures</code> function if features are too heavily concentrated in only a few cell-types. In this case the dropmerged_seurat-based features look good so we will just them.</p>
<p><strong>Exercise</strong> Using the rowData of each dataset how many genes were selected as features in both datasets? What does this tell you abmerged_seurat these datasets?</p>
<p><strong>Answer</strong></p>
</div>
<div id="projecting" class="section level4">
<h4><span class="header-section-number">9.3.0.2</span> Projecting</h4>
<p>scmap computes the distance from each cell to each cell-type in the reference index, then applies an empirically derived threshold to determine which cells are assigned to the closest reference cell-type and which are unassigned. To account for differences in sequencing depth distance is calculated using the spearman correlation and cosine distance and only cells with a consistent assignment with both distances are returned as assigned.</p>
<p>We will project the <code>segerstolpe</code> dataset to <code>muraro</code> dataset:</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">seger_to_muraro &lt;-<span class="st"> </span><span class="kw">scmapCluster</span>(
  <span class="dt">projection =</span> segerstolpe,
  <span class="dt">index_list =</span> <span class="kw">list</span>(
    <span class="dt">muraro =</span> <span class="kw">metadata</span>(muraro)<span class="op">$</span>scmap_cluster_index
  )
)</code></pre></div>
<p>and <code>muraro</code> onto <code>segerstolpe</code></p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">muraro_to_seger &lt;-<span class="st"> </span><span class="kw">scmapCluster</span>(
  <span class="dt">projection =</span> muraro,
  <span class="dt">index_list =</span> <span class="kw">list</span>(
    <span class="dt">seger =</span> <span class="kw">metadata</span>(segerstolpe)<span class="op">$</span>scmap_cluster_index
  )
)</code></pre></div>
<p>Note that in each case we are projecting to a single dataset but that this could be extended to any number of datasets for which we have computed indices.</p>
<p>Now lets compare the original cell-type labels with the projected labels:</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">table</span>(<span class="kw">colData</span>(muraro)<span class="op">$</span>cell_type1, muraro_to_seger<span class="op">$</span>scmap_cluster_labs)</code></pre></div>
<pre><code>##              
##               acinar alpha beta co-expression delta ductal endothelial
##   acinar         211     0    0             0     0      0           0
##   alpha            1   763    0            18     0      2           0
##   beta             2     1  397             7     2      2           0
##   delta            0     0    2             1   173      0           0
##   ductal           7     0    0             0     0    208           0
##   endothelial      0     0    0             0     0      0          15
##   epsilon          0     0    0             0     0      0           0
##   gamma            2     0    0             0     0      0           0
##   mesenchymal      0     0    0             0     0      1           0
##              
##               epsilon gamma MHC class II PSC unassigned
##   acinar            0     0            0   0          8
##   alpha             0     2            0   0         26
##   beta              0     5            1   2         29
##   delta             0     0            0   0         17
##   ductal            0     0            5   3         22
##   endothelial       0     0            0   1          5
##   epsilon           3     0            0   0          0
##   gamma             0    95            0   0          4
##   mesenchymal       0     0            0  77          2</code></pre>
<p>Here we can see that cell-types do map to their equivalents in segerstolpe, and importantly we see that all but one of the “mesenchymal” cells were assigned to the “PSC” class.</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">table</span>(<span class="kw">colData</span>(segerstolpe)<span class="op">$</span>cell_type1, seger_to_muraro<span class="op">$</span>scmap_cluster_labs)</code></pre></div>
<pre><code>##                         
##                          acinar alpha beta delta ductal endothelial
##   acinar                    181     0    0     0      4           0
##   alpha                       0   869    1     0      0           0
##   beta                        0     0  260     0      0           0
##   co-expression               0     7   31     0      0           0
##   delta                       0     0    1   111      0           0
##   ductal                      0     0    0     0    383           0
##   endothelial                 0     0    0     0      0          14
##   epsilon                     0     0    0     0      0           0
##   gamma                       0     2    0     0      0           0
##   mast                        0     0    0     0      0           0
##   MHC class II                0     0    0     0      0           0
##   PSC                         0     0    1     0      0           0
##   unclassified endocrine      0     0    0     0      0           0
##                         
##                          epsilon gamma mesenchymal unassigned
##   acinar                       0     0           0          0
##   alpha                        0     0           0         16
##   beta                         0     0           0         10
##   co-expression                0     0           0          1
##   delta                        0     0           0          2
##   ductal                       0     0           0          3
##   endothelial                  0     0           0          2
##   epsilon                      6     0           0          1
##   gamma                        0   192           0          3
##   mast                         0     0           0          7
##   MHC class II                 0     0           0          5
##   PSC                          0     0          53          0
##   unclassified endocrine       0     0           0         41</code></pre>
<p>Again we see cell-types match each other and that all but one of the “PSCs” match the “mesenchymal” cells providing strong evidence that these two annotations should be considered synonymous.</p>
<p>We can also visualize these tables using a <a href="https://developers.google.com/chart/interactive/docs/gallery/sankey">Sankey diagram</a>:</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">plot</span>(<span class="kw">getSankey</span>(<span class="kw">colData</span>(muraro)<span class="op">$</span>cell_type1,  muraro_to_seger<span class="op">$</span>scmap_cluster_labs[,<span class="dv">1</span>], <span class="dt">plot_height=</span><span class="dv">400</span>))</code></pre></div>
<!-- Sankey generated in R 3.4.3 by googleVis 0.6.2 package -->
<!-- Sat Feb  3 15:39:10 2018 -->
<!-- jsHeader -->
<script type="text/javascript">
 
// jsData 
function gvisDataSankeyID7ae6f87c0b2 () {
var data = new google.visualization.DataTable();
var datajson =
[
 [
"alpha ",
" alpha",
763
],
[
"beta ",
" beta",
397
],
[
"acinar ",
" acinar",
211
],
[
"ductal ",
" ductal",
208
],
[
"delta ",
" delta",
173
],
[
"gamma ",
" gamma",
95
],
[
"mesenchymal ",
" PSC",
77
],
[
"endothelial ",
" endothelial",
15
],
[
"epsilon ",
" epsilon",
3
],
[
"acinar ",
" unassigned",
8
],
[
"alpha ",
" ductal",
2
],
[
"alpha ",
" unassigned",
26
],
[
"alpha ",
" acinar",
1
],
[
"alpha ",
" co-expression",
18
],
[
"alpha ",
" gamma",
2
],
[
"beta ",
" unassigned",
29
],
[
"beta ",
" gamma",
5
],
[
"beta ",
" MHC class II",
1
],
[
"beta ",
" alpha",
1
],
[
"beta ",
" co-expression",
7
],
[
"beta ",
" acinar",
2
],
[
"beta ",
" PSC",
2
],
[
"beta ",
" ductal",
2
],
[
"beta ",
" delta",
2
],
[
"delta ",
" beta",
2
],
[
"delta ",
" unassigned",
17
],
[
"delta ",
" co-expression",
1
],
[
"ductal ",
" acinar",
7
],
[
"ductal ",
" PSC",
3
],
[
"ductal ",
" MHC class II",
5
],
[
"ductal ",
" unassigned",
22
],
[
"endothelial ",
" PSC",
1
],
[
"endothelial ",
" unassigned",
5
],
[
"gamma ",
" acinar",
2
],
[
"gamma ",
" unassigned",
4
],
[
"mesenchymal ",
" ductal",
1
],
[
"mesenchymal ",
" unassigned",
2
] 
];
data.addColumn('string','From');
data.addColumn('string','To');
data.addColumn('number','# of cells');
data.addRows(datajson);
return(data);
}
 
// jsDrawChart
function drawChartSankeyID7ae6f87c0b2() {
var data = gvisDataSankeyID7ae6f87c0b2();
var options = {};
options["width"] = 400;
options["height"] = 400;
options["sankey"] = {
                node:{
                    label:{
                        fontName:'Arial',
                        fontSize:11,color:
                        '#000000',
                        bold:true,
                        italic:false
                    },
                    colors:'#FFFFFF',
                    nodePadding:12
                },iterations:0
            };

    var chart = new google.visualization.Sankey(
    document.getElementById('SankeyID7ae6f87c0b2')
    );
    chart.draw(data,options);
    

}
  
 
// jsDisplayChart
(function() {
var pkgs = window.__gvisPackages = window.__gvisPackages || [];
var callbacks = window.__gvisCallbacks = window.__gvisCallbacks || [];
var chartid = "sankey";
  
// Manually see if chartid is in pkgs (not all browsers support Array.indexOf)
var i, newPackage = true;
for (i = 0; newPackage && i < pkgs.length; i++) {
if (pkgs[i] === chartid)
newPackage = false;
}
if (newPackage)
  pkgs.push(chartid);
  
// Add the drawChart function to the global list of callbacks
callbacks.push(drawChartSankeyID7ae6f87c0b2);
})();
function displayChartSankeyID7ae6f87c0b2() {
  var pkgs = window.__gvisPackages = window.__gvisPackages || [];
  var callbacks = window.__gvisCallbacks = window.__gvisCallbacks || [];
  window.clearTimeout(window.__gvisLoad);
  // The timeout is set to 100 because otherwise the container div we are
  // targeting might not be part of the document yet
  window.__gvisLoad = setTimeout(function() {
  var pkgCount = pkgs.length;
  google.load("visualization", "1", { packages:pkgs, callback: function() {
  if (pkgCount != pkgs.length) {
  // Race condition where another setTimeout call snuck in after us; if
  // that call added a package, we must not shift its callback
  return;
}
while (callbacks.length > 0)
callbacks.shift()();
} });
}, 100);
}
 
// jsFooter
</script>
<!-- jsChart -->
<script type="text/javascript" src="https://www.google.com/jsapi?callback=displayChartSankeyID7ae6f87c0b2"></script>
<!-- divChart -->
<div id="SankeyID7ae6f87c0b2" style="width: 400; height: 400;">

</div>
<p><strong>Exercise</strong> How many of the previously unclassified cells would be be able to assign to cell-types using scmap?</p>
<p><strong>Answer</strong></p>
</div>
<div id="cell-to-cell-mapping" class="section level3">
<h3><span class="header-section-number">9.3.1</span> Cell-to-Cell mapping</h3>
<p>scmap can also project each cell in one dataset to its approximate closest neighbouring cell in the reference dataset. This uses a highly optimized search algorithm allowing it to be scaled to very large references (in theory 100,000-millions of cells). However, this process is stochastic so we must fix the random seed to ensure we can reproduce our results.</p>
<p>We have already performed feature selection for this dataset so we can go straight to building the index.</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">set.seed</span>(<span class="dv">193047</span>)
segerstolpe &lt;-<span class="st"> </span><span class="kw">indexCell</span>(segerstolpe)</code></pre></div>
<pre><code>## Parameter M was not provided, will use M = n_features / 10 (if n_features &lt;= 1000), where n_features is the number of selected features, and M = 100 otherwise.</code></pre>
<pre><code>## Parameter k was not provided, will use k = sqrt(number_of_cells)</code></pre>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">muraro &lt;-<span class="st"> </span><span class="kw">indexCell</span>(muraro)</code></pre></div>
<pre><code>## Parameter M was not provided, will use M = n_features / 10 (if n_features &lt;= 1000), where n_features is the number of selected features, and M = 100 otherwise.
## Parameter k was not provided, will use k = sqrt(number_of_cells)</code></pre>
<p>In this case the index is a series of clusterings of each cell using different sets of features, parameters k and M are the number of clusters and the number of features used in each of these subclusterings. New cells are assigned to the nearest cluster in each subclustering to generate unique pattern of cluster assignments. We then find the cell in the reference dataset with the same or most similar pattern of cluster assignments.</p>
<p>We can examine the cluster assignment patterns for the reference datasets using:</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">metadata</span>(muraro)<span class="op">$</span>scmap_cell_index<span class="op">$</span>subclusters[<span class="dv">1</span><span class="op">:</span><span class="dv">5</span>,<span class="dv">1</span><span class="op">:</span><span class="dv">5</span>]</code></pre></div>
<pre><code>##      D28.1_1 D28.1_13 D28.1_15 D28.1_17 D28.1_2
## [1,]       4       42       27       43      10
## [2,]       5        8        2       33      37
## [3,]      11       32       35       17      26
## [4,]       2        4       32        2      18
## [5,]      31       18       21       40       1</code></pre>
<p>To project and find the <code>w</code> nearest neighbours we use a similar command as before:</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">muraro_to_seger &lt;-<span class="st"> </span><span class="kw">scmapCell</span>(
  <span class="dt">projection =</span> muraro,
  <span class="dt">index_list =</span> <span class="kw">list</span>(
    <span class="dt">seger =</span> <span class="kw">metadata</span>(segerstolpe)<span class="op">$</span>scmap_cell_index
  ),
  <span class="dt">w =</span> <span class="dv">5</span>
)</code></pre></div>
<p>We can again look at the results:</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">muraro_to_seger<span class="op">$</span>seger[[<span class="dv">1</span>]][,<span class="dv">1</span><span class="op">:</span><span class="dv">5</span>]</code></pre></div>
<pre><code>##      D28.1_1 D28.1_13 D28.1_15 D28.1_17 D28.1_2
## [1,]    2201     1288     1117     1623    1078
## [2,]    1229     1724     2104     1448    1593
## [3,]    1793     1854     2201     2039    1553
## [4,]    1882     1737     1081     1202    1890
## [5,]    1731      976     1903     1834    1437</code></pre>
<p>This shows the column number of the 5 nearest neighbours in segerstolpe to each of the cells in muraro. We could then calculate a pseudotime estimate, branch assignment, or other cell-level data by selecting the appropriate data from the colData of the segerstolpe data set. As a demonstration we will find the cell-type of the nearest neighbour of each cell.</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">cell_type_NN &lt;-<span class="st"> </span><span class="kw">colData</span>(segerstolpe)<span class="op">$</span>cell_type1[muraro_to_seger<span class="op">$</span>seger[[<span class="dv">1</span>]][<span class="dv">1</span>,]]
<span class="kw">head</span>(cell_type_NN)</code></pre></div>
<pre><code>## [1] &quot;alpha&quot;       &quot;ductal&quot;      &quot;alpha&quot;       &quot;alpha&quot;       &quot;endothelial&quot;
## [6] &quot;endothelial&quot;</code></pre>
</div>
</div>
<div id="metaneighbour" class="section level2">
<h2><span class="header-section-number">9.4</span> Metaneighbour</h2>
<p><a href="https://www.biorxiv.org/content/early/2017/06/16/150524">Metaneighbour</a> is specifically designed to ask whether cell-type labels are consistent across datasets. It comes in two versions. First is a fully supervised method which assumes cell-types are known in all datasets and calculates how “good” those cell-type labels are. (The precise meaning of “good” will be described below). Alternatively, metaneighbour can estimate how similar all cell-types are to each other both within and across datasets. We will only be using the unsupervised version as it has much more general applicability and is easier to interpret the results of.</p>
<p>Metaneighbour compares cell-types across datasets by building a cell-cell spearman correlation network. The method then tries to predict the label of each cell through weighted “votes” of its nearest-neighbours. Then scores the overall similarity between two clusters as the AUROC for assigning cells of typeA to typeB based on these weighted votes. AUROC of 1 would indicate all the cells of typeA were assigned to typeB before any other cells were, and an AUROC of 0.5 is what you would get if cells were being randomly assigned.</p>
<p>Metanighbour is just a couple of R functions not a complete package so we have to load them using <code>source</code></p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">source</span>(<span class="st">&quot;2017-08-28-runMN-US.R&quot;</span>)</code></pre></div>
<div id="prepare-data" class="section level3">
<h3><span class="header-section-number">9.4.1</span> Prepare Data</h3>
<p>Metaneighbour requires all datasets to be combined into a single expression matrix prior to running:</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">is.common &lt;-<span class="st"> </span><span class="kw">rowData</span>(muraro)<span class="op">$</span>feature_symbol <span class="op">%in%</span><span class="st"> </span><span class="kw">rowData</span>(segerstolpe)<span class="op">$</span>feature_symbol
muraro &lt;-<span class="st"> </span>muraro[is.common,]
segerstolpe &lt;-<span class="st"> </span>segerstolpe[<span class="kw">match</span>(<span class="kw">rowData</span>(muraro)<span class="op">$</span>feature_symbol, <span class="kw">rowData</span>(segerstolpe)<span class="op">$</span>feature_symbol),]
<span class="kw">rownames</span>(segerstolpe) &lt;-<span class="st"> </span><span class="kw">rowData</span>(segerstolpe)<span class="op">$</span>feature_symbol
<span class="kw">rownames</span>(muraro) &lt;-<span class="st"> </span><span class="kw">rowData</span>(muraro)<span class="op">$</span>feature_symbol
<span class="kw">identical</span>(<span class="kw">rownames</span>(segerstolpe), <span class="kw">rownames</span>(muraro))</code></pre></div>
<pre><code>## [1] TRUE</code></pre>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">combined_logcounts &lt;-<span class="st"> </span><span class="kw">cbind</span>(<span class="kw">logcounts</span>(muraro), <span class="kw">logcounts</span>(segerstolpe))
dataset_labels &lt;-<span class="st"> </span><span class="kw">rep</span>(<span class="kw">c</span>(<span class="st">&quot;m&quot;</span>, <span class="st">&quot;s&quot;</span>), <span class="dt">times=</span><span class="kw">c</span>(<span class="kw">ncol</span>(muraro), <span class="kw">ncol</span>(segerstolpe)))
cell_type_labels &lt;-<span class="st"> </span><span class="kw">c</span>(<span class="kw">colData</span>(muraro)<span class="op">$</span>cell_type1, <span class="kw">colData</span>(segerstolpe)<span class="op">$</span>cell_type1)

pheno &lt;-<span class="st"> </span><span class="kw">data.frame</span>(<span class="dt">Sample_ID =</span> <span class="kw">colnames</span>(combined_logcounts),
                <span class="dt">Study_ID=</span>dataset_labels,
                <span class="dt">Celltype=</span><span class="kw">paste</span>(cell_type_labels, dataset_labels, <span class="dt">sep=</span><span class="st">&quot;-&quot;</span>))
<span class="kw">rownames</span>(pheno) &lt;-<span class="st"> </span><span class="kw">colnames</span>(combined_logcounts)</code></pre></div>
<p>Metaneighbor includes a feature selection method to identify highly variable genes.</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">var.genes =<span class="st"> </span><span class="kw">get_variable_genes</span>(combined_logcounts, pheno)</code></pre></div>
<p>Since Metaneighbor is much slower than <code>scmap</code>, we will down sample these datasets.</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">subset &lt;-<span class="st"> </span><span class="kw">sample</span>(<span class="dv">1</span><span class="op">:</span><span class="kw">nrow</span>(pheno), <span class="dv">2000</span>)
combined_logcounts &lt;-<span class="st"> </span>combined_logcounts[,subset]
pheno &lt;-<span class="st"> </span>pheno[subset,]
cell_type_labels &lt;-<span class="st"> </span>cell_type_labels[subset]
dataset_labels &lt;-<span class="st"> </span>dataset_labels[subset]</code></pre></div>
<p>Now we are ready to run Metaneighbor. First we will run the unsupervised version that will let us see which cell-types are most similar across the two datasets.</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">unsup &lt;-<span class="st"> </span><span class="kw">run_MetaNeighbor_US</span>(var.genes, combined_logcounts, <span class="kw">unique</span>(pheno<span class="op">$</span>Celltype), pheno)
<span class="kw">heatmap</span>(unsup)</code></pre></div>
<p><img src="31-projection_files/figure-html/unnamed-chunk-32-1.png" width="672" style="display: block; margin: auto;" /></p>
</div>
</div>
<div id="mnncorrect-1" class="section level2">
<h2><span class="header-section-number">9.5</span> mnnCorrect</h2>
<p><a href="https://www.biorxiv.org/content/early/2017/07/18/165118">mnnCorrect</a> corrects datasets to facilitate joint analysis. It order to account for differences in composition between two replicates or two different experiments it first matches invidual cells across experiments to find the overlaping biologicial structure. Using that overlap it learns which dimensions of expression correspond to the biological state and which dimensions correspond to batch/experiment effect; mnnCorrect assumes these dimensions are orthologal to each other in high dimensional expression space. Finally it removes the batch/experiment effects from the entire expression matrix to return the corrected matrix.</p>
<p>To match individual cells to each other across datasets, mnnCorrect uses the cosine distance to avoid library-size effect then identifies mututal nearest neighbours (<code>k</code> determines to neighbourhood size) across datasets. Only overlaping biological groups should have mutual nearest neighbours (see panel b below). However, this assumes that k is set to approximately the size of the smallest biological group in the datasets, but a k that is too low will identify too few mutual nearest-neighbour pairs to get a good estimate of the batch effect we want to remove.</p>
<p>Learning the biological/techncial effects is done with either singular value decomposition, similar to RUV we encounters in the batch-correction section, or with principal component analysis with the opitimized irlba package, which should be faster than SVD. The parameter <code>svd.dim</code> specifies how many dimensions should be kept to summarize the biological structure of the data, we will set it to three as we found three major groups using Metaneighbor above. These estimates may be futher adjusted by smoothing (<code>sigma</code>) and/or variance adjustment (<code>var.adj</code>).</p>
<p>mnnCorrect also assumes you’ve already subset your expression matricies so that they contain identical genes in the same order, fortunately we have already done with for our datasets when we set up our data for Metaneighbor.</p>
<div class="figure" style="text-align: center"><span id="fig:unnamed-chunk-33"></span>
<img src="figures/mnnCorrectDiagramCropped.png" alt="mnnCorrect batch/dataset effect correction. From Haghverdi et al. 2017"  />
<p class="caption">
Figure 9.1: mnnCorrect batch/dataset effect correction. From Haghverdi et al. 2017
</p>
</div>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">require</span>(<span class="st">&quot;scran&quot;</span>)</code></pre></div>
<pre><code>## Loading required package: scran</code></pre>
<pre><code>## Loading required package: BiocParallel</code></pre>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="co"># mnnCorrect will take several minutes to run</span>
corrected &lt;-<span class="st"> </span><span class="kw">mnnCorrect</span>(<span class="kw">logcounts</span>(muraro), <span class="kw">logcounts</span>(segerstolpe), <span class="dt">k=</span><span class="dv">20</span>, <span class="dt">sigma=</span><span class="dv">1</span>, <span class="dt">pc.approx=</span><span class="ot">TRUE</span>, <span class="dt">subset.row=</span>var.genes, <span class="dt">svd.dim=</span><span class="dv">3</span>)</code></pre></div>
<p>First let’s check that we found a sufficient number of mnn pairs, mnnCorrect returns a list of dataframe with the mnn pairs for each dataset.</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">dim</span>(corrected<span class="op">$</span>pairs[[<span class="dv">1</span>]]) <span class="co"># muraro -&gt; others</span></code></pre></div>
<pre><code>## [1] 0 3</code></pre>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">dim</span>(corrected<span class="op">$</span>pairs[[<span class="dv">2</span>]]) <span class="co"># seger -&gt; others</span></code></pre></div>
<pre><code>## [1] 2533    3</code></pre>
<p>The first and second columns contain the cell column IDs and the third column contains a number indicating which dataset/batch the column 2 cell belongs to. In our case, we are only comparing two datasets so all the mnn pairs have been assigned to the second table and the third column contains only ones</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">head</span>(corrected<span class="op">$</span>pairs[[<span class="dv">2</span>]])</code></pre></div>
<pre><code>## DataFrame with 6 rows and 3 columns
##   current.cell other.cell other.batch
##      &lt;integer&gt;      &lt;Rle&gt;       &lt;Rle&gt;
## 1         1553          5           1
## 2         1078          5           1
## 3         1437          5           1
## 4         1890          5           1
## 5         1569          5           1
## 6          373          5           1</code></pre>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">total_pairs &lt;-<span class="st"> </span><span class="kw">nrow</span>(corrected<span class="op">$</span>pairs[[<span class="dv">2</span>]])
n_unique_seger &lt;-<span class="st"> </span><span class="kw">length</span>(<span class="kw">unique</span>((corrected<span class="op">$</span>pairs[[<span class="dv">2</span>]][,<span class="dv">1</span>])))
n_unique_muraro &lt;-<span class="st"> </span><span class="kw">length</span>(<span class="kw">unique</span>((corrected<span class="op">$</span>pairs[[<span class="dv">2</span>]][,<span class="dv">2</span>])))</code></pre></div>
<p>mnnCorrect found 2533 sets of mutual nearest-neighbours between <code>n_unique_seger</code> segerstolpe cells and <code>n_unique_muraro</code> muraro cells. This should be a sufficient number of pairs but the low number of unique cells in each dataset suggests we might not have captured the full biological signal in each dataset.</p>
<p><strong>Exercise</strong> Which cell-types had mnns across these datasets? Should we increase/decrease k?</p>
<p><strong>Answer</strong></p>
<p>Now we could create a combined dataset to jointly analyse these data. However, the corrected data is no longer counts and usually will contain negative expression values thus some analysis tools may no longer be appropriate. For simplicity let’s just plot a joint TSNE.</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">require</span>(<span class="st">&quot;Rtsne&quot;</span>)</code></pre></div>
<pre><code>## Loading required package: Rtsne</code></pre>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">joint_expression_matrix &lt;-<span class="st"> </span><span class="kw">cbind</span>(corrected<span class="op">$</span>corrected[[<span class="dv">1</span>]], corrected<span class="op">$</span>corrected[[<span class="dv">2</span>]])

<span class="co"># Tsne will take some time to run on the full dataset</span>
joint_tsne &lt;-<span class="st"> </span><span class="kw">Rtsne</span>(<span class="kw">t</span>(joint_expression_matrix[<span class="kw">rownames</span>(joint_expression_matrix) <span class="op">%in%</span><span class="st"> </span>var.genes,]), <span class="dt">initial_dims=</span><span class="dv">10</span>, <span class="dt">theta=</span><span class="fl">0.75</span>,
                        <span class="dt">check_duplicates=</span><span class="ot">FALSE</span>, <span class="dt">max_iter=</span><span class="dv">200</span>, <span class="dt">stop_lying_iter=</span><span class="dv">50</span>, <span class="dt">mom_switch_iter=</span><span class="dv">50</span>)
dataset_labels &lt;-<span class="st"> </span><span class="kw">factor</span>(<span class="kw">rep</span>(<span class="kw">c</span>(<span class="st">&quot;m&quot;</span>, <span class="st">&quot;s&quot;</span>), <span class="dt">times=</span><span class="kw">c</span>(<span class="kw">ncol</span>(muraro), <span class="kw">ncol</span>(segerstolpe))))
cell_type_labels &lt;-<span class="st"> </span><span class="kw">factor</span>(<span class="kw">c</span>(<span class="kw">colData</span>(muraro)<span class="op">$</span>cell_type1, <span class="kw">colData</span>(segerstolpe)<span class="op">$</span>cell_type1))
<span class="kw">plot</span>(joint_tsne<span class="op">$</span>Y[,<span class="dv">1</span>], joint_tsne<span class="op">$</span>Y[,<span class="dv">2</span>], <span class="dt">pch=</span><span class="kw">c</span>(<span class="dv">16</span>,<span class="dv">1</span>)[dataset_labels], <span class="dt">col=</span><span class="kw">rainbow</span>(<span class="kw">length</span>(<span class="kw">levels</span>(cell_type_labels)))[cell_type_labels])</code></pre></div>
<p><img src="31-projection_files/figure-html/unnamed-chunk-38-1.png" width="672" style="display: block; margin: auto;" /></p>
</div>
<div id="cannonical-correlation-analysis-seurat" class="section level2">
<h2><span class="header-section-number">9.6</span> Cannonical Correlation Analysis (Seurat)</h2>
<p>The Seurat package contains another correction method for combining multiple datasets, called <a href="https://www.biorxiv.org/content/early/2017/07/18/164889">CCA</a>. However, unlike mnnCorrect it doesn’t correct the expression matrix itself directly. Instead Seurat finds a lower dimensional subspace for each dataset then corrects these subspaces. Also different from mnnCorrect, Seurat only combines a single pair of datasets at a time.</p>
<p>Seurat uses gene-gene correlations to identify the biological structure in the dataset with a method called canonical correlation analysis (CCA). Seurat learns the shared structure to the gene-gene correlations and then evaluates how well each cell fits this structure. Cells which must better described by a data-specific dimensionality reduction method than by the shared correlation structure are assumed to represent dataset-specific cell-types/states and are discarded before aligning the two datasets. Finally the two datasets are aligned using ‘warping’ algorithms which normalize the low-dimensional representations of each dataset in a way that is robust to differences in population density.</p>
<p>Note because Seurat uses up a lot of library space you will have to restart your R-session to load it, and the plots/merged_seuratput won’t be automatically generated on this page.</p>
<p>Reload the data:</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">muraro &lt;-<span class="st"> </span><span class="kw">readRDS</span>(<span class="st">&quot;pancreas/muraro.rds&quot;</span>)
segerstolpe &lt;-<span class="st"> </span><span class="kw">readRDS</span>(<span class="st">&quot;pancreas/segerstolpe.rds&quot;</span>)
segerstolpe &lt;-<span class="st"> </span>segerstolpe[,<span class="kw">colData</span>(segerstolpe)<span class="op">$</span>cell_type1 <span class="op">!=</span><span class="st"> &quot;unclassified&quot;</span>]
segerstolpe &lt;-<span class="st"> </span>segerstolpe[,<span class="kw">colData</span>(segerstolpe)<span class="op">$</span>cell_type1 <span class="op">!=</span><span class="st"> &quot;not applicable&quot;</span>,]
muraro &lt;-<span class="st"> </span>muraro[,<span class="kw">colData</span>(muraro)<span class="op">$</span>cell_type1 <span class="op">!=</span><span class="st"> &quot;unclear&quot;</span>]
is.common &lt;-<span class="st"> </span><span class="kw">rowData</span>(muraro)<span class="op">$</span>feature_symbol <span class="op">%in%</span><span class="st"> </span><span class="kw">rowData</span>(segerstolpe)<span class="op">$</span>feature_symbol
muraro &lt;-<span class="st"> </span>muraro[is.common,]
segerstolpe &lt;-<span class="st"> </span>segerstolpe[<span class="kw">match</span>(<span class="kw">rowData</span>(muraro)<span class="op">$</span>feature_symbol, <span class="kw">rowData</span>(segerstolpe)<span class="op">$</span>feature_symbol),]
<span class="kw">rownames</span>(segerstolpe) &lt;-<span class="st"> </span><span class="kw">rowData</span>(segerstolpe)<span class="op">$</span>feature_symbol
<span class="kw">rownames</span>(muraro) &lt;-<span class="st"> </span><span class="kw">rowData</span>(muraro)<span class="op">$</span>feature_symbol
<span class="kw">identical</span>(<span class="kw">rownames</span>(segerstolpe), <span class="kw">rownames</span>(muraro))</code></pre></div>
<p>First we will reformat our data into Seurat objects:</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">require</span>(<span class="st">&quot;Seurat&quot;</span>)
<span class="kw">set.seed</span>(<span class="dv">4719364</span>)
muraro_seurat &lt;-<span class="st"> </span><span class="kw">CreateSeuratObject</span>(<span class="dt">raw.data=</span><span class="kw">assays</span>(muraro)[[<span class="st">&quot;normcounts&quot;</span>]]) <span class="co"># raw counts aren&#39;t available for muraro</span>
muraro_seurat<span class="op">@</span>meta.data[, <span class="st">&quot;dataset&quot;</span>] &lt;-<span class="st"> </span><span class="dv">1</span>
muraro_seurat<span class="op">@</span>meta.data[, <span class="st">&quot;celltype&quot;</span>] &lt;-<span class="st"> </span><span class="kw">paste</span>(<span class="st">&quot;m&quot;</span>,<span class="kw">colData</span>(muraro)<span class="op">$</span>cell_type1, <span class="dt">sep=</span><span class="st">&quot;-&quot;</span>)

seger_seurat &lt;-<span class="st"> </span><span class="kw">CreateSeuratObject</span>(<span class="dt">raw.data=</span><span class="kw">assays</span>(segerstolpe)[[<span class="st">&quot;counts&quot;</span>]])
seger_seurat<span class="op">@</span>meta.data[, <span class="st">&quot;dataset&quot;</span>] &lt;-<span class="st"> </span><span class="dv">2</span>
seger_seurat<span class="op">@</span>meta.data[, <span class="st">&quot;celltype&quot;</span>] &lt;-<span class="st"> </span><span class="kw">paste</span>(<span class="st">&quot;s&quot;</span>,<span class="kw">colData</span>(segerstolpe)<span class="op">$</span>cell_type1, <span class="dt">sep=</span><span class="st">&quot;-&quot;</span>)</code></pre></div>
<p>Next we must normalize, scale and identify highly variable genes for each dataset:</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">muraro_seurat &lt;-<span class="st"> </span><span class="kw">NormalizeData</span>(<span class="dt">object=</span>muraro_seurat)
muraro_seurat &lt;-<span class="st"> </span><span class="kw">ScaleData</span>(<span class="dt">object=</span>muraro_seurat)
muraro_seurat &lt;-<span class="st"> </span><span class="kw">FindVariableGenes</span>(<span class="dt">object=</span>muraro_seurat, <span class="dt">do.plot=</span><span class="ot">TRUE</span>)

seger_seurat &lt;-<span class="st"> </span><span class="kw">NormalizeData</span>(<span class="dt">object=</span>seger_seurat)
seger_seurat &lt;-<span class="st"> </span><span class="kw">ScaleData</span>(<span class="dt">object=</span>seger_seurat)
seger_seurat &lt;-<span class="st"> </span><span class="kw">FindVariableGenes</span>(<span class="dt">object=</span>seger_seurat, <span class="dt">do.plot=</span><span class="ot">TRUE</span>)</code></pre></div>
<div class="figure" style="text-align: center"><span id="fig:unnamed-chunk-42"></span>
<img src="figures/muraro_seurat_hvg.png" alt="muraro variable genes"  />
<p class="caption">
Figure 9.2: muraro variable genes
</p>
</div>
<div class="figure" style="text-align: center"><span id="fig:unnamed-chunk-43"></span>
<img src="figures/seger_seurat_hvg.png" alt="segerstolpe variable genes"  />
<p class="caption">
Figure 9.3: segerstolpe variable genes
</p>
</div>
<p>Eventhough Seurat corrects for the relationship between dispersion and mean expression, it doesn’t use the corrected value when ranking features. Compare the results of the command below with the results in the plots above:</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">head</span>(muraro_seurat<span class="op">@</span>hvg.info, <span class="dv">50</span>)
<span class="kw">head</span>(seger_seurat<span class="op">@</span>hvg.info, <span class="dv">50</span>)</code></pre></div>
<p>But we will follow their example and use the top 2000 most dispersed genes withmerged_seurat correcting for mean expression from each dataset anyway.</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">gene.use &lt;-<span class="st"> </span><span class="kw">union</span>(<span class="kw">rownames</span>(<span class="dt">x =</span> <span class="kw">head</span>(<span class="dt">x =</span> muraro_seurat<span class="op">@</span>hvg.info, <span class="dt">n =</span> <span class="dv">2000</span>)),
                  <span class="kw">rownames</span>(<span class="dt">x =</span> <span class="kw">head</span>(<span class="dt">x =</span> seger_seurat<span class="op">@</span>hvg.info, <span class="dt">n =</span> <span class="dv">2000</span>)))</code></pre></div>
<p><strong>Exercise</strong> Find the features we would use if we selected the top 2000 most dispersed after scaling by mean. (Hint: consider the <code>order</code> function)</p>
<p><strong>Answer</strong></p>
<p>Now we will run CCA to find the shared correlation structure for these two datasets:</p>
<p>Note to speed up the calculations we will be using only the top 5 dimensions but ideally you would consider many more and then select the top most informative ones using <code>DimHeatmap</code>.</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">merged_seurat &lt;-<span class="st"> </span><span class="kw">RunCCA</span>(<span class="dt">object=</span>muraro_seurat, <span class="dt">object2=</span>seger_seurat, <span class="dt">genes.use=</span>gene.use, <span class="dt">add.cell.id1=</span><span class="st">&quot;m&quot;</span>, <span class="dt">add.cell.id2=</span><span class="st">&quot;s&quot;</span>, <span class="dt">num.cc =</span> <span class="dv">5</span>)
<span class="kw">DimPlot</span>(<span class="dt">object =</span> merged_seurat, <span class="dt">reduction.use =</span> <span class="st">&quot;cca&quot;</span>, <span class="dt">group.by =</span> <span class="st">&quot;dataset&quot;</span>, <span class="dt">pt.size =</span> <span class="fl">0.5</span>) <span class="co"># Before correcting</span></code></pre></div>
<div class="figure" style="text-align: center"><span id="fig:unnamed-chunk-48"></span>
<img src="figures/cca_before.png" alt="Before Aligning"  />
<p class="caption">
Figure 9.4: Before Aligning
</p>
</div>
<p>To identify dataset specific cell-types we compare how well cells are ‘explained’ by CCA vs dataset-specific principal component analysis.</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">merged_seurat &lt;-<span class="st"> </span><span class="kw">CalcVarExpRatio</span>(<span class="dt">object =</span> merged_seurat, <span class="dt">reduction.type =</span> <span class="st">&quot;pca&quot;</span>, <span class="dt">grouping.var =</span> <span class="st">&quot;dataset&quot;</span>, <span class="dt">dims.use =</span> <span class="dv">1</span><span class="op">:</span><span class="dv">5</span>)
merged.all &lt;-<span class="st"> </span>merged_seurat
merged_seurat &lt;-<span class="st"> </span><span class="kw">SubsetData</span>(<span class="dt">object=</span>merged_seurat, <span class="dt">subset.name=</span><span class="st">&quot;var.ratio.pca&quot;</span>, <span class="dt">accept.low =</span> <span class="fl">0.5</span>) <span class="co"># CCA &gt; 1/2 as good as PCA</span>
merged.discard &lt;-<span class="st"> </span><span class="kw">SubsetData</span>(<span class="dt">object=</span>merged.all, <span class="dt">subset.name=</span><span class="st">&quot;var.ratio.pca&quot;</span>, <span class="dt">accept.high =</span> <span class="fl">0.5</span>)

<span class="kw">summary</span>(<span class="kw">factor</span>(merged.discard<span class="op">@</span>meta.data<span class="op">$</span>celltype)) <span class="co"># check the cell-type of the discarded cells.</span></code></pre></div>
<p>Here we can see that despite both datasets containing endothelial cells, almost all of them have been discarded as “dataset-specific”. Now we can align the datasets:</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">merged_seurat &lt;-<span class="st"> </span><span class="kw">AlignSubspace</span>(<span class="dt">object =</span> merged_seurat, <span class="dt">reduction.type =</span> <span class="st">&quot;cca&quot;</span>, <span class="dt">grouping.var =</span> <span class="st">&quot;dataset&quot;</span>, <span class="dt">dims.align =</span> <span class="dv">1</span><span class="op">:</span><span class="dv">5</span>)
<span class="kw">DimPlot</span>(<span class="dt">object =</span> merged_seurat, <span class="dt">reduction.use =</span> <span class="st">&quot;cca.aligned&quot;</span>, <span class="dt">group.by =</span> <span class="st">&quot;dataset&quot;</span>, <span class="dt">pt.size =</span> <span class="fl">0.5</span>) <span class="co"># After aligning subspaces</span></code></pre></div>
<div class="figure" style="text-align: center"><span id="fig:unnamed-chunk-51"></span>
<img src="figures/cca_after.png" alt="After Aligning"  />
<p class="caption">
Figure 9.5: After Aligning
</p>
</div>
<p><strong>Exercise</strong> Compare the results for if you use the features after scaling dispersions.</p>
<p><strong>Answer</strong></p>
<div class="figure" style="text-align: center"><span id="fig:unnamed-chunk-53"></span>
<img src="figures/cca_after2.png" alt="After Aligning"  />
<p class="caption">
Figure 9.6: After Aligning
</p>
</div>
<p><strong>Advanced Exercise</strong> Use the clustering methods we previously covered on the combined datasets. Do you identify any novel cell-types?</p>
<div id="sessioninfo-10" class="section level3">
<h3><span class="header-section-number">9.6.1</span> sessionInfo()</h3>
<pre><code>## R version 3.4.3 (2017-11-30)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Debian GNU/Linux 9 (stretch)
## 
## Matrix products: default
## BLAS: /usr/lib/openblas-base/libblas.so.3
## LAPACK: /usr/lib/libopenblasp-r0.2.19.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=C             
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats4    parallel  methods   stats     graphics  grDevices utils    
## [8] datasets  base     
## 
## other attached packages:
##  [1] Rtsne_0.13                 scran_1.6.7               
##  [3] BiocParallel_1.12.0        bindrcpp_0.2              
##  [5] scmap_1.1.5                scater_1.6.2              
##  [7] SingleCellExperiment_1.0.0 SummarizedExperiment_1.8.1
##  [9] DelayedArray_0.4.1         matrixStats_0.53.0        
## [11] GenomicRanges_1.30.1       GenomeInfoDb_1.14.0       
## [13] IRanges_2.12.0             S4Vectors_0.16.0          
## [15] ggplot2_2.2.1              Biobase_2.38.0            
## [17] BiocGenerics_0.24.0        googleVis_0.6.2           
## [19] knitr_1.19                
## 
## loaded via a namespace (and not attached):
##  [1] bitops_1.0-6           bit64_0.9-7            progress_1.1.2        
##  [4] httr_1.3.1             rprojroot_1.3-2        dynamicTreeCut_1.63-1 
##  [7] tools_3.4.3            backports_1.1.2        irlba_2.3.2           
## [10] DT_0.4                 R6_2.2.2               vipor_0.4.5           
## [13] DBI_0.7                lazyeval_0.2.1         colorspace_1.3-2      
## [16] gridExtra_2.3          prettyunits_1.0.2      bit_1.1-12            
## [19] compiler_3.4.3         labeling_0.3           bookdown_0.6          
## [22] scales_0.5.0           randomForest_4.6-12    proxy_0.4-21          
## [25] stringr_1.2.0          digest_0.6.15          rmarkdown_1.8         
## [28] XVector_0.18.0         pkgconfig_2.0.1        htmltools_0.3.6       
## [31] limma_3.34.6           highr_0.6              htmlwidgets_1.0       
## [34] rlang_0.1.6            RSQLite_2.0            FNN_1.1               
## [37] shiny_1.0.5            bindr_0.1              zoo_1.8-1             
## [40] jsonlite_1.5           dplyr_0.7.4            RCurl_1.95-4.10       
## [43] magrittr_1.5           GenomeInfoDbData_1.0.0 Matrix_1.2-7.1        
## [46] Rcpp_0.12.15           ggbeeswarm_0.6.0       munsell_0.4.3         
## [49] viridis_0.4.1          stringi_1.1.6          yaml_2.1.16           
## [52] edgeR_3.20.7           zlibbioc_1.24.0        rhdf5_2.22.0          
## [55] plyr_1.8.4             grid_3.4.3             blob_1.1.0            
## [58] shinydashboard_0.6.1   lattice_0.20-34        locfit_1.5-9.1        
## [61] pillar_1.1.0           igraph_1.1.2           rjson_0.2.15          
## [64] reshape2_1.4.3         codetools_0.2-15       biomaRt_2.34.2        
## [67] XML_3.98-1.9           glue_1.2.0             evaluate_0.10.1       
## [70] data.table_1.10.4-3    httpuv_1.3.5           gtable_0.2.0          
## [73] assertthat_0.2.0       xfun_0.1               mime_0.5              
## [76] xtable_1.8-2           e1071_1.6-8            class_7.3-14          
## [79] viridisLite_0.2.0      tibble_1.4.2           AnnotationDbi_1.40.0  
## [82] beeswarm_0.2.3         memoise_1.1.0          tximport_1.6.0        
## [85] statmod_1.4.30</code></pre>

</div>
</div>
<div id="search-scrna-seq-data" class="section level2">
<h2><span class="header-section-number">9.7</span> Search scRNA-Seq data</h2>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">library</span>(scfind)
<span class="kw">library</span>(SingleCellExperiment)
<span class="kw">set.seed</span>(<span class="dv">1234567</span>)</code></pre></div>
<div id="about" class="section level3">
<h3><span class="header-section-number">9.7.1</span> About</h3>
<p><code>scfind</code> is a tool that allows one to search single cell RNA-Seq collections (Atlas) using lists of genes, e.g. searching for cells and cell-types where a specific set of genes are expressed. <code>scfind</code> is a <a href="http://bioconductor.org/packages/scfind">Bioconductor package</a>. Cloud implementation of <code>scfind</code> with a large collection of datasets is available on our <a href="http://www.hemberg-lab.cloud/scfind">website</a>.</p>
<div class="figure" style="text-align: center"><span id="fig:unnamed-chunk-3"></span>
<img src="figures/scfind.png" alt="scfind can be used to search large collection of scRNA-seq data by a list of gene IDs." width="80%" />
<p class="caption">
Figure 2.4: scfind can be used to search large collection of scRNA-seq data by a list of gene IDs.
</p>
</div>
</div>
<div id="dataset" class="section level3">
<h3><span class="header-section-number">9.7.2</span> Dataset</h3>
<p>We will run <code>scfind</code> on the same human pancreas dataset as in the previous chapter. <code>scfind</code> also operates on <code>SingleCellExperiment</code> class:</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">muraro &lt;-<span class="st"> </span><span class="kw">readRDS</span>(<span class="st">&quot;pancreas/muraro.rds&quot;</span>)</code></pre></div>
</div>
<div id="gene-index" class="section level3">
<h3><span class="header-section-number">9.7.3</span> Gene Index</h3>
<p>Now we need to create a gene index using our dataset:</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">cellIndex &lt;-<span class="st"> </span><span class="kw">buildCellIndex</span>(muraro)</code></pre></div>
<p>The gene index contains for each gene indexes of the cells where it is expressed. This is similar to sparsification of the expression matrix. In addition to this the index is also compressed in a way that it can accessed very quickly. We estimated that one can achieve 5-10 compression factor with this method.</p>
<p>By default the <code>cell_type1</code> column of the <code>colData</code> slot of the <code>SingleCellExperiment</code> object is used to define cell types, however it can also be defined manually using the <code>cell_type_column</code> argument of the <code>buildCellTypeIndex</code> function (check <code>?buildCellTypeIndex</code>).</p>
</div>
<div id="marker-genes" class="section level3">
<h3><span class="header-section-number">9.7.4</span> Marker genes</h3>
<p>Now let’s define lists of cell type specific marker genes. We will use the marker genes identified in the original publication, namely in Figure 1:</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="co"># these genes are taken from fig. 1</span>
muraro_alpha &lt;-<span class="st"> </span><span class="kw">c</span>(<span class="st">&quot;GCG&quot;</span>, <span class="st">&quot;LOXL4&quot;</span>, <span class="st">&quot;PLCE1&quot;</span>, <span class="st">&quot;IRX2&quot;</span>, <span class="st">&quot;GC&quot;</span>, <span class="st">&quot;KLHL41&quot;</span>, 
                  <span class="st">&quot;CRYBA2&quot;</span>, <span class="st">&quot;TTR&quot;</span>, <span class="st">&quot;TM4SF4&quot;</span>, <span class="st">&quot;RGS4&quot;</span>)
muraro_beta &lt;-<span class="st"> </span><span class="kw">c</span>(<span class="st">&quot;INS&quot;</span>, <span class="st">&quot;IAPP&quot;</span>, <span class="st">&quot;MAFA&quot;</span>, <span class="st">&quot;NPTX2&quot;</span>, <span class="st">&quot;DLK1&quot;</span>, <span class="st">&quot;ADCYAP1&quot;</span>, 
                 <span class="st">&quot;PFKFB2&quot;</span>, <span class="st">&quot;PDX1&quot;</span>, <span class="st">&quot;TGFBR3&quot;</span>, <span class="st">&quot;SYT13&quot;</span>)
muraro_gamma &lt;-<span class="st"> </span><span class="kw">c</span>(<span class="st">&quot;PPY&quot;</span>, <span class="st">&quot;SERTM1&quot;</span>, <span class="st">&quot;CARTPT&quot;</span>, <span class="st">&quot;SLITRK6&quot;</span>, <span class="st">&quot;ETV1&quot;</span>, 
                  <span class="st">&quot;THSD7A&quot;</span>, <span class="st">&quot;AQP3&quot;</span>, <span class="st">&quot;ENTPD2&quot;</span>, <span class="st">&quot;PTGFR&quot;</span>, <span class="st">&quot;CHN2&quot;</span>)
muraro_delta &lt;-<span class="st"> </span><span class="kw">c</span>(<span class="st">&quot;SST&quot;</span>, <span class="st">&quot;PRG4&quot;</span>, <span class="st">&quot;LEPR&quot;</span>, <span class="st">&quot;RBP4&quot;</span>, <span class="st">&quot;BCHE&quot;</span>, <span class="st">&quot;HHEX&quot;</span>, 
                  <span class="st">&quot;FRZB&quot;</span>, <span class="st">&quot;PCSK1&quot;</span>, <span class="st">&quot;RGS2&quot;</span>, <span class="st">&quot;GABRG2&quot;</span>)</code></pre></div>
</div>
<div id="search-cells-by-a-gene-list" class="section level3">
<h3><span class="header-section-number">9.7.5</span> Search cells by a gene list</h3>
<p><code>findCell</code> function returns a list of p-values corresponding to all cell types in a given dataset. It also outputs a list of cells in which genes from the given gene list are co-expressed. We will run it on all lists of marker genes defined above:</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">res &lt;-<span class="st"> </span><span class="kw">findCell</span>(cellIndex, muraro_alpha)
<span class="kw">barplot</span>(<span class="op">-</span><span class="kw">log10</span>(res<span class="op">$</span>p_values), <span class="dt">ylab =</span> <span class="st">&quot;-log10(pval)&quot;</span>, <span class="dt">las =</span> <span class="dv">2</span>)</code></pre></div>
<p><img src="32-search_files/figure-html/unnamed-chunk-7-1.png" width="672" style="display: block; margin: auto;" /></p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">head</span>(res<span class="op">$</span>common_exprs_cells)</code></pre></div>
<pre><code>##   cell_id cell_type
## 1       1     alpha
## 2       3     alpha
## 3       7     alpha
## 4       9     alpha
## 5      15     alpha
## 6      20     alpha</code></pre>
<p><strong>Exercise 1</strong></p>
<p>Perform a search by <em>beta</em>, <em>delta</em> and <em>gamma</em> gene lists and explore the results.</p>
<p><img src="32-search_files/figure-html/unnamed-chunk-8-1.png" width="672" style="display: block; margin: auto;" /></p>
<pre><code>##   cell_id cell_type
## 1      71     alpha
## 2      72      beta
## 3      92      beta
## 4      96      beta
## 5      98      beta
## 6     102      beta</code></pre>
<p><img src="32-search_files/figure-html/unnamed-chunk-8-2.png" width="672" style="display: block; margin: auto;" /></p>
<pre><code>##   cell_id cell_type
## 1      40     delta
## 2     212     delta
## 3     225     delta
## 4     253     delta
## 5     330     delta
## 6     400     delta</code></pre>
<p><img src="32-search_files/figure-html/unnamed-chunk-8-3.png" width="672" style="display: block; margin: auto;" /></p>
<pre><code>##   cell_id cell_type
## 1      53     alpha
## 2     102      beta
## 3     255     gamma
## 4     305     gamma
## 5     525     gamma
## 6     662     gamma</code></pre>
<p><strong>Exercise 2</strong></p>
<p>Load the <code>segerstolpe</code> and search it using <em>alpha</em>, <em>beta</em>, <em>delta</em> and <em>gamma</em> gene lists identified in <code>muraro</code> dataset.</p>
<p><img src="32-search_files/figure-html/unnamed-chunk-9-1.png" width="672" style="display: block; margin: auto;" /></p>
<pre><code>##   cell_id cell_type
## 1      18     alpha
## 2      20     alpha
## 3      24     alpha
## 4      32     alpha
## 5      43     alpha
## 6      48     alpha</code></pre>
<p><img src="32-search_files/figure-html/unnamed-chunk-9-2.png" width="672" style="display: block; margin: auto;" /></p>
<pre><code>##   cell_id     cell_type
## 1      15 co-expression
## 2      58          beta
## 3     300          beta
## 4     390 co-expression
## 5     504 co-expression
## 6     506          beta</code></pre>
<p><img src="32-search_files/figure-html/unnamed-chunk-9-3.png" width="672" style="display: block; margin: auto;" /></p>
<pre><code>##   cell_id     cell_type
## 1     170         delta
## 2     715         delta
## 3    1039 co-expression
## 4    1133         delta
## 5    1719         delta
## 6    1721         delta</code></pre>
<p><img src="32-search_files/figure-html/unnamed-chunk-9-4.png" width="672" style="display: block; margin: auto;" /></p>
<pre><code>##   cell_id cell_type
## 1      47     gamma
## 2     458     gamma
## 3     476     gamma
## 4     600     gamma
## 5     606     gamma
## 6     622     gamma</code></pre>
</div>
<div id="sessioninfo-11" class="section level3">
<h3><span class="header-section-number">9.7.6</span> sessionInfo()</h3>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">sessionInfo</span>()</code></pre></div>
<pre><code>## R version 3.4.3 (2017-11-30)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Debian GNU/Linux 9 (stretch)
## 
## Matrix products: default
## BLAS: /usr/lib/openblas-base/libblas.so.3
## LAPACK: /usr/lib/libopenblasp-r0.2.19.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=C             
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] parallel  stats4    methods   stats     graphics  grDevices utils    
## [8] datasets  base     
## 
## other attached packages:
##  [1] SingleCellExperiment_1.0.0 SummarizedExperiment_1.8.1
##  [3] DelayedArray_0.4.1         matrixStats_0.53.0        
##  [5] Biobase_2.38.0             GenomicRanges_1.30.1      
##  [7] GenomeInfoDb_1.14.0        IRanges_2.12.0            
##  [9] S4Vectors_0.16.0           BiocGenerics_0.24.0       
## [11] scfind_1.0.0               knitr_1.19                
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_0.12.15           highr_0.6              plyr_1.8.4            
##  [4] pillar_1.1.0           compiler_3.4.3         XVector_0.18.0        
##  [7] bindr_0.1              bitops_1.0-6           tools_3.4.3           
## [10] zlibbioc_1.24.0        digest_0.6.15          bit_1.1-12            
## [13] tibble_1.4.2           evaluate_0.10.1        lattice_0.20-34       
## [16] pkgconfig_2.0.1        rlang_0.1.6            Matrix_1.2-7.1        
## [19] yaml_2.1.16            xfun_0.1               bindrcpp_0.2          
## [22] GenomeInfoDbData_1.0.0 stringr_1.2.0          dplyr_0.7.4           
## [25] rprojroot_1.3-2        grid_3.4.3             glue_1.2.0            
## [28] R6_2.2.2               hash_2.2.6             rmarkdown_1.8         
## [31] bookdown_0.6           reshape2_1.4.3         magrittr_1.5          
## [34] backports_1.1.2        htmltools_0.3.6        assertthat_0.2.0      
## [37] stringi_1.1.6          RCurl_1.95-4.10</code></pre>

<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">set.seed</span>(<span class="dv">1234567</span>)</code></pre></div>
</div>
</div>
</div>
<h3> References</h3>
<div id="refs" class="references">
<div id="ref-Regev2017-mw">
<p>Regev, Aviv, Sarah Teichmann, Eric S Lander, Ido Amit, Christophe Benoist, Ewan Birney, Bernd Bodenmiller, et al. 2017. “The Human Cell Atlas.” <em>bioRxiv</em>, May, 121202.</p>
</div>
<div id="ref-Altschul1990-ts">
<p>Altschul, Stephen F., Warren Gish, Webb Miller, Eugene W. Myers, and David J. Lipman. 1990. “Basic Local Alignment Search Tool.” <em>Journal of Molecular Biology</em> 215 (3). Elsevier BV: 403–10. doi:<a href="https://doi.org/10.1016/s0022-2836(05)80360-2">10.1016/s0022-2836(05)80360-2</a>.</p>
</div>
<div id="ref-Muraro2016-yk">
<p>Muraro, Mauro J., Gitanjali Dharmadhikari, Dominic Grün, Nathalie Groen, Tim Dielen, Erik Jansen, Leon van Gurp, et al. 2016. “A Single-Cell Transcriptome Atlas of the Human Pancreas.” <em>Cell Systems</em> 3 (4). Elsevier BV: 385–394.e3. doi:<a href="https://doi.org/10.1016/j.cels.2016.09.002">10.1016/j.cels.2016.09.002</a>.</p>
</div>
<div id="ref-Segerstolpe2016-wc">
<p>Segerstolpe, Åsa, Athanasia Palasantza, Pernilla Eliasson, Eva-Marie Andersson, Anne-Christine Andréasson, Xiaoyan Sun, Simone Picelli, et al. 2016. “Single-Cell Transcriptome Profiling of Human Pancreatic Islets in Health and Type 2 Diabetes.” <em>Cell Metabolism</em> 24 (4). Elsevier BV: 593–607. doi:<a href="https://doi.org/10.1016/j.cmet.2016.08.020">10.1016/j.cmet.2016.08.020</a>.</p>
</div>
<div id="ref-Kiselev2017-nb">
<p>Kiselev, Vladimir Yu, and Martin Hemberg. 2017. “Scmap - a Tool for Unsupervised Projection of Single Cell RNA-seq Data.” <em>bioRxiv</em>, July, 150292.</p>
</div>
</div>
            </section>

          </div>
        </div>
      </div>
<a href="biological-analysis.html" class="navigation navigation-prev " aria-label="Previous page"><i class="fa fa-angle-left"></i></a>
<a href="seurat-chapter.html" class="navigation navigation-next " aria-label="Next page"><i class="fa fa-angle-right"></i></a>
    </div>
  </div>
<script src="libs/gitbook-2.6.7/js/app.min.js"></script>
<script src="libs/gitbook-2.6.7/js/lunr.js"></script>
<script src="libs/gitbook-2.6.7/js/plugin-search.js"></script>
<script src="libs/gitbook-2.6.7/js/plugin-sharing.js"></script>
<script src="libs/gitbook-2.6.7/js/plugin-fontsettings.js"></script>
<script src="libs/gitbook-2.6.7/js/plugin-bookdown.js"></script>
<script src="libs/gitbook-2.6.7/js/jquery.highlight.js"></script>
<script>
gitbook.require(["gitbook"], function(gitbook) {
gitbook.start({
"sharing": {
"github": false,
"facebook": true,
"twitter": true,
"google": false,
"weibo": false,
"instapper": false,
"vk": false,
"all": ["facebook", "google", "twitter", "weibo", "instapaper"]
},
"fontsettings": {
"theme": "white",
"family": "sans",
"size": 2
},
"edit": {
"link": null,
"text": null
},
"download": ["scRNA-seq-course.pdf"],
"toc": {
"collapse": "section"
},
"search": true
});
});
</script>

<!-- dynamically load mathjax for compatibility with self-contained -->
<script>
  (function () {
    var script = document.createElement("script");
    script.type = "text/javascript";
    var src = "";
    if (src === "" || src === "true") src = "https://cdn.bootcss.com/mathjax/2.7.1/MathJax.js?config=TeX-MML-AM_CHTML";
    if (location.protocol !== "file:" && /^https?:/.test(src))
      src = src.replace(/^https?:/, '');
    script.src = src;
    document.getElementsByTagName("head")[0].appendChild(script);
  })();
</script>
</body>

</html>