Skip to content

Commit

Permalink
thesis summary
Browse files Browse the repository at this point in the history
  • Loading branch information
glouppe committed Jun 7, 2015
1 parent 7714b68 commit d96a196
Show file tree
Hide file tree
Showing 5 changed files with 386 additions and 3 deletions.
10 changes: 8 additions & 2 deletions tex/Makefile
Original file line number Diff line number Diff line change
@@ -1,4 +1,10 @@
thesis.pdf: bibliography.bib thesis.tex classicthesis-config.tex chapters/*.tex frontback/*.tex
summary.pdf: summary.tex classicthesis-config.tex summary/*.tex frontback/*.tex
pdflatex -shell-escape summary
bibtex summary
pdflatex -shell-escape summary
pdflatex -shell-escape summary

thesis.pdf: bibliography.bib thesis.tex classicthesis-config.tex chapters/*.tex frontback/*.tex
pdflatex -shell-escape thesis
bibtex thesis
pdflatex -shell-escape thesis
Expand All @@ -9,6 +15,6 @@ partial:
pdflatex -shell-escape thesis

clean:
rm -f *.lot *.lof *.lol *.toc *.log *.out *.aux *.blg *.bbl thesis.pdf chapters/*.aux frontback/*.aux
rm -f *.lot *.lof *.lol *.toc *.log *.out *.aux *.blg *.bbl thesis.pdf chapters/*.aux frontback/*.aux

rebuild: clean thesis.pdf
29 changes: 29 additions & 0 deletions tex/bibliography.bib
Original file line number Diff line number Diff line change
@@ -1,5 +1,34 @@
@comment{This file has been generated by Pybliographer}

@mastersthesis{louppe:2010b,
author = {Louppe, G.},
title = {Collaborative filtering: Scalable approaches using restricted Boltzmann machines},
affiliation = {Universit{\'e} de Li{\`e}ge - ULg \textgreater D{\'e}p. d'{\'e}lectric., {\'e}lectron. et informat. (Inst.Montefiore) \textgreater Syst{\`e}mes et mod{\'e}lisation \textgreater},
year = {2010},
pages = {95},
language = {en},
keywords = {Ing{\'e}nierie, informatique \& technologie => Sciences informatiques; collaborative filtering; restricted boltzmann machines; machine learning},
abstract = {[en] Parallel to the growth of electronic commerce, recommender systems have become a very active area of research, both in the industry and in the academic world. The goal of these systems is to make automatic but personal recommendations when customers are overwhelmed with thousands of possibilities and do not know what to look for. \\\\In that context, the object of this work is threefold. The first part consists in a survey of recommendation algorithms and emphasizes on a class of algorithms known as collaborative filtering algorithms. The second part consists in studying in more depth a specific model of neural networks known as restricted Boltzmann machines. That model is then experimentaly and extensively examined on a recommendation problem. The third part of this work focuses on how restricted Boltzmann machines can be made more scalable. Three different and original approaches are proposed and studied. \\\\In the first approach, we revisit the learning and test algorithms of restricted Boltzmann machines in the context of shared-memory architectures. In the second approach, we propose to reformulate these algorithms as MapReduce tasks. Finally, in the third method, ensemble of RBMs are investigated. The best and the more promising results are obtained with the MapReduce approach.},
school = {Universit{\'e} de Li{\`e}ge, {[200B?]}Li{\`e}ge, {[200B?]}{[200B?]}Belgique},
degree = {Master en sciences informatiques, {\`a} finalit{\'e} approfondie},
}

}
@phdthesis{louppe:2014phd,
author = {Louppe, G.},
title = {Understanding Random Forests: From Theory to Practice},
affiliation = {Universit{\'e} de Li{\`e}ge \textgreater D{\'e}p. d'{\'e}lectric., {\'e}lectron. et informat. (Inst.Montefiore) \textgreater Syst{\`e}mes et mod{\'e}lisation \textgreater},
year = {2014},
pages = {225},
language = {en},
keywords = {Ing{\'e}nierie, informatique \& technologie => Sciences informatiques; machine learning; random forest; variable importances},
abstract = {[en] Data analysis and machine learning have become an integrative part of the modern scientific methodology, offering automated procedures for the prediction of a phenomenon based on past observations, unraveling underlying patterns in data and providing insights about the problem. Yet, caution should avoid using machine learning as a black-box tool, but rather consider it as a methodology, with a rational thought process that is entirely dependent on the problem under study. In particular, the use of algorithms should ideally require a reasonable understanding of their mechanisms, properties and limitations, in order to better apprehend and interpret their results.\\\\Accordingly, the goal of this thesis is to provide an in-depth analysis of random forests, consistently calling into question each and every part of the algorithm, in order to shed new light on its learning capabilities, inner workings and interpretability. The first part of this work studies the induction of decision trees and the construction of ensembles of randomized trees, motivating their design and purpose whenever possible. Our contributions follow with an original complexity analysis of random forests, showing their good computational performance and scalability, along with an in-depth discussion of their implementation details, as contributed within Scikit-Learn.\\\\In the second part of this work, we analyze and discuss the interpretability of random forests in the eyes of variable importance measures. The core of our contributions rests in the theoretical characterization of the Mean Decrease of Impurity variable importance measure, from which we prove and derive some of its properties in the case of multiway totally randomized trees and in asymptotic conditions. In consequence of this work, our analysis demonstrates that variable importances as computed from non-totally randomized trees (e.g., standard Random Forest) suffer from a combination of defects, due to masking effects, misestimations of node impurity or due to the binary structure of decision trees.\\\\Finally, the last part of this dissertation addresses limitations of random forests in the context of large datasets. Through extensive experiments, we show that subsampling both samples and features simultaneously provides on par performance while lowering at the same time the memory requirements. Overall this paradigm highlights an intriguing practical fact: there is often no need to build single models over immensely large datasets. Good performance can often be achieved by building models on (very) small random parts of the data and then combining them all in an ensemble, thereby avoiding all practical burdens of making large data fit into memory.},
school = {Universit{\'e} de Li{\`e}ge, {[200B?]}Li{\`e}ge, {[200B?]}{[200B?]}Belgique},
degree = {Docteur en Sciences (informatiques)},
institution = {Fonds de la Recherche Scientifique (Communaut{\'e} fran{\c{c}}aise de Belgique) - F.R.S.-FNRS},

}


@Article{miyakawa:1989,
Author = {Miyakawa, Masahiro},
Expand Down
2 changes: 1 addition & 1 deletion tex/classicthesis-config.tex
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
% ********************************************************************
\usepackage{ifthen}
\newboolean{enable-backrefs} % enable backrefs in the bibliography
\setboolean{enable-backrefs}{true} % true false
\setboolean{enable-backrefs}{false} % true false
% ****************************************************************************************************


Expand Down
88 changes: 88 additions & 0 deletions tex/summary.tex
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
\documentclass[oneside,openright,titlepage,numbers=noenddot,headinclude,%
footinclude=true,cleardoublepage=empty,abstractoff,BCOR=5mm,%
paper=a4,fontsize=11pt,ngerman,american]{scrreprt}

% Custom config ===============================================================

% Classic thesis
\usepackage{amssymb}
\input{classicthesis-config}

% Theorems and definitions
\usepackage{amsthm}
\newtheorem{theorem}{Theorem}
\newtheorem{lemma}[theorem]{Lemma}
\newtheorem{proposition}[theorem]{Proposition}
\newtheorem{corollary}[theorem]{Corollary}
\newtheorem{definition}{Definition}

\newtheorem{algorithm}{Algorithm}
\usepackage{algpseudocode}

% Counters
\renewcommand{\labelenumi}{{\color{halfgray}(\alph{enumi})}}
\renewcommand{\labelenumii}{\color{halfgray}{\roman{enumii}.}}
\renewcommand{\labelitemi}{{\color{halfgray}-}}%\raisebox{0.3ex}{\tiny$\blacksquare$}}}

\numberwithin{theorem}{chapter}
\numberwithin{definition}{chapter}
\numberwithin{algorithm}{chapter}
\numberwithin{figure}{chapter}
\numberwithin{table}{chapter}

% Maths
\DeclareMathOperator*{\argmin}{arg\,min}
\DeclareMathOperator*{\argmax}{arg\,max}

\numberwithin{equation}{chapter}
\allowdisplaybreaks

% Shaded boxes
\usepackage{framed}
\newenvironment{remark}[1]{%
\definecolor{shadecolor}{gray}{0.9}%
\begin{shaded}{\color{Maroon}\noindent\textsc{#1}}\\%
}{%
\end{shaded}%
}

% Code snippets
\usepackage{minted}
\definecolor{rulecolor}{rgb}{0.80,0.80,0.80}
\definecolor{bgcolor}{rgb}{1.0,1.0,1.0}
\newminted{python}{bgcolor=bgcolor}

% Todo
\newcommand{\todo}[1]{\textcolor{red}{[TODO] #1}}

% PS pictures
\usepackage{pstricks,auto-pst-pdf}

% Landscape tables
\usepackage{rotating}

% Checkmarks
\usepackage{pifont}% http://ctan.org/pkg/pifont
\newcommand{\cmark}{\ding{51}}%
\newcommand{\xmark}{\ding{55}}%

% Wide tables
\usepackage{ltablex}


% -----------------------------------------------------------------------------

\begin{document}
\frenchspacing
\raggedbottom
\selectlanguage{american}
\pagenumbering{roman}
\pagestyle{plain}


\pagenumbering{arabic}

\include{summary/summary}


\end{document}
Loading

0 comments on commit d96a196

Please sign in to comment.