solution_template.tex

%
% 6.006 problem set 1 solutions template
%
\documentclass[12pt,twoside]{article}

\usepackage{amsmath}
\usepackage{color}
\usepackage{clrscode}

\input{macros}

\setlength{\oddsidemargin}{0pt}
\setlength{\evensidemargin}{0pt}
\setlength{\textwidth}{6.5in}
\setlength{\topmargin}{0in}
\setlength{\textheight}{8.5in}

\newcommand{\theproblemsetnum}{1}
\newcommand{\releasedate}{Thursday, September 17}
\newcommand{\partaduedate}{February 18th, 2016}
\newcommand{\tabUnit}{3ex}
\newcommand{\tabT}{\hspace*{\tabUnit}}

\title{6.006 PSET 1}

\begin{document}

\handout{Problem Set \theproblemsetnum}{February 18, 2016}

\textbf{All parts are due {\bf \partaduedate} at {\bf 11:59PM}}.

\setlength{\parindent}{0pt}

\medskip

\hrulefill

\medskip

{\bf Name:} Alex List

\medskip

{\bf Collaborators:} /

\medskip

\hrulefill

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% See below for common and useful latex constructs. %
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

% Some useful commands:
%$f(x) = \Theta(x)$
%$T(x, y) \leq \log(x) + 2^y + \binom{2n}{n}$
% {\tt code\_function}


% You can create unnumbered lists as follows:
%\begin{itemize}
%    \item First item in a list
%        \begin{itemize}
%            \item First item in a list
%                \begin{itemize}
%                    \item First item in a list
%                    \item Second item in a list
%                \end{itemize}
%            \item Second item in a list
%        \end{itemize}
%    \item Second item in a list
%\end{itemize}

% You can create numbered lists as follows:
%\begin{enumerate}
%    \item First item in a list
%    \item Second item in a list
%    \item Third item in a list
%\end{enumerate}

% You can write aligned equations as follows:
%\begin{align}
%    \begin{split}
%        (x+y)^3 &= (x+y)^2(x+y) \\
%                &= (x^2+2xy+y^2)(x+y) \\
%                &= (x^3+2x^2y+xy^2) + (x^2y+2xy^2+y^3) \\
%                &= x^3+3x^2y+3xy^2+y^3
%    \end{split}
%\end{align}

% You can create grids/matrices as follows:
%\begin{align}
%    A =
%    \begin{bmatrix}
%        A_{11} & A_{21} \\
%        A_{21} & A_{22}
%    \end{bmatrix}
%\end{align}

\begin{problems}

\section*{Part A}

\problem  % Problem 1

\begin{problemparts}
\problempart % Problem 1a)
$f2 = log(4n^{n^{4}})=n^{4}log(n)$

$f2>f1 \rightarrow f1 = O(f2)$

$f3 = 4log(4n)log(n) \simeq log(n)log(n) = log^{2}(n)$

$ \lim n \rightarrow \inf f3/f2 = \lim n \rightarrow \inf log^{2}(n) / n^{4}log(n) = \lim n \rightarrow \inf log(n) / n^{4} = 0 $

$\rightarrow f3 = O(f2) $

$ \lim n \rightarrow \inf f3/f4 = \lim n \rightarrow \inf log^{2}(n) / log^{4}(n) = 0 \rightarrow f3 = O(f4) $


$ \lim n \rightarrow \inf f5/f4 = \lim n \rightarrow \inf log^{4}(log(n)) / log^{4}(n) = 0 \rightarrow f5 = O(f4) $

$ \rightarrow f1 < f2 > f3 < f4 > f5 $

Comparing $f1 \& f3$

$ \lim n \rightarrow \inf f3/f1 = \lim n \rightarrow \inf log^{2}(n) / n^{4} = 0 \rightarrow f3 = O(f1) $

$ \rightarrow f3 < f1 < f2 \& f5 < f4 > f3 $

Comparing $f2 \& f5$

$ \lim n \rightarrow \inf f5/f2 = \lim n \rightarrow \inf log^{4}(log(n))/n^{4}log(n) = \lim n \rightarrow \inf log(log(n))/nlog(n)  = 0 \rightarrow f5 = O(f2) $

$ \rightarrow f3 < f1 < f2 \& f2 > f5 < f4 > f3 $

Comparing $f3 \& f5$

$ \lim n \rightarrow \inf f5/f3 = \lim n \rightarrow \inf log^{4}(log(n))/log^2(n) = \lim n \rightarrow \inf log(log(n))/log(n)  = 0 \rightarrow f5 = O(f3) $

$ \rightarrow f5 < f3 < f1 < f2 \& f4 > f3 \& f4 > f5 $

Comparing $f4 \& f1$

$ \lim n \rightarrow \inf f4/f1 = \lim n \rightarrow \inf log^{4}(n) / n^{4} = 0 \rightarrow f4 = O(f1) $

$ \rightarrow f5 < f3 < f4 < f1 < f2$

$f5, f3, f4, f1, f2$

\problempart  % Problem 1b)
Transform each function with logarithm and compare:

$f'1 = log(4^{4^{n}}) = 4^{n}log(4)$

$f'2 = log(4^{4^{n + 1}}) = 4^{n+1}log(4)$

$f'3 = log(5^{4^{n}}) = 4^{n}log(5)$

$f'4 = log(5^{4n}) = 4nlog(5)$

$f'5 = log(5^{5n})= 5nlog(5)$

By inspection:

$ f4 = O(f5), f1 = O(f2), f4 = O(f3), f1 = O(f3), f3 = O(f2), f5 = O(f3), f5 = O(f1), f4 = O(f1) $

$ f4 < f5, f1 < f2, f1 < f3, f3 < f2, f4 < f3, f5 < f3, f5 < f1, f4 < f1$

$ \rightarrow f4 < f5 < f1 < f3 < f2$

$ f4, f5, f1, f3, f2$

\problempart  Simply first then compare % Problem 1c)

$ f1 = \frac{n!}{4!(n-4)!} = O(n^{4}) $

$f2$ via Sterling's approximation can be $ln$'d $ln(f2) = \ln(n!) = n\ln(n) - n +O(\ln(n)) \simeq O(nln(n))$ Compare $ln(f2)$ as $O(nlnn)$ %$x! \simeq \sqrt{2 \pi x}\left(\frac{x}{e}\right)^x$

$ f2 = \frac{n!}{(n/4)!(n-n/4)!} = \frac{n!}{(n/4)!(3n/4)!}  = \frac{(n)(n-1)(n-2)*...*(3n/4)}{(n/4)(n/4 -1)(n/4 - 2)*...*(1)} = O(n^{n/4})$ Yet probably faster

$ f3 = 4n! = O(n^n)$

$ f4 = 4^{n/4} $

$ f5 = (n/4)^{n/4} $

Visually:

$ f2 < f1 < f3, f5 = O(f3), f1 = O(f5), f2 = O(f4) $

$ \rightarrow f2 < f1 < f5 < f3, f2 = O(f4) $

I'll compare f4 and f3. I wasn't sure, so I use L'Hospital's rule

$ \lim n \rightarrow \inf \frac{d}{dn} \frac{f4}{f3} = \lim n \rightarrow \inf \frac{d}{dn} \frac{4^{n/4}}{n^n} = \lim n \rightarrow \inf \frac{2^{n/2 -1}log(2)}{n^n(log(n) + 1)} = 0 $

$ \rightarrow f4 = O(f3) $

I still need to compare f4 and f5. I'll use L'Hospital's rule again.

$ \lim n \rightarrow \inf \frac{d}{dn} \frac{f5}{f4} = \lim n \rightarrow \inf \frac{d}{dn} \frac{(n/4)^{n/4}}{4^{n/4}} = \lim n \rightarrow \inf \frac{2^{-n/2 -2}n^{n/4}log(n/4 +1)}{2^{n/2 -1}log(2)} = 0 $

$ \rightarrow f5 = O(f4) $

Because the $2^{-n/2 -2}$ term goes to $0$.

$ \rightarrow f2 < f1 < f5 < f4 < f3 $

$ f2, f1, f5, f4, f3 $

\end{problemparts}

\problem  % Problem 2

\begin{problemparts}
\problempart
\begin{enumerate}
\item $\theta(n)$ Branch factor 1. N iterations of c work.
\item $O(n^2)$ Because summation over work on all levels from c to nc is $\frac{n*(n+1)}{2} = \frac{n^2 + n}{2} = O(n^2)$
\item $\theta(log(n))$ Because $log(n)$ levels with c work.
\item $O(n)$ Because with branching factor 2, work $c$ per node, bottom row does most work, $c*2^{height = log(n)} = cn = O(n)$
\item $\theta(nlog(n))$ Because there are $log(n)$ rows of $cn$ work per row.
\item $O(n^{log(3)})$ Because with branching factor 3, the bottom row does most work– $3^{height = log(n)}$ nodes of $c$ work $= c*n^{log(3)/log(2)} = O(n^{log(3)})$
\end{enumerate}
\problempart
\begin{enumerate}
\item $T(n) = T(n/2) + c$ Binary search has one subproblem, with half the nodes to visit. $O(log(n))$ tota.

%problem 1.2b.2
\item $T(n) = T(n/2) + c*log(n)$ For $nxn$ matrix, Instead of doing $c$ work for 1 comparison, I search the n-width row, $log(n)$ work per subproblem.
\end{enumerate}∫
\end{problemparts}

\problem  % Problem 3
\begin{problemparts}
\problempart The nieve solution is $O(n^4)$ with 4-nested loops. It holds a variable named $maxGain$ while (for abuy1 in A for first buy date (for asell1 $>$ abuy1 in A for first sell date (for abuy2 $>=$ asell1 in A for second buy date (for asell2 $>$ abuy2 in A for second sell date ( maxGain = A[asell1] - A[abuy1] + A[asell2] - A[abuy2] if A[asell1] - A[abuy1] + A[asell2] - A[abuy2] $>$ maxGain))))

\begin{verbatim}
ans = 0
for b0 in range(n):
  for s0 in range(b0,n):
    for b1 in range(s0,n):
      for s1 in range(b1,n):
        ans = max(ans, A[asell1] - A[abuy1] + A[asell2] - A[abuy2])
return ans
\end{verbatim}

\problempart
\end{problemparts}

\section*{Part B}
\problem  % Problem 4
\begin{problemparts}
\emph{Submit your implemented python script.}
\problempart
\problempart
\problempart
\problempart
Part A: $O(n)$
\begin{enumerate}
\item Create a new dictionary to maps words to array containing the word's count for each word list $O(m)$
  \begin{enumerate}
  \item Iterate through each word list $O(1)$

  \item Iterate through each word in list $O(n)$

  \item Add frequency of word in current iterating list's $wordList$ to the $word$'s dictionary entry, at the column for the current iterating list $O(1)$
  \end{enumerate}
  \item Get dot-product over dictionary's frequency values for each word $O(m)$
\end{enumerate}

Part B: $O(n)$
\begin{enumerate}
\item Create a new dictionary to maps words to array containing the word's count for each word list $O(m)$
  \begin{enumerate}
  \item Iterate through each word list $O(1)$

  \item Iterate through each word in list $O(n)$

  \item Add frequency of $word + nextWord$ in current iterating list's $wordList$ to the $word + nextWord$'s dictionary entry, at the column for the current iterating list $O(1)$
  \end{enumerate}
  \item Get dot-product over dictionary's frequency values for each word pair $O(m + n)$ $m$ if all pairs the same, up to $n$ if all pairs different
\end{enumerate}

Part c: $O(n + mlogm)$ $mlogn$ may be greater than n, for example if no words occur twice.

\begin{enumerate}
\item get the frequency of the words for each list $O(n)$

\item Sort lists of words by frequency $O(mlogm)$

\item Truncate sorted word lists to 50 words per list $O(1)$

\item Create a new dictionary to maps words to array containing the word's count for each word list $O(k)$
  \begin{enumerate}
  \item Iterate through each truncated word list $O(1)$

  \item Iterate through each word in list $O(k)$

  \item Add frequency of word in current iterating list's $wordList$ to the word's dictionary entry, at the column for the current iterating list $O(1)$
  \end{enumerate}
\item Get dot-product over dictionary's frequency values for each word $O(k)$
\end{enumerate}

\problempart

henry\_iv\_1 with
\begin{verbatim}
tempest doc_dist: 0.3929, pairs: 1.1059, dist_50: 0.3681
pirates doc_dist: 0.5333, pairs: 1.2576, dist_50: 0.5073
henry_iv_2 doc_dist: 0.3024, pairs: 0.9143, dist_50: 0.2901
\end{verbatim}

Conclusions:
$doc\_dist$ is the base case of accuracy. $doc\_dist\_50$ is almost as accurate, for a potential constant factor improvement in runtime. Pair distance is not a comparison method consistent with $doc\_dist$.

\end{problemparts}
\end{problems}

\end{document}