-
Notifications
You must be signed in to change notification settings - Fork 0
/
solution_template.tex
executable file
·321 lines (217 loc) · 9.65 KB
/
solution_template.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
%
% 6.006 problem set 1 solutions template
%
\documentclass[12pt,twoside]{article}
\usepackage{amsmath}
\usepackage{color}
\usepackage{clrscode}
\input{macros}
\setlength{\oddsidemargin}{0pt}
\setlength{\evensidemargin}{0pt}
\setlength{\textwidth}{6.5in}
\setlength{\topmargin}{0in}
\setlength{\textheight}{8.5in}
\newcommand{\theproblemsetnum}{1}
\newcommand{\releasedate}{Thursday, September 17}
\newcommand{\partaduedate}{February 18th, 2016}
\newcommand{\tabUnit}{3ex}
\newcommand{\tabT}{\hspace*{\tabUnit}}
\title{6.006 PSET 1}
\begin{document}
\handout{Problem Set \theproblemsetnum}{February 18, 2016}
\textbf{All parts are due {\bf \partaduedate} at {\bf 11:59PM}}.
\setlength{\parindent}{0pt}
\medskip
\hrulefill
\medskip
{\bf Name:} Alex List
\medskip
{\bf Collaborators:} /
\medskip
\hrulefill
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% See below for common and useful latex constructs. %
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Some useful commands:
%$f(x) = \Theta(x)$
%$T(x, y) \leq \log(x) + 2^y + \binom{2n}{n}$
% {\tt code\_function}
% You can create unnumbered lists as follows:
%\begin{itemize}
% \item First item in a list
% \begin{itemize}
% \item First item in a list
% \begin{itemize}
% \item First item in a list
% \item Second item in a list
% \end{itemize}
% \item Second item in a list
% \end{itemize}
% \item Second item in a list
%\end{itemize}
% You can create numbered lists as follows:
%\begin{enumerate}
% \item First item in a list
% \item Second item in a list
% \item Third item in a list
%\end{enumerate}
% You can write aligned equations as follows:
%\begin{align}
% \begin{split}
% (x+y)^3 &= (x+y)^2(x+y) \\
% &= (x^2+2xy+y^2)(x+y) \\
% &= (x^3+2x^2y+xy^2) + (x^2y+2xy^2+y^3) \\
% &= x^3+3x^2y+3xy^2+y^3
% \end{split}
%\end{align}
% You can create grids/matrices as follows:
%\begin{align}
% A =
% \begin{bmatrix}
% A_{11} & A_{21} \\
% A_{21} & A_{22}
% \end{bmatrix}
%\end{align}
\begin{problems}
\section*{Part A}
\problem % Problem 1
\begin{problemparts}
\problempart % Problem 1a)
$f2 = log(4n^{n^{4}})=n^{4}log(n)$
$f2>f1 \rightarrow f1 = O(f2)$
$f3 = 4log(4n)log(n) \simeq log(n)log(n) = log^{2}(n)$
$ \lim n \rightarrow \inf f3/f2 = \lim n \rightarrow \inf log^{2}(n) / n^{4}log(n) = \lim n \rightarrow \inf log(n) / n^{4} = 0 $
$\rightarrow f3 = O(f2) $
$ \lim n \rightarrow \inf f3/f4 = \lim n \rightarrow \inf log^{2}(n) / log^{4}(n) = 0 \rightarrow f3 = O(f4) $
$ \lim n \rightarrow \inf f5/f4 = \lim n \rightarrow \inf log^{4}(log(n)) / log^{4}(n) = 0 \rightarrow f5 = O(f4) $
$ \rightarrow f1 < f2 > f3 < f4 > f5 $
Comparing $f1 \& f3$
$ \lim n \rightarrow \inf f3/f1 = \lim n \rightarrow \inf log^{2}(n) / n^{4} = 0 \rightarrow f3 = O(f1) $
$ \rightarrow f3 < f1 < f2 \& f5 < f4 > f3 $
Comparing $f2 \& f5$
$ \lim n \rightarrow \inf f5/f2 = \lim n \rightarrow \inf log^{4}(log(n))/n^{4}log(n) = \lim n \rightarrow \inf log(log(n))/nlog(n) = 0 \rightarrow f5 = O(f2) $
$ \rightarrow f3 < f1 < f2 \& f2 > f5 < f4 > f3 $
Comparing $f3 \& f5$
$ \lim n \rightarrow \inf f5/f3 = \lim n \rightarrow \inf log^{4}(log(n))/log^2(n) = \lim n \rightarrow \inf log(log(n))/log(n) = 0 \rightarrow f5 = O(f3) $
$ \rightarrow f5 < f3 < f1 < f2 \& f4 > f3 \& f4 > f5 $
Comparing $f4 \& f1$
$ \lim n \rightarrow \inf f4/f1 = \lim n \rightarrow \inf log^{4}(n) / n^{4} = 0 \rightarrow f4 = O(f1) $
$ \rightarrow f5 < f3 < f4 < f1 < f2$
$f5, f3, f4, f1, f2$
\problempart % Problem 1b)
Transform each function with logarithm and compare:
$f'1 = log(4^{4^{n}}) = 4^{n}log(4)$
$f'2 = log(4^{4^{n + 1}}) = 4^{n+1}log(4)$
$f'3 = log(5^{4^{n}}) = 4^{n}log(5)$
$f'4 = log(5^{4n}) = 4nlog(5)$
$f'5 = log(5^{5n})= 5nlog(5)$
By inspection:
$ f4 = O(f5), f1 = O(f2), f4 = O(f3), f1 = O(f3), f3 = O(f2), f5 = O(f3), f5 = O(f1), f4 = O(f1) $
$ f4 < f5, f1 < f2, f1 < f3, f3 < f2, f4 < f3, f5 < f3, f5 < f1, f4 < f1$
$ \rightarrow f4 < f5 < f1 < f3 < f2$
$ f4, f5, f1, f3, f2$
\problempart Simply first then compare % Problem 1c)
$ f1 = \frac{n!}{4!(n-4)!} = O(n^{4}) $
$f2$ via Sterling's approximation can be $ln$'d $ln(f2) = \ln(n!) = n\ln(n) - n +O(\ln(n)) \simeq O(nln(n))$ Compare $ln(f2)$ as $O(nlnn)$ %$x! \simeq \sqrt{2 \pi x}\left(\frac{x}{e}\right)^x$
$ f2 = \frac{n!}{(n/4)!(n-n/4)!} = \frac{n!}{(n/4)!(3n/4)!} = \frac{(n)(n-1)(n-2)*...*(3n/4)}{(n/4)(n/4 -1)(n/4 - 2)*...*(1)} = O(n^{n/4})$ Yet probably faster
$ f3 = 4n! = O(n^n)$
$ f4 = 4^{n/4} $
$ f5 = (n/4)^{n/4} $
Visually:
$ f2 < f1 < f3, f5 = O(f3), f1 = O(f5), f2 = O(f4) $
$ \rightarrow f2 < f1 < f5 < f3, f2 = O(f4) $
I'll compare f4 and f3. I wasn't sure, so I use L'Hospital's rule
$ \lim n \rightarrow \inf \frac{d}{dn} \frac{f4}{f3} = \lim n \rightarrow \inf \frac{d}{dn} \frac{4^{n/4}}{n^n} = \lim n \rightarrow \inf \frac{2^{n/2 -1}log(2)}{n^n(log(n) + 1)} = 0 $
$ \rightarrow f4 = O(f3) $
I still need to compare f4 and f5. I'll use L'Hospital's rule again.
$ \lim n \rightarrow \inf \frac{d}{dn} \frac{f5}{f4} = \lim n \rightarrow \inf \frac{d}{dn} \frac{(n/4)^{n/4}}{4^{n/4}} = \lim n \rightarrow \inf \frac{2^{-n/2 -2}n^{n/4}log(n/4 +1)}{2^{n/2 -1}log(2)} = 0 $
$ \rightarrow f5 = O(f4) $
Because the $2^{-n/2 -2}$ term goes to $0$.
$ \rightarrow f2 < f1 < f5 < f4 < f3 $
$ f2, f1, f5, f4, f3 $
\end{problemparts}
\problem % Problem 2
\begin{problemparts}
\problempart
\begin{enumerate}
\item $\theta(n)$ Branch factor 1. N iterations of c work.
\item $O(n^2)$ Because summation over work on all levels from c to nc is $\frac{n*(n+1)}{2} = \frac{n^2 + n}{2} = O(n^2)$
\item $\theta(log(n))$ Because $log(n)$ levels with c work.
\item $O(n)$ Because with branching factor 2, work $c$ per node, bottom row does most work, $c*2^{height = log(n)} = cn = O(n)$
\item $\theta(nlog(n))$ Because there are $log(n)$ rows of $cn$ work per row.
\item $O(n^{log(3)})$ Because with branching factor 3, the bottom row does most work– $3^{height = log(n)}$ nodes of $c$ work $= c*n^{log(3)/log(2)} = O(n^{log(3)})$
\end{enumerate}
\problempart
\begin{enumerate}
\item $T(n) = T(n/2) + c$ Binary search has one subproblem, with half the nodes to visit. $O(log(n))$ tota.
%problem 1.2b.2
\item $T(n) = T(n/2) + c*log(n)$ For $nxn$ matrix, Instead of doing $c$ work for 1 comparison, I search the n-width row, $log(n)$ work per subproblem.
\end{enumerate}∫
\end{problemparts}
\problem % Problem 3
\begin{problemparts}
\problempart The nieve solution is $O(n^4)$ with 4-nested loops. It holds a variable named $maxGain$ while (for abuy1 in A for first buy date (for asell1 $>$ abuy1 in A for first sell date (for abuy2 $>=$ asell1 in A for second buy date (for asell2 $>$ abuy2 in A for second sell date ( maxGain = A[asell1] - A[abuy1] + A[asell2] - A[abuy2] if A[asell1] - A[abuy1] + A[asell2] - A[abuy2] $>$ maxGain))))
\begin{verbatim}
ans = 0
for b0 in range(n):
for s0 in range(b0,n):
for b1 in range(s0,n):
for s1 in range(b1,n):
ans = max(ans, A[asell1] - A[abuy1] + A[asell2] - A[abuy2])
return ans
\end{verbatim}
\problempart
\end{problemparts}
\section*{Part B}
\problem % Problem 4
\begin{problemparts}
\emph{Submit your implemented python script.}
\problempart
\problempart
\problempart
\problempart
Part A: $O(n)$
\begin{enumerate}
\item Create a new dictionary to maps words to array containing the word's count for each word list $O(m)$
\begin{enumerate}
\item Iterate through each word list $O(1)$
\item Iterate through each word in list $O(n)$
\item Add frequency of word in current iterating list's $wordList$ to the $word$'s dictionary entry, at the column for the current iterating list $O(1)$
\end{enumerate}
\item Get dot-product over dictionary's frequency values for each word $O(m)$
\end{enumerate}
Part B: $O(n)$
\begin{enumerate}
\item Create a new dictionary to maps words to array containing the word's count for each word list $O(m)$
\begin{enumerate}
\item Iterate through each word list $O(1)$
\item Iterate through each word in list $O(n)$
\item Add frequency of $word + nextWord$ in current iterating list's $wordList$ to the $word + nextWord$'s dictionary entry, at the column for the current iterating list $O(1)$
\end{enumerate}
\item Get dot-product over dictionary's frequency values for each word pair $O(m + n)$ $m$ if all pairs the same, up to $n$ if all pairs different
\end{enumerate}
Part c: $O(n + mlogm)$ $mlogn$ may be greater than n, for example if no words occur twice.
\begin{enumerate}
\item get the frequency of the words for each list $O(n)$
\item Sort lists of words by frequency $O(mlogm)$
\item Truncate sorted word lists to 50 words per list $O(1)$
\item Create a new dictionary to maps words to array containing the word's count for each word list $O(k)$
\begin{enumerate}
\item Iterate through each truncated word list $O(1)$
\item Iterate through each word in list $O(k)$
\item Add frequency of word in current iterating list's $wordList$ to the word's dictionary entry, at the column for the current iterating list $O(1)$
\end{enumerate}
\item Get dot-product over dictionary's frequency values for each word $O(k)$
\end{enumerate}
\problempart
henry\_iv\_1 with
\begin{verbatim}
tempest doc_dist: 0.3929, pairs: 1.1059, dist_50: 0.3681
pirates doc_dist: 0.5333, pairs: 1.2576, dist_50: 0.5073
henry_iv_2 doc_dist: 0.3024, pairs: 0.9143, dist_50: 0.2901
\end{verbatim}
Conclusions:
$doc\_dist$ is the base case of accuracy. $doc\_dist\_50$ is almost as accurate, for a potential constant factor improvement in runtime. Pair distance is not a comparison method consistent with $doc\_dist$.
\end{problemparts}
\end{problems}
\end{document}