Skip to content

Commit

Permalink
dij - done
Browse files Browse the repository at this point in the history
  • Loading branch information
Rohit Raj committed Dec 7, 2016
1 parent 0b2118d commit e1d1995
Show file tree
Hide file tree
Showing 2 changed files with 85 additions and 43 deletions.
56 changes: 28 additions & 28 deletions Task 1/task 2/cacm_query_bm25.txt
Original file line number Diff line number Diff line change
Expand Up @@ -961,8 +961,8 @@
10 Q0 2114 61 8.933420942120813 BM25
10 Q0 1811 62 8.824168421586908 BM25
10 Q0 1928 63 8.790059370535904 BM25
10 Q0 1325 64 8.761176858216748 BM25
10 Q0 2515 65 8.761176858216748 BM25
10 Q0 2515 64 8.761176858216748 BM25
10 Q0 1325 65 8.761176858216748 BM25
10 Q0 2316 66 8.744195515636914 BM25
10 Q0 1957 67 8.679543530353403 BM25
10 Q0 2967 68 8.654579458955302 BM25
Expand Down Expand Up @@ -1088,8 +1088,8 @@
11 Q0 1962 88 3.672411789203252 BM25
11 Q0 1188 89 3.6695177190295745 BM25
11 Q0 1871 90 3.6662320349286235 BM25
11 Q0 1553 91 3.6662320349286235 BM25
11 Q0 1720 92 3.6662320349286235 BM25
11 Q0 1720 91 3.6662320349286235 BM25
11 Q0 1553 92 3.6662320349286235 BM25
11 Q0 1183 93 3.660268120686652 BM25
11 Q0 2928 94 3.6592133590237266 BM25
11 Q0 3092 95 3.656089102603547 BM25
Expand Down Expand Up @@ -1189,9 +1189,9 @@
12 Q0 2868 89 3.4281452423950998 BM25
12 Q0 1264 90 3.3666483235833664 BM25
12 Q0 0825 91 3.315259365322609 BM25
12 Q0 1735 92 3.284791809829198 BM25
12 Q0 1657 92 3.284791809829198 BM25
12 Q0 0698 93 3.284791809829198 BM25
12 Q0 1657 94 3.284791809829198 BM25
12 Q0 1735 94 3.284791809829198 BM25
12 Q0 1698 95 3.284616411148752 BM25
12 Q0 3014 96 3.2532359567167526 BM25
12 Q0 2681 97 3.1883210953608865 BM25
Expand Down Expand Up @@ -1823,8 +1823,8 @@
19 Q0 0141 23 4.201575113323372 BM25
19 Q0 3059 24 4.196158412636333 BM25
19 Q0 2401 25 4.194464849479076 BM25
19 Q0 0392 26 4.191401811764946 BM25
19 Q0 1302 27 4.191401811764946 BM25
19 Q0 1302 26 4.191401811764946 BM25
19 Q0 0392 27 4.191401811764946 BM25
19 Q0 2896 28 4.161182311510507 BM25
19 Q0 1795 29 4.14203215570717 BM25
19 Q0 2895 30 4.132221017675189 BM25
Expand All @@ -1849,8 +1849,8 @@
19 Q0 2740 49 3.790165006301353 BM25
19 Q0 1536 50 3.7652255025826333 BM25
19 Q0 0320 51 3.735916484810322 BM25
19 Q0 1325 52 3.7012230540103737 BM25
19 Q0 2515 53 3.7012230540103737 BM25
19 Q0 2515 52 3.7012230540103737 BM25
19 Q0 1325 53 3.7012230540103737 BM25
19 Q0 3073 54 3.670554564661063 BM25
19 Q0 1603 55 3.661715609655276 BM25
19 Q0 2967 56 3.643994845360717 BM25
Expand Down Expand Up @@ -1888,8 +1888,8 @@
19 Q0 2674 88 3.0266995321967283 BM25
19 Q0 2902 89 3.0244787013928565 BM25
19 Q0 2007 90 3.0231448287825318 BM25
19 Q0 1952 91 3.0127602341381743 BM25
19 Q0 1342 92 3.0127602341381743 BM25
19 Q0 1342 91 3.0127602341381743 BM25
19 Q0 1952 92 3.0127602341381743 BM25
19 Q0 1660 93 3.0127602341381743 BM25
19 Q0 2904 94 3.0125187047373783 BM25
19 Q0 2273 95 3.0072236650958737 BM25
Expand Down Expand Up @@ -2373,9 +2373,9 @@
24 Q0 3127 73 3.5024363109781658 BM25
24 Q0 2580 74 3.499443145499547 BM25
24 Q0 2307 75 3.499443145499547 BM25
24 Q0 1383 76 3.4983114188007494 BM25
24 Q0 1631 77 3.4983114188007494 BM25
24 Q0 2836 78 3.4983114188007494 BM25
24 Q0 2836 76 3.4983114188007494 BM25
24 Q0 1383 77 3.4983114188007494 BM25
24 Q0 1631 78 3.4983114188007494 BM25
24 Q0 1308 79 3.4964589123312764 BM25
24 Q0 1458 80 3.493483558319704 BM25
24 Q0 1008 81 3.49083739465026 BM25
Expand All @@ -2384,20 +2384,20 @@
24 Q0 1554 84 3.4651096391728364 BM25
24 Q0 1385 85 3.458453165780282 BM25
24 Q0 2953 86 3.457878811336067 BM25
24 Q0 1707 87 3.443572144892962 BM25
24 Q0 2900 88 3.443572144892962 BM25
24 Q0 2900 87 3.443572144892962 BM25
24 Q0 1707 88 3.443572144892962 BM25
24 Q0 1809 89 3.440027232426237 BM25
24 Q0 2185 90 3.4364948419874826 BM25
24 Q0 0095 91 3.4364948419874826 BM25
24 Q0 0298 92 3.432974885422526 BM25
24 Q0 2844 93 3.432974885422526 BM25
24 Q0 2844 92 3.432974885422526 BM25
24 Q0 0298 93 3.432974885422526 BM25
24 Q0 0278 94 3.4301703143561015 BM25
24 Q0 2795 95 3.4218391245733057 BM25
24 Q0 2717 96 3.4190774144088585 BM25
24 Q0 2062 97 3.4086761245654276 BM25
24 Q0 3072 98 3.4026654299226142 BM25
24 Q0 1145 99 3.3891915766566902 BM25
24 Q0 2019 100 3.388308455844605 BM25
24 Q0 2000 100 3.388308455844605 BM25
25 Q0 2318 1 12.025830790163194 BM25
25 Q0 2812 2 11.161505283035435 BM25
25 Q0 2268 3 10.810061287446935 BM25
Expand Down Expand Up @@ -4945,16 +4945,16 @@
50 Q0 2882 45 6.297088229624087 BM25
50 Q0 1236 46 6.283117023274475 BM25
50 Q0 2846 47 6.268359385178184 BM25
50 Q0 0275 48 6.213277712687243 BM25
50 Q0 0651 49 6.213277712687243 BM25
50 Q0 0651 48 6.213277712687243 BM25
50 Q0 0275 49 6.213277712687243 BM25
50 Q0 2070 50 6.203232221148504 BM25
50 Q0 1675 51 6.185956589628219 BM25
50 Q0 2561 52 6.165243679188006 BM25
50 Q0 0239 53 6.153747945988366 BM25
50 Q0 2160 54 6.130320411126592 BM25
50 Q0 0292 55 6.124634602874985 BM25
50 Q0 1711 56 6.1071980064242455 BM25
50 Q0 2795 57 6.1071980064242455 BM25
50 Q0 2795 56 6.1071980064242455 BM25
50 Q0 1711 57 6.1071980064242455 BM25
50 Q0 0440 58 6.103583613411731 BM25
50 Q0 1527 59 6.087055076562048 BM25
50 Q0 1725 60 6.080064185523548 BM25
Expand Down Expand Up @@ -5202,8 +5202,8 @@
53 Q0 2210 2 19.47349358908773 BM25
53 Q0 2229 3 19.180231035227695 BM25
53 Q0 2986 4 18.882197903160876 BM25
53 Q0 0514 5 18.75519095243858 BM25
53 Q0 0569 6 18.75519095243858 BM25
53 Q0 0569 5 18.75519095243858 BM25
53 Q0 0514 6 18.75519095243858 BM25
53 Q0 0139 7 18.739734546300696 BM25
53 Q0 2391 8 18.418407765313 BM25
53 Q0 0679 9 17.50500974676375 BM25
Expand Down Expand Up @@ -5272,8 +5272,8 @@
53 Q0 1961 72 7.630529580441003 BM25
53 Q0 1649 73 7.508601277590296 BM25
53 Q0 3204 74 7.491941766249837 BM25
53 Q0 1365 75 7.4898856414567625 BM25
53 Q0 1543 76 7.4898856414567625 BM25
53 Q0 1543 75 7.4898856414567625 BM25
53 Q0 1365 76 7.4898856414567625 BM25
53 Q0 2149 77 7.471609825980542 BM25
53 Q0 2220 78 7.455073894865669 BM25
53 Q0 2710 79 7.449058474509419 BM25
Expand Down
72 changes: 57 additions & 15 deletions Task 1/task 2/task2.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@
dl_map = {}
query_map = {}

score_square_denom = {}
non_inverted_index = {}
doc_term_score = {}

k1 = 1.2
k2 = 100.0
b = 0.75
Expand Down Expand Up @@ -43,18 +47,51 @@ def clean_text(text):
return text

def load_inverted_index():
with open('inverted_index.txt') as index:
#print(index)
for entry in index:
entry = re.sub("[(,)>-]", "", entry)
data = entry.split()
inverted_index[data[0]] = {}
for x in range(1,len(data)):
if(x%2 == 0):
continue
else:
inverted_index[data[0]][data[x]]=data[x+1]
make_dl_map(data[x],int(data[x+1]))
file_h = open("inverted_index.txt","r")
#print(len(file_h.readlines()))
for entry in file_h.readlines():
entry = re.sub("[(,)>-]", "", entry)
data = entry.split()
inverted_index[data[0]] = {}

term = data[0]
idf = math.log(3204/len(data))

for x in range(1,len(data)):
if(x%2 == 0):
continue
else:
inverted_index[data[0]][data[x]]=data[x+1]
make_dl_map(data[x],int(data[x+1]))
doc = data[x]
fik = int(data[x+1])

#term weight denominator calculation
norm_denom = math.pow(((math.log(fik) + 1) * idf),2)
if doc in score_square_denom:
score_square_denom [doc] += norm_denom
else:
score_square_denom[doc] = norm_denom

#make a non-inverted index
if doc in non_inverted_index:
non_inverted_index[doc][term] = fik
else:
curr = {term:fik}
non_inverted_index[doc] = curr


def calculate_dij():
for doc in non_inverted_index:
doc_term_score[doc] = {}
for term in non_inverted_index[doc]:
idf = math.log(3204/len(inverted_index[term]))
fik = float(non_inverted_index[doc][term])
score_numer = (math.log(fik) + 1) * idf
score = score_numer/math.sqrt(score_square_denom[doc])

doc_term_score[doc][term] = score


def load_queries():
with open("cacm.queries.txt") as query_list:
Expand Down Expand Up @@ -126,8 +163,12 @@ def write_results_to_file(score_map):
file_h.close()


def main():
def bm25():
load_inverted_index()

calculate_dij()
print(doc_term_score['0001'])

load_queries()

avdl = calculate_avdl()
Expand All @@ -137,8 +178,9 @@ def main():
write_results_to_file(score_map)


main()

bm25()





Expand Down

0 comments on commit e1d1995

Please sign in to comment.