Skip to content

Commit

Permalink
make it faster
Browse files Browse the repository at this point in the history
  • Loading branch information
Ryoichi Ando committed Aug 24, 2021
1 parent 136a1ce commit 1afc3c8
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 24 deletions.
38 changes: 19 additions & 19 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
# Server Mode:
# > docker run -u $UID:$GID -v ${PWD}:/root -p 3600:3600 -ti --rm webpapers --server papers
#
import os, sys, configparser, subprocess, json, argparse, latexcodec, time, signal ,logging
import os, sys, configparser, subprocess, json, argparse, latexcodec, time, signal ,logging, json
import shutil, pikepdf, pdfdump, base64, nltk, secrets, re
from psutil import virtual_memory
from PIL import Image
Expand Down Expand Up @@ -722,8 +722,6 @@ def merge_files( key_from, key_to ):
file.write(data)
#
# Build paper references
data_0_js = 'data_0 = [];\n'
data_1_js = 'data_1 = [];\n'
data_map = {}
#
# Add search index
Expand All @@ -750,8 +748,8 @@ def merge_files( key_from, key_to ):
if w not in word_dictionary:
word_dictionary.add(w)
#
data_0_js = 'data_0 = [\n'
data_1_js = 'data_1 = [\n'
data_array = []
data_index = []
idx = 0
_print( 'Analyzing...' )
for dir,paper in tqdm(database.items()):
Expand Down Expand Up @@ -807,12 +805,15 @@ def merge_files( key_from, key_to ):
# console.log(int32View[i]);
# }
#
data_0_js += "[{}],\n".format(
','.join(['['+','.join([ str(y[0]) for y in x ])+']' for x in indices])
)
data_1_js += "[{}],\n".format(
','.join(['['+','.join([ str(y[1]) for y in x ])+']' for x in indices])
)
data_index.append(len(data_array))
data_array.append(len(indices))
for x in indices:
data_array.append(len(x))
for x in indices:
data_array.extend([ y[0] for y in x ])
for x in indices:
data_array.extend([ y[1] for y in x ])
#
data_map[dir] = idx
idx += 1
#
Expand All @@ -823,17 +824,16 @@ def merge_files( key_from, key_to ):
file.write(additional_words_data)
#
# Write word table
data_0_js += '];\n'
data_1_js += '];\n'
#
data_1_js += 'const data_map = {{ {} }};\n'.format(','.join([ f"'{x}' : {y}" for x,y in data_map.items()]) )
data_1_js += 'const word_table = {{\n{}\n}};\n'.format(',\n'.join([ f"'{x}' : {y}" for x,y in registered_words.items() ]))
data_1_js += 'let data_words = {};\n'
data_js = ''
data_js += 'const data_array = {};\n'.format(json.dumps(data_array))
data_js += 'const data_index = {};\n'.format(json.dumps(data_index))
data_js += 'const data_map = {{ {} }};\n'.format(','.join([ f"'{x}' : {y}" for x,y in data_map.items()]) )
data_js += 'const word_table = {{\n{}\n}};\n'.format(',\n'.join([ f"'{x}' : {y}" for x,y in registered_words.items() ]))
data_js += 'let data_words = {};\n'
#
# Generate Javascript file
with open(root+'/data.js','w') as file:
file.write(data_0_js)
file.write(data_1_js)
file.write(data_js)
#
papers_js = '''
const papers = {0};
Expand Down
32 changes: 27 additions & 5 deletions resources/search.js
Original file line number Diff line number Diff line change
Expand Up @@ -239,15 +239,37 @@ function search ( keywords, add_year, add_paper, add_snippet, param=null, import
//
let entries = [];
if( keywords_dict['word'].length ) {
for( let i=0; i<data_0[paper_idx].length; ++i ) {
let min = data_0[paper_idx][i].length;
let head = data_index[paper_idx];
const num_lines = data_array[head++];
const num_words = [];
for( let i=0; i<num_lines; ++i ) {
num_words.push(data_array[head++]);
}
data_0 = [];
for( let i=0; i<num_lines; ++i ) {
let line_0 = [];
for( let j=0; j<num_words[i]; ++j ) {
line_0.push(data_array[head++]);
}
data_0.push(line_0);
}
data_1 = [];
for( let i=0; i<num_lines; ++i ) {
let line_1 = [];
for( let j=0; j<num_words[i]; ++j ) {
line_1.push(data_array[head++]);
}
data_1.push(line_1);
}
for( let i=0; i<data_0.length; ++i ) {
let min = num_words[i];
let max = 0;
let positions = [];
for( const idx of keywords_dict['word'] ) {
let pos = -1;
for( let j=0; j<data_0[paper_idx][i].length; ++j ) {
if( data_0[paper_idx][i][j] == idx ) {
pos = data_1[paper_idx][i][j];
for( let j=0; j<data_0[i].length; ++j ) {
if( data_0[i][j] == idx ) {
pos = data_1[i][j];
break;
}
}
Expand Down

0 comments on commit 1afc3c8

Please sign in to comment.