Skip to content

Commit

Permalink
013: Fixed issues-folder creation, stream chunking
Browse files Browse the repository at this point in the history
  • Loading branch information
HHansi committed Mar 17, 2020
1 parent 050080b commit 68d819b
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 2 deletions.
5 changes: 4 additions & 1 deletion data_analysis/data_preprocessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from nltk.corpus import stopwords

from data_analysis.groundtruth_processor import extract_gt_tokens, generate_gt_string
from utils.file_utils import delete_create_folder
from utils.file_utils import delete_create_folder, create_folder_if_not_exist

en_stopwords = stopwords.words('english')

Expand Down Expand Up @@ -90,6 +90,9 @@ def preprocessing_flow(text):
# pre-process the text contents in given input file and save to given output file
# output_file_path format - [ID, timestamp, text] without column names
def preprocess_bulk(input_file_path, output_file_path):
# create folder if not exists
create_folder_if_not_exist(output_file_path, is_file_path=True)

input_file = open(input_file_path, encoding='utf-8')
input_reader = csv.reader(input_file, delimiter='\t')

Expand Down
2 changes: 1 addition & 1 deletion webed/stream_chunker.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def filter_documents_by_time_bulk(from_time_str, to_time_str, time_duration, inp
to_time = datetime.strptime(to_time_str, '%Y_%m_%d_%H_%M_%S')

from_time_temp = from_time
while from_time_temp != to_time:
while from_time_temp > to_time:
to_time_temp = from_time_temp + timedelta(seconds=(60 * (time_duration - 1)) + 59)
to_time_str = to_time_temp.strftime('%Y_%m_%d_%H_%M_%S')

Expand Down

0 comments on commit 68d819b

Please sign in to comment.