Skip to content

Commit

Permalink
Idaho Camera Traps
Browse files Browse the repository at this point in the history
  • Loading branch information
agentmorris committed Jul 29, 2021
1 parent bc2e1fe commit de6d313
Showing 1 changed file with 91 additions and 17 deletions.
108 changes: 91 additions & 17 deletions data_management/importers/idaho-camera-traps.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
from multiprocessing.pool import Pool as Pool
from multiprocessing.pool import ThreadPool as ThreadPool
n_threads = 14
n_threads_file_copy = 1
n_threads_file_copy = 20

input_base = r'i:\idfg-images'
output_base = r'h:\idaho-camera-traps'
Expand Down Expand Up @@ -535,7 +535,7 @@ def csv_to_sequences(csv_file):
sequence_ids = set()

# sequences = sequences_by_file[0]
for sequences in tqdm(sequences_by_file):
for i_sequences,sequences in enumerate(tqdm(sequences_by_file)):

assert len(sequences) > 0
csv_source = sequences[0]['csv_source']
Expand All @@ -544,11 +544,16 @@ def csv_to_sequences(csv_file):
assert os.path.isfile(csv_file_absolute)

# sequence = sequences[0]
for sequence in sequences:
for i_sequence,sequence in enumerate(sequences):

assert sequence['csv_source'] == csv_source
sequence_id = sequence['sequence_id']
assert sequence_id not in sequence_ids
if sequence_id in sequence_ids:
print('Warning: duplicate sequence for {}, creating new sequence'.format(sequence_id))
sequence['sequence_id'] = sequence['sequence_id'] + '_' + str(i_sequences) + '_' + str(i_sequence)
sequence_id = sequence['sequence_id']
assert sequence_id not in sequence_ids

sequence_ids.add(sequence_id)

species_present = sequence['species_present']
Expand Down Expand Up @@ -807,7 +812,7 @@ def csv_to_sequences(csv_file):

info = {}
info['contributor'] = 'Idaho Department of Fish and Game'
info['description'] = 'Idaho Camera traps'
info['description'] = 'Idaho Camera traps'
info['version'] = '2021.07.19'

output_data = {}
Expand Down Expand Up @@ -1084,8 +1089,10 @@ def csv_to_sequences(csv_file):
ann['id'] = annotation_id_mappings[ann['id']]
ann['image_id'] = image_id_mappings[ann['image_id']]

print('Applied mappings')


#%% Write new dictionaries
#%% Write new dictionaries (modified strings, original files)

output_data = {}
output_data['images'] = images
Expand All @@ -1097,7 +1104,7 @@ def csv_to_sequences(csv_file):
json.dump(output_data,f,indent=2)


#%% Validate .json file (original files)
#%% Validate .json file (modified strings, original files)

from data_management.databases import sanity_check_json_db

Expand Down Expand Up @@ -1199,19 +1206,37 @@ def process_image(im):
#%% Copy images to final output folder (execution)

# For each image
# im = images[0]
if n_threads_file_copy == 1:
# im = images[0]
for im in tqdm(images):
process_image(im)
else:
pool = ThreadPool(n_threads_file_copy)
pool.imap(process_image,images)
pool.map(process_image,images)

print('Finished copying, writing .json output')

# Write output .json
with open(output_json,'w') as f:
json.dump(d,f,indent=1)


#%% Make sure the right number of images got there

from pathlib import Path
all_output_files = []
all_output_files_list = os.path.join(output_base,'all_output_files.json')

for path in Path(output_image_base).rglob('*.*'):
path = str(path)
path = os.path.relpath(path,output_image_base)
all_output_files.append(path)
with open(all_output_files_list,'w') as f:
json.dump(all_output_files,f,indent=1)

print('Enumerated {} output files (of {} images)'.format(len(all_output_files),len(images)))


#%% Validate .json file (final filenames)

from data_management.databases import sanity_check_json_db
Expand All @@ -1230,7 +1255,7 @@ def process_image(im):
from visualization import visualize_db

viz_options = visualize_db.DbVizOptions()
viz_options.num_to_visualize = None
viz_options.num_to_visualize = 1500
viz_options.trim_to_images_with_bboxes = False
viz_options.add_search_links = False
viz_options.sort_by_filename = False
Expand All @@ -1241,20 +1266,69 @@ def process_image(im):
viz_options.classes_to_include = ['bear','mountain lion']
# viz_options.classes_to_include = ['horse']
# viz_options.classes_to_include = [viz_options.multiple_categories_tag]
# viz_options.classes_to_include = ['domestic dog']
# viz_options.classes_to_include = ['human','vehicle','domestic dog']

html_output_file, _ = visualize_db.process_images(db_path=output_json,
output_dir=os.path.join(
output_base,'final-preview'),
output_base,'final-preview-01'),
image_base_dir=output_image_base_public,
options=viz_options)
os.startfile(html_output_file)

#%% Find a thumbnail

#%% Create zipfiles

## List public files
from pathlib import Path
all_public_output_files = []
all_public_output_files_list = os.path.join(output_base,'all_public_output_files.json')

if not os.path.isfile(all_public_output_files_list):
for path in Path(output_image_base_public).rglob('*.*'):
path = str(path)
path = os.path.relpath(path,output_image_base)
all_public_output_files.append(path)
with open(all_public_output_files_list,'w') as f:
json.dump(all_public_output_files,f,indent=1)
else:
with open(all_public_output_files_list,'r') as f:
all_public_output_files = json.load(f)

print('Enumerated {} public output files (of {} total)'.format(len(all_public_output_files),len(all_output_files)))


#%% Split into chunks of approximately-equal size

n_parts = 6

file_lists = np.array_split(all_public_output_files,n_parts)
file_lists = [list(x) for x in file_lists]

assert sum([len(l) for l in file_lists]) == len(all_public_output_files)

with open(output_json,'r') as f:
d = json.load(f)

#%%
#%% Create a zipfile for each chunk

categories = d['categories']
from zipfile import ZipFile
import zipfile
import os

# i_file_list = 0; file_list = file_lists[i_file_list]
for i_file_list,file_list in enumerate(file_lists):

print('Processing archive {}'.format(i_file_list))
zipfile_name = os.path.join('k:\\idaho-camera-traps-images.part{}.zip'.format(i_file_list))

with ZipFile(zipfile_name, 'w') as zipObj:

for filename_relative in tqdm(file_list):

assert filename_relative.startswith('public')
filename_absolute = os.path.join(output_image_base,filename_relative)
zipObj.write(filename_absolute.replace('\\','/'), filename_relative, compress_type=zipfile.ZIP_STORED)

# ...for each filename

# with ZipFile()

# ...for each list of files

0 comments on commit de6d313

Please sign in to comment.