Skip to content

Commit

Permalink
Flag to ignore too small datasets
Browse files Browse the repository at this point in the history
  • Loading branch information
GreenWizard2015 committed Jun 28, 2024
1 parent bcb125f commit a3ed9c0
Showing 1 changed file with 16 additions and 2 deletions.
18 changes: 16 additions & 2 deletions scripts/preprocess-remote.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,10 @@ def dropPadding(idx, padding):
print('Frames before: {}. Frames after: {}'.format(len(idx), len(res)))
return res

def processFolder(folder, timeDelta, testRatio, framesPerChunk, testPadding, skippedFrames):
def processFolder(
folder, timeDelta, testRatio, framesPerChunk, testPadding, skippedFrames,
minFrames
):
print('Processing', folder)
dataset = loadNpz(folder)
for k, v in dataset.items():
Expand Down Expand Up @@ -164,6 +167,11 @@ def saveSubset(filename, idx):
for fn in files:
os.remove(os.path.join(folder, fn))
print('Removed', len(files), 'files')

totalFrames = len(testing) + len(training)
if minFrames < totalFrames:
print('Not enough frames: %d < %d' % (totalFrames, minFrames))
return 0, 0
# save training and testing sets
saveSubset('train.npz', training)
saveSubset('test.npz', testing)
Expand Down Expand Up @@ -199,7 +207,8 @@ def main(args):
testFramesN, trainFramesN = processFolder(
path,
args.time_delta, args.test_ratio, args.frames_per_chunk,
args.test_padding, args.skipped_frames
args.test_padding, args.skipped_frames,
minFrames=args.min_frames
)
testFrames += testFramesN
trainFrames += trainFramesN
Expand Down Expand Up @@ -233,6 +242,11 @@ def main(args):
'--skipped-frames', type=str, default='train', choices=['train', 'test', 'drop'],
help='What to do with skipped frames ("train", "test", or "drop")'
)
parser.add_argument(
'--min-frames', type=int, default=0,
help='Minimum number of frames in a chunk'
)

args = parser.parse_args()
main(args)
pass

0 comments on commit a3ed9c0

Please sign in to comment.