Skip to content

Commit

Permalink
ignore blacklisted
Browse files Browse the repository at this point in the history
  • Loading branch information
GreenWizard2015 committed Jun 28, 2024
1 parent a6272ea commit 0a4cb47
Showing 1 changed file with 22 additions and 0 deletions.
22 changes: 22 additions & 0 deletions scripts/preprocess-remote.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,17 @@ def saveSubset(filename, idx):
return len(testing), len(training)

def main(args):
# blacklisted datasets
blacklisted = []
if args.blacklist is not None:
with open(args.blacklist, 'r') as f:
blacklisted = json.load(f)
pass
blacklisted = set([
'/'.join(item)
for item in blacklisted
])
print(blacklisted)
stats = {
'placeId': [],
'userId': [],
Expand All @@ -204,6 +215,13 @@ def main(args):
if not (sid in stats['screenId']):
stats['screenId'].append(sid)
path = os.path.join(folder, placeId, userId, screenId)
# check if the dataset is blacklisted
# placeId twice since real screenId is "placeId/screenId"
uuid = '/'.join([userId, placeId, placeId, screenId])
print(uuid)
if uuid in blacklisted:
print('Skipping blacklisted dataset:', path)
continue
testFramesN, trainFramesN = processFolder(
path,
args.time_delta, args.test_ratio, args.frames_per_chunk,
Expand Down Expand Up @@ -246,6 +264,10 @@ def main(args):
'--min-frames', type=int, default=0,
help='Minimum number of frames in a chunk'
)
parser.add_argument(
'--blacklist', type=str, default=None,
help='Path to the blacklist file'
)

args = parser.parse_args()
main(args)
Expand Down

0 comments on commit 0a4cb47

Please sign in to comment.