forked from DefectDojo/django-DefectDojo
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest_deduplication_logic.py
1278 lines (1042 loc) · 70.9 KB
/
test_deduplication_logic.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
from .dojo_test_case import DojoTestCase
from dojo.models import Finding, User, Product, Endpoint, Endpoint_Status, Test, Engagement
from dojo.models import System_Settings
from django.conf import settings
from crum import impersonate
import unittest
import logging
logger = logging.getLogger(__name__)
deduplicationLogger = logging.getLogger("dojo.specific-loggers.deduplication")
# things to consider:
# - cross scanner deduplication is still flaky as if some scanners don't provide severity, but another doesn, the hashcode will be different so no deduplication happens.
# so I couldn't create any good tests
# - hash_code is only calculated once and never changed. should we add a feature to run dedupe when somebody modifies a finding? bulk edit action to trigger dedupe?
# -> this is handled by the dedupe.py script but which suffers stabiblity issues currently
# - deduplication is using the default ordering for findings, so most of the time this means a new finding will be marked as duplicate of the most recent existing finding
# that matches the criteria. I think it would be better to consider the oldest existing findings first? Otherwise we have the chance that an old finding becomes
# marked as duplicate of a newer one at some point.
# - legacy: if file_path and line or both empty and there are no endpoints, no dedupe will happen. Is this desirable or a BUG?
# -> this is just one of the many limitations of the legacy algorithm.
# For non standard parsers, it's advised to use the deduplication configuration to finely tune which fields should be used
# - DEDUPE_ALGO_UNIQUE_ID_FROM_TOOL_OR_HASH_CODE should:
# - try to match on uniquer_id first before falling back to hash_Code. Currently it just takes the first finding it can find
# that mathces either the hash_code or unique id.
# -> that is an insteresting improvment to consider
# - If the unique_id does NOT match, the finding is still considered for dedupe if the hash_code matches. We may need to forbid as the unique_id should be leading for the same test_type
# test data summary
# product 1: Python How-to
# engagement 2: April monthly engagement (dedupe_inside: True)
# test 13: ZAP Scan (algo=hash_code, dynamic=True)
# no findings
# endpoints
# 2: ftp://localhost/
# 1: http://127.0.0.1/endpoint/420/edit/
# 3: ssh:127.0.1
# endpoint statuses
# 1: dojo.Endpoint.None dojo.Finding.None 1 2020-07-01 00:00:00+00:00 2020-07-01 17:45:39.791907+00:00 False None None False False False ftp://localhost/ High Impact Test Finding
# product 2: Security How-to
# engagement 1: 1st Quarter Engagement (dedupe_inside: True)
# test 3: ZAP Scan (algo=hash_code, dynamic=True)
# findings:
# 2 : "High Impact Test Fin": High : act: True : ver: True : mit: False: dup: False: dup_id: None: hash_code: 5d368a051fdec959e08315a32ef633ba5711bed6e8e75319ddee2cab4d4608c7: eps: 0: notes: []: uid: None
# 3 : "High Impact Test Fin": High : act: True : ver: True : mit: False: dup: True : dup_id: 2 : hash_code: 5d368a051fdec959e08315a32ef633ba5711bed6e8e75319ddee2cab4d4608c7: eps: 0: notes: []: uid: None
# 4 : "High Impact Test Fin": High : act: True : ver: True : mit: False: dup: True : dup_id: 2 : hash_code: 5d368a051fdec959e08315a32ef633ba5711bed6e8e75319ddee2cab4d4608c7: eps: 0: notes: []: uid: None
# 5 : "High Impact Test Fin": High : act: True : ver: True : mit: False: dup: True : dup_id: 2 : hash_code: 5d368a051fdec959e08315a32ef633ba5711bed6e8e75319ddee2cab4d4608c7: eps: 0: notes: []: uid: None
# 6 : "High Impact Test Fin": High : act: True : ver: True : mit: False: dup: True : dup_id: 2 : hash_code: 5d368a051fdec959e08315a32ef633ba5711bed6e8e75319ddee2cab4d4608c7: eps: 0: notes: []: uid: None
# 7 : "DUMMY FINDING ": High : act: False: ver: False: mit: False: dup: False: dup_id: None: hash_code: c89d25e445b088ba339908f68e15e3177b78d22f3039d1bfea51c4be251bf4e0: eps: 0: notes: [1]: uid: None
# endpoints
# 2: ftp://localhost/
# 1: http://127.0.0.1/endpoint/420/edit/
# 3: ssh:127.0.1
# endpoint statuses
# 1: dojo.Endpoint.None dojo.Finding.None 1 2020-07-01 00:00:00+00:00 2020-07-01 17:45:39.791907+00:00 False None None False False False ftp://localhost/ High Impact Test Finding
# test 14: ZAP Scan (algo=hash_code, dynamic=True)
# no findings
# endpoints
# 2: ftp://localhost/
# 1: http://127.0.0.1/endpoint/420/edit/
# 3: ssh:127.0.1
# endpoint statuses
# 1: dojo.Endpoint.None dojo.Finding.None 1 2020-07-01 00:00:00+00:00 2020-07-01 17:45:39.791907+00:00 False None None False False False ftp://localhost/ High Impact Test Finding
# engagement 4: April monthly engagement (dedupe_inside: True)
# test 4: ZAP Scan (algo=hash_code, dynamic=True)
# no findings
# endpoints
# 2: ftp://localhost/
# 1: http://127.0.0.1/endpoint/420/edit/
# 3: ssh:127.0.1
# endpoint statuses
# 1: dojo.Endpoint.None dojo.Finding.None 1 2020-07-01 00:00:00+00:00 2020-07-01 17:45:39.791907+00:00 False None None False False False ftp://localhost/ High Impact Test Finding
# engagement 5: April monthly engagement (dedupe_inside: True)
# test 55: Checkmarx Scan detailed (algo=unique_id_from_tool, dynamic=False)
# findings:
# 124 : "Low Impact Test Find": Low : act: True : ver: True : mit: False: dup: False: dup_id: None: hash_code: 9aca00affd340c4da02c934e7e3106a45c6ad0911da479daae421b3b28a2c1aa: eps: 0: notes: []: uid: 12345
# 125 : "Low Impact Test Find": Low : act: True : ver: True : mit: False: dup: True : dup_id: None: hash_code: 9aca00affd340c4da02c934e7e3106a45c6ad0911da479daae421b3b28a2c1aa: eps: 0: notes: []: uid: 12345
# endpoints
# 2: ftp://localhost/
# 1: http://127.0.0.1/endpoint/420/edit/
# 3: ssh:127.0.1
# endpoint statuses
# 1: dojo.Endpoint.None dojo.Finding.None 1 2020-07-01 00:00:00+00:00 2020-07-01 17:45:39.791907+00:00 False None None False False False ftp://localhost/ High Impact Test Finding
# test 66: Checkmarx Scan detailed (algo=unique_id_from_tool, dynamic=False)
# no findings
# endpoints
# 2: ftp://localhost/
# 1: http://127.0.0.1/endpoint/420/edit/
# 3: ssh:127.0.1
# endpoint statuses
# 1: dojo.Endpoint.None dojo.Finding.None 1 2020-07-01 00:00:00+00:00 2020-07-01 17:45:39.791907+00:00 False None None False False False ftp://localhost/ High Impact Test Finding
# test 77: Veracode Scan (algo=unique_id_from_tool_or_hash_code, dynamic=False)
# findings:
# 224 : "UID Impact Test Find": Low : act: True : ver: True : mit: False: dup: False: dup_id: None: hash_code: 6f8d0bf970c14175e597843f4679769a4775742549d90f902ff803de9244c7e1: eps: 0: notes: []: uid: 6789
# 225 : "UID Impact Test Find": Low : act: True : ver: True : mit: False: dup: True : dup_id: 224 : hash_code: 6f8d0bf970c14175e597843f4679769a4775742549d90f902ff803de9244c7e1: eps: 0: notes: []: uid: 6789
# endpoints
# 2: ftp://localhost/
# 1: http://127.0.0.1/endpoint/420/edit/
# 3: ssh:127.0.1
# endpoint statuses
# 1: dojo.Endpoint.None dojo.Finding.None 1 2020-07-01 00:00:00+00:00 2020-07-01 17:45:39.791907+00:00 False None None False False False ftp://localhost/ High Impact Test Finding
# test 88: Veracode Scan (algo=unique_id_from_tool_or_hash_code, dynamic=False)
# no findings
# endpoints
# 2: ftp://localhost/
# 1: http://127.0.0.1/endpoint/420/edit/
# 3: ssh:127.0.1
# endpoint statuses
# 1: dojo.Endpoint.None dojo.Finding.None 1 2020-07-01 00:00:00+00:00 2020-07-01 17:45:39.791907+00:00 False None None False False False ftp://localhost/ High Impact Test Finding
# engagement 6: April monthly engagement (dedupe_inside: True)
# engagement 3: weekly engagement (dedupe_inside: True)
# test 33: Xanitizer Scan Findings Import (algo=legacy, dynamic=False)
# findings:
# 22 : "Low Impact Test Find": Low : act: True : ver: True : mit: False: dup: False: dup_id: None: hash_code: 9aca00affd340c4da02c934e7e3106a45c6ad0911da479daae421b3b28a2c1aa: eps: 0: notes: []: uid: None
# 23 : "Low Impact Test Find": Low : act: True : ver: True : mit: False: dup: True : dup_id: 22 : hash_code: 9aca00affd340c4da02c934e7e3106a45c6ad0911da479daae421b3b28a2c1aa: eps: 0: notes: []: uid: None
# 24 : "Low Impact Test Find": Low : act: True : ver: True : mit: False: dup: True : dup_id: 22 : hash_code: 9aca00affd340c4da02c934e7e3106a45c6ad0911da479daae421b3b28a2c1aa: eps: 0: notes: []: uid: None
# endpoints
# 2: ftp://localhost/
# 1: http://127.0.0.1/endpoint/420/edit/
# 3: ssh:127.0.1
# endpoint statuses
# 1: dojo.Endpoint.None dojo.Finding.None 1 2020-07-01 00:00:00+00:00 2020-07-01 17:45:39.791907+00:00 False None None False False False ftp://localhost/ High Impact Test Finding
# product 3: Security Podcast
class TestDuplicationLogic(DojoTestCase):
fixtures = ['dojo_testdata.json']
def run(self, result=None):
testuser = User.objects.get(username='admin')
testuser.usercontactinfo.block_execution = True
testuser.save()
# unit tests are running without any user, which will result in actions like dedupe happening in the celery process
# this doesn't work in unittests as unittests are using an in memory sqlite database and celery can't see the data
# so we're running the test under the admin user context and set block_execution to True
with impersonate(testuser):
super().run(result)
def setUp(self):
logger.debug('enabling deduplication')
self.enable_dedupe()
self.log_summary()
def tearDown(self):
# some test disable dedupe, always reenable
self.enable_dedupe()
self.log_summary()
# self.log_summary(test=33)
# self.log_summary(product=2)
# all engagements in the test data have deduplication_on_engagement set to true
# legacy algo: findings 23, 24, 25 in test 33 are scan_Type Generic Findings Import which uses the legacy algo
def test_identical_legacy(self):
# 24 is already a duplicate of 22 let's see what happens if we create an identical finding (but reset status)
# expect: marked as duplicate
finding_new, finding_24 = self.copy_and_reset_finding(id=24)
finding_new.save(dedupe_option=True)
self.assert_finding(finding_new, not_pk=24, duplicate=True, duplicate_finding_id=finding_24.duplicate_finding.id, hash_code=finding_24.hash_code)
def test_identical_ordering_legacy(self):
finding_22 = Finding.objects.get(id=22)
# 23 is already a duplicate of 22, but let's reset it's status. then create a new finding and see if it gets marked as duplicate of 22 or 23
# expect: marked as duplicate of 22 as lowest finding_id should be chosen as original
finding_23 = Finding.objects.get(id=23)
finding_23.duplicate = False
finding_23.duplicate_finding = None
finding_23.active = True
finding_23.save(dedupe_option=False)
self.assert_finding(finding_23, duplicate=False, hash_code=finding_22.hash_code)
# create a copy of 22
finding_new, finding_22 = self.copy_and_reset_finding(id=22)
finding_new.save()
self.assert_finding(finding_new, not_pk=22, duplicate=True, duplicate_finding_id=finding_22.id, hash_code=finding_22.hash_code)
# self.assert_finding(finding_new, not_pk=22, duplicate=True, duplicate_finding_id=finding_23.id, hash_code=finding_22.hash_code)
def test_identical_except_title_legacy(self):
# 24 is already a duplicate of 22, let's see what happens if we create an identical finding with different title (and reset status)
# expect: NOT marked as duplicate as title is part of hash_code calculation
finding_new, finding_4 = self.copy_and_reset_finding(id=4)
finding_new.title = 'the best title'
finding_new.save(dedupe_option=True)
self.assert_finding(finding_new, not_pk=24, duplicate=False, not_hash_code=finding_4.hash_code)
def test_identical_except_description_legacy(self):
# 24 is already a duplicate of 22, let's see what happens if we create an identical finding with different description (and reset status)
# expect: not marked as duplicate as legacy sees description as leading for hash_code
finding_new, finding_24 = self.copy_and_reset_finding(id=24)
finding_new.description = 'useless finding'
finding_new.save(dedupe_option=True)
self.assert_finding(finding_new, not_pk=24, duplicate=False, not_hash_code=finding_24.hash_code)
def test_identical_except_line_legacy(self):
# 24 is already a duplicate of 22, let's see what happens if we create an identical finding with different line (and reset status)
# expect: not marked as duplicate
finding_new, finding_24 = self.copy_and_reset_finding(id=24)
finding_new.line = 666
finding_new.save(dedupe_option=True)
self.assert_finding(finding_new, not_pk=24, duplicate=False, not_hash_code=finding_24.hash_code)
def test_identical_except_filepath_legacy(self):
# 24 is already a duplicate of 22, let's see what happens if we create an identical finding with different file_path (and reset status)
# expect: not marked as duplicate
finding_new, finding_24 = self.copy_and_reset_finding(id=24)
finding_new.file_path = '/dev/null'
finding_22 = Finding.objects.get(id=22)
finding_new.save(dedupe_option=True)
self.assert_finding(finding_new, not_pk=24, duplicate=False, not_hash_code=finding_24.hash_code)
def test_dedupe_inside_engagement_legacy(self):
# finding 2 in engagement 1
# make a copy and store it in engagement 2, test 4
# should not result in being marked as duplicate as it crosses engagement boundaries
# both test 3 and 4 are ZAP scans (cross scanner dedupe is still not working very well)
finding_new, finding_22 = self.copy_and_reset_finding(id=22)
# create new engagment + test in same product
test_new, eng_new = self.create_new_test_and_engagment_from_finding(finding_22)
finding_new.test = test_new
finding_new.save(dedupe_option=True)
self.assert_finding(finding_new, not_pk=22, duplicate=False, hash_code=finding_22.hash_code)
def test_dedupe_not_inside_engagement_legacy(self):
# finding 2 in engagement 1
# make a copy and store it in engagement 2, test 4
# should result in being marked as duplicate as dedupe inside engagement is set to False
# both test 3 and 4 are ZAP scans (cross scanner dedupe is still not working very well)
finding_new, finding_22 = self.copy_and_reset_finding(id=22)
# dedupe_inside_engagment must be false before cloning engagement
self.set_dedupe_inside_engagement(False)
# create new engagment + test in same product
test_new, eng_new = self.create_new_test_and_engagment_from_finding(finding_22)
finding_new.test = test_new
finding_new.save(dedupe_option=True)
self.assert_finding(finding_new, not_pk=22, duplicate=True, duplicate_finding_id=22, hash_code=finding_22.hash_code)
# legacy: if file_path and line or both empty and there are no endpoints, no dedupe will happen. Is this desirable or a BUG?
def test_identical_no_filepath_no_line_no_endpoints_legacy(self):
finding_new, finding_22 = self.copy_and_reset_finding(id=22)
finding_new.file_path = None
finding_new.line = None
finding_new.save(dedupe_option=True)
self.assert_finding(finding_new, not_pk=22, duplicate=False)
def test_identical_legacy_with_identical_endpoints_static(self):
finding_new, finding_24 = self.copy_and_reset_finding_add_endpoints(id=24, static=True, dynamic=False) # has myhost.com, myhost2.com
finding_new.save()
# create an identical copy of the new finding with the same endpoints. it should be marked as duplicate
finding_new2, finding_new = self.copy_and_reset_finding(id=finding_new.id)
finding_new2.save(dedupe_option=False)
ep1 = Endpoint(product=finding_new2.test.engagement.product, finding=finding_new2, host="myhost.com", protocol="https")
ep1.save()
ep2 = Endpoint(product=finding_new2.test.engagement.product, finding=finding_new2, host="myhost2.com", protocol="https")
ep2.save()
finding_new2.endpoints.add(ep1)
finding_new2.endpoints.add(ep2)
finding_new2.save()
self.assert_finding(finding_new2, not_pk=finding_new.pk, duplicate=True, duplicate_finding_id=finding_new.id, hash_code=finding_new.hash_code, not_hash_code=finding_24.hash_code)
def test_identical_legacy_extra_endpoints_static(self):
finding_new, finding_24 = self.copy_and_reset_finding_add_endpoints(id=24, static=True, dynamic=False) # has myhost.com, myhost2.com
finding_new.save()
# create a new finding with 3 endpoints (so 1 extra)
finding_new3, finding_new = self.copy_and_reset_finding(id=finding_new.id)
finding_new3.save(dedupe_option=False)
ep1 = Endpoint(product=finding_new3.test.engagement.product, finding=finding_new3, host="myhost.com", protocol="https")
ep1.save()
ep2 = Endpoint(product=finding_new3.test.engagement.product, finding=finding_new3, host="myhost2.com", protocol="https")
ep2.save()
ep3 = Endpoint(product=finding_new3.test.engagement.product, finding=finding_new3, host="myhost3.com", protocol="https")
ep3.save()
finding_new3.endpoints.add(ep1)
finding_new3.endpoints.add(ep2)
finding_new3.endpoints.add(ep3)
finding_new3.save()
# expect: marked as duplicate as the requirement for static findings is that the new finding has to contain all the endpoints of the existing finding (extra is no problem)
# hash_code not affected by endpoints
self.assert_finding(finding_new3, not_pk=finding_new.pk, duplicate=True, duplicate_finding_id=finding_new.id, hash_code=finding_new.hash_code, not_hash_code=finding_24.hash_code)
def test_identical_legacy_different_endpoints_static(self):
finding_new, finding_24 = self.copy_and_reset_finding_add_endpoints(id=24, static=True, dynamic=False) # has myhost.com, myhost2.com
finding_new.save()
# create an identical copy of the new finding, but with different endpoints
finding_new3, finding_new = self.copy_and_reset_finding(id=finding_new.id)
finding_new3.save(dedupe_option=False)
ep1 = Endpoint(product=finding_new3.test.engagement.product, finding=finding_new3, host="myhost4.com", protocol="https")
ep1.save()
ep2 = Endpoint(product=finding_new3.test.engagement.product, finding=finding_new3, host="myhost2.com", protocol="https")
ep2.save()
finding_new3.endpoints.add(ep1)
finding_new3.endpoints.add(ep2)
finding_new3.save()
# expect: not marked as duplicate as the requirement for static findings is that the new finding has to contain all the endpoints of the existing finding and this is not met
# hash_code not affected by endpoints
self.assert_finding(finding_new3, not_pk=finding_new.pk, duplicate=False, hash_code=finding_new.hash_code, not_hash_code=finding_24.hash_code)
def test_identical_legacy_no_endpoints_static(self):
finding_new, finding_24 = self.copy_and_reset_finding_add_endpoints(id=24, static=True, dynamic=False) # has myhost.com, myhost2.com
finding_new.save()
# create an identical copy of the new finding, but with 1 extra endpoint. should not be marked as duplicate
finding_new3, finding_new = self.copy_and_reset_finding(id=finding_new.id)
finding_new3.save(dedupe_option=False)
finding_new3.save()
# expect not marked as duplicate as the new finding doesn't have endpoints and we don't have filepath/line
self.assert_finding(finding_new3, not_pk=finding_new.pk, duplicate=False, hash_code=finding_new.hash_code, not_hash_code=finding_24.hash_code)
def test_identical_legacy_with_identical_endpoints_dynamic(self):
finding_new, finding_24 = self.copy_and_reset_finding_add_endpoints(id=24, static=True, dynamic=False) # has myhost.com, myhost2.com
finding_new.save()
# create an identical copy of the new finding. it should be marked as duplicate
finding_new2, finding_new = self.copy_and_reset_finding(id=finding_new.id)
finding_new2.save(dedupe_option=False)
ep1 = Endpoint(product=finding_new2.test.engagement.product, finding=finding_new2, host="myhost.com", protocol="https")
ep1.save()
ep2 = Endpoint(product=finding_new2.test.engagement.product, finding=finding_new2, host="myhost2.com", protocol="https")
ep2.save()
finding_new2.endpoints.add(ep1)
finding_new2.endpoints.add(ep2)
finding_new2.save()
self.assert_finding(finding_new2, not_pk=finding_new.pk, duplicate=True, duplicate_finding_id=finding_new.id, hash_code=finding_new.hash_code, not_hash_code=finding_24.hash_code)
def test_identical_legacy_extra_endpoints_dynamic(self):
finding_new, finding_24 = self.copy_and_reset_finding_add_endpoints(id=24)
finding_new.save()
# create an identical copy of the new finding, but with 1 extra endpoint.
finding_new3, finding_new = self.copy_and_reset_finding(id=finding_new.id)
finding_new3.save(dedupe_option=False)
ep1 = Endpoint(product=finding_new3.test.engagement.product, finding=finding_new3, host="myhost.com", protocol="https")
ep1.save()
ep2 = Endpoint(product=finding_new3.test.engagement.product, finding=finding_new3, host="myhost2.com", protocol="https")
ep2.save()
ep3 = Endpoint(product=finding_new3.test.engagement.product, finding=finding_new3, host="myhost3.com", protocol="https")
ep3.save()
finding_new3.endpoints.add(ep1)
finding_new3.endpoints.add(ep2)
finding_new3.endpoints.add(ep3)
finding_new3.save()
# expect: marked as duplicate as hash_code is not affected by endpoints anymore with the legacy algorithm
self.assert_finding(finding_new3, not_pk=finding_new.pk, duplicate=True, hash_code=finding_new.hash_code)
def test_identical_legacy_different_endpoints_dynamic(self):
# this test is using the pattern currently in use in the import / serializers.py.
# - save finding first with dedupe-false
# - add endpoints
# - safe finding again with endpoints attached, dedupe=True (default) -> hash_code gets computed
# create a new finding with 3 endpoints (so 1 extra)
# expect: not marked as duplicate as endpoints need to be 100% equal for dynamic findings (host+port)
# hash_code not affected by endpoints
finding_new, finding_24 = self.copy_and_reset_finding_add_endpoints(id=24)
finding_new.save()
# create an identical copy of the new finding, but with 1 extra endpoint. should not be marked as duplicate
finding_new3, finding_new = self.copy_and_reset_finding(id=finding_new.id)
finding_new3.save(dedupe_option=False)
ep1 = Endpoint(product=finding_new3.test.engagement.product, finding=finding_new3, host="myhost4.com", protocol="https")
ep1.save()
ep2 = Endpoint(product=finding_new3.test.engagement.product, finding=finding_new3, host="myhost2.com", protocol="https")
ep2.save()
finding_new3.endpoints.add(ep1)
finding_new3.endpoints.add(ep2)
finding_new3.save()
# expected: hash_code is not affected by endpoints anymore in legacy algorithm
# but not duplicate because the legacy dedupe algo examines not only hash_code but endpoints too
self.assert_finding(finding_new3, not_pk=finding_new.pk, duplicate=False, hash_code=finding_new.hash_code)
def test_identical_legacy_no_endpoints_dynamic(self):
finding_new, finding_24 = self.copy_and_reset_finding_add_endpoints(id=24)
finding_new.save()
# create an identical copy of the new finding, but with no endpoints
finding_new3, finding_new = self.copy_and_reset_finding(id=finding_new.id)
finding_new3.save(dedupe_option=False)
finding_new3.save()
# expect: marked as duplicate, hash_code not affected by endpoints with the legacy algorithm
# but not duplicate because the legacy dedupe algo examines not only hash_code but endpoints too
self.assert_finding(finding_new3, not_pk=finding_new.pk, duplicate=False, hash_code=finding_new.hash_code)
# hash_code based algorithm tests
# existing findings in test 3 are from ZAP scanner, which uses hash_code algorithm with ['title', 'cwe', 'endpoints', 'severity']
def test_identical_hash_code(self):
# 4 is already a duplicate of 2, let's see what happens if we create an identical finding (but reset status)
# 2 has an endpoint ftp://localhost, 4 has no endpoint
# expect: marked as duplicate
finding_new, finding_4 = self.copy_and_reset_finding(id=4)
finding_new.save(dedupe_option=True)
if (settings.DEDUPE_ALGO_ENDPOINT_FIELDS == []):
# expect duplicate, as endpoints shouldn't affect dedupe
self.assert_finding(finding_new, not_pk=4, duplicate=True, duplicate_finding_id=finding_4.duplicate_finding.id, hash_code=finding_4.hash_code)
else:
self.assert_finding(finding_new, not_pk=4, duplicate=False, duplicate_finding_id=None, hash_code=finding_4.hash_code)
finding_new, finding_2 = self.copy_with_endpoints_without_dedupe_and_reset_finding(id=2)
finding_new.save(dedupe_option=True)
self.assert_finding(finding_new, not_pk=2, duplicate=True, duplicate_finding_id=finding_4.duplicate_finding.id, hash_code=finding_2.hash_code)
def test_identical_ordering_hash_code(self):
dedupe_algo_endpoint_fields = settings.DEDUPE_ALGO_ENDPOINT_FIELDS
settings.DEDUPE_ALGO_ENDPOINT_FIELDS = []
finding_2 = Finding.objects.get(id=2)
# 3 is already a duplicate of 2, but let's reset it's status. then update 24 and see if it gets marked as duplicate of 2 or 3
# expect: marked as duplicate of 2 as lowest finding_id should be chosen as original
finding_3 = Finding.objects.get(id=3)
finding_3.duplicate = False
finding_3.duplicate_finding = None
finding_3.active = True
finding_3.save(dedupe_option=False)
self.assert_finding(finding_3, duplicate=False, hash_code=finding_2.hash_code)
# create a copy of 2
finding_new, finding_2 = self.copy_and_reset_finding(id=2)
finding_new.save()
self.assert_finding(finding_new, not_pk=2, duplicate=True, duplicate_finding_id=finding_2.id, hash_code=finding_2.hash_code)
# self.assert_finding(finding_new, not_pk=2, duplicate=True, duplicate_finding_id=finding_3.id, hash_code=finding_2.hash_code)
# reset for further tests
settings.DEDUPE_ALGO_ENDPOINT_FIELDS = dedupe_algo_endpoint_fields
def test_identical_except_title_hash_code(self):
# 4 is already a duplicate of 2, let's see what happens if we create an identical finding with different title (and reset status)
# expect: NOT marked as duplicate as title is part of hash_code calculation
finding_new, finding_4 = self.copy_and_reset_finding(id=4)
finding_new.title = 'the best title'
finding_new.save(dedupe_option=True)
self.assert_finding(finding_new, not_pk=4, duplicate=False, not_hash_code=finding_4.hash_code)
def test_identical_except_description_hash_code(self):
# 4 is already a duplicate of 2, let's see what happens if we create an identical finding with different description (and reset status)
# 2 has an endpoint ftp://localhost, 4 has no endpoint
# expect: marked as duplicate
finding_new, finding_4 = self.copy_and_reset_finding(id=4)
finding_new.description = 'useless finding'
finding_new.save(dedupe_option=True)
if (settings.DEDUPE_ALGO_ENDPOINT_FIELDS == []):
# expect duplicate, as endpoints shouldn't affect dedupe
self.assert_finding(finding_new, not_pk=4, duplicate=True, duplicate_finding_id=finding_4.duplicate_finding.id, hash_code=finding_4.hash_code)
else:
self.assert_finding(finding_new, not_pk=4, duplicate=False, duplicate_finding_id=None, hash_code=finding_4.hash_code)
finding_new, finding_2 = self.copy_with_endpoints_without_dedupe_and_reset_finding(id=2)
finding_new.save(dedupe_option=True)
self.assert_finding(finding_new, not_pk=2, duplicate=True, duplicate_finding_id=finding_4.duplicate_finding.id, hash_code=finding_2.hash_code)
# TODO not usefile with ZAP?
def test_identical_except_line_hash_code(self):
# 4 is already a duplicate of 2, let's see what happens if we create an identical finding with different line (and reset status)
# 2 has an endpoint ftp://localhost, 4 has no endpoint
# expect: marked as duplicate
finding_new, finding_4 = self.copy_and_reset_finding(id=4)
finding_new.line = 666
finding_new.save(dedupe_option=True)
if (settings.DEDUPE_ALGO_ENDPOINT_FIELDS == []):
# expect duplicate, as endpoints shouldn't affect dedupe
self.assert_finding(finding_new, not_pk=4, duplicate=True, duplicate_finding_id=finding_4.duplicate_finding.id, hash_code=finding_4.hash_code)
else:
self.assert_finding(finding_new, not_pk=4, duplicate=False, duplicate_finding_id=None, hash_code=finding_4.hash_code)
finding_new, finding_2 = self.copy_with_endpoints_without_dedupe_and_reset_finding(id=2)
finding_new.line = 666
finding_new.save(dedupe_option=True)
self.assert_finding(finding_new, not_pk=2, duplicate=True, duplicate_finding_id=finding_4.duplicate_finding.id, hash_code=finding_2.hash_code)
# TODO not usefile with ZAP?
def test_identical_except_filepath_hash_code(self):
# 4 is already a duplicate of 2, let's see what happens if we create an identical finding with different file_path (and reset status)
# expect: marked as duplicate
finding_new, finding_4 = self.copy_and_reset_finding(id=4)
finding_new.file_path = '/dev/null'
finding_new.save(dedupe_option=True)
if (settings.DEDUPE_ALGO_ENDPOINT_FIELDS == []):
# expect duplicate, as endpoints shouldn't affect dedupe
self.assert_finding(finding_new, not_pk=4, duplicate=True, duplicate_finding_id=finding_4.duplicate_finding.id, hash_code=finding_4.hash_code)
else:
self.assert_finding(finding_new, not_pk=4, duplicate=False, duplicate_finding_id=None, hash_code=finding_4.hash_code)
finding_new, finding_2 = self.copy_with_endpoints_without_dedupe_and_reset_finding(id=2)
finding_new.file_path = '/dev/null'
finding_new.save(dedupe_option=True)
self.assert_finding(finding_new, not_pk=2, duplicate=True, duplicate_finding_id=finding_4.duplicate_finding.id, hash_code=finding_2.hash_code)
def test_dedupe_inside_engagement_hash_code(self):
# finding 2 in engagement 1
# make a copy and store it in engagement 2, test 4
# should not result in being marked as duplicate as it crosses engagement boundaries
# both test 3 and 4 are ZAP scans (cross scanner dedupe is still not working very well)
finding_new, finding_2 = self.copy_with_endpoints_without_dedupe_and_reset_finding(id=2)
finding_new.test = Test.objects.get(id=4)
finding_new.save(dedupe_option=True)
self.assert_finding(finding_new, not_pk=2, duplicate=False, hash_code=finding_2.hash_code)
def test_dedupe_not_inside_engagement_hash_code(self):
# finding 2 in engagement 1
# make a copy and store it in engagement 2, test 4
# should result in being marked as duplicate as dedupe inside engagement is set to False
# both test 3 and 4 are ZAP scans (cross scanner dedupe is still not working very well)
self.set_dedupe_inside_engagement(False)
finding_new, finding_2 = self.copy_with_endpoints_without_dedupe_and_reset_finding(id=2)
finding_new.test = Test.objects.get(id=4)
finding_new.save(dedupe_option=True)
self.assert_finding(finding_new, not_pk=2, duplicate=True, duplicate_finding_id=2, hash_code=finding_2.hash_code)
# hash_code: if file_path and line or both empty and there are no endpoints, dedupe should happen (as opposed to legacy dedupe)
@unittest.skip("Test is not valid because finding 2 has an endpoint.")
def test_identical_no_filepath_no_line_no_endpoints_hash_code(self):
finding_new, finding_2 = self.copy_and_reset_finding(id=2)
finding_new.file_path = None
finding_new.line = None
finding_new.save(dedupe_option=True)
self.assert_finding(finding_new, not_pk=2, duplicate=True, duplicate_finding_id=2, hash_code=finding_2.hash_code)
def test_identical_hash_code_with_identical_endpoints(self):
# create an identical copy of the new finding, with the same endpoints
finding_new, finding_2 = self.copy_with_endpoints_without_dedupe_and_reset_finding(id=2) # has ftp://localhost
finding_new.save(dedupe_option=True)
# expect: marked as duplicate of original finding 2 (because finding 4 is a duplicate of finding 2 in sample data), hash_code not affected by endpoints (endpoints are not anymore in ZAP configuration for hash_code)
self.assert_finding(finding_new, not_pk=finding_2.pk, duplicate=True, duplicate_finding_id=2, hash_code=finding_2.hash_code, not_hash_code=None)
def test_dedupe_algo_endpoint_fields_host_port_identical(self):
dedupe_algo_endpoint_fields = settings.DEDUPE_ALGO_ENDPOINT_FIELDS
settings.DEDUPE_ALGO_ENDPOINT_FIELDS = ["host", "port"]
# create an identical copy of the new finding, with the same endpoints but different path
finding_new, finding_2 = self.copy_and_reset_finding(id=2) # finding_2 has host ftp://localhost
finding_new.save()
ep = Endpoint(product=finding_new.test.engagement.product, finding=finding_new, host="localhost", protocol="ftp", path="local")
ep.save()
finding_new.endpoints.add(ep)
finding_new.save()
# expect: marked as duplicate of original finding 2 (because finding 4 is a duplicate of finding 2 in sample data), hash_code not affected by endpoints (endpoints are not anymore in ZAP configuration for hash_code)
self.assert_finding(finding_new, not_pk=finding_2.pk, duplicate=True, duplicate_finding_id=2, hash_code=finding_2.hash_code, not_hash_code=None)
# reset for further tests
settings.DEDUPE_ALGO_ENDPOINT_FIELDS = dedupe_algo_endpoint_fields
def test_dedupe_algo_endpoint_field_path_different(self):
dedupe_algo_endpoint_fields = settings.DEDUPE_ALGO_ENDPOINT_FIELDS
settings.DEDUPE_ALGO_ENDPOINT_FIELDS = ["path"]
# create an identical copy of the new finding, with the same endpoints but different path
finding_new, finding_2 = self.copy_and_reset_finding(id=2) # finding_2 has host ftp://localhost
finding_new.save()
ep = Endpoint(product=finding_new.test.engagement.product, finding=finding_new, host="localhost", protocol="ftp", path="local")
ep.save()
finding_new.endpoints.add(ep)
finding_new.save()
# expect: marked as duplicate of original finding 2 (because finding 4 is a duplicate of finding 2 in sample data), hash_code not affected by endpoints (endpoints are not anymore in ZAP configuration for hash_code)
self.assert_finding(finding_new, not_pk=finding_2.pk, duplicate=False, duplicate_finding_id=None, hash_code=finding_2.hash_code, not_hash_code=None)
# reset for further tests
settings.DEDUPE_ALGO_ENDPOINT_FIELDS = dedupe_algo_endpoint_fields
def test_identical_hash_code_with_intersect_endpoints(self):
dedupe_algo_endpoint_fields = settings.DEDUPE_ALGO_ENDPOINT_FIELDS
settings.DEDUPE_ALGO_ENDPOINT_FIELDS = ["host", "port"]
# ep1: https://myhost.com, ep2: https://myhost2.com
finding_new, finding_2 = self.copy_and_reset_finding(id=2)
finding_new.save(dedupe_option=False)
ep1 = Endpoint(product=finding_new.test.engagement.product, finding=finding_new, host="myhost.com", protocol="https")
ep1.save()
ep2 = Endpoint(product=finding_new.test.engagement.product, finding=finding_new, host="myhost2.com", protocol="https")
ep2.save()
finding_new.endpoints.add(ep1)
finding_new.endpoints.add(ep2)
finding_new.save(dedupe_option=True)
# expect: marked not as duplicate of original finding 2 because the endpoints are different
self.assert_finding(finding_new, not_pk=finding_2.pk, duplicate=False, hash_code=finding_2.hash_code)
# create an identical copy of the new finding without original endpoints, but with 3 extra endpoints.
finding_new3, finding_new = self.copy_and_reset_finding(id=finding_new.id)
finding_new3.save(dedupe_option=False)
ep1 = Endpoint(product=finding_new3.test.engagement.product, finding=finding_new3, host="myhost4.com", protocol="https")
ep1.save()
ep2 = Endpoint(product=finding_new3.test.engagement.product, finding=finding_new3, host="myhost2.com", protocol="https")
ep2.save()
ep3 = Endpoint(product=finding_new3.test.engagement.product, finding=finding_new3, host="myhost3.com", protocol="https")
ep3.save()
finding_new3.endpoints.add(ep1)
finding_new3.endpoints.add(ep2)
finding_new3.endpoints.add(ep3)
finding_new3.save()
# expect: marked not as duplicate of original finding 2 or finding_new3 because the endpoints are different
self.assert_finding(finding_new3, not_pk=finding_new.pk, duplicate=True, duplicate_finding_id=finding_new.id, hash_code=finding_new.hash_code)
# expect: marked not as duplicate of original finding 2 because the endpoints are different
self.assert_finding(finding_new, not_pk=finding_2.pk, duplicate=False, hash_code=finding_2.hash_code)
# reset for further tests
settings.DEDUPE_ALGO_ENDPOINT_FIELDS = dedupe_algo_endpoint_fields
def test_identical_hash_code_with_different_endpoints(self):
dedupe_algo_endpoint_fields = settings.DEDUPE_ALGO_ENDPOINT_FIELDS
settings.DEDUPE_ALGO_ENDPOINT_FIELDS = ["host", "port"]
# ep1: https://myhost.com, ep2: https://myhost2.com
finding_new, finding_2 = self.copy_and_reset_finding(id=2)
finding_new.save(dedupe_option=False)
ep1 = Endpoint(product=finding_new.test.engagement.product, finding=finding_new, host="myhost.com", protocol="https")
ep1.save()
ep2 = Endpoint(product=finding_new.test.engagement.product, finding=finding_new, host="myhost2.com", protocol="https")
ep2.save()
finding_new.endpoints.add(ep1)
finding_new.endpoints.add(ep2)
finding_new.save(dedupe_option=True)
# expect: marked not as duplicate of original finding 2 because the endpoints are different
self.assert_finding(finding_new, not_pk=finding_2.pk, duplicate=False, hash_code=finding_2.hash_code)
# create an identical copy of the new finding without original endpoints, but with 3 extra endpoints.
finding_new3, finding_new = self.copy_and_reset_finding(id=finding_new.id)
finding_new3.save(dedupe_option=False)
ep1 = Endpoint(product=finding_new3.test.engagement.product, finding=finding_new3, host="myhost4.com", protocol="https")
ep1.save()
ep2 = Endpoint(product=finding_new3.test.engagement.product, finding=finding_new3, host="myhost2.com", protocol="http")
ep2.save()
ep3 = Endpoint(product=finding_new3.test.engagement.product, finding=finding_new3, host="myhost3.com", protocol="https")
ep3.save()
finding_new3.endpoints.add(ep1)
finding_new3.endpoints.add(ep2)
finding_new3.endpoints.add(ep3)
finding_new3.save()
# expect: marked not as duplicate of original finding 2 or finding_new3 because the endpoints are different
self.assert_finding(finding_new3, not_pk=finding_new.pk, duplicate=False, hash_code=finding_new.hash_code)
self.assert_finding(finding_new3, not_pk=finding_2.pk, duplicate=False, hash_code=finding_2.hash_code)
# expect: marked not as duplicate of original finding 2 because the endpoints are different
self.assert_finding(finding_new, not_pk=finding_2.pk, duplicate=False, hash_code=finding_2.hash_code)
# reset for further tests
settings.DEDUPE_ALGO_ENDPOINT_FIELDS = dedupe_algo_endpoint_fields
# # unique_id algo uses id from tool. hash_code is still calculated, according to legacy field config Checkmarx detailed scan
def test_identical_unique_id(self):
# create identical copy
finding_new, finding_124 = self.copy_and_reset_finding(id=124)
finding_new.save()
# expect duplicate
self.assert_finding(finding_new, not_pk=124, duplicate=True, duplicate_finding_id=124, hash_code=finding_124.hash_code)
def test_different_unique_id_unique_id(self):
# create identical copy
finding_new, finding_124 = self.copy_and_reset_finding(id=124)
finding_new.unique_id_from_tool = '9999'
finding_new.save()
# expect not duplicate, but same hash_code
self.assert_finding(finding_new, not_pk=124, duplicate=False, hash_code=finding_124.hash_code)
def test_identical_ordering_unique_id(self):
# create identical copy
finding_new, finding_125 = self.copy_and_reset_finding(id=125)
finding_new.save()
# expect duplicate, but of 124 as that is first in the list, but it's newer then 125. feature or BUG?
self.assert_finding(finding_new, not_pk=124, duplicate=True, duplicate_finding_id=124, hash_code=finding_125.hash_code)
def test_title_description_line_filepath_different_unique_id(self):
# create identical copy, change some fields
finding_new, finding_124 = self.copy_and_reset_finding(id=124)
finding_new.title = 'another title'
finding_new.unsaved_vulnerability_ids = ['CVE-2020-12345']
finding_new.cwe = '456'
finding_new.description = 'useless finding'
finding_new.save()
# expect duplicate as we only match on unique id, hash_code also different
self.assert_finding(finding_new, not_pk=124, duplicate=True, duplicate_finding_id=124, not_hash_code=finding_124.hash_code)
def test_title_description_line_filepath_different_and_id_different_unique_id(self):
# create identical copy, change some fields
finding_new, finding_124 = self.copy_and_reset_finding(id=124)
finding_new.title = 'another title'
finding_new.unsaved_vulnerability_ids = ['CVE-2020-12345']
finding_new.cwe = '456'
finding_new.description = 'useless finding'
finding_new.unique_id_from_tool = '9999'
finding_new.save()
# expect not duplicate as we match on unique id, hash_code also different because fields changed
self.assert_finding(finding_new, not_pk=124, duplicate=False, not_hash_code=finding_124.hash_code)
def test_dedupe_not_inside_engagement_unique_id(self):
# create identical copy
finding_new, finding_124 = self.copy_and_reset_finding(id=124)
# first setup some finding with same unique_id in different engagement, but same test_type
finding_22 = Finding.objects.get(id=22)
finding_22.test.test_type = finding_124.test.test_type
finding_22.test.save()
finding_22.unique_id_from_tool = '888'
finding_22.save(dedupe_option=False)
finding_new.unique_id_from_tool = '888'
finding_new.save()
# expect not duplicate as dedupe_inside_engagement is True
self.assert_finding(finding_new, not_pk=124, duplicate=False, hash_code=finding_124.hash_code)
def test_dedupe_inside_engagement_unique_id(self):
# create identical copy
finding_new, finding_124 = self.copy_and_reset_finding(id=124)
# first setup some finding with same unique_id in same engagement, but different test (same test_type)
finding_new.test = Test.objects.get(id=66)
finding_new.save()
# print(finding_new.pk)
# print(finding_new.hash_code)
# print(finding_new.duplicate)
# expect duplicate as dedupe_inside_engagement is True and the other test is in the same engagement
self.assert_finding(finding_new, not_pk=124, duplicate=True, duplicate_finding_id=124, hash_code=finding_124.hash_code)
def test_dedupe_inside_engagement_unique_id2(self):
# create identical copy
finding_new, finding_124 = self.copy_and_reset_finding(id=124)
# first setup some finding with same unique_id in different engagement, but same test_type
self.set_dedupe_inside_engagement(False)
finding_22 = Finding.objects.get(id=22)
finding_22.test.test_type = finding_124.test.test_type
finding_22.test.save()
finding_22.unique_id_from_tool = '888'
finding_22.save(dedupe_option=False)
finding_new.unique_id_from_tool = '888'
finding_new.save()
# expect duplicate as dedupe_inside_engagement is false
self.assert_finding(finding_new, not_pk=124, duplicate=True, duplicate_finding_id=finding_22.id, hash_code=finding_124.hash_code)
def test_dedupe_same_id_different_test_type_unique_id(self):
# create identical copy
finding_new, finding_124 = self.copy_and_reset_finding(id=124)
# first setup some finding from a different test_Type, but with the same unique_id_from_tool
finding_22 = Finding.objects.get(id=22)
finding_22.unique_id_from_tool = '888'
finding_new.unique_id_from_tool = '888'
# and we need to look in another engagement this time for finding_22
self.set_dedupe_inside_engagement(False)
finding_22.save(dedupe_option=False)
finding_new.save()
# expect not duplicate as the mathcing finding is from another test_type, hash_code is the same as original
self.assert_finding(finding_new, not_pk=124, duplicate=False, hash_code=finding_124.hash_code)
def test_identical_different_endpoints_unique_id(self):
# create identical copy
finding_new, finding_124 = self.copy_and_reset_finding(id=124)
finding_new.save(dedupe_option=False)
ep1 = Endpoint(product=finding_new.test.engagement.product, finding=finding_new, host="myhost.com", protocol="https")
ep1.save()
finding_new.endpoints.add(ep1)
finding_new.save()
# expect duplicate, as endpoints shouldn't affect dedupe and hash_code due to unique_id
self.assert_finding(finding_new, not_pk=124, duplicate=True, duplicate_finding_id=124, hash_code=finding_124.hash_code)
# algo unique_id_or_hash_code Veracode scan
def test_identical_unique_id_or_hash_code(self):
# create identical copy
finding_new, finding_224 = self.copy_and_reset_finding(id=224)
finding_new.save()
# expect duplicate as uid matches
self.assert_finding(finding_new, not_pk=224, duplicate=True, duplicate_finding_id=224, hash_code=finding_224.hash_code)
# existing BUG? finding gets matched on hash code, while there is also an existing finding with matching unique_id_from_tool
def test_identical_unique_id_or_hash_code_bug(self):
# create identical copy
finding_124 = Finding.objects.get(id=124)
finding_new, finding_224 = self.copy_and_reset_finding(id=224)
finding_new.title = finding_124.title # use title from 124 to get matching hashcode
finding_new.save()
# marked as duplicate of 124 as that has the same hashcode and is earlier in the list of findings ordered by id
self.assert_finding(finding_new, not_pk=224, duplicate=True, duplicate_finding_id=124, hash_code=finding_124.hash_code)
def test_different_unique_id_unique_id_or_hash_code(self):
# create identical copy
finding_new, finding_224 = self.copy_and_reset_finding(id=224)
finding_new.unique_id_from_tool = '9999'
finding_new.save()
# expect duplicate, uid mismatch, but same hash_code
self.assert_finding(finding_new, not_pk=224, duplicate=True, duplicate_finding_id=finding_224.id, hash_code=finding_224.hash_code)
# but if we change title and thus hash_code, it should no longer matchs
finding_new, finding_224 = self.copy_and_reset_finding(id=224)
finding_new.unique_id_from_tool = '9999'
finding_new.title = 'no no no no no no'
finding_new.save()
# expect duplicate, uid mismatch, but same hash_code
self.assert_finding(finding_new, not_pk=224, duplicate=False, not_hash_code=finding_224.hash_code)
def test_identical_ordering_unique_id_or_hash_code(self):
# create identical copy
finding_new, finding_225 = self.copy_and_reset_finding(id=225)
finding_new.save()
# expect duplicate, but of 124 as that is first in the list, but it's newer then 225. feature or BUG?
self.assert_finding(finding_new, not_pk=224, duplicate=True, duplicate_finding_id=224, hash_code=finding_225.hash_code)
def test_title_description_line_filepath_different_unique_id_or_hash_code(self):
# create identical copy, change some fields
finding_new, finding_224 = self.copy_and_reset_finding(id=224)
finding_new.title = 'another title'
finding_new.unsaved_vulnerability_ids = ['CVE-2020-12345']
finding_new.cwe = '456'
finding_new.description = 'useless finding'
finding_new.save()
# expect duplicate as we only match on unique id, hash_code also different
self.assert_finding(finding_new, not_pk=224, duplicate=True, duplicate_finding_id=224, not_hash_code=finding_224.hash_code)
def test_title_description_line_filepath_different_and_id_different_unique_id_or_hash_code(self):
# create identical copy, change some fields
finding_new, finding_224 = self.copy_and_reset_finding(id=224)
finding_new.title = 'another title'
finding_new.unsaved_vulnerability_ids = ['CVE-2020-12345']
finding_new.cwe = '456'
finding_new.description = 'useless finding'
finding_new.unique_id_from_tool = '9999'
finding_new.save()
# expect not duplicate as we match on unique id, hash_code also different because fields changed
self.assert_finding(finding_new, not_pk=224, duplicate=False, not_hash_code=finding_224.hash_code)
def test_dedupe_not_inside_engagement_same_hash_unique_id_or_hash_code(self):
# create identical copy
finding_new, finding_224 = self.copy_and_reset_finding(id=224)
# first setup some finding with same unique_id in different engagement, but same test_type, same hash
finding_22 = Finding.objects.get(id=22)
finding_22.test.test_type = finding_224.test.test_type
finding_22.test.save()
finding_22.unique_id_from_tool = '888'
finding_22.save(dedupe_option=False)
finding_new.unique_id_from_tool = '888'
finding_new.save()
# should become duplicate of finding 22 because of the uid match, but existing BUG makes it duplicate of 224 due to hashcode match
self.assert_finding(finding_new, not_pk=224, duplicate=True, duplicate_finding_id=224, hash_code=finding_224.hash_code)
def test_dedupe_not_inside_engagement_same_hash_unique_id_or_hash_code2(self):
# create identical copy
# create identical copy
finding_new, finding_224 = self.copy_and_reset_finding(id=224)
# first setup some finding with same unique_id in different engagement, different test_type, same hash_code
finding_22 = Finding.objects.get(id=22)
finding_22.test.test_type = finding_224.test.test_type
finding_22.test.save()
finding_22.unique_id_from_tool = '333'
finding_22.save(dedupe_option=False)
finding_new.hash_code = finding_22.hash_code # sneaky copy of hash_code to be able to test this case icm with the bug in previous test case above
finding_new.unique_id_from_tool = '333'
finding_new.save()
# expect not duplicate as dedupe_inside_engagement is True and 22 is in another engagement
# but existing BUG? it is marked as duplicate of 124 which has the same hash and same engagement, but different unique_id_from_tool at same test_type
self.assert_finding(finding_new, not_pk=22, duplicate=True, duplicate_finding_id=124, hash_code=finding_22.hash_code)
def test_dedupe_inside_engagement_unique_id_or_hash_code(self):
# create identical copy
finding_new, finding_224 = self.copy_and_reset_finding(id=224)
# first setup some finding with same unique_id in same engagement, but different test (same test_type)
finding_new.test = Test.objects.get(id=66)
finding_new.save()
# expect duplicate as dedupe_inside_engagement is True and the other test is in the same engagement
self.assert_finding(finding_new, not_pk=224, duplicate=True, duplicate_finding_id=224, hash_code=finding_224.hash_code)
def test_dedupe_inside_engagement_unique_id_or_hash_code2(self):
# create identical copy
finding_new, finding_224 = self.copy_and_reset_finding(id=224)
# first setup some finding with same unique_id in different engagement, but same scan_type
self.set_dedupe_inside_engagement(False)
finding_22 = Finding.objects.get(id=22)
finding_22.test.test_type = finding_224.test.test_type
finding_22.test.scan_type = finding_224.test.scan_type
finding_22.test.save()
finding_22.unique_id_from_tool = '888'
finding_22.save(dedupe_option=False)
finding_new.unique_id_from_tool = '888'
finding_new.title = 'hack to work around bug that matches on hash_code first' # arrange different hash_code
finding_new.save()
# expect duplicate as dedupe_inside_engagement is false
self.assert_finding(finding_new, not_pk=224, duplicate=True, duplicate_finding_id=finding_22.id, not_hash_code=finding_22.hash_code)
def test_dedupe_same_id_different_test_type_unique_id_or_hash_code(self):
# create identical copy
finding_new, finding_224 = self.copy_and_reset_finding(id=224)
# first setup some finding from a different test_Type, but with the same unique_id_from_tool
finding_22 = Finding.objects.get(id=22)
finding_22.unique_id_from_tool = '888'
finding_new.unique_id_from_tool = '888'
# and we need to look in another engagement this time for finding_22
self.set_dedupe_inside_engagement(False)
finding_22.save(dedupe_option=False)
finding_new.title = 'title to change hash_code'
finding_new.save()
# expect not duplicate as the mathcing finding is from another test_type, hash_code is also different
self.assert_finding(finding_new, not_pk=224, duplicate=False, not_hash_code=finding_224.hash_code)
# same scenario but with idencital hash_code as 224 leads to being marked as duplicate of 224
finding_new, finding_224 = self.copy_and_reset_finding(id=224)
# first setup some finding from a different test_Type, but with the same unique_id_from_tool
finding_22 = Finding.objects.get(id=22)
finding_22.unique_id_from_tool = '888'
finding_new.unique_id_from_tool = '888'
# and we need to look in another engagement this time for finding_22
self.set_dedupe_inside_engagement(False)
finding_22.save(dedupe_option=False)
finding_new.save()
# expect not duplicate as the mathcing finding is from another test_type, hash_code is also different
self.assert_finding(finding_new, not_pk=224, duplicate=True, duplicate_finding_id=224, hash_code=finding_224.hash_code)
def test_identical_different_endpoints_unique_id_or_hash_code(self):
# create identical copy, so unique id is the same
finding_new, finding_224 = self.copy_and_reset_finding(id=224)
finding_new.save(dedupe_option=False)
ep1 = Endpoint(product=finding_new.test.engagement.product, finding=finding_new, host="myhost.com", protocol="https")
ep1.save()
finding_new.endpoints.add(ep1)
finding_new.save()
if settings.DEDUPE_ALGO_ENDPOINT_FIELDS == []:
# expect duplicate, as endpoints shouldn't affect dedupe and hash_code due to unique_id