forked from MTG/essentia
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdescriptors.yaml
1112 lines (1111 loc) · 64 KB
/
descriptors.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
timestamp: 2008-07-25 15:53:52 +0200
1:
group: "unknown"
name: "danceability"
algorithm: "Danceability"
description: "the higher the value of the danceability descriptor, the easier it should be to dance to this song"
outputdomain: "a real number from 0 to ???"
application: ""
who: "Thomas"
reference: ""
examplechain: ""
2:
group: "rhythm"
name: "onset_rate"
algorithm: "OnsetRate"
description: "The onset is the beginning of a note or a sound, in which the amplitude of the sounds rises from zero to an initial peak. The onset rate is a real number representing the number of onsets per second. It may also be considered as a measure of the number of sonic events per second, and thus a rhythmic indicator of the audio piece. A higher onset rate means that the audio piece has a higher rhythmic density."
outputdomain: "real, positive"
application: "Rhythm detection"
who: "Thomas Aussenac, Owen Meyers"
reference: "http://aubio.org/phd/thesis/brossier06thesis.pdf"
examplechain: "fft -> onsetDetection -> onsets -> onsetRate"
3:
group: "unknown"
name: "equalization_profile"
algorithm: ""
description: "This descriptor is not longer part of essentia versions higher than 0.6.0"
outputdomain: "integer value between 1 and 8"
application: "not useful. Failed the Robustness test"
who: "Thomas, Joachim"
reference: ""
examplechain: ""
4:
group: "unknown"
name: "excitement"
algorithm: ""
description: "(weka tree) The higher the value, the more exciting the song should be. This weka tree is based on the ground truth from the Yamaha database."
outputdomain: "an integer value which can be rather 1 (not exciting), 2 (exciting) or 3 (very exciting)."
application: ""
who: "Thomas"
reference: ""
examplechain: ""
5:
group: "unknown"
name: "thumbnail"
algorithm: ""
description: "Time onsets corresponding to the most relevant segment of a song."
outputdomain: "A time stamps pair (start and end of the thumbnail) in seconds."
application: ""
who: "Thomas"
reference: ""
examplechain: ""
6:
group: "unknown"
name: "intensity"
algorithm: ""
description: "Intensity is a measure of the intensity of an audio piece from a rhythmic point of view. Typically, a slow, soft and relaxing audio can be considered to have a low intensity. On the other hand, a fast energetic audio can be considered as having a high intensity. The higher the value, the more intense the audio piece is."
outputdomain: "an integer value between 0 and 1"
application: ""
who: "Thomas"
reference: ""
examplechain: ""
7:
group: "unknown"
name: "segments"
algorithm: ""
description: "Time onsets corresponding to the different segments found after segmentation."
outputdomain: "a list of time stamps pairs (start and end of each segment) in seconds."
application: ""
who: "Thomas"
reference: ""
examplechain: ""
8:
group: "unknown"
name: "speech_music"
algorithm: ""
description: "This descriptor intends to describe if a given audio file is a music file or a speech file."
outputdomain: "a string label which can be either \"speech\" or \"music'."
application: ""
who: "Shadi"
reference: ""
examplechain: ""
9:
group: "unknown"
name: "voice_instrumental"
algorithm: ""
description: "This descriptor intends to describe if a given audio file is an instrumental file or a song with voice and accompaniment."
outputdomain: "A string label which can be either \"voice\" or \"instrumental'."
application: ""
who: "Thomas"
reference: ""
examplechain: ""
10:
group: "unknown"
name: "dynamic_complexity"
algorithm: ""
description: "The Dynamic Complexity is the average absolute deviation from the global loudness level estimate on the dB scale. It is related to the dynamic range and to the amount of fluctuation in loudness present in a recording."
outputdomain: "A real number from 0 to 1."
application: ""
who: "Thomas"
reference: ""
examplechain: ""
11:
group: "unknown"
name: "larm"
algorithm: ""
description: "This algorithm extracts the Equivalent sound level (Leq) of an audio signal. The Larm measure with Revised Low-frequency B-weighting (RLB) has shown to be reliable, objective loudness estimate of music and speech."
outputdomain: "a real number in dB from -100dB to ???"
application: ""
who: "Thomas"
reference: ""
examplechain: ""
12:
group: "metadata"
name: "replay_gain"
algorithm: "ReplayGain"
description: "The Replay Gain, a measure of the perceived loudness of an audio piece. "
outputdomain: "real. The value is given in dB."
application: "Can be used to compute and store the recommended gain for playing a file, for example within a collection which has files with highly varying gain levels."
who: "Thomas Aussenac, Justin Salamon"
reference: "http://replaygain.hydrogenaudio.org/"
examplechain: "signal -> ReplayGain"
13:
group: "unknown"
name: "vicker"
algorithm: ""
description: "The loudness Vicker is a loudness measure used for calculating the Dynamic Complexity."
outputdomain: "a real number in dB from -90 to ???"
application: ""
who: "Thomas"
reference: ""
examplechain: ""
14:
group: "unknown"
name: "panning"
algorithm: ""
description: "This represents the parameterized panning curve of the given audio file."
outputdomain: "An array of real numbers."
application: ""
who: "Xavier Janer"
reference: ""
examplechain: ""
15:
group: "rhythm"
name: "beats_loudness"
algorithm: "SingleBeatLoudness"
description: "Beats loudness is a measure of the strength of the rhythmic beats of the audio piece. It turns to be very useful for characterizing audio piece. "
outputdomain: "real, from 0 to 1"
application: "Genre classification. For example, a folk song may have a low beats loudness while a punk-rock song or a hip-hop song may have a high beats loudness."
who: "Thomas Aussenac, Martin Haro"
reference: ""
examplechain: "input signal -> beats_loudness"
16:
group: "rhythm"
name: "beats_loudness_bass"
algorithm: "SingleBeatLoudness"
description: "Beats loudness bass is a measure of the strength of the low frequency part of rhythmic beats of an audio piece. It turns to be very useful for characterizing an audio piece. "
outputdomain: "real, from 0 to 1"
application: "Genre Classification. For example, a folk song or a punk-rock may have a low beats loudness bass, while a hip-hop song may have a high bass beats loudness bass."
who: "Thomas Aussenac, Martin Haro"
reference: ""
examplechain: "input signal -> beats_loudness_bass"
17:
group: "rhythm"
name: "onset_times"
algorithm: "Onsets"
description: "The onset is the beginning of a note or a sound, in which the amplitude of the sound rises from zero to an initial peak. The onsets are the time stamps in seconds corresponding to the onsets of the audio piece."
outputdomain: "real, positive. Returns a list of time stamps in seconds."
application: "Rhythm detection"
who: "Thomas Aussenac, Paul Brossier, Owen Meyers"
reference: "http://aubio.org/phd/thesis/brossier06thesis.pdf"
examplechain: "onsetDetection -> onsetTimes"
18:
group: "unknown"
name: "der_av_after_max"
algorithm: ""
description: "The derivative average, weighted by the amplitude, after the max amplitude of the signal envelope. This coefficient helps discriminating Impulsive sounds, which have a steepest release, so a smaller value, from Decrescendo sounds."
outputdomain: "a real number from ???"
application: "Strange."
who: "Thomas"
reference: ""
examplechain: ""
19:
group: "unknown"
name: "effective_duration"
algorithm: ""
description: "the effective duration is a measure of The time the signal is perceptually meaningful. It is approximated by the time the envelope is above a given threshold, e. g. 40% of the envelope maximum."
outputdomain: "a real number in seconds from 0 to duration of the audio piece."
application: "Up to you."
who: "Thomas"
reference: ""
examplechain: ""
20:
group: "unknown"
name: "flatness"
algorithm: ""
description: "The flatness coefficient is the ratio of the value above which lie 5% of the values to the value above which lie 80% of the values. This coefficient is close to 1 for flat envelope and large for sounds having a large dynamic."
outputdomain: "a real number from 0 to 1."
application: "Good. Noises, percussive sounds and pitched sounds cluster."
who: "Thomas"
reference: ""
examplechain: ""
21:
group: "sfx"
name: "inharmonicity"
algorithm: "Inharmonicity"
description: "The divergence of the signal spectral components from a purely harmonic signal. It is computed as the energy weighted divergence of the spectral components from the multiple of the fundamental frequency. The inharmonicity ranges from 0 (purely harmonic signal) to 1 (inharmonic signal)."
outputdomain: "real, from 0 to 1."
application: "E.g. charaterisation of piano sounds"
who: "Thomas Aussenac, Justin Salamon"
reference: "http://en.wikipedia.org/wiki/Inharmonicity"
examplechain: "spectrum -> SpectralPeaks (frequencies,magnitudes) -> HarmonicPeaks (frequencies, magnitudes)\n\nspectrum -> PitchDetection\n\nHarmonicPeaks (frequencies,magnitudes) & PitchDetection -> Inharmonicity"
22:
group: "unknown"
name: "logattacktime"
algorithm: ""
description: "The duration from when the sound becomes perceptually audible to when it reaches its maximum intensity. It is computed by applying thresholds on the signal. The start of the sound is estimated as the point where the signal reaches 20% of the maximum value. This is to account for possible noise presence. The end of the logAttack is estimated as the point where the signal reaches 90% of the maximum value. This is to account for the max value possibility occurring after the logAttack, as in a trumpet sound."
outputdomain: "a real number from -5 to ???"
application: "Good."
who: "Thomas"
reference: ""
examplechain: ""
23:
group: "unknown"
name: "max_der_before_max"
algorithm: ""
description: "The maximum derivative before the max amplitude. This coefficient helps discriminating Crescendo and Delta sounds that have a smooth attack so a smaller value than sounds with different dynamic profile."
outputdomain: "a real number from ???"
application: "Good."
who: "Thomas"
reference: ""
examplechain: ""
24:
group: "unknown"
name: "max_to_total"
algorithm: ""
description: "The maximum amplitude time to total length ratio of a signal envelope. This coefficient shows how much the maximum amplitude is off-center. Its value is close to 0 if the maximum is close to the beginning (e.g. Decrescendo or Impulsive sounds), close to 0.5 if it is close to the middle (e. g. Delta sounds) and close to 1 if it is close to the end of the sound (e.g. Crescendo sounds)."
outputdomain: "a real number from 0 to 1"
application: "Good on short sounds."
who: "Thomas"
reference: ""
examplechain: ""
25:
group: "sfx"
name: "oddtoevenharmonicenergyratio"
algorithm: "OddToEvenHarmonicEnergyRatio"
description: "The Odd to Even Harmonic Energy Ratio of a signal given its harmonic peaks. The Odd to Even Harmonic Energy Ratio is a measure allowing distinguishing odd harmonic energy predominant sounds (such as clarinet sounds) from equally important harmonic energy sounds (such as the trumpet)."
outputdomain: "a real number from 0 to 1"
application: "Discrimination of sounds with predominance of odd or even harmonics"
who: "Thomas Aussenac, Gerard Roma"
reference: "Peeters, G.: A large set of audio features for sound description in the CIUDADO project. Technical Report, IRCAM, 2004\n"
examplechain: "spectrum->spectral_peaks;\nspectrum->pitch_detection;\n(spectral_peaks,pitch_detection)->harmonic_peaks->oddtoevenharmonicenergyratio"
26:
group: "unknown"
name: "onsets_number"
algorithm: ""
description: "The number of onsets. If the number of onsets is greater than 1 (the first onset correspond to the attack of the sound), the sound is classified as iterative."
outputdomain: "a real number from 0 to ???"
application: "Up to you."
who: "Thomas"
reference: ""
examplechain: ""
27:
group: "sfx"
name: "pitch_after_max_to_before_max_energy_ratio"
algorithm: "AfterMaxToBeforeMaxEnergyRatio"
description: "The ratio of energy after the maximum to energy before the maximum of pitch values. Sounds having an ascending pitch value a small while sounds having a descending pitch have a high value."
outputdomain: "a real number from 0 to 1"
application: "Discriminating sounds with different relation between pitch and energy envelope"
who: "Thomas Aussenac, Gerard Roma"
reference: ""
examplechain: "pitch-> pitch_after_max_to_before_max_energy_ratio"
28:
group: "sfx"
name: "pitch_centroid"
algorithm: "Centroid"
description: "The center of gravity of the array of pitch values per frame. A value close to 0.5 may indicate a stable pitch"
outputdomain: "a real number normalized by the range parameter."
application: "Classifying sound effects with a potentially varying pitch."
who: "Thomas Aussenac, Gerard Roma"
reference: ""
examplechain: "pitch->centroid"
29:
group: "sfx"
name: "pitch_max_to_total"
algorithm: "MaxToTotal"
description: "A measure of the relative position in time of the maximum pitch value. A value of zero (maximum at the beginning) indicates descending pitch, while a value of one indicates an ascending pitch."
outputdomain: "a real number from 0 to 1."
application: "Discriminating sound effects with different pitch envelopes."
who: "Thomas Aussenac, Gerard Roma"
reference: ""
examplechain: "pitch->max_to_total"
30:
group: "sfx"
name: "pitch_min_to_total"
algorithm: "MinToTotal"
description: "A measure of the relative position in time of the minimum pitch value. A value of zero (minimum at the beginning) indicates ascending pitch, while a value of one indicates an descending pitch."
outputdomain: "a real number from 0 to 1"
application: "Discriminating sound effects with different pitch envelopes"
who: "Thomas Aussenac, Gerard Roma"
reference: ""
examplechain: "pitch->minToTotal"
31:
group: "unknown"
name: "strongdecay"
algorithm: ""
description: "The strong decay is built from the non-linear combination of the signal energy and the signal temporal centroid. A signal containing a temporal centroid near its left boundary and a strong energy is said to have a strong decay."
outputdomain: "a real number from ???"
application: "Bad!"
who: "Thomas"
reference: ""
examplechain: ""
32:
group: "unknown"
name: "tc_to_total"
algorithm: ""
description: "The temporal centroid to total length ratio of a signal envelope. This coefficient shows how the sound is \"balanced'. Its value is close to 0 if most of the energy lies at the beginning (e.g. Decrescendo or Impulsive sounds), close to 0.5 if the sound is symmetric (e.g. Unvarying or Delta sounds) and close to 1 if most of the energy lies at the end of the sound (e.g. Crescendo sounds)."
outputdomain: "a real number from 0 to 1."
application: "Bad!"
who: "Thomas"
reference: ""
examplechain: ""
33:
group: "unknown"
name: "temporal_centroid"
algorithm: ""
description: "The center of gravity of the audio envelope."
outputdomain: "a real number from ???"
application: "Somehow good."
who: "Thomas"
reference: ""
examplechain: ""
34:
group: "unknown"
name: "temporal_decrease"
algorithm: ""
description: "The center of gravity of the audio envelope."
outputdomain: "a real number from ???"
application: "Good."
who: "Thomas"
reference: ""
examplechain: ""
35:
group: "lowlevel"
name: "barkbands"
algorithm: "BarkBands"
description: "This algorithm extracts the 28 Bark band values of a Spectrum. For each bark band the power-spectrum (mag-squared) is summed. The first two bands [0..100] and [100..200] are divided in two for better resolution.\n\nBarkbands only takes one parameter (sampleRate), one input (the spectrum) and one output (the bands)\n\n The Frequency edges (in Hz) are: 0, 50, 100, 150, 200, 300, 400, 510, 630, 770, 920, 1080, 1270, 1480, 1720, 2000, 2320, 2700, 3150, 3700, 4400, 5300, 6400, 7700, 9500, 12000, 15500, 20500 and 27000 \n\n"
outputdomain: "real, non-negative. 28 values (or less depending on the sampleRate)"
application: "Perceptual description of sounds, since the scale ranges from 1 to 24 and corresponds to the first 24 critical bands of hearing (see Zwicker, E. (1961), \"Subdivision of the audible frequency range into critical bands,\" The Journal of the Acoustical Society of America, 33, Feb., 1961.). "
who: "Thomas Aussenac, Martin Haro"
reference: "http://ccrma.stanford.edu/~jos/bbt/Bark_Frequency_Scale.html and \n\nhttp://en.wikipedia.org/wiki/Bark_scale\n\n"
examplechain: "spectrum -> barkbands"
36:
group: "lowlevel"
name: "barkbands_kurtosis"
algorithm: "DistributionShape"
description: "The kurtosis gives a measure of the flatness of a distribution around its mean value. A negative kurtosis indicates flatter bark bands. A positive kurtosis indicates peakier bark bands. A kurtosis = 0 indicates bark bands with normal distribution."
outputdomain: "real"
application: "Timbral characterization. "
who: "Thomas Aussenac, Martin Haro"
reference: "http://en.wikipedia.org/wiki/Kurtosis\n\nSee also barkbands' documentation"
examplechain: "spectrum -> barkbands -> centralmoments -> distributionshape"
37:
group: "lowlevel"
name: "barkbands_skewness"
algorithm: "DistributionShape"
description: "The skewness is a measure of the asymmetry of a distribution around its mean value. A negative skewness indicates bark bands with more energy in the high frequencies. A positive skewness indicates bark bands with more energy in the low frequencies. A skewness = 0 indicates symmetric bark bands. For silence or constants signal, skewness is 0."
outputdomain: "real "
application: "Timbral characterization."
who: "Thomas Aussenac, Martin Haro"
reference: "http://en.wikipedia.org/wiki/Skewness\n\nSee also barkbands' documentation"
examplechain: "spectrum -> barkbands -> centralmoments -> distributionshape\n"
38:
group: "lowlevel"
name: "spectral_centroid"
algorithm: "Centroid"
description: "The spectral centroid is a measure used in digital signal processing to characterize an audio spectrum. It indicates where the \"center of mass\" of the spectrum is. "
outputdomain: "real, non-negative"
application: "Perceptually, it has a robust connection with the impression of \"brightness\" of a sound. High values of it correspond to brighter textures."
who: "Thomas Aussenac, Elena Martinez"
reference: "G. Tzanetakis and P. Cook. Musical genre classification of audio signals. IEEE Transaction on speech and audio processing, 10(5), July 2002."
examplechain: "spectrum -> centroid"
39:
group: "lowlevel"
name: "spectral_crest"
algorithm: "Crest"
description: "The crest is the ratio between the max value and the arithmetic mean of the spectrum. It is a measure of the noisiness of the spectrum."
outputdomain: "real, greater than 1."
application: "Discrimination of noisy signals"
who: "Thomas Aussenac, Gerard Roma"
reference: "Peeters, G.: A large set of audio features for sound description in the CUIDADO project. Technical Report, IRCAM, 2004"
examplechain: "spectrum->barkbands->crest"
40:
group: "lowlevel"
name: "barkbands_spread"
algorithm: "DistributionShape"
description: "The spread is defined as the variance of a distribution around its mean value. It is equal to the 2nd order central moment.\n"
outputdomain: "real"
application: "Timbral characterization."
who: "Thomas Aussenac, Martin Haro"
reference: "http://en.wikipedia.org/wiki/Central_moment"
examplechain: "spectrum -> barkbands -> centralmoments -> distributionshape\n"
41:
group: "lowlevel"
name: "spectral_decrease"
algorithm: "Decrease"
description: "A measure of the amount of decrease of the spectral amplitude, which is supposed to be more related to perception than the spectrum slope (linear regression)."
outputdomain: "a real number normalized by the range parameter"
application: "Classification of musical instruments, pitch detection for some specific instruments like the piano"
who: "Thomas Aussenac, Gerard Roma"
reference: "Peeters, G.: A large set of audio features for sound description in the CIUDADO project. Technical Report, IRCAM, 2004"
examplechain: "spectrum**2 -> decrease"
42:
group: "lowlevel"
name: "spectral_energy"
algorithm: "Energy"
description: "The spectrum energy at a given frame."
outputdomain: "real, non-negative"
application: "This is a very general descriptor and can be used for a wide variety of applications"
who: "Thomas Aussenac, Elena Martinez"
reference: "http://en.wikipedia.org/wiki/Energy_%28signal_processing%29"
examplechain: "spectrum -> energy"
43:
group: "lowlevel"
name: "spectral_energyband_low"
algorithm: "EnergyBandRatio"
description: "The Energy Band Ratio of a spectrum is the ratio of the spectrum energy from startCutoffFrequency to stopCutoffFrequency to the total spectrum energy. For the Energy Band Ration Low, startCutoffFrequency = 20Hz and stopCutoffFrequency = 150Hz"
outputdomain: "real, from 0 to 1"
application: "This is a very general descriptor and can be used for a wide variety of applications"
who: "Thomas Aussenac, Elena Martinez"
reference: "http://en.wikipedia.org/wiki/Energy_%28signal_processing%29"
examplechain: "spectrum -> energy band ratio"
44:
group: "lowlevel"
name: "spectral_energyband_middle_low"
algorithm: "EnergyBandRatio"
description: "The Energy Band Ratio of a spectrum is the ratio of the spectrum energy from startCutoffFrequency to stopCutoffFrequency to the total spectrum energy. For the Energy Band Ration Middle Low, startCutoffFrequency = 150Hz and stopCutoffFrequency = 800Hz"
outputdomain: "real, from 0 to 1"
application: "This is a very general descriptor and can be used for a wide variety of applications"
who: "Thomas Aussenac, Elena Martinez"
reference: "http://en.wikipedia.org/wiki/Energy_%28signal_processing%29"
examplechain: "spectrum -> energy band ratio"
45:
group: "lowlevel"
name: "spectral_energyband_middle_high"
algorithm: "EnergyBandRatio"
description: "the Energy Band Ratio of a spectrum is the ratio of the spectrum energy from startCutoffFrequency to stopCutoffFrequency to the total spectrum energy. For the Energy Band Ration Middle High, startCutoffFrequency = 800Hz and stopCutoffFrequency = 4000Hz"
outputdomain: "real, from 0 to 1"
application: "This is a very general descriptor and can be used for a wide variety of applications"
who: "Thomas Aussenac, Elena Martinez"
reference: "http://en.wikipedia.org/wiki/Energy_%28signal_processing%29"
examplechain: "spectrum -> energy band ratio"
46:
group: "lowlevel"
name: "spectral_energyband_high"
algorithm: "EnergyBandRatio"
description: "The Energy Band Ratio of a spectrum is the ratio of the spectrum energy from startCutoffFrequency to stopCutoffFrequency to the total spectrum energy. For the Energy Band Ration High, startCutoffFrequency = 4000Hz and stopCutoffFrequency = 20000Hz"
outputdomain: "real, from 0 to 1"
application: "This is a very general descriptor and can be used for a wide variety of applications"
who: "Thomas Aussenac, Elena Martinez"
reference: "http://en.wikipedia.org/wiki/Energy_%28signal_processing%29"
examplechain: "spectrum -> energy band ratio"
47:
group: "lowlevel"
name: "spectral_flatness_db"
algorithm: "FlatnessDB"
description: "This is a kind of dB value of the Bark bands. It characterizes the shape of the spectral envelope. For tonal signals, flatness dB is close to 1, for noisy signals it is close to 0."
outputdomain: "real, from 0 to 1."
application: "segmentation"
who: "Thomas Aussenac, Ferdinand Fuhrmann"
reference: "Peeters, G.: A large set of audio features for sound description in the CIUDADO project. Technical Report, IRCAM, 2004"
examplechain: "spectrum -> Hz2Bark -> flatness -> flatnessDB -> flatness dB"
48:
group: "lowlevel"
name: "spectral_flux"
algorithm: "Flux"
description: "Spectral Flux is a measure of how quickly the power spectrum of a signal is changing, calculated by comparing the power spectrum for one frame against the power spectrum from the previous frame. The spectral flux can be used to determine the timbre of an audio signal, or in onset detection, among other things."
outputdomain: "a positive real number"
application: "Segmentation"
who: "Thomas Aussenac, Gerard Roma"
reference: "Tzanetakis, G. Cook, P. Multifeature audio segmentation for browsing and annotation.\nIEEE workshop on Applications of Signal Processing to Audio and Acoustics, 1999"
examplechain: "spectrum->flux"
49:
group: "lowlevel"
name: "hfc"
algorithm: "HFC"
description: "The High Frequency Content measure is a simple measure, taken across a signal spectrum (usually a STFT spectrum), which can be used to characterize the amount of high-frequency content in the signal. In contrast to perceptual measures, this is not based on any evidence about its relevance to human hearing. Despite that, it can be useful for some applications, such as onset detection."
outputdomain: "real, non-negative"
application: "Onset detection"
who: "Thomas Aussenac, Justin Salamon"
reference: "http://en.wikipedia.org/wiki/High_Frequency_Content_measure\n\n\n\nP. Brossier, J. P. Bello and M. D. Plumbley. Real-time temporal segmentation of note objects in music signals, in Proceedings of the International Computer Music Conference (ICMC 2004), Miami, Florida, USA, November 1-6, 2004."
examplechain: "spectrum -> hfc"
50:
group: "lowlevel"
name: "spectral_kurtosis"
algorithm: "DistributionShape"
description: "The kurtosis gives a measure of the flatness of a distribution around its mean value. A negative kurtosis indicates a flatter signal spectrum. A positive kurtosis indicates a peakier signal spectrum. A kurtosis = 0 indicates a spectrum with normal distribution."
outputdomain: "one real value"
application: "Timbral characterization. "
who: "Thomas Aussenac, Martin Haro"
reference: "http://en.wikipedia.org/wiki/Kurtosis"
examplechain: "spectrum -> centralmoments -> distributionshape"
51:
group: "lowlevel"
name: "spectral_spread"
algorithm: "DistributionShape"
description: "The spread is defined as the variance of a distribution around its mean value. It is equal to the 2nd order central moment."
outputdomain: "real"
application: "Timbral characterization."
who: "Thomas Aussenac, Martin Haro"
reference: "http://en.wikipedia.org/wiki/Central_moment"
examplechain: "spectrum -> centralmoments -> distributionshape"
52:
group: "lowlevel"
name: "spectral_skewness"
algorithm: "DistributionShape"
description: "The skewness is a measure of the asymmetry of a distribution around its mean value. A negative skewness indicates a signal spectrum with more energy in the high frequencies. A positive skewness indicates a signal spectrum with more energy in the low frequencies. A skewness = 0 indicates a symmetric spectrum. For silence or constants signal, skewness is 0."
outputdomain: "real"
application: "Timbral characterization."
who: "Thomas Aussenac, Martin Haro"
reference: "http://en.wikipedia.org/wiki/Skewness"
examplechain: "spectrum -> centralmoments -> distributionshape "
53:
group: "lowlevel"
name: "mfcc"
algorithm: "MFCC"
description: "This algorithm computes the mel-frequency cepstrum coefficients. "
outputdomain: "real, matrix of dimensions: number mfcc coefficients per number of frames"
application: "They have been widely used in speech recognition and also to model music since they provide a compact representation of the spectral envelope. The first coefficients concentrate\nmost of the signal energy. Its use for music information retrieval\nhas become standard since the seminar paper (2) from 1997"
who: "Thomas Aussenac, Elena Martinez"
reference: "(1) Rabiner, L. and Juang, B., 1993, Fundamentals of Speech Recognition, Prentice-Hall.\n\n(2) J. Foote. Content-based retrieval of music and audio. In In Multimedia Storage and Archiving Systems II, Proc. of SPIE, 1997.\n\n(3) Scheirer, E. and Slaney, M., 1997, Construction and evaluation of a robust multifeature speech/music\ndiscriminator, Proceedings IEEE International Conference on Acoustics, Speech, and Signal Processing.\n\n(4) B. Logan. Mel frequency cepstral coefficients for music modeling. In\nProc. of the 1st Annual International Symposium on Music Information\nRetrieval (ISMIR), 2000."
examplechain: "spectrum -> mfcc"
54:
group: "lowlevel"
name: "pitch"
algorithm: "PitchDetection"
description: "Pitch detection for monophonic sounds. Pitch is represented as the fundamental frequency of the analysed sound. The algorithm uses the YinFFT method developed by Paul Brossier, based on the time-domain YIN algorithm with an efficient implementation making use of the spectral domain."
outputdomain: "real, non-negative. Represents the frequency in Hertz."
application: "Monophonic voice and instrument transcription"
who: "Justin Salamon"
reference: "Paul Brossier, ''Automatic annotation of musical audio for interactive systems'', PhD thesis, Centre for Digital music, Queen Mary University of London, London, UK, 2006."
examplechain: "spectrum -> PitchDetection -> pitch"
55:
group: "unknown"
name: "pitch_histogram"
algorithm: ""
description: ""
outputdomain: ""
application: ""
who: ""
reference: ""
examplechain: ""
56:
group: "unknown"
name: "pitch_histogram_spread"
algorithm: ""
description: ""
outputdomain: ""
application: ""
who: ""
reference: ""
examplechain: ""
57:
group: "unknown"
name: "pitch_histogram_values"
algorithm: ""
description: ""
outputdomain: ""
application: ""
who: ""
reference: ""
examplechain: ""
58:
group: "lowlevel"
name: "pitch_instantaneous_confidence"
algorithm: "PitchDetection"
description: "a measure of pitch confidence derived from the yinFFT algorithm, which is a monophonic pitch detector. gives evidence about how much a certain pitch, detected in a frame, is affecting the total spectrum. If the output is near 1, there exist just one pitch in the mixture, an output near 0 indicates multiple, not distinguishable pitches."
outputdomain: "real, from 0 to 1."
application: "segmentation"
who: "Ferdinand Fuhrmann"
reference: "Paul Brossier, ''Automatic annotation of musical audio for interactive systems'', PhD thesis, Centre for Digital music, Queen Mary University of London, London, UK, 2006."
examplechain: "spectrum -> pitchDetection -> pitch_instantaneous confidence"
59:
group: "lowlevel"
name: "pitch_salience"
algorithm: "PitchSalience"
description: "The pitch salience is given by the ratio of the highest peak to the 0-lag peak in the autocorrelation function. Non-pitched sounds have a mean pitch salience value close to 0 while harmonic sounds have a value close to 1. Sounds having Unvarying pitch have a small pitch salience variance while sounds having Varying pitch have a high pitch salience variance."
outputdomain: "real, from 0 to 1."
application: "Characterising percussive sounds for example. We can expect low values for percussive sounds and high values for white noises."
who: "Thomas Aussenac, Justin Salamon"
reference: "Ricard J., Towards computational \nmorphological description of sound, \nDEA pre-thesis research work, Universitat Pompeu \nFabra, Barcelona, September 2004."
examplechain: "spectrum -> PitchSalience"
60:
group: "lowlevel"
name: "spectral_rms"
algorithm: "RMS"
description: "The root mean square spectrum energy."
outputdomain: "real, non-negative"
application: "It is a measure of loudness of the sound frame"
who: "Thomas Aussenac, Elena Martinez"
reference: "G. Tzanetakis and P. Cook. Musical genre classification of audio signals. IEEE Transaction on speech and audio processing, 10(5), July 2002"
examplechain: "array -> rms"
61:
group: "lowlevel"
name: "spectral_rolloff"
algorithm: "RollOff"
description: "The Roll Off is the frequency for which 85% of the spectrum energy is contained below. Like the centroid, the rolloff is also a measure of spectral shape, they are in fact strongly correlated."
outputdomain: "real, from 0 to 22500"
application: "It can be used to distinguish between harmonic and noisy sounds."
who: "Thomas Aussenac, Elena Martinez"
reference: "G. Tzanetakis and P. Cook. Musical genre classification of audio signals. IEEE Transaction on speech and audio processing, 10(5), July 2002."
examplechain: "spectrum -> rolloff"
62:
group: "lowlevel"
name: "silence_rate_20dB"
algorithm: "SilenceRate"
description: "This is the rate of frames where the level is above a given threshold, here -20dB. Returns 1 whenever the instant power of the input frame is below the given threshold, 0 otherwise"
outputdomain: "binary, 0 or 1"
application: "It intends to measure the level of compression of a signal."
who: "Thomas Aussenac, Owen Meyers"
reference: ""
examplechain: "instantPower -> silenceRate"
63:
group: "lowlevel"
name: "silence_rate_30dB"
algorithm: "SilenceRate"
description: "This is the rate of frames where the level is above a given threshold, here -30dB. Returns 1 whenever the instant power of the input frame is below the given threshold, 0 otherwise"
outputdomain: "binary, 0 or 1"
application: "It intends to measure the level of compression of a signal."
who: "Thomas Aussenac, Owen Meyers"
reference: ""
examplechain: "instantPower -> silenceRate"
64:
group: "lowlevel"
name: "silence_rate_60dB"
algorithm: "SilenceRate"
description: "This is the rate of frames where the level is above a given threshold, here -60dB. Returns 1 whenever the instant power of the input frame is below the given threshold, 0 otherwise"
outputdomain: "binary, 0 or 1"
application: "It intends to measure the level of compression of a signal."
who: "Thomas Aussenac, Owen Meyers"
reference: ""
examplechain: "instantPower -> silenceRate"
65:
group: "lowlevel"
name: "spectral_strongpeak"
algorithm: "StrongPeak"
description: "The Strong Peak is defined as the ratio between the spectrum maximum magnitude and the bandwidth of the maximum peak in the spectrum above a threshold (half its amplitude). It reveals whether the spectrum presents a very pronounced maximum peak. The thinner and the higher the maximum of the spectrum is, the higher the value this parameter takes."
outputdomain: "a positive real number "
application: "Recognition of percussive instruments"
who: "Thomas Aussenac, Gerard Roma"
reference: "Gouyon, F., Herrera, P. Exploration of techniques for automatic labeling of audio drum tracks instruments. Proceedings of MOSART (2001)"
examplechain: "spectrum->strongpeak"
66:
group: "metadata"
name: "duration"
algorithm: "Duration"
description: "The duration is a measure of the length of the signal."
outputdomain: "real, non-negative. The duration of the audio signal in seconds."
application: "Any application which requires the time duration of a signal"
who: "Thomas Aussenac, Justin Salamon"
reference: ""
examplechain: "signal -> Duration"
67:
group: "unknown"
name: "lpc"
algorithm: ""
description: "This is a measure of the Linear Predictive Coefficients vector of a signal."
outputdomain: "a list of 11 real values from ???"
application: ""
who: "Thomas"
reference: ""
examplechain: ""
68:
group: "lowlevel"
name: "zerocrossingrate"
algorithm: "ZeroCrossingRate"
description: "The Zero Crossing Rate is the number of sign changes between consecutive signal values divided by the total number of values. "
outputdomain: "real, from 0 to 1"
application: "It can be a measure of the noisiness of the signal: noisy signals tend to have a high value of it."
who: "Thomas Aussenac, Elena Martinez"
reference: "G. Tzanetakis and P. Cook. Musical genre classification of audio signals.\nIEEE Transaction on speech and audio processing, 10(5), July 2002."
examplechain: "input signal -> zero crossing rate"
69:
group: "lowlevel"
name: "spectral_complexity"
algorithm: "SpectralComplexity"
description: "Timbral Complexity is a measure of the complexity of the instrumentation of the audio piece. Typically, in a piece of audio several instruments are present. This increases the complexity of the spectrum of the audio and therefore, it represents a useful audio feature for characterizing a piece of audio."
outputdomain: "integer, non-negative"
application: "segmentation"
who: "Thomas Aussenac, Ferdinand Fuhrmann"
reference: "Streich, S.: Music Complexity a multi-faceted description of audio content. PhD Thesis, Universitat Pompeu Fabra, Barcelona, 2007."
examplechain: "spectrum -> SpectralPeaks -> SpectralComplexity -> spectral complexity"
70:
group: "unknown"
name: "chords_changes"
algorithm: ""
description: "The chord changes is the suite of time stamps when a played chord is different from the previous played chord."
outputdomain: "a list of time stamps in seconds."
application: ""
who: "Thomas"
reference: ""
examplechain: ""
71:
group: "tonal"
name: "chords_changes_rate"
algorithm: "ChordsDescriptors"
description: "The Chord Changes Rate is the ratio from the number of \"tonal_chords_changes\" to the total number of detected chords in \"tonal_chord_progression\"."
outputdomain: "real, from 0 to 1."
application: "Similarity, classification"
who: "Thomas Aussenac, Emilia Gomez, Justin Salamon"
reference: "E. Gómez, P. Herrera, P. Cano, J. Janer, J. Serrà, J. Bonada, S. El-Hajj, T. Aussenac & G. Holmberg. Music similarity systems and methods using descriptors. United States patent application number 12/128917, filed May 29, 2008."
examplechain: "hpcp->key(profileType=5, tonic triad)->tonal_chords_progression, tonal_chords_changes"
72:
group: "unknown"
name: "chords_dissonance"
algorithm: ""
description: "The Chords Dissonance of an audio piece corresponds to the dissonance between 2 successive chords of the \"tonal_chord_progression\"."
outputdomain: "a real number from 0 to ???"
application: ""
who: "Joachim"
reference: ""
examplechain: ""
73:
group: "tonal"
name: "chords_histogram"
algorithm: "ChordsDescriptors"
description: "The Chords Histogram represents, for each possible chord, the percentage of times this chord is played in the audio piece, following the \"tonal_chords_progression\". The histogram \"normalized\" to the \"tonal_key_key\" following the circle of fifth."
outputdomain: "real, from 0 to 100. Returns a list of 24 values (from 0 to 100) representing the chords in the following order (circle of fifths):\n C, Em, G, Bm, D, F#m, A, C#m, E, G#m, B, D#m, F#, A#m, C#, Fm, G#, Cm, D#, Gm, A#, Dm, F, Am"
application: "Harmonic description and similarity."
who: "Thomas Aussenac, Emilia Gomez, Justin Salamon"
reference: "E. Gómez, P. Herrera, P. Cano, J. Janer, J. Serrà, J. Bonada, S. El-Hajj, T. Aussenac & G. Holmberg. Music similarity systems and methods using descriptors. United States patent application number 12/128917, filed May 29, 2008."
examplechain: "hpcp->key(profileType=5, tonic triad)->tonal_chords_progression, and hpcp->key(defaultparams)"
74:
group: "tonal"
name: "chords_key"
algorithm: "Key"
description: "A chord is made of three (triad) or more notes that sound simultaneously. Each chord is specified by its root/bass note (what we call \"key\" A, A#, B, C, C#, D, D#, E, F, F#, G, G#), its type or \"mode\" (major, minor, major7,...) and its strength (how close the note distribution is from the estimated chord). \n\n\nA succession of chords is called a chord progression.\n\n\nThe chord is computed using the key estimation algorithm within short segments of 1 or 2 seconds."
outputdomain: "string. The string represents the chord of the analyzed segment, A, A#, B, C, C#, D, D#, E, F, F#, G, G#"
application: "Chord estimation, harmonic description. "
who: "Thomas Aussenac, Emilia Gomez"
reference: "Takuya Fujishima. Real-time chord recognition of musical sound: A system using common lisp music. In ICMC, pages 464–467, Beijing, China, 1999."
examplechain: "hpcp->key(profileType=5, tonic triad)"
75:
group: "tonal"
name: "chords_mode"
algorithm: "Key"
description: "A chord is made of three (triad) or more notes that sound simultaneously. Each chord is specified by its root/bass note (what we call \"key\" A, A#, B, C, C#, D, D#, E, F, F#, G, G#), its type or \"mode\" (major, minor, major7,...) and its strength (how close the note distribution is from the estimated chord). \n\n\nA succession of chords is called a chord progression.\n\n\nThe chord is computed using the key estimation algorithm within short segments of 1 or 2 seconds."
outputdomain: "string. A string representing the mode of the chord of the song. Only triad chords are considered (major, minor or none)"
application: "Chord estimation, harmonic description. "
who: "Thomas Aussenac, Emilia Gomez"
reference: "Takuya Fujishima. Real-time chord recognition ofmusical sound: A system using common lisp music. In ICMC, pages 464–467, Beijing, China, 1999."
examplechain: "hpcp->key(profileType=5, tonic triad)"
76:
group: "tonal"
name: "chords_number_rate"
algorithm: "ChordsDescriptors"
description: "The Chord Number Rate is the ratio from the number of different chords played more than 1% of the time to the total number of detected chords in \"tonal_chord_progression\"."
outputdomain: "real, from 0 to 1."
application: "Harmonic description and similarity."
who: "Thomas Aussenac, Emilia Gómez, Justin Salamon"
reference: "E. Gómez, P. Herrera, P. Cano, J. Janer, J. Serrà, J. Bonada, S. El-Hajj, T. Aussenac & G. Holmberg. Music similarity systems and methods using descriptors. United States patent application number 12/128917, filed May 29, 2008."
examplechain: "ChordsDetection(chords_progression) + Key(key,mode) -> ChordsDescriptors"
77:
group: "tonal"
name: "chords_progression"
algorithm: "ChordsDetection"
description: "A chord is made of three (triad) or more notes that sound simultaneously. Each chord is specified by its root/bass note (what we call \"key\" A, A#, B, C, C#, D, D#, E, F, F#, G, G#), its type or \"mode\" (major, minor, major7,...) and its strength (how close the note distribution is from the estimated chord). \n\n\nThe chord is computed using the key estimation algorithm within short segments of 1 or 2 seconds, so that we obtain a succession of chord values.\nThis succession of chords is called a chord progression.\n\n\nThe Chord Progression is the suite of chords symbols - e.g. C, Am, F#, Bb, Em, G#m, etc - played in the audio piece."
outputdomain: "string. The string represents the chord sequence of the song, where each chord is one of: A, A#, B, C, C#, D, D#, E, F, F#, G, G#, with an optional m (for minor)."
application: "Chord estimation, harmonic description, similarity."
who: "Thomas Aussenac, Emilia Gómez, Justin Salamon"
reference: "Takuya Fujishima. Real-time chord recognition ofmusical sound: A system using common lisp music. In ICMC, pages 464–467, Beijing, China, 1999.\n"
examplechain: "signal->ChordsDetection\n\nOR: hpcp->average over several frames and normalize->key(profileType=5, tonic triad)"
78:
group: "tonal"
name: "chords_strength"
algorithm: "Key"
description: "A chord is made of three (triad) or more notes that sound simultaneously. Each chord is specified by its root/bass note (what we call \"key\" A, A#, B, C, C#, D, D#, E, F, F#, G, G#), its type or \"mode\" (major, minor, major7,...) and its strength . \n\n\nThe chord is computed using the key estimation algorithm within short segments of 1 or 2 seconds.\n\n\nThe Chord Strength descriptor represents the power/correlation of the chord detection: high value means that the chord detected location is very tonal and low value means that it's not very tonal for the used key profile or template.\n\n\nA succession of chords is called a chord progression.\n\n\n\nThe chord is computed using the key estimation algorithm within short segments of 1 or 2 seconds."
outputdomain: "real, from 0 to 1."
application: "Chord estimation, harmonic description, classification."
who: "Thomas Aussenac, Emilia Gomez"
reference: "Takuya Fujishima. Real-time chord recognition ofmusical sound: A system using common lisp music. In ICMC, pages 464–467, Beijing, China, 1999."
examplechain: "hpcp->key(profileType=5, tonic triad)"
79:
group: "lowlevel"
name: "dissonance"
algorithm: "Dissonance"
description: "a perceptual descriptor used to measure the roughness of the sound. based on the fact that two sinusoidal spectral components share a dissonance curve, which values are dependent on their frequency and amplitude relations. the total dissonance is derived by summing up the values for all the components (i.e. the spectral peaks) of a given frame. the dissonance curves are obtained from perceptual experiments conducted in the paper listed below."
outputdomain: "real, from 0 to 1"
application: "segmentation"
who: "Ferdinand Fuhrmann"
reference: "Plomp, R., Levelt, W., Tonal consonance and critical bandwidth, The Journal of the Acoustical Society of America, Vol. 38, No. 4, pp. 548-560, 1965."
examplechain: "spectrum -> peakDetection -> Dissonance -> Dissonance"
80:
group: "tonal"
name: "hpcp"
algorithm: "HPCP"
description: "The HPCP is the Harmonic Pitch Class Profile, i.e. a 12, 24, 36,... size (size being a multiple of 12) dimensional vector which represents the intensities of each of the frequency bins of an equal-tempered scale."
outputdomain: "real, from 0 to 1. List of values from 0 to 1."
application: "Key estimation, tonal similarity, classification"
who: "Thomas Aussenac, Emilia Gomez"
reference: "Gómez, E. Tonal Description of music audio signals, PhD Thesis, 2006. \nhttp://mtg.upf.edu/~egomez/thesis/"
examplechain: "Spectrum->PeakDetection->TuningFrequency\nSpectrum->PeakDetection->SpectralWhitening->HPCP"
81:
group: "tonal"
name: "key_key"
algorithm: "Key"
description: "In music theory, the key identifies the tonic triad, the chord, major or minor, which represents the final point of rest for a piece, or the focal point of a section. Although the key of a piece may be named in the title (e.g. Symphony in C), or inferred from the key signature, the establishment of key is brought about via functional harmony, a sequence of chords leading to one or more cadences. A key may be major or minor."
outputdomain: "string. A string representing the key of the song, A, A#, B, C, C#, D, D#, E, F, F#, G, G#"
application: "Key estimation, tonal similarity, classification"
who: "Emilia Gomez, Justin Salamon"
reference: "Gómez, E. (2006). Tonal description of polyphonic audio for music content processing. INFORMS Journal on Computing, Special Cluster on Computation in Music."
examplechain: "hpcp->Key"
82:
group: "tonal"
name: "key_mode"
algorithm: "Key"
description: "In music theory, the key identifies the tonic triad, the chord, major or minor, which represents the final point of rest for a piece, or the focal point of a section. Although the key of a piece may be named in the title (e.g. Symphony in C), or inferred from the key signature, the establishment of key is brought about via functional harmony, a sequence of chords leading to one or more cadences. A key may be major or minor."
outputdomain: "string. A string representing the mode of the key of the song, either 'major', 'minor' or 'none'"
application: "Key estimation, tonal similarity, classification"
who: "Emilia Gomez, Justin Salamon"
reference: "Gómez, E. (2006). Tonal description of polyphonic audio for music content processing. INFORMS Journal on Computing, Special Cluster on Computation in Music. "
examplechain: "hpcp->Key"
83:
group: "tonal"
name: "key_strength"
algorithm: "Key"
description: "The Key Strength descriptor represents the power/correlation of the key: high value means that the piece is very tonal and low value means that it's not very tonal for the used key profile or template."
outputdomain: "real, from 0 to 1."
application: "Tonal similarity, music description, classification between tonal and non-tonal music"
who: "Emilia Gomez, Justin Salamon"
reference: "Gómez, E. (2006). Tonal description of polyphonic audio for music content processing. INFORMS Journal on Computing, Special Cluster on Computation in Music. "
examplechain: "hpcp->Key"
84:
group: "tonal"
name: "tuning_diatonic_strength"
algorithm: "Key"
description: "The Diatonic Strength is the \"tonal_key_strength\" calculated using a diatonic tonal profile on the 120-bins HPCP average."
outputdomain: "real, from 0 to 1."
application: "western vs non-western music classification, key estimation"
who: "Emilia Gomez"
reference: "Gómez, E. and Herrera, P. (accepted, under revision). ‘Comparative Analysis of Music Recordings from Western and Non-Western traditions by Automatic Tonal Feature Extraction’, Empirical Musicology Review. \nISSN: 1559-5749"
examplechain: "hpcp->Key (parameters for the Key algorithm: profileType=1)"
85:
group: "tonal"
name: "tuning_equal_tempered_deviation"
algorithm: "HighResolutionFeatures"
description: "This is a measure of the deviation of the 120-length HPCP (Harmonic Pitch Class Profiles) local maxima with respect to equal-tempered bins. This measure if how the audio piece scale may be considered as an equal-tempered one or not. An Indian music audio piece may have a high equal tempered deviation while a pop song may have a low one."
outputdomain: "real, non-negative."
application: "western vs non-western music classification"
who: "Emilia Gomez, Justin Salamon"
reference: "Gómez, E. and Herrera, P. (accepted, under revision). ‘Comparative Analysis of Music Recordings from Western and Non-Western traditions by Automatic Tonal Feature Extraction’, Empirical Musicology Review. \nISSN: 1559-5749"
examplechain: "hpcp(size=120)->PeakDetection->HighResolutionFeatures"
86:
group: "tonal"
name: "tuning_frequency"
algorithm: "TuningFrequency"
description: "Frequency used to tune a piece, by default 440 Hz "
outputdomain: "real, non-negative."
application: "Western vs non-western music classification, key estimation, HPCP computation, tonal similarity"
who: "Emilia Gomez, Justin Salamon"
reference: "Gómez, E. Tonal Description of Music Audio Signals, PhD Thesis, Chapter 2, 2006.\n\nGómez, E. and Herrera, P. (accepted, under revision). ‘Comparative Analysis of Music Recordings from Western and Non-Western traditions by Automatic Tonal Feature Extraction’, Empirical Musicology Review. \nISSN: 1559-5749"
examplechain: "Spectrum->PeakDetection->TuningFrequency"
87:
group: "unknown"
name: "tuning_nontempered2tempered_energy_ratio"
algorithm: ""
description: "This is the ratio between the energy on non-tempered bins and the total energy, computed from the 120-bins HPCP average. This measure if how the audio piece scale may be considered as an equal-tempered one or not. An Indian music audio piece may have a low ratio while a pop song may have a high one."
outputdomain: "a real number from 0 to 1."
application: ""
who: "Thomas"
reference: ""
examplechain: ""
88:
group: "tonal"
name: "tuning_nontempered_energy_ratio"
algorithm: "HighResolutionFeatures"
description: "This is the ratio between the energy on non-tempered peaks and the total energy, computed from the 120-bins HPCP average. This measure if how the audio piece scale may be considered as an equal-tempered one or not. An Indian music audio piece may have a low ratio while a pop song may have a high one."
outputdomain: "real, from 0 to 1."
application: "Western vs non-western music classification"
who: "Emilia Gomez, Justin Salamon"
reference: "Gómez, E. and Herrera, P. (accepted, under revision). ‘Comparative Analysis of Music Recordings from Western and Non-Western traditions by Automatic Tonal Feature Extraction’, Empirical Musicology Review. \nISSN: 1559-5749"
examplechain: "hpcp(size=120)->PeakDetection->HighResolutionFeatures"
89:
group: "unknown"
name: "relative_ioi"
algorithm: ""
description: ""
outputdomain: ""
application: ""
who: ""
reference: ""
examplechain: ""
90:
group: "unknown"
name: "relative_ioi_peaks"
algorithm: ""
description: ""
outputdomain: ""
application: ""
who: "Paul"
reference: ""
examplechain: ""
91:
group: "rhythm"
name: "bpm_estimates"
algorithm: "ExtractorTempoTap"
description: "BPM estimates is a list of estimated BPM (Beat per minute, see the description of the BPM descriptor). TODO: frequency of this descriptor\n"
outputdomain: "a vector of real (bpm)"
application: "Tempo tracking"
who: "Cyril Laurier"
reference: ""
examplechain: "input signal->bpm estimates"
92:
group: "rhythm"
name: "first_peak_bpm"
algorithm: "BPMHistogramDescriptors"
description: "This algorithm computes the value of the highest peak of the 'beats per minute' (bpm) probability histogram."
outputdomain: "real, non-negative"
application: "Genre classification; beat estimation. "
who: "Martin Haro"
reference: "http://en.wikipedia.org/wiki/Beats_per_minute"
examplechain: ""
93:
group: "rhythm"
name: "first_peak_spread"
algorithm: "BPMHistogramDescriptors"
description: "This algorithm computes the spread of the highest peak of the 'beats per minute' (bpm) probability histogram.\nThe spread is defined as the variance of a distribution around its mean value. It is equal to the 2nd order central moment. \n"
outputdomain: "real, non-negative"
application: "Genre classification; beat estimation. "
who: "Martin Haro"
reference: "http://en.wikipedia.org/wiki/Beats_per_minute and \nhttp://en.wikipedia.org/wiki/Central_moment"
examplechain: ""
94:
group: "rhythm"
name: "first_peak_weight"
algorithm: "BPMHistogramDescriptors"
description: "This algorithm computes the weight of the highest peak of the 'beats per minute' (bpm) probability histogram. "
outputdomain: "real, non-negative"
application: "Genre classification; beat estimation. "
who: "Martin Haro"
reference: "http://en.wikipedia.org/wiki/Beats_per_minute"
examplechain: ""
95:
group: "rhythm"
name: "bpm_intervals"
algorithm: "ExtractorTempoTap"
description: "BPM Intervals is a list of beats interval in seconds. It estimates the time in seconds between two beats. At each frame, an estimation is added to the list.\nTODO: frequency of this descriptor\n"
outputdomain: "a real vector of real (interval between beats in seconds)"
application: "Tempo tracking"
who: "Cyril Laurier"
reference: ""
examplechain: "input signal->bpm intervals"
96:
group: "rhythm"
name: "rubato_start"
algorithm: "BpmRubato"
description: "This descriptor provides a list of values indicating the start times, in seconds, of large tempo changes in the signal."
outputdomain: "real, positive. Array of real values."
application: "Measure fluctuation in tempo or rhythm"
who: "Owen Meyers"
reference: ""
examplechain: "beats -> rubatoStart"
97:
group: "rhythm"
name: "rubato_stop"
algorithm: "BpmRubato"
description: "This descriptor provides a list of values indicating the stop times, in seconds, of large tempo changes in the signal."
outputdomain: "real, positive. Array of real values."
application: "Measure fluctuation in tempo or rhythm"
who: "Owen Meyers"
reference: ""
examplechain: "beats -> rubatoStop"
98:
group: "rhythm"
name: "second_peak_bpm"
algorithm: "BPMHistogramDescriptors"
description: "This algorithm computes the value of the second highest peak of the 'beats per minute' (bpm) probability histogram."
outputdomain: "real, non-negative"
application: "Genre classification; beat estimation. "
who: "Martin Haro"
reference: "http://en.wikipedia.org/wiki/Beats_per_minute"
examplechain: ""
99:
group: "rhythm"
name: "second_peak_spread"
algorithm: "BPMHistogramDescriptors"
description: "This algorithm computes the spread of the second highest peak of the 'beats per minute' (bpm) probability histogram. The spread is defined as the variance of a distribution around its mean value. It is equal to the 2nd order central moment. "
outputdomain: "real, non-negative"
application: "Genre classification; beat estimation. "
who: "Martin Haro"
reference: "http://en.wikipedia.org/wiki/Beats_per_minute and http://en.wikipedia.org/wiki/Central_moment\n"
examplechain: ""
100:
group: "rhythm"
name: "second_peak_weight"
algorithm: "BPMHistogramDescriptors"
description: "This algorithm computes the weight of the second highest peak of the 'beats per minute' (bpm) probability histogram. "
outputdomain: "real, non-negative"
application: "Genre classification; beat estimation. "
who: "Martin Haro"