doc/descriptors.yaml

timestamp: 2008-07-25 15:53:52 +0200

1:
    group: "unknown"
    name: "danceability"
    algorithm: "Danceability"
    description: "the higher the value of the danceability descriptor, the easier it should be to dance to this song"
    outputdomain: "a real number from 0 to ???"
    application: ""
    who: "Thomas"
    reference: ""
    examplechain: ""
2:
    group: "rhythm"
    name: "onset_rate"
    algorithm: "OnsetRate"
    description: "The onset is the beginning of a note or a sound, in which the amplitude of the sounds rises from zero to an initial peak. The onset rate is a real number representing the number of onsets per second. It may also be considered as a measure of the number of sonic events per second, and thus a rhythmic indicator of the audio piece. A higher onset rate means that the audio piece has a higher rhythmic density."
    outputdomain: "real, positive"
    application: "Rhythm detection"
    who: "Thomas Aussenac, Owen Meyers"
    reference: "http://aubio.org/phd/thesis/brossier06thesis.pdf"
    examplechain: "fft -> onsetDetection -> onsets -> onsetRate"
3:
    group: "unknown"
    name: "equalization_profile"
    algorithm: ""
    description: "This descriptor is not longer part of essentia versions higher than 0.6.0"
    outputdomain: "integer value between 1 and 8"
    application: "not useful. Failed the Robustness test"
    who: "Thomas, Joachim"
    reference: ""
    examplechain: ""
4:
    group: "unknown"
    name: "excitement"
    algorithm: ""
    description: "(weka tree) The higher the value, the more exciting the song should be. This weka tree is based on the ground truth from the Yamaha database."
    outputdomain: "an integer value which can be rather 1 (not exciting), 2 (exciting) or 3 (very exciting)."
    application: ""
    who: "Thomas"
    reference: ""
    examplechain: ""
5:
    group: "unknown"
    name: "thumbnail"
    algorithm: ""
    description: "Time onsets corresponding to the most relevant segment of a song."
    outputdomain: "A time stamps pair (start and end of the thumbnail) in seconds."
    application: ""
    who: "Thomas"
    reference: ""
    examplechain: ""
6:
    group: "unknown"
    name: "intensity"
    algorithm: ""
    description: "Intensity is a measure of the intensity of an audio piece from a rhythmic point of view. Typically, a slow, soft and relaxing audio can be considered to have a low intensity. On the other hand, a fast energetic audio can be considered as having a high intensity. The higher the value, the more intense the audio piece is."
    outputdomain: "an integer value between 0 and 1"
    application: ""
    who: "Thomas"
    reference: ""
    examplechain: ""
7:
    group: "unknown"
    name: "segments"
    algorithm: ""
    description: "Time onsets corresponding to the different segments found after segmentation."
    outputdomain: "a list of time stamps pairs (start and end of each segment) in seconds."
    application: ""
    who: "Thomas"
    reference: ""
    examplechain: ""
8:
    group: "unknown"
    name: "speech_music"
    algorithm: ""
    description: "This descriptor intends to describe if a given audio file is a music file or a speech file."
    outputdomain: "a string label which can be either \"speech\" or \"music'."
    application: ""
    who: "Shadi"
    reference: ""
    examplechain: ""
9:
    group: "unknown"
    name: "voice_instrumental"
    algorithm: ""
    description: "This descriptor intends to describe if a given audio file is an instrumental file or a song with voice and accompaniment."
    outputdomain: "A string label which can be either \"voice\" or \"instrumental'."
    application: ""
    who: "Thomas"
    reference: ""
    examplechain: ""
10:
    group: "unknown"
    name: "dynamic_complexity"
    algorithm: ""
    description: "The Dynamic Complexity is the average absolute deviation from the global loudness level estimate on the dB scale. It is related to the dynamic range and to the amount of fluctuation in loudness present in a recording."
    outputdomain: "A real number from 0 to 1."
    application: ""
    who: "Thomas"
    reference: ""
    examplechain: ""
11:
    group: "unknown"
    name: "larm"
    algorithm: ""
    description: "This algorithm extracts the Equivalent sound level (Leq) of an audio signal. The Larm measure with Revised Low-frequency B-weighting (RLB) has shown to be reliable, objective loudness estimate of music and speech."
    outputdomain: "a real number in dB from -100dB to ???"
    application: ""
    who: "Thomas"
    reference: ""
    examplechain: ""
12:
    group: "metadata"
    name: "replay_gain"
    algorithm: "ReplayGain"
    description: "The Replay Gain, a measure of the perceived loudness of an audio piece. "
    outputdomain: "real. The value is given in dB."
    application: "Can be used to compute and store the recommended gain for playing a file, for example within a collection which has files with highly varying gain levels."
    who: "Thomas Aussenac, Justin Salamon"
    reference: "http://replaygain.hydrogenaudio.org/"
    examplechain: "signal -> ReplayGain"
13:
    group: "unknown"
    name: "vicker"
    algorithm: ""
    description: "The loudness Vicker is a loudness measure used for calculating the Dynamic Complexity."
    outputdomain: "a real number in dB from -90 to ???"
    application: ""
    who: "Thomas"
    reference: ""
    examplechain: ""
14:
    group: "unknown"
    name: "panning"
    algorithm: ""
    description: "This represents the parameterized panning curve of the given audio file."
    outputdomain: "An array of real numbers."
    application: ""
    who: "Xavier Janer"
    reference: ""
    examplechain: ""
15:
    group: "rhythm"
    name: "beats_loudness"
    algorithm: "SingleBeatLoudness"
    description: "Beats loudness is a measure of the strength of the rhythmic beats of the audio piece. It turns to be very useful for characterizing audio piece. "
    outputdomain: "real, from 0 to 1"
    application: "Genre classification. For example, a folk song may have a low beats loudness while a punk-rock song or a hip-hop song may have a high beats loudness."
    who: "Thomas Aussenac, Martin Haro"
    reference: ""
    examplechain: "input signal -> beats_loudness"
16:
    group: "rhythm"
    name: "beats_loudness_bass"
    algorithm: "SingleBeatLoudness"
    description: "Beats loudness bass is a measure of the strength of the low frequency part of rhythmic beats of an audio piece. It turns to be very useful for characterizing an audio piece. "
    outputdomain: "real, from 0 to 1"
    application: "Genre Classification. For example, a folk song or a punk-rock may have a low beats loudness bass, while a hip-hop song may have a high bass beats loudness bass."
    who: "Thomas Aussenac, Martin Haro"
    reference: ""
    examplechain: "input signal -> beats_loudness_bass"
17:
    group: "rhythm"
    name: "onset_times"
    algorithm: "Onsets"
    description: "The onset is the beginning of a note or a sound, in which the amplitude of the sound rises from zero to an initial peak. The onsets are the time stamps in seconds corresponding to the onsets of the audio piece."
    outputdomain: "real, positive. Returns a list of time stamps in seconds."
    application: "Rhythm detection"
    who: "Thomas Aussenac, Paul Brossier, Owen Meyers"
    reference: "http://aubio.org/phd/thesis/brossier06thesis.pdf"
    examplechain: "onsetDetection -> onsetTimes"
18:
    group: "unknown"
    name: "der_av_after_max"
    algorithm: ""
    description: "The derivative average, weighted by the amplitude, after the max amplitude of the signal envelope. This coefficient helps discriminating Impulsive sounds, which have a steepest release, so a smaller value, from Decrescendo sounds."
    outputdomain: "a real number from ???"
    application: "Strange."
    who: "Thomas"
    reference: ""
    examplechain: ""
19:
    group: "unknown"
    name: "effective_duration"
    algorithm: ""
    description: "the effective duration is a measure of The time the signal is perceptually meaningful. It is approximated by the time the envelope is above a given threshold, e. g. 40% of the envelope maximum."
    outputdomain: "a real number in seconds from 0 to duration of the audio piece."
    application: "Up to you."
    who: "Thomas"
    reference: ""
    examplechain: ""
20:
    group: "unknown"
    name: "flatness"
    algorithm: ""
    description: "The flatness coefficient is the ratio of the value above which lie 5% of the values to the value above which lie 80% of the values. This coefficient is close to 1 for flat envelope and large for sounds having a large dynamic."
    outputdomain: "a real number from 0 to 1."
    application: "Good. Noises, percussive sounds and pitched sounds cluster."
    who: "Thomas"
    reference: ""
    examplechain: ""
21:
    group: "sfx"
    name: "inharmonicity"
    algorithm: "Inharmonicity"
    description: "The divergence of the signal spectral components from a purely harmonic signal. It is computed as the energy weighted divergence of the spectral components from the multiple of the fundamental frequency. The inharmonicity ranges from 0 (purely harmonic signal) to 1 (inharmonic signal)."
    outputdomain: "real, from 0 to 1."
    application: "E.g. charaterisation of piano sounds"
    who: "Thomas Aussenac, Justin Salamon"
    reference: "http://en.wikipedia.org/wiki/Inharmonicity"
    examplechain: "spectrum -> SpectralPeaks (frequencies,magnitudes) -> HarmonicPeaks (frequencies, magnitudes)\n\nspectrum -> PitchDetection\n\nHarmonicPeaks (frequencies,magnitudes) & PitchDetection -> Inharmonicity"
22:
    group: "unknown"
    name: "logattacktime"
    algorithm: ""
    description: "The duration from when the sound becomes perceptually audible to when it reaches its maximum intensity. It is computed by applying thresholds on the signal. The start of the sound is estimated as the point where the signal reaches 20% of the maximum value. This is to account for possible noise presence. The end of the logAttack is estimated as the point where the signal reaches 90% of the maximum value. This is to account for the max value possibility occurring after the logAttack, as in a trumpet sound."
    outputdomain: "a real number from -5 to ???"
    application: "Good."
    who: "Thomas"
    reference: ""
    examplechain: ""
23:
    group: "unknown"
    name: "max_der_before_max"
    algorithm: ""
    description: "The maximum derivative before the max amplitude. This coefficient helps discriminating Crescendo and Delta sounds that have a smooth attack so a smaller value than sounds with different dynamic profile."
    outputdomain: "a real number from ???"
    application: "Good."
    who: "Thomas"
    reference: ""
    examplechain: ""
24:
    group: "unknown"
    name: "max_to_total"
    algorithm: ""
    description: "The maximum amplitude time to total length ratio of a signal envelope. This coefficient shows how much the maximum amplitude is off-center. Its value is close to 0 if the maximum is close to the beginning (e.g. Decrescendo or Impulsive sounds), close to 0.5 if it is close to the middle (e. g. Delta sounds) and close to 1 if it is close to the end of the sound (e.g. Crescendo sounds)."
    outputdomain: "a real number from 0 to 1"
    application: "Good on short sounds."
    who: "Thomas"
    reference: ""
    examplechain: ""
25:
    group: "sfx"
    name: "oddtoevenharmonicenergyratio"
    algorithm: "OddToEvenHarmonicEnergyRatio"
    description: "The Odd to Even Harmonic Energy Ratio of a signal given its harmonic peaks. The Odd to Even Harmonic Energy Ratio is a measure allowing distinguishing odd harmonic energy predominant sounds (such as clarinet sounds) from equally important harmonic energy sounds (such as the trumpet)."
    outputdomain: "a real number from 0 to 1"
    application: "Discrimination of sounds with predominance of odd or even harmonics"
    who: "Thomas Aussenac, Gerard Roma"
    reference: "Peeters, G.: A large set of audio features for sound description in the CIUDADO project. Technical Report, IRCAM, 2004\n"
    examplechain: "spectrum->spectral_peaks;\nspectrum->pitch_detection;\n(spectral_peaks,pitch_detection)->harmonic_peaks->oddtoevenharmonicenergyratio"
26:
    group: "unknown"
    name: "onsets_number"
    algorithm: ""
    description: "The number of onsets. If the number of onsets is greater than 1 (the first onset correspond to the attack of the sound), the sound is classified as iterative."
    outputdomain: "a real number from 0 to ???"
    application: "Up to you."
    who: "Thomas"
    reference: ""
    examplechain: ""
27:
    group: "sfx"
    name: "pitch_after_max_to_before_max_energy_ratio"
    algorithm: "AfterMaxToBeforeMaxEnergyRatio"
    description: "The ratio of energy after the maximum to energy before the maximum of pitch values. Sounds having an ascending pitch value a small while sounds having a descending pitch have a high value."
    outputdomain: "a real number from 0 to 1"
    application: "Discriminating sounds with different relation between pitch and energy envelope"
    who: "Thomas Aussenac, Gerard Roma"
    reference: ""
    examplechain: "pitch-> pitch_after_max_to_before_max_energy_ratio"
28:
    group: "sfx"
    name: "pitch_centroid"
    algorithm: "Centroid"
    description: "The center of gravity of the array of pitch values per frame. A value close to 0.5 may indicate a stable pitch"
    outputdomain: "a real number normalized by the range parameter."
    application: "Classifying sound effects with a potentially varying pitch."
    who: "Thomas Aussenac, Gerard Roma"
    reference: ""
    examplechain: "pitch->centroid"
29:
    group: "sfx"
    name: "pitch_max_to_total"
    algorithm: "MaxToTotal"
    description: "A measure of the relative position in time of the maximum pitch value. A value of zero (maximum at the beginning) indicates descending pitch, while a value of one indicates an ascending pitch."
    outputdomain: "a real number from 0 to 1."
    application: "Discriminating sound effects with different pitch envelopes."
    who: "Thomas Aussenac, Gerard Roma"
    reference: ""
    examplechain: "pitch->max_to_total"
30:
    group: "sfx"
    name: "pitch_min_to_total"
    algorithm: "MinToTotal"
    description: "A measure of the relative position in time of the minimum pitch value. A value of zero (minimum at the beginning) indicates ascending pitch, while a value of one indicates an descending pitch."
    outputdomain: "a real number from 0 to 1"
    application: "Discriminating sound effects with different pitch envelopes"
    who: "Thomas Aussenac, Gerard Roma"
    reference: ""
    examplechain: "pitch->minToTotal"
31:
    group: "unknown"
    name: "strongdecay"
    algorithm: ""
    description: "The strong decay is built from the non-linear combination of the signal energy and the signal temporal centroid. A signal containing a temporal centroid near its left boundary and a strong energy is said to have a strong decay."
    outputdomain: "a real number from ???"
    application: "Bad!"
    who: "Thomas"
    reference: ""
    examplechain: ""
32:
    group: "unknown"
    name: "tc_to_total"
    algorithm: ""
    description: "The temporal centroid to total length ratio of a signal envelope. This coefficient shows how the sound is \"balanced'. Its value is close to 0 if most of the energy lies at the beginning (e.g. Decrescendo or Impulsive sounds), close to 0.5 if the sound is symmetric (e.g. Unvarying or Delta sounds) and close to 1 if most of the energy lies at the end of the sound (e.g. Crescendo sounds)."
    outputdomain: "a real number from 0 to 1."
    application: "Bad!"
    who: "Thomas"
    reference: ""
    examplechain: ""
33:
    group: "unknown"
    name: "temporal_centroid"
    algorithm: ""
    description: "The center of gravity of the audio envelope."
    outputdomain: "a real number from ???"
    application: "Somehow good."
    who: "Thomas"
    reference: ""
    examplechain: ""
34:
    group: "unknown"
    name: "temporal_decrease"
    algorithm: ""
    description: "The center of gravity of the audio envelope."
    outputdomain: "a real number from ???"
    application: "Good."
    who: "Thomas"
    reference: ""
    examplechain: ""
35:
    group: "lowlevel"
    name: "barkbands"
    algorithm: "BarkBands"
    description: "This algorithm extracts the 28 Bark band values of a Spectrum. For each bark band the power-spectrum (mag-squared) is summed. The first two bands [0..100] and [100..200] are divided in two for better resolution.\n\nBarkbands only takes one parameter (sampleRate), one input (the spectrum) and one output (the bands)\n\n The Frequency edges (in Hz) are: 0, 50, 100, 150, 200, 300, 400, 510, 630, 770, 920, 1080, 1270, 1480, 1720, 2000, 2320, 2700, 3150, 3700, 4400, 5300, 6400, 7700, 9500, 12000, 15500, 20500 and 27000 \n\n"
    outputdomain: "real, non-negative.  28 values (or less  depending on the sampleRate)"
    application: "Perceptual description of sounds, since the scale ranges from 1 to 24 and corresponds to the first 24 critical bands of hearing (see Zwicker, E. (1961), \"Subdivision of the audible frequency range into critical bands,\" The Journal of the Acoustical Society of America, 33, Feb., 1961.).  "
    who: "Thomas Aussenac, Martin Haro"
    reference: "http://ccrma.stanford.edu/~jos/bbt/Bark_Frequency_Scale.html and \n\nhttp://en.wikipedia.org/wiki/Bark_scale\n\n"
    examplechain: "spectrum -> barkbands"
36:
    group: "lowlevel"
    name: "barkbands_kurtosis"
    algorithm: "DistributionShape"
    description: "The kurtosis gives a measure of the flatness of a distribution around its mean value. A negative kurtosis indicates flatter bark bands. A positive kurtosis indicates peakier bark bands. A kurtosis = 0 indicates bark bands with normal distribution."
    outputdomain: "real"
    application: "Timbral characterization. "
    who: "Thomas Aussenac, Martin Haro"
    reference: "http://en.wikipedia.org/wiki/Kurtosis\n\nSee also barkbands' documentation"
    examplechain: "spectrum -> barkbands -> centralmoments -> distributionshape"
37:
    group: "lowlevel"
    name: "barkbands_skewness"
    algorithm: "DistributionShape"
    description: "The skewness is a measure of the asymmetry of a distribution around its mean value. A negative skewness indicates bark bands with more energy in the high frequencies. A positive skewness indicates bark bands with more energy in the low frequencies. A skewness = 0 indicates symmetric bark bands. For silence or constants signal, skewness is 0."
    outputdomain: "real "
    application: "Timbral characterization."
    who: "Thomas Aussenac, Martin Haro"
    reference: "http://en.wikipedia.org/wiki/Skewness\n\nSee also barkbands' documentation"
    examplechain: "spectrum -> barkbands -> centralmoments -> distributionshape\n"
38:
    group: "lowlevel"
    name: "spectral_centroid"
    algorithm: "Centroid"
    description: "The spectral centroid is a measure used in digital signal processing to characterize an audio spectrum. It indicates where the \"center of mass\" of the spectrum is. "
    outputdomain: "real, non-negative"
    application: "Perceptually, it has a robust connection with the impression of \"brightness\" of a sound. High values of it correspond to brighter textures."
    who: "Thomas Aussenac, Elena Martinez"
    reference: "G. Tzanetakis and P. Cook. Musical genre classification of audio signals. IEEE Transaction on speech and audio processing, 10(5), July 2002."
    examplechain: "spectrum -> centroid"
39:
    group: "lowlevel"
    name: "spectral_crest"
    algorithm: "Crest"
    description: "The crest is the ratio between the max value and the arithmetic mean of the spectrum. It is a measure of the noisiness of the spectrum."
    outputdomain: "real, greater than 1."
    application: "Discrimination of noisy signals"
    who: "Thomas Aussenac, Gerard Roma"
    reference: "Peeters, G.: A large set of audio features for sound description in the CUIDADO project. Technical Report, IRCAM, 2004"
    examplechain: "spectrum->barkbands->crest"
40:
    group: "lowlevel"
    name: "barkbands_spread"
    algorithm: "DistributionShape"
    description: "The spread is defined as the variance of a distribution around its mean value. It is equal to the 2nd order central moment.\n"
    outputdomain: "real"
    application: "Timbral characterization."
    who: "Thomas Aussenac, Martin Haro"
    reference: "http://en.wikipedia.org/wiki/Central_moment"
    examplechain: "spectrum -> barkbands -> centralmoments -> distributionshape\n"
41:
    group: "lowlevel"
    name: "spectral_decrease"
    algorithm: "Decrease"
    description: "A measure of the amount of decrease of the spectral amplitude, which is supposed to be more related to perception than the spectrum slope (linear regression)."
    outputdomain: "a real number normalized by the range parameter"
    application: "Classification of musical instruments, pitch detection for some specific instruments like the piano"
    who: "Thomas Aussenac, Gerard Roma"
    reference: "Peeters, G.: A large set of audio features for sound description in the CIUDADO project. Technical Report, IRCAM, 2004"
    examplechain: "spectrum**2 -> decrease"
42:
    group: "lowlevel"
    name: "spectral_energy"
    algorithm: "Energy"
    description: "The spectrum energy at a given frame."
    outputdomain: "real, non-negative"
    application: "This is a very general descriptor and can be used for a wide variety of applications"
    who: "Thomas Aussenac, Elena Martinez"
    reference: "http://en.wikipedia.org/wiki/Energy_%28signal_processing%29"
    examplechain: "spectrum -> energy"
43:
    group: "lowlevel"
    name: "spectral_energyband_low"
    algorithm: "EnergyBandRatio"
    description: "The Energy Band Ratio of a spectrum is the ratio of the spectrum energy from startCutoffFrequency to stopCutoffFrequency to the total spectrum energy. For the Energy Band Ration Low, startCutoffFrequency = 20Hz and stopCutoffFrequency = 150Hz"
    outputdomain: "real, from 0 to 1"
    application: "This is a very general descriptor and can be used for a wide variety of applications"
    who: "Thomas Aussenac, Elena Martinez"
    reference: "http://en.wikipedia.org/wiki/Energy_%28signal_processing%29"
    examplechain: "spectrum -> energy band ratio"
44:
    group: "lowlevel"
    name: "spectral_energyband_middle_low"
    algorithm: "EnergyBandRatio"
    description: "The Energy Band Ratio of a spectrum is the ratio of the spectrum energy from startCutoffFrequency to stopCutoffFrequency to the total spectrum energy. For the Energy Band Ration Middle Low, startCutoffFrequency = 150Hz and stopCutoffFrequency = 800Hz"
    outputdomain: "real, from 0 to 1"
    application: "This is a very general descriptor and can be used for a wide variety of applications"
    who: "Thomas Aussenac, Elena Martinez"
    reference: "http://en.wikipedia.org/wiki/Energy_%28signal_processing%29"
    examplechain: "spectrum -> energy band ratio"
45:
    group: "lowlevel"
    name: "spectral_energyband_middle_high"
    algorithm: "EnergyBandRatio"
    description: "the Energy Band Ratio of a spectrum is the ratio of the spectrum energy from startCutoffFrequency to stopCutoffFrequency to the total spectrum energy. For the Energy Band Ration Middle High, startCutoffFrequency = 800Hz and stopCutoffFrequency = 4000Hz"
    outputdomain: "real, from 0 to 1"
    application: "This is a very general descriptor and can be used for a wide variety of applications"
    who: "Thomas Aussenac, Elena Martinez"
    reference: "http://en.wikipedia.org/wiki/Energy_%28signal_processing%29"
    examplechain: "spectrum -> energy band ratio"
46:
    group: "lowlevel"
    name: "spectral_energyband_high"
    algorithm: "EnergyBandRatio"
    description: "The Energy Band Ratio of a spectrum is the ratio of the spectrum energy from startCutoffFrequency to stopCutoffFrequency to the total spectrum energy. For the Energy Band Ration High, startCutoffFrequency = 4000Hz and stopCutoffFrequency = 20000Hz"
    outputdomain: "real, from 0 to 1"
    application: "This is a very general descriptor and can be used for a wide variety of applications"
    who: "Thomas Aussenac, Elena Martinez"
    reference: "http://en.wikipedia.org/wiki/Energy_%28signal_processing%29"
    examplechain: "spectrum -> energy band ratio"
47:
    group: "lowlevel"
    name: "spectral_flatness_db"
    algorithm: "FlatnessDB"
    description: "This is a kind of dB value of the Bark bands. It characterizes the shape of the spectral envelope. For tonal signals, flatness dB is close to 1, for noisy signals it is close to 0."
    outputdomain: "real, from 0 to 1."
    application: "segmentation"
    who: "Thomas Aussenac, Ferdinand Fuhrmann"
    reference: "Peeters, G.: A large set of audio features for sound description in the CIUDADO project. Technical Report, IRCAM, 2004"
    examplechain: "spectrum -> Hz2Bark -> flatness -> flatnessDB -> flatness dB"
48:
    group: "lowlevel"
    name: "spectral_flux"
    algorithm: "Flux"
    description: "Spectral Flux is a measure of how quickly the power spectrum of a signal is changing, calculated by comparing the power spectrum for one frame against the power spectrum from the previous frame. The spectral flux can be used to determine the timbre of an audio signal, or in onset detection, among other things."
    outputdomain: "a positive real number"
    application: "Segmentation"
    who: "Thomas Aussenac, Gerard Roma"
    reference: "Tzanetakis, G. Cook, P.  Multifeature audio segmentation for browsing and annotation.\nIEEE workshop on Applications of Signal Processing to Audio and Acoustics, 1999"
    examplechain: "spectrum->flux"
49:
    group: "lowlevel"
    name: "hfc"
    algorithm: "HFC"
    description: "The High Frequency Content measure is a simple measure, taken across a signal spectrum (usually a STFT spectrum), which can be used to characterize the amount of high-frequency content in the signal. In contrast to perceptual measures, this is not based on any evidence about its relevance to human hearing. Despite that, it can be useful for some applications, such as onset detection."
    outputdomain: "real, non-negative"
    application: "Onset detection"
    who: "Thomas Aussenac, Justin Salamon"
    reference: "http://en.wikipedia.org/wiki/High_Frequency_Content_measure\n\n\n\nP. Brossier, J. P. Bello and M. D. Plumbley. Real-time temporal segmentation of note objects in music signals, in Proceedings of the International Computer Music Conference (ICMC 2004), Miami, Florida, USA, November 1-6, 2004."
    examplechain: "spectrum -> hfc"
50:
    group: "lowlevel"
    name: "spectral_kurtosis"
    algorithm: "DistributionShape"
    description: "The kurtosis gives a measure of the flatness of a distribution around its mean value. A negative kurtosis indicates a flatter signal spectrum. A positive kurtosis indicates a peakier signal spectrum. A kurtosis = 0 indicates a spectrum with normal distribution."
    outputdomain: "one real value"
    application: "Timbral characterization. "
    who: "Thomas Aussenac, Martin Haro"
    reference: "http://en.wikipedia.org/wiki/Kurtosis"
    examplechain: "spectrum -> centralmoments -> distributionshape"
51:
    group: "lowlevel"
    name: "spectral_spread"
    algorithm: "DistributionShape"
    description: "The spread is defined as the variance of a distribution around its mean value. It is equal to the 2nd order central moment."
    outputdomain: "real"
    application: "Timbral characterization."
    who: "Thomas Aussenac, Martin Haro"
    reference: "http://en.wikipedia.org/wiki/Central_moment"
    examplechain: "spectrum -> centralmoments -> distributionshape"
52:
    group: "lowlevel"
    name: "spectral_skewness"
    algorithm: "DistributionShape"
    description: "The skewness is a measure of the asymmetry of a distribution around its mean value. A negative skewness indicates a signal spectrum with more energy in the high frequencies. A positive skewness indicates a signal spectrum with more energy in the low frequencies. A skewness = 0 indicates a symmetric spectrum. For silence or constants signal, skewness is 0."
    outputdomain: "real"
    application: "Timbral characterization."
    who: "Thomas Aussenac, Martin Haro"
    reference: "http://en.wikipedia.org/wiki/Skewness"
    examplechain: "spectrum -> centralmoments -> distributionshape "
53:
    group: "lowlevel"
    name: "mfcc"
    algorithm: "MFCC"
    description: "This algorithm computes the mel-frequency cepstrum coefficients. "
    outputdomain: "real, matrix of dimensions: number mfcc coefficients per number of frames"
    application: "They have been widely used in speech recognition and also to model music since they provide a compact representation of the spectral envelope. The first coefficients concentrate\nmost of the signal energy. Its use for music information retrieval\nhas become standard since the seminar paper (2) from 1997"
    who: "Thomas Aussenac, Elena Martinez"
    reference: "(1) Rabiner, L. and Juang, B., 1993, Fundamentals of Speech Recognition, Prentice-Hall.\n\n(2) J. Foote. Content-based retrieval of music and audio. In In Multimedia Storage and Archiving Systems II, Proc. of SPIE, 1997.\n\n(3) Scheirer, E. and Slaney, M., 1997, Construction and evaluation of a robust multifeature speech/music\ndiscriminator, Proceedings IEEE International Conference on Acoustics, Speech, and Signal Processing.\n\n(4) B. Logan. Mel frequency cepstral coefficients for music modeling. In\nProc. of the 1st Annual International Symposium on Music Information\nRetrieval (ISMIR), 2000."
    examplechain: "spectrum -> mfcc"
54:
    group: "lowlevel"
    name: "pitch"
    algorithm: "PitchDetection"
    description: "Pitch detection for monophonic sounds. Pitch is represented as the fundamental frequency of the analysed sound. The algorithm uses the YinFFT method developed by Paul Brossier, based on the time-domain YIN algorithm with an efficient implementation making use of the spectral domain."
    outputdomain: "real, non-negative. Represents the frequency in Hertz."
    application: "Monophonic voice and instrument transcription"
    who: "Justin Salamon"
    reference: "Paul Brossier, ''Automatic annotation of musical audio for interactive systems'', PhD thesis, Centre for Digital music, Queen Mary University of London, London, UK, 2006."
    examplechain: "spectrum -> PitchDetection -> pitch"
55:
    group: "unknown"
    name: "pitch_histogram"
    algorithm: ""
    description: ""
    outputdomain: ""
    application: ""
    who: ""
    reference: ""
    examplechain: ""
56:
    group: "unknown"
    name: "pitch_histogram_spread"
    algorithm: ""
    description: ""
    outputdomain: ""
    application: ""
    who: ""
    reference: ""
    examplechain: ""
57:
    group: "unknown"
    name: "pitch_histogram_values"
    algorithm: ""
    description: ""
    outputdomain: ""
    application: ""
    who: ""
    reference: ""
    examplechain: ""
58:
    group: "lowlevel"
    name: "pitch_instantaneous_confidence"
    algorithm: "PitchDetection"
    description: "a measure of pitch confidence derived from the yinFFT algorithm, which is a monophonic pitch detector. gives evidence about how much a certain pitch, detected in a frame, is affecting the total spectrum. If the output is near 1, there exist just one pitch in the mixture, an output near 0 indicates multiple, not distinguishable pitches."
    outputdomain: "real, from 0 to 1."
    application: "segmentation"
    who: "Ferdinand Fuhrmann"
    reference: "Paul Brossier, ''Automatic annotation of musical audio for interactive systems'', PhD thesis, Centre for Digital music, Queen Mary University of London, London, UK, 2006."
    examplechain: "spectrum -> pitchDetection -> pitch_instantaneous confidence"
59:
    group: "lowlevel"
    name: "pitch_salience"
    algorithm: "PitchSalience"
    description: "The pitch salience is given by the ratio of the highest peak to the 0-lag peak in the autocorrelation function. Non-pitched sounds have a mean pitch salience value close to 0 while harmonic sounds have a value close to 1. Sounds having Unvarying pitch have a small pitch salience variance while sounds having Varying pitch have a high pitch salience variance."
    outputdomain: "real, from 0 to 1."
    application: "Characterising percussive sounds for example. We can expect low values for percussive sounds and high values for white noises."
    who: "Thomas Aussenac, Justin Salamon"
    reference: "Ricard J., Towards computational \nmorphological description of sound, \nDEA pre-thesis research work, Universitat Pompeu \nFabra, Barcelona,  September 2004."
    examplechain: "spectrum -> PitchSalience"
60:
    group: "lowlevel"
    name: "spectral_rms"
    algorithm: "RMS"
    description: "The root mean square spectrum energy."
    outputdomain: "real, non-negative"
    application: "It is a measure of loudness of the sound frame"
    who: "Thomas Aussenac, Elena Martinez"
    reference: "G. Tzanetakis and P. Cook. Musical genre classification of audio signals. IEEE Transaction on speech and audio processing, 10(5), July 2002"
    examplechain: "array -> rms"
61:
    group: "lowlevel"
    name: "spectral_rolloff"
    algorithm: "RollOff"
    description: "The Roll Off is the frequency for which 85% of the spectrum energy is contained below. Like the centroid, the rolloff is also a measure of spectral shape, they are in fact strongly correlated."
    outputdomain: "real, from 0 to 22500"
    application: "It can be used to distinguish between harmonic and noisy sounds."
    who: "Thomas Aussenac, Elena Martinez"
    reference: "G. Tzanetakis and P. Cook. Musical genre classification of audio signals. IEEE Transaction on speech and audio processing, 10(5), July 2002."
    examplechain: "spectrum -> rolloff"
62:
    group: "lowlevel"
    name: "silence_rate_20dB"
    algorithm: "SilenceRate"
    description: "This is the rate of frames where the level is above a given threshold, here -20dB. Returns 1 whenever the instant power of the input frame is below the given threshold, 0 otherwise"
    outputdomain: "binary, 0 or 1"
    application: "It intends to measure the level of compression of a signal."
    who: "Thomas Aussenac, Owen Meyers"
    reference: ""
    examplechain: "instantPower -> silenceRate"
63:
    group: "lowlevel"
    name: "silence_rate_30dB"
    algorithm: "SilenceRate"
    description: "This is the rate of frames where the level is above a given threshold, here -30dB. Returns 1 whenever the instant power of the input frame is below the given threshold, 0 otherwise"
    outputdomain: "binary, 0 or 1"
    application: "It intends to measure the level of compression of a signal."
    who: "Thomas Aussenac, Owen Meyers"
    reference: ""
    examplechain: "instantPower -> silenceRate"
64:
    group: "lowlevel"
    name: "silence_rate_60dB"
    algorithm: "SilenceRate"
    description: "This is the rate of frames where the level is above a given threshold, here -60dB. Returns 1 whenever the instant power of the input frame is below the given threshold, 0 otherwise"
    outputdomain: "binary, 0 or 1"
    application: "It intends to measure the level of compression of a signal."
    who: "Thomas Aussenac, Owen Meyers"
    reference: ""
    examplechain: "instantPower -> silenceRate"
65:
    group: "lowlevel"
    name: "spectral_strongpeak"
    algorithm: "StrongPeak"
    description: "The Strong Peak is defined as the ratio between the spectrum maximum magnitude and the bandwidth of the maximum peak in the spectrum above a threshold (half its amplitude). It reveals whether the spectrum presents a very pronounced maximum peak. The thinner and the higher the maximum of the spectrum is, the higher the value this parameter takes."
    outputdomain: "a positive real number "
    application: "Recognition of percussive instruments"
    who: "Thomas Aussenac, Gerard Roma"
    reference: "Gouyon, F., Herrera, P. Exploration of techniques for automatic labeling of audio drum tracks instruments. Proceedings of MOSART (2001)"
    examplechain: "spectrum->strongpeak"
66:
    group: "metadata"
    name: "duration"
    algorithm: "Duration"
    description: "The duration is a measure of the length of the signal."
    outputdomain: "real, non-negative. The duration of the audio signal in seconds."
    application: "Any application which requires the time duration of a signal"
    who: "Thomas Aussenac, Justin Salamon"
    reference: ""
    examplechain: "signal -> Duration"
67:
    group: "unknown"
    name: "lpc"
    algorithm: ""
    description: "This is a measure of the Linear Predictive Coefficients vector of a signal."
    outputdomain: "a list of 11 real values from ???"
    application: ""
    who: "Thomas"
    reference: ""
    examplechain: ""
68:
    group: "lowlevel"
    name: "zerocrossingrate"
    algorithm: "ZeroCrossingRate"
    description: "The Zero Crossing Rate is the number of sign changes between consecutive signal values divided by the total number of values. "
    outputdomain: "real, from 0 to 1"
    application: "It can be a measure of the noisiness of the signal: noisy signals tend to have a high value of it."
    who: "Thomas Aussenac, Elena Martinez"
    reference: "G. Tzanetakis and P. Cook. Musical genre classification of audio signals.\nIEEE Transaction on speech and audio processing, 10(5), July 2002."
    examplechain: "input signal -> zero crossing rate"
69:
    group: "lowlevel"
    name: "spectral_complexity"
    algorithm: "SpectralComplexity"
    description: "Timbral Complexity is a measure of the complexity of the instrumentation of the audio piece. Typically, in a piece of audio several instruments are present. This increases the complexity of the spectrum of the audio and therefore, it represents a useful audio feature for characterizing a piece of audio."
    outputdomain: "integer, non-negative"
    application: "segmentation"
    who: "Thomas Aussenac, Ferdinand Fuhrmann"
    reference: "Streich, S.: Music Complexity a multi-faceted description of audio content. PhD Thesis, Universitat Pompeu Fabra, Barcelona, 2007."
    examplechain: "spectrum -> SpectralPeaks -> SpectralComplexity -> spectral complexity"
70:
    group: "unknown"
    name: "chords_changes"
    algorithm: ""
    description: "The chord changes is the suite of time stamps when a played chord is different from the previous played chord."
    outputdomain: "a list of time stamps in seconds."
    application: ""
    who: "Thomas"
    reference: ""
    examplechain: ""
71:
    group: "tonal"
    name: "chords_changes_rate"
    algorithm: "ChordsDescriptors"
    description: "The Chord Changes Rate is the ratio from the number of \"tonal_chords_changes\" to the total number of detected chords in \"tonal_chord_progression\"."
    outputdomain: "real, from 0 to 1."
    application: "Similarity, classification"
    who: "Thomas Aussenac, Emilia Gomez, Justin Salamon"
    reference: "E. Gómez, P. Herrera, P. Cano, J. Janer, J. Serrà, J. Bonada, S. El-Hajj, T. Aussenac & G. Holmberg. Music similarity systems and methods using descriptors. United States patent application number 12/128917, filed May 29, 2008."
    examplechain: "hpcp->key(profileType=5, tonic triad)->tonal_chords_progression, tonal_chords_changes"
72:
    group: "unknown"
    name: "chords_dissonance"
    algorithm: ""
    description: "The Chords Dissonance of an audio piece corresponds to the dissonance between 2 successive chords of the \"tonal_chord_progression\"."
    outputdomain: "a real number from 0 to ???"
    application: ""
    who: "Joachim"
    reference: ""
    examplechain: ""
73:
    group: "tonal"
    name: "chords_histogram"
    algorithm: "ChordsDescriptors"
    description: "The Chords Histogram represents, for each possible chord, the percentage of times this chord is played in the audio piece, following the \"tonal_chords_progression\". The histogram \"normalized\" to the \"tonal_key_key\" following the circle of fifth."
    outputdomain: "real, from 0 to 100. Returns a list of 24 values (from 0 to 100) representing the chords in the following order (circle of fifths):\n  C, Em, G, Bm, D, F#m, A, C#m, E, G#m, B, D#m, F#, A#m, C#, Fm, G#, Cm, D#, Gm, A#, Dm, F, Am"
    application: "Harmonic description and similarity."
    who: "Thomas Aussenac, Emilia Gomez, Justin Salamon"
    reference: "E. Gómez, P. Herrera, P. Cano, J. Janer, J. Serrà, J. Bonada, S. El-Hajj, T. Aussenac & G. Holmberg. Music similarity systems and methods using descriptors. United States patent application number 12/128917, filed May 29, 2008."
    examplechain: "hpcp->key(profileType=5, tonic triad)->tonal_chords_progression, and hpcp->key(defaultparams)"
74:
    group: "tonal"
    name: "chords_key"
    algorithm: "Key"
    description: "A chord is made of three (triad) or more notes that sound simultaneously. Each chord is specified by its root/bass note (what we call \"key\" A, A#, B, C, C#, D, D#, E, F, F#, G, G#), its type or \"mode\" (major, minor, major7,...) and its strength (how close the note distribution is from the estimated chord). \n\n\nA succession of chords is called a chord progression.\n\n\nThe chord is computed using the key estimation algorithm within short segments of 1 or 2 seconds."
    outputdomain: "string. The string represents the chord of the analyzed segment, A, A#, B, C, C#, D, D#, E, F, F#, G, G#"
    application: "Chord estimation, harmonic description. "
    who: "Thomas Aussenac, Emilia Gomez"
    reference: "Takuya Fujishima. Real-time chord recognition of musical sound: A system using common lisp music. In ICMC, pages 464–467, Beijing, China, 1999."
    examplechain: "hpcp->key(profileType=5, tonic triad)"
75:
    group: "tonal"
    name: "chords_mode"
    algorithm: "Key"
    description: "A chord is made of three (triad) or more notes that sound simultaneously. Each chord is specified by its root/bass note (what we call \"key\" A, A#, B, C, C#, D, D#, E, F, F#, G, G#), its type or \"mode\" (major, minor, major7,...) and its strength (how close the note distribution is from the estimated chord). \n\n\nA succession of chords is called a chord progression.\n\n\nThe chord is computed using the key estimation algorithm within short segments of 1 or 2 seconds."
    outputdomain: "string. A string representing the mode of the chord of the song. Only triad chords are considered (major, minor or none)"
    application: "Chord estimation, harmonic description. "
    who: "Thomas Aussenac, Emilia Gomez"
    reference: "Takuya Fujishima. Real-time chord recognition ofmusical sound: A system using common lisp music. In ICMC, pages 464–467, Beijing, China, 1999."
    examplechain: "hpcp->key(profileType=5, tonic triad)"
76:
    group: "tonal"
    name: "chords_number_rate"
    algorithm: "ChordsDescriptors"
    description: "The Chord Number Rate is the ratio from the number of different chords played more than 1% of the time to the total number of detected chords in \"tonal_chord_progression\"."
    outputdomain: "real, from 0 to 1."
    application: "Harmonic description and similarity."
    who: "Thomas Aussenac, Emilia Gómez, Justin Salamon"
    reference: "E. Gómez, P. Herrera, P. Cano, J. Janer, J. Serrà, J. Bonada, S. El-Hajj, T. Aussenac & G. Holmberg. Music similarity systems and methods using descriptors. United States patent application number 12/128917, filed May 29, 2008."
    examplechain: "ChordsDetection(chords_progression) + Key(key,mode) -> ChordsDescriptors"
77:
    group: "tonal"
    name: "chords_progression"
    algorithm: "ChordsDetection"
    description: "A chord is made of three (triad) or more notes that sound simultaneously. Each chord is specified by its root/bass note (what we call \"key\" A, A#, B, C, C#, D, D#, E, F, F#, G, G#), its type or \"mode\" (major, minor, major7,...) and its strength (how close the note distribution is from the estimated chord). \n\n\nThe chord is computed using the key estimation algorithm within short segments of 1 or 2 seconds, so that we obtain a succession of chord values.\nThis succession of chords is called a chord progression.\n\n\nThe Chord Progression is the suite of  chords symbols - e.g. C, Am, F#, Bb, Em, G#m, etc - played in the audio piece."
    outputdomain: "string. The string represents the chord sequence of the song, where each chord is one of: A, A#, B, C, C#, D, D#, E, F, F#, G, G#, with an optional m (for minor)."
    application: "Chord estimation, harmonic description, similarity."
    who: "Thomas Aussenac, Emilia Gómez, Justin Salamon"
    reference: "Takuya Fujishima. Real-time chord recognition ofmusical sound: A system using common lisp music. In ICMC, pages 464–467, Beijing, China, 1999.\n"
    examplechain: "signal->ChordsDetection\n\nOR: hpcp->average over several frames and normalize->key(profileType=5, tonic triad)"
78:
    group: "tonal"
    name: "chords_strength"
    algorithm: "Key"
    description: "A chord is made of three (triad) or more notes that sound simultaneously. Each chord is specified by its root/bass note (what we call \"key\" A, A#, B, C, C#, D, D#, E, F, F#, G, G#), its type or \"mode\" (major, minor, major7,...) and its strength . \n\n\nThe chord is computed using the key estimation algorithm within short segments of 1 or 2 seconds.\n\n\nThe Chord Strength descriptor represents the power/correlation of the chord detection: high value means that the chord detected location is very tonal and low value means that it's not very tonal for the used key profile or template.\n\n\nA succession of chords is called a chord progression.\n\n\n\nThe chord is computed using the key estimation algorithm within short segments of 1 or 2 seconds."
    outputdomain: "real, from 0 to 1."
    application: "Chord estimation, harmonic description, classification."
    who: "Thomas Aussenac, Emilia Gomez"
    reference: "Takuya Fujishima. Real-time chord recognition ofmusical sound: A system using common lisp music. In ICMC, pages 464–467, Beijing, China, 1999."
    examplechain: "hpcp->key(profileType=5, tonic triad)"
79:
    group: "lowlevel"
    name: "dissonance"
    algorithm: "Dissonance"
    description: "a perceptual descriptor used to measure the roughness of the sound. based on the fact that two sinusoidal spectral components share a dissonance curve, which values are dependent on their frequency and amplitude relations. the total dissonance is derived by summing up the values for  all the components (i.e. the spectral peaks) of a given frame. the dissonance curves are obtained from perceptual experiments conducted in the paper listed below."
    outputdomain: "real, from 0 to 1"
    application: "segmentation"
    who: "Ferdinand Fuhrmann"
    reference: "Plomp, R., Levelt, W., Tonal consonance and critical bandwidth, The Journal of the Acoustical Society of America, Vol. 38, No. 4, pp. 548-560, 1965."
    examplechain: "spectrum -> peakDetection -> Dissonance -> Dissonance"
80:
    group: "tonal"
    name: "hpcp"
    algorithm: "HPCP"
    description: "The HPCP is the Harmonic Pitch Class Profile, i.e. a 12, 24, 36,... size (size being a multiple of 12) dimensional vector which represents the intensities of each of the frequency bins of an equal-tempered scale."
    outputdomain: "real, from 0 to 1. List of values from 0 to 1."
    application: "Key estimation, tonal similarity, classification"
    who: "Thomas Aussenac, Emilia Gomez"
    reference: "Gómez, E. Tonal Description of music audio signals, PhD Thesis, 2006. \nhttp://mtg.upf.edu/~egomez/thesis/"
    examplechain: "Spectrum->PeakDetection->TuningFrequency\nSpectrum->PeakDetection->SpectralWhitening->HPCP"
81:
    group: "tonal"
    name: "key_key"
    algorithm: "Key"
    description: "In music theory, the key identifies the tonic triad, the chord, major or minor, which represents the final point of rest for a piece, or the focal point of a section. Although the key of a piece may be named in the title (e.g. Symphony in C), or inferred from the key signature, the establishment of key is brought about via functional harmony, a sequence of chords leading to one or more cadences. A key may be major or minor."
    outputdomain: "string. A string representing the key of the song, A, A#, B, C, C#, D, D#, E, F, F#, G, G#"
    application: "Key estimation, tonal similarity, classification"
    who: "Emilia Gomez, Justin Salamon"
    reference: "Gómez, E. (2006).  Tonal description of polyphonic audio for music content processing.  INFORMS Journal on Computing, Special Cluster on Computation in Music."
    examplechain: "hpcp->Key"
82:
    group: "tonal"
    name: "key_mode"
    algorithm: "Key"
    description: "In music theory, the key identifies the tonic triad, the chord, major or minor, which represents the final point of rest for a piece, or the focal point of a section. Although the key of a piece may be named in the title (e.g. Symphony in C), or inferred from the key signature, the establishment of key is brought about via functional harmony, a sequence of chords leading to one or more cadences. A key may be major or minor."
    outputdomain: "string. A string representing the mode of the key of the song, either 'major', 'minor' or 'none'"
    application: "Key estimation, tonal similarity, classification"
    who: "Emilia Gomez, Justin Salamon"
    reference: "Gómez, E. (2006).  Tonal description of polyphonic audio for music content processing.  INFORMS Journal on Computing, Special Cluster on Computation in Music. "
    examplechain: "hpcp->Key"
83:
    group: "tonal"
    name: "key_strength"
    algorithm: "Key"
    description: "The Key Strength descriptor represents the power/correlation of the key: high value means that the piece is very tonal and low value means that it's not very tonal for the used key profile or template."
    outputdomain: "real, from 0 to 1."
    application: "Tonal similarity, music description, classification between tonal and non-tonal music"
    who: "Emilia Gomez, Justin Salamon"
    reference: "Gómez, E. (2006).  Tonal description of polyphonic audio for music content processing.  INFORMS Journal on Computing, Special Cluster on Computation in Music. "
    examplechain: "hpcp->Key"
84:
    group: "tonal"
    name: "tuning_diatonic_strength"
    algorithm: "Key"
    description: "The Diatonic Strength is the \"tonal_key_strength\" calculated using a diatonic tonal profile on the 120-bins HPCP average."
    outputdomain: "real, from 0 to 1."
    application: "western vs non-western music classification, key estimation"
    who: "Emilia Gomez"
    reference: "Gómez, E. and Herrera, P. (accepted, under revision). ‘Comparative Analysis of Music Recordings from Western and Non-Western traditions by Automatic Tonal Feature Extraction’, Empirical Musicology Review. \nISSN: 1559-5749"
    examplechain: "hpcp->Key (parameters for the Key algorithm: profileType=1)"
85:
    group: "tonal"
    name: "tuning_equal_tempered_deviation"
    algorithm: "HighResolutionFeatures"
    description: "This is a measure of the deviation of the 120-length HPCP (Harmonic Pitch Class Profiles) local maxima with respect to equal-tempered bins. This measure if how the audio piece scale may be considered as an equal-tempered one or not. An Indian music audio piece may have a high equal tempered deviation while a pop song may have a low one."
    outputdomain: "real, non-negative."
    application: "western vs non-western music classification"
    who: "Emilia Gomez, Justin Salamon"
    reference: "Gómez, E. and Herrera, P. (accepted, under revision). ‘Comparative Analysis of Music Recordings from Western and Non-Western traditions by Automatic Tonal Feature Extraction’, Empirical Musicology Review. \nISSN: 1559-5749"
    examplechain: "hpcp(size=120)->PeakDetection->HighResolutionFeatures"
86:
    group: "tonal"
    name: "tuning_frequency"
    algorithm: "TuningFrequency"
    description: "Frequency used to tune a piece, by default 440 Hz "
    outputdomain: "real, non-negative."
    application: "Western vs non-western music classification, key estimation, HPCP computation, tonal similarity"
    who: "Emilia Gomez, Justin Salamon"
    reference: "Gómez, E. Tonal Description of Music Audio Signals, PhD Thesis, Chapter 2, 2006.\n\nGómez, E. and Herrera, P. (accepted, under revision). ‘Comparative Analysis of Music Recordings from Western and Non-Western traditions by Automatic Tonal Feature Extraction’, Empirical Musicology Review. \nISSN: 1559-5749"
    examplechain: "Spectrum->PeakDetection->TuningFrequency"
87:
    group: "unknown"
    name: "tuning_nontempered2tempered_energy_ratio"
    algorithm: ""
    description: "This is the ratio between the energy on non-tempered bins and the total energy, computed from the 120-bins HPCP average. This measure if how the audio piece scale may be considered as an equal-tempered one or not. An Indian music audio piece may have a low ratio while a pop song may have a high one."
    outputdomain: "a real number from 0 to 1."
    application: ""
    who: "Thomas"
    reference: ""
    examplechain: ""
88:
    group: "tonal"
    name: "tuning_nontempered_energy_ratio"
    algorithm: "HighResolutionFeatures"
    description: "This is the ratio between the energy on non-tempered peaks and the total energy, computed from the 120-bins HPCP average. This measure if how the audio piece scale may be considered as an equal-tempered one or not. An Indian music audio piece may have a low ratio while a pop song may have a high one."
    outputdomain: "real, from 0 to 1."
    application: "Western vs non-western music classification"
    who: "Emilia Gomez, Justin Salamon"
    reference: "Gómez, E. and Herrera, P. (accepted, under revision). ‘Comparative Analysis of Music Recordings from Western and Non-Western traditions by Automatic Tonal Feature Extraction’, Empirical Musicology Review. \nISSN: 1559-5749"
    examplechain: "hpcp(size=120)->PeakDetection->HighResolutionFeatures"
89:
    group: "unknown"
    name: "relative_ioi"
    algorithm: ""
    description: ""
    outputdomain: ""
    application: ""
    who: ""
    reference: ""
    examplechain: ""
90:
    group: "unknown"
    name: "relative_ioi_peaks"
    algorithm: ""
    description: ""
    outputdomain: ""
    application: ""
    who: "Paul"
    reference: ""
    examplechain: ""
91:
    group: "rhythm"
    name: "bpm_estimates"
    algorithm: "ExtractorTempoTap"
    description: "BPM estimates is a list of estimated BPM (Beat per minute, see the description of the BPM descriptor). TODO: frequency of this descriptor\n"
    outputdomain: "a vector of real (bpm)"
    application: "Tempo tracking"
    who: "Cyril Laurier"
    reference: ""
    examplechain: "input signal->bpm estimates"
92:
    group: "rhythm"
    name: "first_peak_bpm"
    algorithm: "BPMHistogramDescriptors"
    description: "This algorithm computes the value of the highest peak of the 'beats per minute' (bpm) probability histogram."
    outputdomain: "real, non-negative"
    application: "Genre classification; beat estimation. "
    who: "Martin Haro"
    reference: "http://en.wikipedia.org/wiki/Beats_per_minute"
    examplechain: ""
93:
    group: "rhythm"
    name: "first_peak_spread"
    algorithm: "BPMHistogramDescriptors"
    description: "This algorithm computes the spread of the highest peak of the 'beats per minute' (bpm) probability histogram.\nThe spread is defined as the variance of a distribution around its mean value. It is equal to the 2nd order central moment. \n"
    outputdomain: "real, non-negative"
    application: "Genre classification; beat estimation. "
    who: "Martin Haro"
    reference: "http://en.wikipedia.org/wiki/Beats_per_minute and \nhttp://en.wikipedia.org/wiki/Central_moment"
    examplechain: ""
94:
    group: "rhythm"
    name: "first_peak_weight"
    algorithm: "BPMHistogramDescriptors"
    description: "This algorithm computes the weight of the highest peak of the 'beats per minute' (bpm) probability histogram. "
    outputdomain: "real, non-negative"
    application: "Genre classification; beat estimation. "
    who: "Martin Haro"
    reference: "http://en.wikipedia.org/wiki/Beats_per_minute"
    examplechain: ""
95:
    group: "rhythm"
    name: "bpm_intervals"
    algorithm: "ExtractorTempoTap"
    description: "BPM Intervals is a list of beats interval in seconds. It estimates the time in seconds between two beats. At each frame, an estimation is added to the list.\nTODO: frequency of this descriptor\n"
    outputdomain: "a real vector of real (interval between beats in seconds)"
    application: "Tempo tracking"
    who: "Cyril Laurier"
    reference: ""
    examplechain: "input signal->bpm intervals"
96:
    group: "rhythm"
    name: "rubato_start"
    algorithm: "BpmRubato"
    description: "This descriptor provides a list of values indicating the start times, in seconds, of large tempo changes in the signal."
    outputdomain: "real, positive. Array of real values."
    application: "Measure fluctuation in tempo or rhythm"
    who: "Owen Meyers"
    reference: ""
    examplechain: "beats -> rubatoStart"
97:
    group: "rhythm"
    name: "rubato_stop"
    algorithm: "BpmRubato"
    description: "This descriptor provides a list of values indicating the stop times, in seconds, of large tempo changes in the signal."
    outputdomain: "real, positive. Array of real values."
    application: "Measure fluctuation in tempo or rhythm"
    who: "Owen Meyers"
    reference: ""
    examplechain: "beats -> rubatoStop"
98:
    group: "rhythm"
    name: "second_peak_bpm"
    algorithm: "BPMHistogramDescriptors"
    description: "This algorithm computes the value of the second highest peak of the 'beats per minute' (bpm) probability histogram."
    outputdomain: "real, non-negative"
    application: "Genre classification; beat estimation. "
    who: "Martin Haro"
    reference: "http://en.wikipedia.org/wiki/Beats_per_minute"
    examplechain: ""
99:
    group: "rhythm"
    name: "second_peak_spread"
    algorithm: "BPMHistogramDescriptors"
    description: "This algorithm computes the spread of the second highest peak of the 'beats per minute' (bpm) probability histogram. The spread is defined as the variance of a distribution around its mean value. It is equal to the 2nd order central moment. "
    outputdomain: "real, non-negative"
    application: "Genre classification; beat estimation. "
    who: "Martin Haro"
    reference: "http://en.wikipedia.org/wiki/Beats_per_minute and http://en.wikipedia.org/wiki/Central_moment\n"
    examplechain: ""
100:
    group: "rhythm"
    name: "second_peak_weight"
    algorithm: "BPMHistogramDescriptors"
    description: "This algorithm computes the weight of the second highest peak of the 'beats per minute' (bpm) probability histogram. "
    outputdomain: "real, non-negative"
    application: "Genre classification; beat estimation. "
    who: "Martin Haro"
    reference: "http://en.wikipedia.org/wiki/Beats_per_minute\n"
    examplechain: ""
101:
    group: "rhythm"
    name: "beats_position"
    algorithm: "StreamingTempoTap"
    description: "This descriptor gives the position of the beats in a track, where a beat is one quarter note according to the determined tempo of the track. It is given in the 'ticks' output of the StreamingTempoTap algorithm."
    outputdomain: "The location of the beats, in seconds (i.e. Real non-negative)"
    application: "Score alignment"
    who: "Paul Brossier, Justin Salamon"
    reference: "M. E. P. Davies, P. M. Brossier, and M. D. Plumbley. Beat tracking towards automatic musical accompaniment. In Proceedings of the 118th Convention of the AES, Barcelona, Spain, May 28–31, 2005."
    examplechain: "signal -> StreamingTempoTap -> ticks"
102:
    group: "unknown"
    name: "temporal_kurtosis"
    algorithm: ""
    description: "The kurtosis excess (here called kurtosis) gives a measure of the flatness of a distribution around its mean value. A negative kurtosis indicates a flatter envelope. A positive kurtosis indicates a peakier envelope. A kurtosis = 0 indicates a spectrum with normal distribution."
    outputdomain: "a real number from ???"
    application: "Bad."
    who: "Thomas"
    reference: ""
    examplechain: ""
103:
    group: "unknown"
    name: "temporal_skewness"
    algorithm: ""
    description: "The Skewness is a measure of the asymmetry of a distribution around its mean value. A negative skewness indicates an envelope with higher at the beginning than at the end. A positive skewness indicates an envelope with higher at the end than at the beginning. A skewness = 0 indicates a symmetric spectrum. For silence or constants signal, skewness is 0."
    outputdomain: "a real number from ???"
    application: "Somehow good."
    who: "Thomas"
    reference: ""
    examplechain: ""
104:
    group: "unknown"
    name: "temporal_spread"
    algorithm: ""
    description: "The Spread of a spectrum is defined as the variance of a distribution around its mean value. It is equal to the 2nd order central moment."
    outputdomain: "a real number from ???"
    application: "Somehow good."
    who: "Thomas"
    reference: ""
    examplechain: ""
105:
    group: "sfx"
    name: "tristimulus"
    algorithm: "Tristimulus"
    description: "The concept of tristimulus was introduced as an  equivalent for timbre to the three primary colors  of vision. The tristimuli are 3 different types of energy ratio: the first value corresponds to the relative weight of the first harmonic, the second to that of the 2nd, 3rd, and 4th harmonics, and the third to the weight of the rest."
    outputdomain: "a list of 3 real values from 0 to 1."
    application: "Characterization of timbre."
    who: "Thomas Aussenac, Gerard Roma"
    reference: "H F Pollard, E V Jansson, \nA tristimulus method for the specification of musical timbre. \nAcustica, 51:162 –171, 1982"
    examplechain: "spectrum->spectral_peaks->tristimulus"
106:
    group: "unknown"
    name: "dynamic_average"
    algorithm: ""
    description: "A quick and dirty implementation of a measure for the dynamic average based on the following steps:\nIn buffers of 2 seconds duration:\n\nA-weighting in time domain\nCalculate mean square\nApply exponent 0.6\nWith the resulting loudness of all buffers:\n\nNormalize to maximum. This yields numbers between 0 and 1\nAssure, all values a above a Threshold (e.g. 0.0001 = -80 dB)\nAverage\nConvert to 10*log10 "
    outputdomain: "-100 dB to 0 dB"
    application: ""
    who: "Joachim"
    reference: ""
    examplechain: ""
107:
    group: "tonal"
    name: "thpcp"
    algorithm: "HPCP"
    description: "Transposed HPCP, so that the first position corresponds to the highest HPCP magnitude\n\nTHPCP[n] = HPCP[mod(n-shift), size]\nn=1,...size\n\nwhere size is the size of the HPCP vector (12, 24, 36,...)\n\nwhere shift is the position corresponding to max(HPCP).\n\nNote: this descriptor needs to be computed as a post-processing step after computing the HPCP."
    outputdomain: "real, from 0 to 1. The output is a vector of real numbers from 0 to 1."
    application: "Tonal similarity, scale analysis, western vs non-western music classification, genre classification"
    who: "Emilia Gomez, Justin Salamon"
    reference: "Gómez, E. and Herrera, P. (accepted, under revision). ‘Comparative Analysis of Music Recordings from Western and Non-Western traditions by Automatic Tonal Feature Extraction’, Empirical Musicology Review. \nISSN: 1559-5749"
    examplechain: "hpcp->thpcp"
108:
    group: "lowlevel"
    name: "average_loudness"
    algorithm: "Loudness"
    description: "This algorithm computes the average loudness of a signal."
    outputdomain: "real, from 0 to 1"
    application: "segmentation"
    who: "Owen Meyers, Ferdinand Fuhrmann"
    reference: "http://en.wikipedia.org/wiki/Loudness"
    examplechain: "signalEnergy -> loudness -> averageLoudness"
109:
    group: "unknown"
    name: "perceptual_speed"
    algorithm: ""
    description: ""
    outputdomain: ""
    application: ""
    who: ""
    reference: ""
    examplechain: ""
110:
    group: "rhythm"
    name: "bpm_confidence"
    algorithm: "Unknown / Not implemented"
    description: "TODO Can't find information about this descriptors. From the YAML it seems to always return 0."
    outputdomain: ""
    application: ""
    who: "Cyril Laurier"
    reference: ""
    examplechain: ""
111:
    group: "rhythm"
    name: "bpm"
    algorithm: "ExtractorTempoTap"
    description: "BPM (Beat Per Minute) is a measure of tempo. The higher the BPM is the faster is the tempo. A BPM value of 120 means that there are 120 beats per minute, typically 120 quarter notes per minute."
    outputdomain: "a real value from 40 to 208"
    application: "Segmentation, Genre classification, Mood classification."
    who: "Cyril Laurier"
    reference: "Simon Dixon, Automatic extraction of tempo and beat from expressive performances. Journal of New Music Research, 30(1):39–58, March 2001b. "
    examplechain: "input signal->bpm"