forked from mozilla/DeepSpeech
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdeepspeech_utils.h
42 lines (36 loc) · 1.51 KB
/
deepspeech_utils.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
#ifndef __DEEPSPEECH_UTILS_H__
#define __DEEPSPEECH_UTILS_H__
#include <cstddef>
namespace DeepSpeech
{
/**
* @brief Given audio, return a vector suitable for input to a DeepSpeech
* model trained with the given parameters.
*
* Extracts MFCC features from a given audio signal and adds the appropriate
* amount of context to run inference on a DeepSpeech model trained with
* the given parameters.
*
* @param aBuffer A 16-bit, mono raw audio signal at the appropriate sample
* rate.
* @param aBufferSize The sample-length of the audio signal.
* @param aSampleRate The sample-rate of the audio signal.
* @param aNCep The number of cepstrum.
* @param aNContext The size of the context window.
* @param[out] aMFCC An array containing features, of shape
* (@p aNFrames, ncep * ncontext). The user is responsible
* for freeing the array.
* @param[out] aNFrames (optional) The number of frames in @p aMFCC.
* @param[out] aFrameLen (optional) The length of each frame
* (ncep * ncontext) in @p aMFCC.
*/
void audioToInputVector(const short* aBuffer,
unsigned int aBufferSize,
int aSampleRate,
int aNCep,
int aNContext,
float** aMfcc,
int* aNFrames = NULL,
int* aFrameLen = NULL);
}
#endif /* __DEEPSPEECH_UTILS_H__ */