diff --git a/.gitignore b/.gitignore index 94ca4506..a050624f 100644 --- a/.gitignore +++ b/.gitignore @@ -19,6 +19,7 @@ Snowboy.pm /examples/C++/pa_stable_v190600_20161030.tgz /examples/C++/portaudio /examples/C++/demo +/examples/C++/demo2 /examples/Java/Demo.class /examples/Perl/data/ /examples/iOS/Obj-C/Pods/Pods.xcodeproj/xcuserdata/ diff --git a/README.md b/README.md index a888e5be..460152bd 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ by [KITT.AI](http://kitt.ai). [Commercial application FAQ](README_commercial.md) -Version: 1.2.0 (3/25/2017) +Version: 1.3.0 (2/19/2018) ## Alexa support @@ -18,10 +18,12 @@ Snowboy now brings hands-free experience to the [Alexa AVS sample app](https://g **Performance** -The performance of hotword detection usually depends on the actually environment, e.g., is it used with a quality microphone, is it used on the street, in a kitchen, or is there any background noise, etc. So we feel it is best for the users to evaluate it in their real environment. For the evaluation purpose, we have prepared an Android app which can be installed and run out of box: [SnowboyAlexaDemo.apk](https://github.com/Kitt-AI/snowboy/raw/master/resources/alexa/SnowboyAlexaDemo.apk) (please uninstall any previous one first if you installed this app before). +The performance of hotword detection usually depends on the actual environment, e.g., is it used with a quality microphone, is it used on the street, in a kitchen, or is there any background noise, etc. So we feel it is best for the users to evaluate it in their real environment. For the evaluation purpose, we have prepared an Android app which can be installed and run out of box: [SnowboyAlexaDemo.apk](https://github.com/Kitt-AI/snowboy/raw/master/resources/alexa/SnowboyAlexaDemo.apk) (please uninstall any previous versions first if you have installed this app before). **Personal model** + * Create your personal hotword model through our [website](https://snowboy.kitt.ai) or [hotword API](https://snowboy.kitt.ai/api/v1/train/) + * Replace the hotword model in [Alexa AVS sample app](https://github.com/alexa/alexa-avs-sample-app) (after installation) with your personal model ``` @@ -47,6 +49,7 @@ make * Run the wake word agent with engine set to `kitt_ai`! **Universal model** + * Replace the hotword model in [Alexa AVS sample app](https://github.com/alexa/alexa-avs-sample-app) (after installation) with your universal model ``` @@ -114,14 +117,9 @@ Currently Snowboy supports (look into the [lib](lib) folder): * all versions of Raspberry Pi (with Raspbian based on Debian Jessie 8.0) * 64bit Mac OS X -* 64bit Ubuntu (12.04 and 14.04) +* 64bit Ubuntu 14.04 * iOS * Android -* Pine64 (Debian Jessie 8.5, 3.10.102 BSP2) -* Nvidia Jetson TX1 (use above Pine64 package) -* Nvidia Jetson TX2 (use above Pine64 package) -* Intel Edison (Ubilinux based on Debian Wheezy 7.8) -* Samsung Artik (built with Fedora 25 for ARMv7) * ARM64 (aarch64, Ubuntu 16.04) It ships in the form of a **C++ library** with language-dependent wrappers @@ -130,11 +128,12 @@ pull request! Currently we have built wrappers for: +* C/C++ * Java/Android * Go (thanks to @brentnd and @deadprogram) * Node (thanks to @evancohen and @nekuz0r) * Perl (thanks to @iboguslavsky) -* Python +* Python2/Python3 * iOS/Swift3 (thanks to @grimlockrocks) * iOS/Object-C (thanks to @patrickjquinn) @@ -164,18 +163,10 @@ environment. Here is the list of the models, and the parameters that you have to use for them: -* **resources/snowboy.umdl**: Universal model for the hotword "Snowboy". Set -SetSensitivity to 0.5 for better performance. -* **resources/alexa.umdl**: Universal model for the hotword "Alexa". Set -SetSensitivity to 0.5, and preferably set ApplyFrontend (only works on Raspberry -Pi) to true. This model is depressed. -* **resources/alexa/alexa_02092017.umdl**: Universal model for the hotword -"Alexa". This is still work in progress. Set SetSensitivity to 0.15. -* **resources/alexa/alexa-avs-sample-app/alexa.umdl**: Universal model for the -hotword "Alexa" optimized for [Alexa AVS sample app](https://github.com/alexa/alexa-avs-sample-app). -Set SetSensitivity to 0.6, and set ApplyFrontend (only works on Raspberry Pi) -to true. This is so far the best "Alexa" model we released publicly, when -ApplyFrontend is set to true. +* **resources/alexa/alexa-avs-sample-app/alexa.umdl**: Universal model for the hotword "Alexa" optimized for [Alexa AVS sample app](https://github.com/alexa/alexa-avs-sample-app). Set SetSensitivity to 0.6, and set ApplyFrontend to true. This is so far the best "Alexa" model we released publicly, when ApplyFrontend is set to true. +* **resources/models/snowboy.umdl**: Universal model for the hotword "Snowboy". Set SetSensitivity to 0.5 and ApplyFrontend to false. +* **resources/models/jarvis.umdl**: Universal model for the hotword "Jarvis" (https://snowboy.kitt.ai/hotword/29). It has two different models for the hotword Jarvis, so you have to use two sensitivites. Set sensitivities to "0.8,0.80" and ApplyFrontend to true. +* **resources/models/smart_mirror.umdl**: Universal model for the hotword "Smart Mirror" (https://snowboy.kitt.ai/hotword/47). Set sensitivity to Sensitivity to 0.5, and ApplyFrontend to false. ## Precompiled node module @@ -190,13 +181,10 @@ dependencies like `fs`, `wav` or `node-record-lpcm16` depending on which script you use. ## Precompiled Binaries with Python Demo -* 64 bit Ubuntu [12.04](https://s3-us-west-2.amazonaws.com/snowboy/snowboy-releases/ubuntu1204-x86_64-1.2.0.tar.bz2) - / [14.04](https://s3-us-west-2.amazonaws.com/snowboy/snowboy-releases/ubuntu1404-x86_64-1.2.0.tar.bz2) -* [MacOS X](https://s3-us-west-2.amazonaws.com/snowboy/snowboy-releases/osx-x86_64-1.2.0.tar.bz2) +* 64 bit Ubuntu [14.04](https://s3-us-west-2.amazonaws.com/snowboy/snowboy-releases/ubuntu1404-x86_64-1.3.0.tar.bz2) +* [MacOS X](https://s3-us-west-2.amazonaws.com/snowboy/snowboy-releases/osx-x86_64-1.3.0.tar.bz2) * Raspberry Pi with Raspbian 8.0, all versions - ([1/2/3/Zero](https://s3-us-west-2.amazonaws.com/snowboy/snowboy-releases/rpi-arm-raspbian-8.0-1.2.0.tar.bz2)) -* Pine64 (Debian Jessie 8.5 (3.10.102)), Nvidia Jetson TX1 and Nvidia Jetson TX2 ([download](https://s3-us-west-2.amazonaws.com/snowboy/snowboy-releases/pine64-debian-jessie-1.2.0.tar.bz2)) -* Intel Edison (Ubilinux based on Debian Wheezy 7.8) ([download](https://s3-us-west-2.amazonaws.com/snowboy/snowboy-releases/edison-ubilinux-1.2.0.tar.bz2)) + ([1/2/3/Zero](https://s3-us-west-2.amazonaws.com/snowboy/snowboy-releases/rpi-arm-raspbian-8.0-1.3.0.tar.bz2)) If you want to compile a version against your own environment/language, read on. @@ -386,9 +374,14 @@ See [Full Documentation](http://docs.kitt.ai/snowboy). ## Change Log -**1/4/2018** +**v1.3.0, 2/19/2018** -* Added `resources/smart_mirror.umdl`. This is trained with voices from https://snowboy.kitt.ai/hotword/47. We suspect that it'll work well with male voices as most of our developers are male. +* Added Frontend processing for all platforms +* Added `resources/models/smart_mirror.umdl` for https://snowboy.kitt.ai/hotword/47 +* Added `resources/models/jarvis.umdl` for https://snowboy.kitt.ai/hotword/29 +* Added README for Chinese +* Cleaned up the supported platforms +* Re-structured the model path **v1.2.0, 3/25/2017** diff --git a/README_ZH_CN.md b/README_ZH_CN.md index b813d03d..889f174f 100644 --- a/README_ZH_CN.md +++ b/README_ZH_CN.md @@ -10,7 +10,7 @@ (因为我们每天都会收到很多消息,从2016年9月开始建立了讨论组。请在这里发送一般性的讨论。关于错误,请使用Github问题标签。) -版本:1.2.0(3/25/2017) +版本:1.3.0(2/19/2018) ## Alexa支持 @@ -113,25 +113,21 @@ Snowboy是一款可定制的唤醒词检测引擎,可为您创建像 "OK Googl * 所有版本的Raspberry Pi(Raspbian基于Debian Jessie 8.0) * 64位Mac OS X -* 64位Ubuntu(12.04和14.04) +* 64位Ubuntu 14.04 * iOS * Android -* Pine64(Debian Jessie 8.5,3.10.102 BSP2) -* Nvidia Jetson TX1(使用上面的Pine64包) -* Nvidia Jetson TX2(使用上面的Pine64包) -* 英特尔Edison(Ubilinux基于Debian Wheezy 7.8) -* 三星Artik(搭载Fedora 25为ARMv7) * ARM64(aarch64,Ubuntu 16.04) Snowboy底层库由C++写成,通过swig被封装成能在多种操作系统和语言上使用的软件库。我们欢迎新语言的封装,请随时发送你们的Pull Request! 目前我们已经现实封装的有: +* C/C++ * Java / Android * Go(thanks to @brentnd and @deadprogram) * Node(thanks to @evancohen和@ nekuz0r) * Perl(thanks to @iboguslavsky) -* Python +* Python2/Python3 * iOS / Swift3(thanks to @grimlockrocks) * iOS / Object-C(thanks to @patrickjquinn) @@ -158,13 +154,10 @@ Snowboy底层库由C++写成,通过swig被封装成能在多种操作系统和 以下是模型列表和您必须使用的参数: -* resources/ snowboy.umdl:唤醒词为“snowboy”的通用模型。将`SetSensitivity`设置为`0.5`以获得更好的性能。 -* resources/ alexa.umdl:唤醒词为“Alexa”的通用模型。将`SetSensitivity`设置为`0.5`,最好将`ApplyFrontend` -(仅适用于Raspberry Pi)设置为`true`。这个模型已经不建议使用了。 -* resources/ alexa / alexa_02092017.umdl:唤醒词为“Alexa”的通用模型。这个仍然在优化中。 -将`SetSensitivity`设置为`0.15`。 -* resources/ alexa / alexa-avs-sample-app/alexa.umdl:这个是为[Alexa AVS sample app](https://github.com/alexa/alexa-avs-sample-app) -优化过的唤醒词为“Alexa”的通用模型,将`SetSensitivity`设置为`0.6`,并将`ApplyFrontend`(仅适用于Raspberry Pi)设置为true。当`ApplyFrontend`设置为`true`时,这是迄今为止我们公开发布的最好的“Alexa”的模型。 +* **resources/alexa/alexa-avs-sample-app/alexa.umdl**:这个是为[Alexa AVS sample app](https://github.com/alexa/alexa-avs-sample-app)优化过的唤醒词为“Alexa”的通用模型,将`SetSensitivity`设置为`0.6`,并将`ApplyFrontend`设置为true。当`ApplyFrontend`设置为`true`时,这是迄今为止我们公开发布的最好的“Alexa”的模型。 +* **resources/models/snowboy.umdl**:唤醒词为“snowboy”的通用模型。将`SetSensitivity`设置为`0.5`,`ApplyFrontend`设置为`false`。 +* **resources/models/jarvis.umdl**: 唤醒词为“Jarvis” (https://snowboy.kitt.ai/hotword/29)的通用模型,其中包含了对应于“Jarvis”的两个唤醒词模型,所以需要设置两个`sensitivity`。将`SetSensitivity`设置为`0.8,0.8`,`ApplyFrontend`设置为`true`。 +* **resources/models/smart_mirror.umdl**: 唤醒词为“Smart Mirror” (https://snowboy.kitt.ai/hotword/47)的通用模型。将`SetSensitivity`设置为`0.5`,`ApplyFrontend`设置为`false`。 ## 预编译node模块 @@ -176,10 +169,10 @@ Snowboy为一下平台编译了node模块:64位Ubuntu,MacOS X和Raspberry Pi ## 预编译Python Demo的二进制文件 * 64 bit Ubuntu [12.04](https://s3-us-west-2.amazonaws.com/snowboy/snowboy-releases/ubuntu1204-x86_64-1.2.0.tar.bz2) - / [14.04](https://s3-us-west-2.amazonaws.com/snowboy/snowboy-releases/ubuntu1404-x86_64-1.2.0.tar.bz2) -* [MacOS X](https://s3-us-west-2.amazonaws.com/snowboy/snowboy-releases/osx-x86_64-1.2.0.tar.bz2) + / [14.04](https://s3-us-west-2.amazonaws.com/snowboy/snowboy-releases/ubuntu1404-x86_64-1.3.0.tar.bz2) +* [MacOS X](https://s3-us-west-2.amazonaws.com/snowboy/snowboy-releases/osx-x86_64-1.3.0.tar.bz2) * Raspberry Pi with Raspbian 8.0, all versions - ([1/2/3/Zero](https://s3-us-west-2.amazonaws.com/snowboy/snowboy-releases/rpi-arm-raspbian-8.0-1.2.0.tar.bz2)) + ([1/2/3/Zero](https://s3-us-west-2.amazonaws.com/snowboy/snowboy-releases/rpi-arm-raspbian-8.0-1.3.0.tar.bz2)) * Pine64 (Debian Jessie 8.5 (3.10.102)), Nvidia Jetson TX1 and Nvidia Jetson TX2 ([download](https://s3-us-west-2.amazonaws.com/snowboy/snowboy-releases/pine64-debian-jessie-1.2.0.tar.bz2)) * Intel Edison (Ubilinux based on Debian Wheezy 7.8) ([download](https://s3-us-west-2.amazonaws.com/snowboy/snowboy-releases/edison-ubilinux-1.2.0.tar.bz2)) @@ -367,6 +360,15 @@ Perl示例包括使用KITT.AI RESTful API训练个人唤醒词,在检测到唤 ## 更改日志 +**v1.3.0, 2/19/2018** + +* 添加前端处理到所有平台 +* 添加`resources/models/smart_mirror.umdl` 给 https://snowboy.kitt.ai/hotword/47 +* 添加`resources/models/jarvis.umdl` 给 https://snowboy.kitt.ai/hotword/29 +* 添加中文文档 +* 清理支持的平台 +* 重新定义了模型路径 + **v1.2.0, 3/25/2017** * 为[Alexa AVS sample app](https://github.com/alexa/alexa-avs-sample-app)添加更好的Alexa模型 diff --git a/examples/C++/demo.cc b/examples/C++/demo.cc index eaa2d424..0f5cd1ef 100644 --- a/examples/C++/demo.cc +++ b/examples/C++/demo.cc @@ -200,10 +200,11 @@ int main(int argc, char* argv[]) { // Parameter section. // If you have multiple hotword models (e.g., 2), you should set // and as follows: - // model_filename = "resources/snowboy.umdl,resources/alexa.pmdl"; - // sensitivity_str = "0.4,0.4"; + // model_filename = + // "resources/models/snowboy.umdl,resources/models/smart_mirror.umdl"; + // sensitivity_str = "0.5,0.5"; std::string resource_filename = "resources/common.res"; - std::string model_filename = "resources/snowboy.umdl"; + std::string model_filename = "resources/models/snowboy.umdl"; std::string sensitivity_str = "0.5"; float audio_gain = 1; diff --git a/examples/C++/demo2.cc b/examples/C++/demo2.cc index 400bd96c..ed74f728 100644 --- a/examples/C++/demo2.cc +++ b/examples/C++/demo2.cc @@ -3,7 +3,7 @@ #include "portaudio.h" #define resource_filename "resources/common.res" -#define model_filename "resources/snowboy.umdl" +#define model_filename "resources/models/snowboy.umdl" #define sensitivity_str "0.5" struct wavHeader { //44 byte HEADER only diff --git a/examples/C/demo.c b/examples/C/demo.c index b95706a3..971d7407 100644 --- a/examples/C/demo.c +++ b/examples/C/demo.c @@ -183,10 +183,11 @@ int main(int argc, char* argv[]) { // Parameter section. // If you have multiple hotword models (e.g., 2), you should set // and as follows: - // model_filename = "resources/snowboy.umdl,resources/alexa.pmdl"; - // sensitivity_str = "0.4,0.4"; + // model_filename = + // "resources/models/snowboy.umdl,resources/models/smart_mirror.umdl"; + // sensitivity_str = "0.5,0.5"; const char resource_filename[] = "resources/common.res"; - const char model_filename[] = "resources/snowboy.umdl"; + const char model_filename[] = "resources/models/snowboy.umdl"; const char sensitivity_str[] = "0.5"; float audio_gain = 1; diff --git a/examples/Go/detect/readme.md b/examples/Go/detect/readme.md index d10aa17d..1b8ea908 100644 --- a/examples/Go/detect/readme.md +++ b/examples/Go/detect/readme.md @@ -22,7 +22,7 @@ go build -o snowboy main.go ### Examples Cmd: -`./snowboy ../../../resources/snowboy.umdl ../../../resources/snowboy.wav` +`./snowboy ../../../resources/models/snowboy.umdl ../../../resources/snowboy.wav` Output: ``` @@ -37,4 +37,4 @@ Output: ``` Snowboy detecting keyword in ../../resources/snowboy.wav Snowboy detected nothing -``` \ No newline at end of file +``` diff --git a/examples/Go/listen/README.md b/examples/Go/listen/README.md index 46f03790..cf8aab00 100644 --- a/examples/Go/listen/README.md +++ b/examples/Go/listen/README.md @@ -23,7 +23,7 @@ go build -o listen main.go ### Examples Cmd: -`./listen ../../../resources/common.res ../../../resources/snowboy.umdl` +`./listen ../../../resources/common.res ../../../resources/models/snowboy.umdl` Output: ``` diff --git a/examples/Java/Demo.java b/examples/Java/Demo.java index 6360b8f5..840a610d 100644 --- a/examples/Java/Demo.java +++ b/examples/Java/Demo.java @@ -21,7 +21,7 @@ public static void main(String[] args) { // Sets up Snowboy. SnowboyDetect detector = new SnowboyDetect("resources/common.res", - "resources/snowboy.umdl"); + "resources/models/snowboy.umdl"); detector.SetSensitivity("0.5"); detector.SetAudioGain(1); diff --git a/examples/Node/file.js b/examples/Node/file.js index b36b85c1..055f1237 100644 --- a/examples/Node/file.js +++ b/examples/Node/file.js @@ -6,7 +6,7 @@ const Models = require('../../').Models; const models = new Models(); models.add({ - file: 'resources/snowboy.umdl', + file: 'resources/models/snowboy.umdl', sensitivity: '0.5', hotwords : 'snowboy' }); diff --git a/examples/Node/microphone.js b/examples/Node/microphone.js index ae190528..4e573182 100644 --- a/examples/Node/microphone.js +++ b/examples/Node/microphone.js @@ -5,7 +5,7 @@ const Models = require('../../').Models; const models = new Models(); models.add({ - file: 'resources/snowboy.umdl', + file: 'resources/models/snowboy.umdl', sensitivity: '0.5', hotwords : 'snowboy' }); diff --git a/examples/Perl/snowboy_googlevoice.pl b/examples/Perl/snowboy_googlevoice.pl index 8c43af2d..9c4075ca 100755 --- a/examples/Perl/snowboy_googlevoice.pl +++ b/examples/Perl/snowboy_googlevoice.pl @@ -3,9 +3,9 @@ # This script first uses Snowboy to wake up, then collects audio and sends to # Google Speech API for further recognition. It works with both personal and # universal models. By default, it uses the Snowboy universal model at -# resources/snowboy.umdl, you can change it to other universal models, or your -# own personal models. You also have to provide your Google API key in order to -# use it. +# resources/models/snowboy.umdl, you can change it to other universal models, or +# your own personal models. You also have to provide your Google API key in +# order to use it. use Snowboy; @@ -23,16 +23,16 @@ This script first uses Snowboy to wake up, then collects audio and sends to Google Speech API for further recognition. It works with both personal and universal models. By default, it uses the Snowboy universal model at -resources/snowboy.umdl, you can change it to other universal models, or your own -personal models. You also have to provide your Google API key in order to use -it. +resources/models/snowboy.umdl, you can change it to other universal models, or +your own personal models. You also have to provide your Google API key in order +to use it. Note: Google is now moving to Google Cloud Speech API, so we will have to update the API query later. Usage: ./snowboy_googlevoice.pl [Hotword_Model] e.g.: ./snowboy_googlevoice.pl \ - abcdefghijklmnopqrstuvwxyzABC0123456789 resources/snowboy.umdl + abcdefghijklmnopqrstuvwxyzABC0123456789 resources/models/snowboy.umdl Allowed options: --language : Language for speech recognizer. (string, default="en") @@ -48,9 +48,9 @@ # Gets parameters. my $api_key = shift @ARGV; -my $model = shift @ARGV || 'resources/snowboy.umdl'; +my $model = shift @ARGV || 'resources/models/snowboy.umdl'; -if ($model eq 'resources/snowboy.umdl') { +if ($model eq 'resources/models/snowboy.umdl') { $hotword = "Snowboy"; } else { $hotword = "your hotword"; diff --git a/examples/Perl/snowboy_unit_test.pl b/examples/Perl/snowboy_unit_test.pl index 45cae4e4..6a9a856d 100755 --- a/examples/Perl/snowboy_unit_test.pl +++ b/examples/Perl/snowboy_unit_test.pl @@ -12,7 +12,7 @@ close WAV; $sb = new Snowboy::SnowboyDetect('resources/common.res', - 'resources/snowboy.umdl'); + 'resources/models/snowboy.umdl'); $sb->SetSensitivity ("0.5"); $sb->SetAudioGain (1); diff --git a/examples/Python/demo3.py b/examples/Python/demo3.py index 67fd7307..29ecec63 100644 --- a/examples/Python/demo3.py +++ b/examples/Python/demo3.py @@ -4,11 +4,11 @@ # Demo code for detecting hotword in a .wav file # Example Usage: -# $ python demo3.py resources/snowboy.wav resources/snowboy.umdl +# $ python demo3.py resources/snowboy.wav resources/models/snowboy.umdl # Should print: # Hotword Detected! # -# $ python demo3.py resources/ding.wav resources/snowboy.umdl +# $ python demo3.py resources/ding.wav resources/models/snowboy.umdl # Should print: # Hotword Not Detected! diff --git a/examples/Python3/demo3.py b/examples/Python3/demo3.py index 67fd7307..29ecec63 100644 --- a/examples/Python3/demo3.py +++ b/examples/Python3/demo3.py @@ -4,11 +4,11 @@ # Demo code for detecting hotword in a .wav file # Example Usage: -# $ python demo3.py resources/snowboy.wav resources/snowboy.umdl +# $ python demo3.py resources/snowboy.wav resources/models/snowboy.umdl # Should print: # Hotword Detected! # -# $ python demo3.py resources/ding.wav resources/snowboy.umdl +# $ python demo3.py resources/ding.wav resources/models/snowboy.umdl # Should print: # Hotword Not Detected! diff --git a/include/snowboy-detect.h b/include/snowboy-detect.h index 42c3e0ad..76e4036a 100644 --- a/include/snowboy-detect.h +++ b/include/snowboy-detect.h @@ -13,6 +13,7 @@ namespace snowboy { // Forward declaration. struct WaveHeader; class PipelineDetect; +class PipelineVad; //////////////////////////////////////////////////////////////////////////////// // @@ -78,7 +79,7 @@ class SnowboyDetect { // // @param [in] data Small chunk of data to be detected. See // above for the supported data format. - // @param [in] array_length Length of the data array in samples. + // @param [in] array_length Length of the data array. // @param [in] is_end Set it to true if it is the end of a // utterance or file. int RunDetection(const float* const data, @@ -96,6 +97,7 @@ class SnowboyDetect { // Make sure you properly align the sensitivity value to the corresponding // hotword. void SetSensitivity(const std::string& sensitivity_str); + void SetHighSensitivity(const std::string& high_sensitivity_str); // Returns the sensitivity string for the current hotwords. std::string GetSensitivity() const; @@ -133,6 +135,80 @@ class SnowboyDetect { std::unique_ptr detect_pipeline_; }; +//////////////////////////////////////////////////////////////////////////////// +// +// SnowboyVad class interface. +// +//////////////////////////////////////////////////////////////////////////////// +class SnowboyVad { + public: + // Constructor that takes a resource file. It shares the same resource file + // with SnowboyDetect. + SnowboyVad(const std::string& resource_filename); + + // Resets the VAD. + bool Reset(); + + // Runs the VAD algorithm. Supported audio format is WAVE (with linear PCM, + // 8-bits unsigned integer, 16-bits signed integer or 32-bits signed integer). + // See SampleRate(), NumChannels() and BitsPerSample() for the required + // sampling rate, number of channels and bits per sample values. You are + // supposed to provide a small chunk of data (e.g., 0.1 second) each time you + // call RunDetection(). Larger chunk usually leads to longer delay, but less + // CPU usage. + // + // Definition of return values: + // -2: Silence. + // -1: Error. + // 0: Non-silence. + // + // @param [in] data Small chunk of data to be detected. See + // above for the supported data format. + // @param [in] is_end Set it to true if it is the end of a + // utterance or file. + int RunVad(const std::string& data, bool is_end = false); + + // Various versions of RunVad() that take different format of audio. If + // NumChannels() > 1, e.g., NumChannels() == 2, then the array is as follows: + // + // d1c1, d1c2, d2c1, d2c2, d3c1, d3c2, ..., dNc1, dNc2 + // + // where d1c1 means data point 1 of channel 1. + // + // @param [in] data Small chunk of data to be detected. See + // above for the supported data format. + // @param [in] array_length Length of the data array. + // @param [in] is_end Set it to true if it is the end of a + // utterance or file. + int RunVad(const float* const data, + const int array_length, bool is_end = false); + int RunVad(const int16_t* const data, + const int array_length, bool is_end = false); + int RunVad(const int32_t* const data, + const int array_length, bool is_end = false); + + // Applied a fixed gain to the input audio. In case you have a very weak + // microphone, you can use this function to boost input audio level. + void SetAudioGain(const float audio_gain); + + // If is true, then apply frontend audio processing; + // otherwise turns the audio processing off. + void ApplyFrontend(const bool apply_frontend); + + // Returns the required sampling rate, number of channels and bits per sample + // values for the audio data. You should use this information to set up your + // audio capturing interface. + int SampleRate() const; + int NumChannels() const; + int BitsPerSample() const; + + ~SnowboyVad(); + + private: + std::unique_ptr wave_header_; + std::unique_ptr vad_pipeline_; +}; + } // namespace snowboy #endif // SNOWBOY_INCLUDE_SNOWBOY_DETECT_H_ diff --git a/lib/aarch64-ubuntu1604/libsnowboy-detect.a b/lib/aarch64-ubuntu1604/libsnowboy-detect.a index 5bf55c7f..03286c2e 100644 Binary files a/lib/aarch64-ubuntu1604/libsnowboy-detect.a and b/lib/aarch64-ubuntu1604/libsnowboy-detect.a differ diff --git a/lib/android/armv7a/libsnowboy-detect.a b/lib/android/armv7a/libsnowboy-detect.a index 8784e6b9..112bc12f 100644 Binary files a/lib/android/armv7a/libsnowboy-detect.a and b/lib/android/armv7a/libsnowboy-detect.a differ diff --git a/lib/android/armv8-aarch64/libsnowboy-detect.a b/lib/android/armv8-aarch64/libsnowboy-detect.a index 2a0cf7ae..e8c4f197 100644 Binary files a/lib/android/armv8-aarch64/libsnowboy-detect.a and b/lib/android/armv8-aarch64/libsnowboy-detect.a differ diff --git a/lib/edison/libsnowboy-detect.a b/lib/edison/libsnowboy-detect.a deleted file mode 100644 index 3f78ab14..00000000 Binary files a/lib/edison/libsnowboy-detect.a and /dev/null differ diff --git a/lib/fedora25-armv7/libsnowboy-detect.a b/lib/fedora25-armv7/libsnowboy-detect.a deleted file mode 100644 index 24f8ae4b..00000000 Binary files a/lib/fedora25-armv7/libsnowboy-detect.a and /dev/null differ diff --git a/lib/ios/libsnowboy-detect.a b/lib/ios/libsnowboy-detect.a index 6be483de..345832cd 100644 Binary files a/lib/ios/libsnowboy-detect.a and b/lib/ios/libsnowboy-detect.a differ diff --git a/lib/node/SnowboyDetectNative.d.ts b/lib/node/SnowboyDetectNative.d.ts index e6bc98a9..d0714416 100644 --- a/lib/node/SnowboyDetectNative.d.ts +++ b/lib/node/SnowboyDetectNative.d.ts @@ -3,6 +3,7 @@ interface SnowboyDetectNativeInterface { Reset(): boolean; RunDetection(audioData: Buffer): number; SetSensitivity(sensitivity: string): void; + SetHighSensitivity(highSensitivity: string): void; GetSensitivity(): string; SetAudioGain(audioGain: number): void; UpdateModel(): void; diff --git a/lib/node/index.ts b/lib/node/index.ts index cc587f90..409e4690 100644 --- a/lib/node/index.ts +++ b/lib/node/index.ts @@ -34,12 +34,14 @@ export interface DetectorOptions { models: HotwordModels; audioGain?: number; applyFrontend?: boolean; + highSensitivity?: string; } export interface SnowboyDetectInterface { reset(): boolean; runDetection(buffer: Buffer): number; setSensitivity(sensitivity: string): void; + setHighSensitivity(highSensitivity: string): void; getSensitivity(): string; setAudioGain(gain: number): void; updateModel(): void; @@ -121,6 +123,10 @@ export class SnowboyDetect extends stream.Writable implements SnowboyDetectInter if (options.applyFrontend) { this.nativeInstance.ApplyFrontend(options.applyFrontend); } + + if (options.highSensitivity) { + this.nativeInstance.SetHighSensitivity(options.highSensitivity); + } } reset(): boolean { @@ -137,6 +143,10 @@ export class SnowboyDetect extends stream.Writable implements SnowboyDetectInter this.nativeInstance.SetSensitivity(sensitivity); } + setHighSensitivity(highSensitivity: string): void { + this.nativeInstance.SetHighSensitivity(highSensitivity); + } + getSensitivity(): string { return this.nativeInstance.GetSensitivity(); } diff --git a/lib/osx/libsnowboy-detect.a b/lib/osx/libsnowboy-detect.a index eca070ec..b0736ada 100644 Binary files a/lib/osx/libsnowboy-detect.a and b/lib/osx/libsnowboy-detect.a differ diff --git a/lib/pine64/libsnowboy-detect.a b/lib/pine64/libsnowboy-detect.a deleted file mode 100755 index ef94636f..00000000 Binary files a/lib/pine64/libsnowboy-detect.a and /dev/null differ diff --git a/lib/rpi/libsnowboy-detect.a b/lib/rpi/libsnowboy-detect.a index dc2f659d..df04d29d 100644 Binary files a/lib/rpi/libsnowboy-detect.a and b/lib/rpi/libsnowboy-detect.a differ diff --git a/lib/ubuntu64/libsnowboy-detect.a b/lib/ubuntu64/libsnowboy-detect.a index bc5b9991..6aaad1a3 100644 Binary files a/lib/ubuntu64/libsnowboy-detect.a and b/lib/ubuntu64/libsnowboy-detect.a differ diff --git a/package.json b/package.json index 67717107..1f671df1 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "snowboy", - "version": "1.2.0", + "version": "1.3.0", "description": "Snowboy is a customizable hotword detection engine", "main": "lib/node/index.js", "typings": "lib/node/index.d.ts", diff --git a/resources/alexa.umdl b/resources/alexa.umdl deleted file mode 100644 index 0d9db6f2..00000000 Binary files a/resources/alexa.umdl and /dev/null differ diff --git a/resources/models/jarvis.umdl b/resources/models/jarvis.umdl new file mode 100644 index 00000000..aa6ad19d Binary files /dev/null and b/resources/models/jarvis.umdl differ diff --git a/resources/smart_mirror.umdl b/resources/models/smart_mirror.umdl similarity index 100% rename from resources/smart_mirror.umdl rename to resources/models/smart_mirror.umdl diff --git a/resources/snowboy.umdl b/resources/models/snowboy.umdl similarity index 100% rename from resources/snowboy.umdl rename to resources/models/snowboy.umdl