diff --git a/binding.gyp b/binding.gyp index ab833be4..b2917aef 100644 --- a/binding.gyp +++ b/binding.gyp @@ -31,6 +31,16 @@ '<(module_root_dir)/lib/rpi/libsnowboy-detect.a', ] } + }], + ['OS=="linux" and target_arch=="arm64"', { + 'link_settings': { + 'ldflags': [ + '-Wl,--no-as-needed', + ], + 'libraries': [ + '<(module_root_dir)/lib/aarch64-ubuntu1604/libsnowboy-detect.a', + ] + } }] ], 'cflags': [ diff --git a/examples/Android/SnowboyAlexaDemo/src/ai/kitt/snowboy/SnowboyVad.java b/examples/Android/SnowboyAlexaDemo/src/ai/kitt/snowboy/SnowboyVad.java new file mode 120000 index 00000000..12df56be --- /dev/null +++ b/examples/Android/SnowboyAlexaDemo/src/ai/kitt/snowboy/SnowboyVad.java @@ -0,0 +1 @@ +../../../../../../../swig/Android/java/ai/kitt/snowboy/SnowboyVad.java \ No newline at end of file diff --git a/examples/Android/SnowboyAlexaDemo/src/ai/kitt/snowboy/audio/AudioDataSaver.java b/examples/Android/SnowboyAlexaDemo/src/ai/kitt/snowboy/audio/AudioDataSaver.java index e847847f..5f464507 100644 --- a/examples/Android/SnowboyAlexaDemo/src/ai/kitt/snowboy/audio/AudioDataSaver.java +++ b/examples/Android/SnowboyAlexaDemo/src/ai/kitt/snowboy/audio/AudioDataSaver.java @@ -15,6 +15,19 @@ public class AudioDataSaver implements AudioDataReceivedListener { private static final String TAG = AudioDataSaver.class.getSimpleName(); + // file size of when to delete and create a new recording file + private final float MAX_RECORDING_FILE_SIZE_IN_MB = 50f; + + // initial file size of recording file + private final float INITIAL_FILE_SIZE_IN_MB = 1.3f; + + // converted max file size + private final float MAX_RECORDING_FILE_SIZE_IN_BYTES + = (MAX_RECORDING_FILE_SIZE_IN_MB - INITIAL_FILE_SIZE_IN_MB) * 1024 * 1024; + + // keeps track of recording file size + private int recordingFileSizeCounterInBytes = 0; + private File saveFile = null; private DataOutputStream dataOutputStreamInstance = null; @@ -25,7 +38,7 @@ public AudioDataSaver() { @Override public void start() { - if(null != saveFile) { + if (null != saveFile) { if (saveFile.exists()) { saveFile.delete(); } @@ -36,7 +49,7 @@ public void start() { } try { - BufferedOutputStream bufferedStreamInstance = new BufferedOutputStream( + BufferedOutputStream bufferedStreamInstance = new BufferedOutputStream( new FileOutputStream(this.saveFile)); dataOutputStreamInstance = new DataOutputStream(bufferedStreamInstance); } catch (FileNotFoundException e) { @@ -48,8 +61,14 @@ public void start() { @Override public void onAudioDataReceived(byte[] data, int length) { try { - if(null != dataOutputStreamInstance) { + if (null != dataOutputStreamInstance) { + if (recordingFileSizeCounterInBytes >= MAX_RECORDING_FILE_SIZE_IN_BYTES) { + stop(); + start(); + recordingFileSizeCounterInBytes = 0; + } dataOutputStreamInstance.write(data, 0, length); + recordingFileSizeCounterInBytes += length; } } catch (IOException e) { Log.e(TAG, "IO Exception on saving audio file " + saveFile.toString(), e); @@ -58,7 +77,7 @@ public void onAudioDataReceived(byte[] data, int length) { @Override public void stop() { - if(null != dataOutputStreamInstance) { + if (null != dataOutputStreamInstance) { try { dataOutputStreamInstance.close(); } catch (IOException e) { diff --git a/examples/Android/SnowboyAlexaDemo/src/ai/kitt/snowboy/audio/RecordingThread.java b/examples/Android/SnowboyAlexaDemo/src/ai/kitt/snowboy/audio/RecordingThread.java index 793c4f36..850e955a 100644 --- a/examples/Android/SnowboyAlexaDemo/src/ai/kitt/snowboy/audio/RecordingThread.java +++ b/examples/Android/SnowboyAlexaDemo/src/ai/kitt/snowboy/audio/RecordingThread.java @@ -41,7 +41,7 @@ public RecordingThread(Handler handler, AudioDataReceivedListener listener) { this.listener = listener; detector.SetSensitivity("0.6"); - //-detector.SetAudioGain(1); + detector.SetAudioGain(1); detector.ApplyFrontend(true); try { player.setDataSource(strEnvWorkSpace+"ding.wav"); diff --git a/examples/C++/demo.cc b/examples/C++/demo.cc index 0f5cd1ef..330e6ffa 100644 --- a/examples/C++/demo.cc +++ b/examples/C++/demo.cc @@ -207,11 +207,13 @@ int main(int argc, char* argv[]) { std::string model_filename = "resources/models/snowboy.umdl"; std::string sensitivity_str = "0.5"; float audio_gain = 1; + bool apply_frontend = false; // Initializes Snowboy detector. snowboy::SnowboyDetect detector(resource_filename, model_filename); detector.SetSensitivity(sensitivity_str); detector.SetAudioGain(audio_gain); + detector.ApplyFrontend(apply_frontend); // Initializes PortAudio. You may use other tools to capture the audio. PortAudioWrapper pa_wrapper(detector.SampleRate(), diff --git a/examples/C++/demo2.cc b/examples/C++/demo2.cc index ed74f728..64a17e6e 100644 --- a/examples/C++/demo2.cc +++ b/examples/C++/demo2.cc @@ -5,6 +5,8 @@ #define resource_filename "resources/common.res" #define model_filename "resources/models/snowboy.umdl" #define sensitivity_str "0.5" +#define audio_gain 1.0 +#define apply_frontend false struct wavHeader { //44 byte HEADER only char RIFF[4]; @@ -136,6 +138,8 @@ int main(int argc, char * argv[]) { // Initializes Snowboy detector. snowboy::SnowboyDetect detector(resource_filename, model_filename); detector.SetSensitivity(sensitivity_str); + detector.SetAudioGain(audio_gain); + detector.ApplyFrontend(apply_frontend); int result = detector.RunDetection(&data_buffer[0], fsize/sizeof(short)); std::cout << ">>>>> Result: " << result << " <<<<<" << std::endl; diff --git a/examples/C/demo.c b/examples/C/demo.c index 971d7407..f72f36ba 100644 --- a/examples/C/demo.c +++ b/examples/C/demo.c @@ -190,12 +190,14 @@ int main(int argc, char* argv[]) { const char model_filename[] = "resources/models/snowboy.umdl"; const char sensitivity_str[] = "0.5"; float audio_gain = 1; + bool apply_frontend = false; // Initializes Snowboy detector. SnowboyDetect* detector = SnowboyDetectConstructor(resource_filename, model_filename); SnowboyDetectSetSensitivity(detector, sensitivity_str); SnowboyDetectSetAudioGain(detector, audio_gain); + SnowboyDetectApplyFrontend(detector, apply_frontend); // Initializes PortAudio. You may use other tools to capture the audio. StartAudioCapturing(SnowboyDetectSampleRate(detector), diff --git a/examples/Go/detect/main.go b/examples/Go/detect/main.go index 2300be5b..00ec04d6 100644 --- a/examples/Go/detect/main.go +++ b/examples/Go/detect/main.go @@ -18,6 +18,7 @@ func main() { detector := snowboydetect.NewSnowboyDetect("../../../resources/common.res", os.Args[1]) detector.SetSensitivity("0.5") detector.SetAudioGain(1) + detector.ApplyFrontend(false) defer snowboydetect.DeleteSnowboyDetect(detector) dat, err := ioutil.ReadFile(os.Args[2]) @@ -36,4 +37,4 @@ func main() { } else { fmt.Println("Snowboy detected keyword ", res) } -} \ No newline at end of file +} diff --git a/examples/Java/Demo.java b/examples/Java/Demo.java index 840a610d..d064007d 100644 --- a/examples/Java/Demo.java +++ b/examples/Java/Demo.java @@ -24,6 +24,7 @@ public static void main(String[] args) { "resources/models/snowboy.umdl"); detector.SetSensitivity("0.5"); detector.SetAudioGain(1); + detector.ApplyFrontend(false); try { TargetDataLine targetLine = diff --git a/examples/Node/file.js b/examples/Node/file.js index 055f1237..c97bf540 100644 --- a/examples/Node/file.js +++ b/examples/Node/file.js @@ -14,7 +14,8 @@ models.add({ const detector = new Detector({ resource: "resources/common.res", models: models, - audioGain: 1.0 + audioGain: 1.0, + applyFrontend: false }); detector.on('silence', function () { diff --git a/examples/Node/microphone.js b/examples/Node/microphone.js index 4e573182..a0dfe2ee 100644 --- a/examples/Node/microphone.js +++ b/examples/Node/microphone.js @@ -13,7 +13,8 @@ models.add({ const detector = new Detector({ resource: "resources/common.res", models: models, - audioGain: 2.0 + audioGain: 2.0, + applyFrontend: true }); detector.on('silence', function () { diff --git a/examples/Perl/snowboy_googlevoice.pl b/examples/Perl/snowboy_googlevoice.pl index 9c4075ca..97b5d055 100755 --- a/examples/Perl/snowboy_googlevoice.pl +++ b/examples/Perl/snowboy_googlevoice.pl @@ -133,6 +133,7 @@ $sb = new Snowboy::SnowboyDetect('resources/common.res', $model); $sb->SetSensitivity('0.5'); $sb->SetAudioGain(1.0); +$sb->ApplyFrontend(0); # Running the detection forever. print "\n"; diff --git a/examples/Perl/snowboy_unit_test.pl b/examples/Perl/snowboy_unit_test.pl index 6a9a856d..b54cc306 100755 --- a/examples/Perl/snowboy_unit_test.pl +++ b/examples/Perl/snowboy_unit_test.pl @@ -16,6 +16,7 @@ $sb->SetSensitivity ("0.5"); $sb->SetAudioGain (1); +$sb->ApplyFrontend (0); print "==== SnowBoy object properties ====\n"; print "Sample Rate : ", $sb->SampleRate(), "\n"; diff --git a/examples/iOS/Obj-C/SnowboyTest/ViewController.mm b/examples/iOS/Obj-C/SnowboyTest/ViewController.mm index 4497ec50..d1d6c91b 100644 --- a/examples/iOS/Obj-C/SnowboyTest/ViewController.mm +++ b/examples/iOS/Obj-C/SnowboyTest/ViewController.mm @@ -31,6 +31,7 @@ - (void)initSnowboy { std::string([[[NSBundle mainBundle]pathForResource:@"alexa" ofType:@"umdl"] UTF8String])); _snowboyDetect->SetSensitivity("0.5"); _snowboyDetect->SetAudioGain(1.0); + _snowboyDetect->ApplyFrotnend(false); } - (void) initMic { diff --git a/include/snowboy-detect.h b/include/snowboy-detect.h index 76e4036a..95e06c59 100644 --- a/include/snowboy-detect.h +++ b/include/snowboy-detect.h @@ -97,6 +97,12 @@ class SnowboyDetect { // Make sure you properly align the sensitivity value to the corresponding // hotword. void SetSensitivity(const std::string& sensitivity_str); + + // Similar to the sensitivity setting above. When set higher than the above + // sensitivity, the algorithm automatically chooses between the normal + // sensitivity set above and the higher sensitivity set here, to maximize the + // performance. By default, it is not set, which means the algorithm will + // stick with the sensitivity set above. void SetHighSensitivity(const std::string& high_sensitivity_str); // Returns the sensitivity string for the current hotwords. @@ -118,7 +124,13 @@ class SnowboyDetect { int NumHotwords() const; // If is true, then apply frontend audio processing; - // otherwise turns the audio processing off. + // otherwise turns the audio processing off. Frontend audio processing + // includes algorithms such as automatic gain control (AGC), noise suppression + // (NS) and so on. Generally adding frontend audio processing helps the + // performance, but if the model is not trained with frontend audio + // processing, it may decrease the performance. The general rule of thumb is: + // 1. For personal models, set it to false. + // 2. For universal models, follow the instruction of each published model void ApplyFrontend(const bool apply_frontend); // Returns the required sampling rate, number of channels and bits per sample diff --git a/lib/aarch64-ubuntu1604/libsnowboy-detect.a b/lib/aarch64-ubuntu1604/libsnowboy-detect.a index 03286c2e..6b8ec973 100644 Binary files a/lib/aarch64-ubuntu1604/libsnowboy-detect.a and b/lib/aarch64-ubuntu1604/libsnowboy-detect.a differ diff --git a/package.json b/package.json index 1f671df1..42489798 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "snowboy", - "version": "1.3.0", + "version": "1.3.1", "description": "Snowboy is a customizable hotword detection engine", "main": "lib/node/index.js", "typings": "lib/node/index.d.ts", @@ -12,7 +12,6 @@ "host": "https://snowboy-release-node.s3-us-west-2.amazonaws.com" }, "scripts": { - "preinstall": "npm install node-pre-gyp", "install": "node-pre-gyp install --fallback-to-build", "test": "node index.js", "prepublish": "tsc --listFiles" diff --git a/resources/models/jarvis.umdl b/resources/models/jarvis.umdl index aa6ad19d..793d2539 100644 Binary files a/resources/models/jarvis.umdl and b/resources/models/jarvis.umdl differ diff --git a/scripts/install_swig.sh b/scripts/install_swig.sh new file mode 100755 index 00000000..46208db1 --- /dev/null +++ b/scripts/install_swig.sh @@ -0,0 +1,30 @@ +#!/bin/bash + +# SWIG is a tool to compile c++ code into Python. + +echo "Installing SWIG" + +if [ ! -e swig-3.0.10.tar.gz ]; then + cp exteral_tools/swig-3.0.10.tar.gz ./ || \ + wget -T 10 -t 3 \ + http://prdownloads.sourceforge.net/swig/swig-3.0.10.tar.gz || exit 1; +fi + +tar -xovzf swig-3.0.10.tar.gz || exit 1 +ln -s swig-3.0.10 swig + +cd swig + +# We first have to install PCRE. +if [ ! -e pcre-8.37.tar.gz ]; then + cp ../exteral_tools/pcre-8.37.tar.gz ./ || \ + wget -T 10 -t 3 \ + https://sourceforge.net/projects/pcre/files/pcre/8.37/pcre-8.37.tar.gz || exit 1; +fi +Tools/pcre-build.sh + +./configure --prefix=`pwd` --with-pic +make +make install + +cd .. diff --git a/swig/Python/snowboydecoder.py b/swig/Python/snowboydecoder.py index a7a76b22..7718999c 100644 --- a/swig/Python/snowboydecoder.py +++ b/swig/Python/snowboydecoder.py @@ -7,6 +7,8 @@ import wave import os import logging +from ctypes import * +from contextlib import contextmanager logging.basicConfig() logger = logging.getLogger("snowboy") @@ -17,6 +19,23 @@ DETECT_DING = os.path.join(TOP_DIR, "resources/ding.wav") DETECT_DONG = os.path.join(TOP_DIR, "resources/dong.wav") +def py_error_handler(filename, line, function, err, fmt): + pass + +ERROR_HANDLER_FUNC = CFUNCTYPE(None, c_char_p, c_int, c_char_p, c_int, c_char_p) + +c_error_handler = ERROR_HANDLER_FUNC(py_error_handler) + +@contextmanager +def no_alsa_error(): + try: + asound = cdll.LoadLibrary('libasound.so') + asound.snd_lib_error_set_handler(c_error_handler) + yield + asound.snd_lib_error_set_handler(None) + except: + yield + pass class RingBuffer(object): """Ring buffer to hold audio from PortAudio""" @@ -43,7 +62,8 @@ def play_audio_file(fname=DETECT_DING): """ ding_wav = wave.open(fname, 'rb') ding_data = ding_wav.readframes(ding_wav.getnframes()) - audio = pyaudio.PyAudio() + with no_alsa_error(): + audio = pyaudio.PyAudio() stream_out = audio.open( format=audio.get_format_from_width(ding_wav.getsampwidth()), channels=ding_wav.getnchannels(), @@ -68,11 +88,13 @@ class HotwordDetector(object): decoder. If an empty list is provided, then the default sensitivity in the model will be used. :param audio_gain: multiply input volume by this factor. + :param apply_frontend: applies the frontend processing algorithm if True. """ def __init__(self, decoder_model, resource=RESOURCE_FILE, sensitivity=[], - audio_gain=1): + audio_gain=1, + apply_frontend=False): def audio_callback(in_data, frame_count, time_info, status): self.ring_buffer.extend(in_data) @@ -90,6 +112,7 @@ def audio_callback(in_data, frame_count, time_info, status): self.detector = snowboydetect.SnowboyDetect( resource_filename=resource.encode(), model_str=model_str.encode()) self.detector.SetAudioGain(audio_gain) + self.detector.ApplyFrontend(apply_frontend) self.num_hotwords = self.detector.NumHotwords() if len(decoder_model) > 1 and len(sensitivity) == 1: @@ -104,7 +127,8 @@ def audio_callback(in_data, frame_count, time_info, status): self.ring_buffer = RingBuffer( self.detector.NumChannels() * self.detector.SampleRate() * 5) - self.audio = pyaudio.PyAudio() + with no_alsa_error(): + self.audio = pyaudio.PyAudio() self.stream_in = self.audio.open( input=True, output=False, format=self.audio.get_format_from_width( diff --git a/swig/Python3/snowboydecoder.py b/swig/Python3/snowboydecoder.py index 75607a60..34ee26fb 100644 --- a/swig/Python3/snowboydecoder.py +++ b/swig/Python3/snowboydecoder.py @@ -7,6 +7,8 @@ import wave import os import logging +from ctypes import * +from contextlib import contextmanager logging.basicConfig() logger = logging.getLogger("snowboy") @@ -17,6 +19,23 @@ DETECT_DING = os.path.join(TOP_DIR, "resources/ding.wav") DETECT_DONG = os.path.join(TOP_DIR, "resources/dong.wav") +def py_error_handler(filename, line, function, err, fmt): + pass + +ERROR_HANDLER_FUNC = CFUNCTYPE(None, c_char_p, c_int, c_char_p, c_int, c_char_p) + +c_error_handler = ERROR_HANDLER_FUNC(py_error_handler) + +@contextmanager +def no_alsa_error(): + try: + asound = cdll.LoadLibrary('libasound.so') + asound.snd_lib_error_set_handler(c_error_handler) + yield + asound.snd_lib_error_set_handler(None) + except: + yield + pass class RingBuffer(object): """Ring buffer to hold audio from PortAudio""" @@ -44,7 +63,8 @@ def play_audio_file(fname=DETECT_DING): """ ding_wav = wave.open(fname, 'rb') ding_data = ding_wav.readframes(ding_wav.getnframes()) - audio = pyaudio.PyAudio() + with no_alsa_error(): + audio = pyaudio.PyAudio() stream_out = audio.open( format=audio.get_format_from_width(ding_wav.getsampwidth()), channels=ding_wav.getnchannels(), @@ -69,12 +89,14 @@ class HotwordDetector(object): decoder. If an empty list is provided, then the default sensitivity in the model will be used. :param audio_gain: multiply input volume by this factor. + :param apply_frontend: applies the frontend processing algorithm if True. """ def __init__(self, decoder_model, resource=RESOURCE_FILE, sensitivity=[], - audio_gain=1): + audio_gain=1, + apply_frontend=False): tm = type(decoder_model) ts = type(sensitivity) @@ -87,6 +109,7 @@ def __init__(self, decoder_model, self.detector = snowboydetect.SnowboyDetect( resource_filename=resource.encode(), model_str=model_str.encode()) self.detector.SetAudioGain(audio_gain) + self.detector.ApplyFrontend(apply_frontend) self.num_hotwords = self.detector.NumHotwords() if len(decoder_model) > 1 and len(sensitivity) == 1: @@ -141,7 +164,8 @@ def audio_callback(in_data, frame_count, time_info, status): play_data = chr(0) * len(in_data) return play_data, pyaudio.paContinue - self.audio = pyaudio.PyAudio() + with no_alsa_error(): + self.audio = pyaudio.PyAudio() self.stream_in = self.audio.open( input=True, output=False, format=self.audio.get_format_from_width(