Merge remote-tracking branch 'upstream/master' into python3_fix

Kitt-AI · May 6, 2018 · a5a54ef · a5a54ef
2 parents e6e4de3 + 797a17e
commit a5a54ef
Show file tree

Hide file tree

Showing 21 changed files with 151 additions and 17 deletions.
diff --git a/binding.gyp b/binding.gyp
@@ -31,6 +31,16 @@
                         '<(module_root_dir)/lib/rpi/libsnowboy-detect.a',
                     ]
                 }
+            }],
+            ['OS=="linux" and target_arch=="arm64"', {
+                'link_settings': {
+                    'ldflags': [
+                        '-Wl,--no-as-needed',
+                    ],
+                    'libraries': [
+                        '<(module_root_dir)/lib/aarch64-ubuntu1604/libsnowboy-detect.a',
+                    ]
+                }
             }]
         ],
         'cflags': [

diff --git a/examples/Android/SnowboyAlexaDemo/src/ai/kitt/snowboy/SnowboyVad.java b/examples/Android/SnowboyAlexaDemo/src/ai/kitt/snowboy/SnowboyVad.java
@@ -0,0 +1 @@
+../../../../../../../swig/Android/java/ai/kitt/snowboy/SnowboyVad.java
diff --git a/examples/Android/SnowboyAlexaDemo/src/ai/kitt/snowboy/audio/AudioDataSaver.java b/examples/Android/SnowboyAlexaDemo/src/ai/kitt/snowboy/audio/AudioDataSaver.java
@@ -15,6 +15,19 @@ public class AudioDataSaver implements AudioDataReceivedListener {
 
     private static final String TAG = AudioDataSaver.class.getSimpleName();
 
+    // file size of when to delete and create a new recording file
+    private final float MAX_RECORDING_FILE_SIZE_IN_MB = 50f;
+
+    // initial file size of recording file
+    private final float INITIAL_FILE_SIZE_IN_MB = 1.3f;
+
+    // converted max file size
+    private final float MAX_RECORDING_FILE_SIZE_IN_BYTES
+            = (MAX_RECORDING_FILE_SIZE_IN_MB - INITIAL_FILE_SIZE_IN_MB) * 1024 * 1024;
+
+    // keeps track of recording file size
+    private int recordingFileSizeCounterInBytes = 0;
+
     private File saveFile = null;
     private DataOutputStream dataOutputStreamInstance = null;
 
@@ -25,7 +38,7 @@ public AudioDataSaver() {
 
     @Override
     public void start() {
-        if(null != saveFile) {
+        if (null != saveFile) {
             if (saveFile.exists()) {
                 saveFile.delete();
             }
@@ -36,7 +49,7 @@ public void start() {
             }
 
             try {
-                BufferedOutputStream bufferedStreamInstance  = new BufferedOutputStream(
+                BufferedOutputStream bufferedStreamInstance = new BufferedOutputStream(
                         new FileOutputStream(this.saveFile));
                 dataOutputStreamInstance = new DataOutputStream(bufferedStreamInstance);
             } catch (FileNotFoundException e) {
@@ -48,8 +61,14 @@ public void start() {
     @Override
     public void onAudioDataReceived(byte[] data, int length) {
         try {
-            if(null != dataOutputStreamInstance) {
+            if (null != dataOutputStreamInstance) {
+                if (recordingFileSizeCounterInBytes >= MAX_RECORDING_FILE_SIZE_IN_BYTES) {
+                    stop();
+                    start();
+                    recordingFileSizeCounterInBytes = 0;
+                }
                 dataOutputStreamInstance.write(data, 0, length);
+                recordingFileSizeCounterInBytes += length;
             }
         } catch (IOException e) {
             Log.e(TAG, "IO Exception on saving audio file " + saveFile.toString(), e);
@@ -58,7 +77,7 @@ public void onAudioDataReceived(byte[] data, int length) {
 
     @Override
     public void stop() {
-        if(null != dataOutputStreamInstance) {
+        if (null != dataOutputStreamInstance) {
             try {
                 dataOutputStreamInstance.close();
             } catch (IOException e) {

diff --git a/examples/Android/SnowboyAlexaDemo/src/ai/kitt/snowboy/audio/RecordingThread.java b/examples/Android/SnowboyAlexaDemo/src/ai/kitt/snowboy/audio/RecordingThread.java
@@ -41,7 +41,7 @@ public RecordingThread(Handler handler, AudioDataReceivedListener listener) {
         this.listener = listener;
 
         detector.SetSensitivity("0.6");
-        //-detector.SetAudioGain(1);
+        detector.SetAudioGain(1);
         detector.ApplyFrontend(true);
         try {
             player.setDataSource(strEnvWorkSpace+"ding.wav");

diff --git a/examples/C++/demo.cc b/examples/C++/demo.cc
@@ -207,11 +207,13 @@ int main(int argc, char* argv[]) {
   std::string model_filename = "resources/models/snowboy.umdl";
   std::string sensitivity_str = "0.5";
   float audio_gain = 1;
+  bool apply_frontend = false;
 
   // Initializes Snowboy detector.
   snowboy::SnowboyDetect detector(resource_filename, model_filename);
   detector.SetSensitivity(sensitivity_str);
   detector.SetAudioGain(audio_gain);
+  detector.ApplyFrontend(apply_frontend);
 
   // Initializes PortAudio. You may use other tools to capture the audio.
   PortAudioWrapper pa_wrapper(detector.SampleRate(),

diff --git a/examples/C++/demo2.cc b/examples/C++/demo2.cc
@@ -5,6 +5,8 @@
 #define resource_filename "resources/common.res"
 #define model_filename "resources/models/snowboy.umdl"
 #define sensitivity_str "0.5"
+#define audio_gain 1.0
+#define apply_frontend false
 
 struct wavHeader { //44 byte HEADER only
   char  RIFF[4];
@@ -136,6 +138,8 @@ int main(int argc, char * argv[]) {
   // Initializes Snowboy detector.
   snowboy::SnowboyDetect detector(resource_filename, model_filename);
   detector.SetSensitivity(sensitivity_str);
+  detector.SetAudioGain(audio_gain);
+  detector.ApplyFrontend(apply_frontend);
 
   int result = detector.RunDetection(&data_buffer[0], fsize/sizeof(short));
   std::cout << ">>>>> Result: " << result << " <<<<<" << std::endl;

diff --git a/examples/C/demo.c b/examples/C/demo.c
@@ -190,12 +190,14 @@ int main(int argc, char* argv[]) {
   const char model_filename[] = "resources/models/snowboy.umdl";
   const char sensitivity_str[] = "0.5";
   float audio_gain = 1;
+  bool apply_frontend = false;
 
   // Initializes Snowboy detector.
   SnowboyDetect* detector = SnowboyDetectConstructor(resource_filename,
                                                      model_filename);
   SnowboyDetectSetSensitivity(detector, sensitivity_str);
   SnowboyDetectSetAudioGain(detector, audio_gain);
+  SnowboyDetectApplyFrontend(detector, apply_frontend);
 
   // Initializes PortAudio. You may use other tools to capture the audio.
   StartAudioCapturing(SnowboyDetectSampleRate(detector),

diff --git a/examples/Go/detect/main.go b/examples/Go/detect/main.go
@@ -18,6 +18,7 @@ func main() {
 	detector := snowboydetect.NewSnowboyDetect("../../../resources/common.res", os.Args[1])
 	detector.SetSensitivity("0.5")
 	detector.SetAudioGain(1)
+	detector.ApplyFrontend(false)
 	defer snowboydetect.DeleteSnowboyDetect(detector)
 
 	dat, err := ioutil.ReadFile(os.Args[2])
@@ -36,4 +37,4 @@ func main() {
 	} else {
 		fmt.Println("Snowboy detected keyword ", res)
 	}
-}
+}
diff --git a/examples/Java/Demo.java b/examples/Java/Demo.java
@@ -24,6 +24,7 @@ public static void main(String[] args) {
                                                "resources/models/snowboy.umdl");
     detector.SetSensitivity("0.5");
     detector.SetAudioGain(1);
+    detector.ApplyFrontend(false);
 
     try {
       TargetDataLine targetLine =

diff --git a/examples/Node/file.js b/examples/Node/file.js
@@ -14,7 +14,8 @@ models.add({
 const detector = new Detector({
   resource: "resources/common.res",
   models: models,
-  audioGain: 1.0
+  audioGain: 1.0,
+  applyFrontend: false
 });
 
 detector.on('silence', function () {

diff --git a/examples/Node/microphone.js b/examples/Node/microphone.js
@@ -13,7 +13,8 @@ models.add({
 const detector = new Detector({
   resource: "resources/common.res",
   models: models,
-  audioGain: 2.0
+  audioGain: 2.0,
+  applyFrontend: true
 });
 
 detector.on('silence', function () {

diff --git a/examples/Perl/snowboy_googlevoice.pl b/examples/Perl/snowboy_googlevoice.pl
@@ -133,6 +133,7 @@
 $sb = new Snowboy::SnowboyDetect('resources/common.res', $model);
 $sb->SetSensitivity('0.5');
 $sb->SetAudioGain(1.0);
+$sb->ApplyFrontend(0);
 
 # Running the detection forever.
 print "\n";

diff --git a/examples/Perl/snowboy_unit_test.pl b/examples/Perl/snowboy_unit_test.pl
@@ -16,6 +16,7 @@
 
 $sb->SetSensitivity ("0.5");
 $sb->SetAudioGain (1);
+$sb->ApplyFrontend (0);
 
 print "==== SnowBoy object properties ====\n";
 print "Sample Rate         : ", $sb->SampleRate(), "\n";

diff --git a/examples/iOS/Obj-C/SnowboyTest/ViewController.mm b/examples/iOS/Obj-C/SnowboyTest/ViewController.mm
@@ -31,6 +31,7 @@ - (void)initSnowboy {
                                                 std::string([[[NSBundle mainBundle]pathForResource:@"alexa" ofType:@"umdl"] UTF8String]));
     _snowboyDetect->SetSensitivity("0.5");
     _snowboyDetect->SetAudioGain(1.0);
+    _snowboyDetect->ApplyFrotnend(false);
 }
 
 - (void) initMic {

diff --git a/include/snowboy-detect.h b/include/snowboy-detect.h
@@ -97,6 +97,12 @@ class SnowboyDetect {
   // Make sure you properly align the sensitivity value to the corresponding
   // hotword.
   void SetSensitivity(const std::string& sensitivity_str);
+
+  // Similar to the sensitivity setting above. When set higher than the above
+  // sensitivity, the algorithm automatically chooses between the normal
+  // sensitivity set above and the higher sensitivity set here, to maximize the
+  // performance. By default, it is not set, which means the algorithm will
+  // stick with the sensitivity set above.
   void SetHighSensitivity(const std::string& high_sensitivity_str);
 
   // Returns the sensitivity string for the current hotwords.
@@ -118,7 +124,13 @@ class SnowboyDetect {
   int NumHotwords() const;
 
   // If <apply_frontend> is true, then apply frontend audio processing;
-  // otherwise turns the audio processing off.
+  // otherwise turns the audio processing off. Frontend audio processing
+  // includes algorithms such as automatic gain control (AGC), noise suppression
+  // (NS) and so on. Generally adding frontend audio processing helps the
+  // performance, but if the model is not trained with frontend audio
+  // processing, it may decrease the performance. The general rule of thumb is:
+  //   1. For personal models, set it to false.
+  //   2. For universal models, follow the instruction of each published model
   void ApplyFrontend(const bool apply_frontend);
 
   // Returns the required sampling rate, number of channels and bits per sample

diff --git a/lib/aarch64-ubuntu1604/libsnowboy-detect.a b/lib/aarch64-ubuntu1604/libsnowboy-detect.a
diff --git a/package.json b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "snowboy",
-  "version": "1.3.0",
+  "version": "1.3.1",
   "description": "Snowboy is a customizable hotword detection engine",
   "main": "lib/node/index.js",
   "typings": "lib/node/index.d.ts",
@@ -12,7 +12,6 @@
     "host": "https://snowboy-release-node.s3-us-west-2.amazonaws.com"
   },
   "scripts": {
-    "preinstall": "npm install node-pre-gyp",
     "install": "node-pre-gyp install --fallback-to-build",
     "test": "node index.js",
     "prepublish": "tsc --listFiles"

diff --git a/resources/models/jarvis.umdl b/resources/models/jarvis.umdl
diff --git a/scripts/install_swig.sh b/scripts/install_swig.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+# SWIG is a tool to compile c++ code into Python.
+
+echo "Installing SWIG"
+
+if [ ! -e swig-3.0.10.tar.gz ]; then
+  cp exteral_tools/swig-3.0.10.tar.gz ./ || \
+  wget -T 10 -t 3 \
+    http://prdownloads.sourceforge.net/swig/swig-3.0.10.tar.gz || exit 1;
+fi
+
+tar -xovzf swig-3.0.10.tar.gz || exit 1
+ln -s swig-3.0.10 swig
+
+cd swig
+
+# We first have to install PCRE.
+if [ ! -e pcre-8.37.tar.gz ]; then
+  cp ../exteral_tools/pcre-8.37.tar.gz ./ || \
+  wget -T 10 -t 3 \
+    https://sourceforge.net/projects/pcre/files/pcre/8.37/pcre-8.37.tar.gz || exit 1;
+fi
+Tools/pcre-build.sh
+
+./configure --prefix=`pwd` --with-pic
+make
+make install
+
+cd ..
diff --git a/swig/Python/snowboydecoder.py b/swig/Python/snowboydecoder.py
@@ -7,6 +7,8 @@
 import wave
 import os
 import logging
+from ctypes import *
+from contextlib import contextmanager
 
 logging.basicConfig()
 logger = logging.getLogger("snowboy")
@@ -17,6 +19,23 @@
 DETECT_DING = os.path.join(TOP_DIR, "resources/ding.wav")
 DETECT_DONG = os.path.join(TOP_DIR, "resources/dong.wav")
 
+def py_error_handler(filename, line, function, err, fmt):
+    pass
+
+ERROR_HANDLER_FUNC = CFUNCTYPE(None, c_char_p, c_int, c_char_p, c_int, c_char_p)
+
+c_error_handler = ERROR_HANDLER_FUNC(py_error_handler)
+
+@contextmanager
+def no_alsa_error():
+    try:
+        asound = cdll.LoadLibrary('libasound.so')
+        asound.snd_lib_error_set_handler(c_error_handler)
+        yield
+        asound.snd_lib_error_set_handler(None)
+    except:
+        yield
+        pass
 
 class RingBuffer(object):
     """Ring buffer to hold audio from PortAudio"""
@@ -43,7 +62,8 @@ def play_audio_file(fname=DETECT_DING):
     """
     ding_wav = wave.open(fname, 'rb')
     ding_data = ding_wav.readframes(ding_wav.getnframes())
-    audio = pyaudio.PyAudio()
+    with no_alsa_error():
+        audio = pyaudio.PyAudio()
     stream_out = audio.open(
         format=audio.get_format_from_width(ding_wav.getsampwidth()),
         channels=ding_wav.getnchannels(),
@@ -68,11 +88,13 @@ class HotwordDetector(object):
                               decoder. If an empty list is provided, then the
                               default sensitivity in the model will be used.
     :param audio_gain: multiply input volume by this factor.
+    :param apply_frontend: applies the frontend processing algorithm if True.
     """
     def __init__(self, decoder_model,
                  resource=RESOURCE_FILE,
                  sensitivity=[],
-                 audio_gain=1):
+                 audio_gain=1,
+                 apply_frontend=False):
 
         def audio_callback(in_data, frame_count, time_info, status):
             self.ring_buffer.extend(in_data)
@@ -90,6 +112,7 @@ def audio_callback(in_data, frame_count, time_info, status):
         self.detector = snowboydetect.SnowboyDetect(
             resource_filename=resource.encode(), model_str=model_str.encode())
         self.detector.SetAudioGain(audio_gain)
+        self.detector.ApplyFrontend(apply_frontend)
         self.num_hotwords = self.detector.NumHotwords()
 
         if len(decoder_model) > 1 and len(sensitivity) == 1:
@@ -104,7 +127,8 @@ def audio_callback(in_data, frame_count, time_info, status):
 
         self.ring_buffer = RingBuffer(
             self.detector.NumChannels() * self.detector.SampleRate() * 5)
-        self.audio = pyaudio.PyAudio()
+        with no_alsa_error():
+            self.audio = pyaudio.PyAudio()
         self.stream_in = self.audio.open(
             input=True, output=False,
             format=self.audio.get_format_from_width(
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		../../../../../../../swig/Android/java/ai/kitt/snowboy/SnowboyVad.java