Merge pull request #11 from Kitt-AI/devel

Devel
Kitt-AI · May 24, 2016 · 5f0daeb · 5f0daeb
2 parents 9f529c9 + f761bb1
commit 5f0daeb
Show file tree

Hide file tree

Showing 22 changed files with 416 additions and 10 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,7 +1,12 @@
 /lib/libsnowboy-detect.a
 snowboy-detect-swig.cc
 snowboydetect.py
+.DS_Store
 
 *.pyc
 *.o
 *.so
+
+/examples/C++/pa_stable_v19_20140130.tgz
+/examples/C++/portaudio
+/examples/C++/demo
diff --git a/README.md b/README.md
@@ -7,7 +7,7 @@ by [KITT.AI](http://kitt.ai).
 [Full Documentation](https://snowboy.kitt.ai/docs)
 
 
-Version: 1.0.1 (5/16/2016)
+Version: 1.0.2 (5/24/2016)
 
 Snowboy is a customizable hotword detection engine for you to create your own
 hotword like "OK Google" or "Alexa". It is powered by deep neural networks and has the following properties:
@@ -32,11 +32,11 @@ It ships in the form of a **C library** with **Python** wrappers generated by SW
 If you want support on other hardware/OS, please send your request to [[email protected]](mailto:snowboy.kitt.ai)
 
 ## Precompiled Binaries
-* 64 bit Ubuntu [12.04](https://s3-us-west-2.amazonaws.com/snowboy/snowboy-releases/ubuntu1204-x86_64-1.0.1.tar.bz2)
-  / [14.04](https://s3-us-west-2.amazonaws.com/snowboy/snowboy-releases/ubuntu1404-x86_64-1.0.1.tar.bz2)
-* [MacOS X](https://s3-us-west-2.amazonaws.com/snowboy/snowboy-releases/osx-x86_64-1.0.1.tar.bz2)
+* 64 bit Ubuntu [12.04](https://s3-us-west-2.amazonaws.com/snowboy/snowboy-releases/ubuntu1204-x86_64-1.0.2.tar.bz2)
+  / [14.04](https://s3-us-west-2.amazonaws.com/snowboy/snowboy-releases/ubuntu1404-x86_64-1.0.2.tar.bz2)
+* [MacOS X](https://s3-us-west-2.amazonaws.com/snowboy/snowboy-releases/osx-x86_64-1.0.2.tar.bz2)
 * Raspberry Pi with Raspbian 8.0, all versions
-  ([1/2/3/Zero](https://s3-us-west-2.amazonaws.com/snowboy/snowboy-releases/rpi-arm-raspbian-8.0-1.0.1.tar.bz2))
+  ([1/2/3/Zero](https://s3-us-west-2.amazonaws.com/snowboy/snowboy-releases/rpi-arm-raspbian-8.0-1.0.2.tar.bz2))
 
 If you want to compile a version against your own environment/language, read on.
 
@@ -76,17 +76,17 @@ If you need extra setup on your audio (especially on a Raspberry Pi), please see
 
 ## Compile a Python Wrapper
 
-    cd swig/python
+    cd swig/Python
     make
 
 SWIG will generate a `_snowboydetect.so` file and a simple (but hard-to-read) python wrapper `snowboydetect.py`. We have provided a higher level python wrapper `snowboydecoder.py` on top of that.
 
-Feel free to adapt the `Makefile` in `swig/python` to your own system's setting if you cannot `make` it.
+Feel free to adapt the `Makefile` in `swig/Python` to your own system's setting if you cannot `make` it.
 
 
 ## Quick Start
 
-Go to the `swig/python` folder and open your python console:
+Go to the `examples/Python` folder and open your python console:
 
     In [1]: import snowboydecoder
 
@@ -120,11 +120,16 @@ See [Full Documentation](https://snowboy.kitt.ai/docs).
 
 ## Change Log
 
+**v1.0.2, 5/24/2016**
+
+* Updated universal `snowboy.umdl` model
+* added C++ examples, docs will come in next release.
+
 **v1.0.1, 5/16/2016**
 
 * VAD now returns -2 on silence, -1 on error, 0 on voice and >0 on triggered models
 * added static library for Raspberry Pi in case people want to compile themselves instead of using the binary version
 
 **v1.0.0, 5/10/2016**
 
-* initial release
+* initial release
diff --git a/examples/C++/Makefile b/examples/C++/Makefile
@@ -0,0 +1,22 @@
+include demo.mk
+
+BINFILES = demo
+
+all: $(BINFILES)
+
+%.a:
+	$(MAKE) -C ${@D} ${@F}
+
+$(BINFILES): $(PORTAUDIOLIBS) $(SNOWBOYDETECTLIBFILE)
+
+$(PORTAUDIOLIBS):
+	@-./install_portaudio.sh
+
+clean:
+	-rm -f *.o *.a $(BINFILES)
+
+depend:
+	-$(CXX) -M $(CXXFLAGS) *.cc > .depend.mk
+
+# Putting "-" so no error messages.
+-include .depend.mk
diff --git a/examples/C++/demo.cc b/examples/C++/demo.cc
@@ -0,0 +1,235 @@
+// example/C++/demo.cc
+
+// Copyright 2016  KITT.AI (author: Guoguo Chen)
+
+#include <cassert>
+#include <csignal>
+#include <iostream>
+#include <pa_ringbuffer.h>
+#include <pa_util.h>
+#include <portaudio.h>
+#include <string>
+#include <vector>
+
+#include "include/snowboy-detect.h"
+
+int PortAudioCallback(const void* input,
+                      void* output,
+                      unsigned long frame_count,
+                      const PaStreamCallbackTimeInfo* time_info,
+                      PaStreamCallbackFlags status_flags,
+                      void* user_data);
+
+class PortAudioWrapper {
+ public:
+  // Constructor.
+  PortAudioWrapper(int sample_rate, int num_channels, int bits_per_sample) {
+    num_lost_samples_ = 0;
+    min_read_samples_ = sample_rate * 0.1;
+    Init(sample_rate, num_channels, bits_per_sample);
+  }
+
+  // Reads data from ring buffer.
+  template<typename T>
+  void Read(std::vector<T>* data) {
+    assert(data != NULL);
+
+    // Checks ring buffer overflow.
+    if (num_lost_samples_ > 0) {
+      std::cerr << "Lost " << num_lost_samples_ << " samples due to ring"
+          << " buffer overflow." << std::endl;
+      num_lost_samples_ = 0;
+    }
+
+    ring_buffer_size_t num_available_samples = 0;
+    while (true) {
+      num_available_samples =
+          PaUtil_GetRingBufferReadAvailable(&pa_ringbuffer_);
+      if (num_available_samples >= min_read_samples_) {
+        break;
+      }
+      Pa_Sleep(5);
+    }
+
+    // Reads data.
+    num_available_samples = PaUtil_GetRingBufferReadAvailable(&pa_ringbuffer_);
+    data->resize(num_available_samples);
+    ring_buffer_size_t num_read_samples = PaUtil_ReadRingBuffer(
+        &pa_ringbuffer_, data->data(), num_available_samples);
+    if (num_read_samples != num_available_samples) {
+      std::cerr << num_available_samples << " samples were available,  but "
+          << "only " << num_read_samples << " samples were read." << std::endl;
+    }
+  }
+
+  int Callback(const void* input, void* output,
+               unsigned long frame_count,
+               const PaStreamCallbackTimeInfo* time_info,
+               PaStreamCallbackFlags status_flags) {
+    // Input audio.
+    ring_buffer_size_t num_written_samples =
+        PaUtil_WriteRingBuffer(&pa_ringbuffer_, input, frame_count);
+    num_lost_samples_ += frame_count - num_written_samples;
+    return paContinue;
+  }
+
+  ~PortAudioWrapper() {
+    Pa_StopStream(pa_stream_);
+    Pa_CloseStream(pa_stream_);
+    Pa_Terminate();
+    PaUtil_FreeMemory(ringbuffer_);
+  }
+
+ private:
+  // Initialization.
+  bool Init(int sample_rate, int num_channels, int bits_per_sample) {
+    // Allocates ring buffer memory.
+    int ringbuffer_size = 16384;
+    ringbuffer_ = static_cast<char*>(
+        PaUtil_AllocateMemory(bits_per_sample / 8 * ringbuffer_size));
+    if (ringbuffer_ == NULL) {
+      std::cerr << "Fail to allocate memory for ring buffer." << std::endl;
+      return false;
+    }
+
+    // Initializes PortAudio ring buffer.
+    ring_buffer_size_t rb_init_ans =
+        PaUtil_InitializeRingBuffer(&pa_ringbuffer_, bits_per_sample / 8,
+                                    ringbuffer_size, ringbuffer_);
+    if (rb_init_ans == -1) {
+      std::cerr << "Ring buffer size is not power of 2." << std::endl;
+      return false;
+    }
+
+    // Initializes PortAudio.
+    PaError pa_init_ans = Pa_Initialize();
+    if (pa_init_ans != paNoError) {
+      std::cerr << "Fail to initialize PortAudio, error message is \""
+          << Pa_GetErrorText(pa_init_ans) << "\"" << std::endl;
+      return false;
+    }
+
+    PaError pa_open_ans;
+    if (bits_per_sample == 8) {
+      pa_open_ans = Pa_OpenDefaultStream(
+          &pa_stream_, num_channels, 0, paUInt8, sample_rate,
+          paFramesPerBufferUnspecified, PortAudioCallback, this);
+    } else if (bits_per_sample == 16) {
+      pa_open_ans = Pa_OpenDefaultStream(
+          &pa_stream_, num_channels, 0, paInt16, sample_rate,
+          paFramesPerBufferUnspecified, PortAudioCallback, this);
+    } else if (bits_per_sample == 32) {
+      pa_open_ans = Pa_OpenDefaultStream(
+          &pa_stream_, num_channels, 0, paInt32, sample_rate,
+          paFramesPerBufferUnspecified, PortAudioCallback, this);
+    } else {
+      std::cerr << "Unsupported BitsPerSample: " << bits_per_sample
+          << std::endl;
+      return false;
+    }
+    if (pa_open_ans != paNoError) {
+      std::cerr << "Fail to open PortAudio stream, error message is \""
+          << Pa_GetErrorText(pa_open_ans) << "\"" << std::endl;
+      return false;
+    }
+
+    PaError pa_stream_start_ans = Pa_StartStream(pa_stream_);
+    if (pa_stream_start_ans != paNoError) {
+      std::cerr << "Fail to start PortAudio stream, error message is \""
+          << Pa_GetErrorText(pa_stream_start_ans) << "\"" << std::endl;
+      return false;
+    }
+    return true;
+  }
+
+ private:
+  // Pointer to the ring buffer memory.
+  char* ringbuffer_;
+
+  // Ring buffer wrapper used in PortAudio.
+  PaUtilRingBuffer pa_ringbuffer_;
+
+  // Pointer to PortAudio stream.
+  PaStream* pa_stream_;
+
+  // Number of lost samples at each Read() due to ring buffer overflow.
+  int num_lost_samples_;
+
+  // Wait for this number of samples in each Read() call.
+  int min_read_samples_;
+};
+
+int PortAudioCallback(const void* input,
+                      void* output,
+                      unsigned long frame_count,
+                      const PaStreamCallbackTimeInfo* time_info,
+                      PaStreamCallbackFlags status_flags,
+                      void* user_data) {
+  PortAudioWrapper* pa_wrapper = reinterpret_cast<PortAudioWrapper*>(user_data);
+  pa_wrapper->Callback(input, output, frame_count, time_info, status_flags);
+  return paContinue;
+}
+
+void SignalHandler(int signal){
+  std::cerr << "Caught signal " << signal << ", terminating..." << std::endl;
+  exit(0);
+}
+
+int main(int argc, char* argv[]) {
+  std::string usage =
+      "Example that shows how to use Snowboy in C++. Parameters are\n"
+      "hard-coded in the parameter section. Please check the source code for\n"
+      "more details. Audio is captured by PortAudio.\n"
+      "\n"
+      "To run the example:\n"
+      "  ./demo\n";
+
+  // Checks the command.
+  if (argc > 1) {
+    std::cerr << usage;
+    exit(1);
+  }
+
+  // Configures signal handling.
+   struct sigaction sig_int_handler;
+   sig_int_handler.sa_handler = SignalHandler;
+   sigemptyset(&sig_int_handler.sa_mask);
+   sig_int_handler.sa_flags = 0;
+   sigaction(SIGINT, &sig_int_handler, NULL);
+
+  // Parameter section.
+  // If you have multiple hotword models (e.g., 2), you should set
+  // <model_filename> and <sensitivity_str> as follows:
+  //   model_filename = "resources/snowboy.umdl,resources/alexa.pmdl";
+  //   sensitivity_str = "0.4,0.4";
+  std::string resource_filename = "resources/common.res";
+  std::string model_filename = "resources/snowboy.umdl";
+  std::string sensitivity_str = "0.4";
+  float audio_gain = 1;
+
+  // Initializes Snowboy detector.
+  snowboy::SnowboyDetect detector(resource_filename, model_filename);
+  detector.SetSensitivity(sensitivity_str);
+  detector.SetAudioGain(audio_gain);
+
+  // Initializes PortAudio. You may use other tools to capture the audio.
+  PortAudioWrapper pa_wrapper(detector.SampleRate(),
+                              detector.NumChannels(), detector.BitsPerSample());
+
+  // Runs the detection.
+  // Note: I hard-coded <int16_t> as data type because detector.BitsPerSample()
+  //       returns 16.
+  std::cout << "Listening... Press Ctrl+C to exit" << std::endl;
+  std::vector<int16_t> data;
+  while (true) {
+    pa_wrapper.Read(&data);
+    if (data.size() != 0) {
+      int result = detector.RunDetection(data.data(), data.size());
+      if (result > 0) {
+        std::cout << "Hotword " << result << " detected!" << std::endl;
+      }
+    }
+  }
+
+  return 0;
+}
diff --git a/examples/C++/demo.mk b/examples/C++/demo.mk
@@ -0,0 +1,53 @@
+TOPDIR := ../../
+DYNAMIC := True
+CC = $(CXX)
+CXX :=
+LDFLAGS :=
+LDLIBS :=
+PORTAUDIOINC := portaudio/install/include
+PORTAUDIOLIBS := portaudio/install/lib/libportaudio.a
+
+ifeq ($(DYNAMIC), True)
+  CXXFLAGS += -fPIC
+endif
+
+ifeq ($(shell uname -m | cut -c 1-3), x86)
+  CXXFLAGS += -msse  -msse2
+endif
+
+ifeq ($(shell uname), Darwin)
+  # By default Mac uses clang++ as g++, but people may have changed their
+  # default configuration.
+  CXX := clang++
+  CXXFLAGS += -I$(TOPDIR) -Wall -Wno-sign-compare -Winit-self \
+      -DHAVE_POSIX_MEMALIGN -DHAVE_CLAPACK -I$(PORTAUDIOINC)
+  LDLIBS += -ldl -lm -framework Accelerate -framework CoreAudio \
+      -framework AudioToolbox -framework AudioUnit -framework CoreServices \
+      $(PORTAUDIOLIBS)
+  SNOWBOYDETECTLIBFILE := $(TOPDIR)/lib/osx/libsnowboy-detect.a
+else ifeq ($(shell uname), Linux)
+  CXX := g++
+  CXXFLAGS += -I$(TOPDIR) -std=c++0x -Wall -Wno-sign-compare \
+      -Wno-unused-local-typedefs -Winit-self -rdynamic \
+      -DHAVE_POSIX_MEMALIGN -I$(PORTAUDIOINC)
+  LDLIBS += -ldl -lm -Wl,-Bstatic -Wl,-Bdynamic -lrt -lpthread $(PORTAUDIOLIBS)
+  ifneq ($(wildcard $(PORTAUDIOINC)/pa_linux_alsa.h),)
+    LDLIBS += -lasound
+  endif
+  ifneq ($(wildcard $(PORTAUDIOINC)/pa_jack.h),)
+    LDLIBS += -ljack
+  endif
+  SNOWBOYDETECTLIBFILE := $(TOPDIR)/lib/ubuntu64/libsnowboy-detect.a
+  ifneq (,$(findstring arm,$(shell uname -m)))
+    SNOWBOYDETECTLIBFILE := $(TOPDIR)/lib/rpi/libsnowboy-detect.a
+  endif
+endif
+
+# Suppress clang warnings...
+COMPILER = $(shell $(CXX) -v 2>&1 )
+ifeq ($(findstring clang,$(COMPILER)), clang)
+  CXXFLAGS += -Wno-mismatched-tags -Wno-c++11-extensions
+endif
+
+# Set optimization level.
+CXXFLAGS += -O3