diff --git a/.gitignore b/.gitignore index cc6ea75c..99f872b9 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,12 @@ /lib/libsnowboy-detect.a snowboy-detect-swig.cc snowboydetect.py +.DS_Store *.pyc *.o *.so + +/examples/C++/pa_stable_v19_20140130.tgz +/examples/C++/portaudio +/examples/C++/demo diff --git a/README.md b/README.md index d2eb31d3..d278c136 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ by [KITT.AI](http://kitt.ai). [Full Documentation](https://snowboy.kitt.ai/docs) -Version: 1.0.1 (5/16/2016) +Version: 1.0.2 (5/24/2016) Snowboy is a customizable hotword detection engine for you to create your own hotword like "OK Google" or "Alexa". It is powered by deep neural networks and has the following properties: @@ -32,11 +32,11 @@ It ships in the form of a **C library** with **Python** wrappers generated by SW If you want support on other hardware/OS, please send your request to [snowboy@kitt.ai](mailto:snowboy.kitt.ai) ## Precompiled Binaries -* 64 bit Ubuntu [12.04](https://s3-us-west-2.amazonaws.com/snowboy/snowboy-releases/ubuntu1204-x86_64-1.0.1.tar.bz2) - / [14.04](https://s3-us-west-2.amazonaws.com/snowboy/snowboy-releases/ubuntu1404-x86_64-1.0.1.tar.bz2) -* [MacOS X](https://s3-us-west-2.amazonaws.com/snowboy/snowboy-releases/osx-x86_64-1.0.1.tar.bz2) +* 64 bit Ubuntu [12.04](https://s3-us-west-2.amazonaws.com/snowboy/snowboy-releases/ubuntu1204-x86_64-1.0.2.tar.bz2) + / [14.04](https://s3-us-west-2.amazonaws.com/snowboy/snowboy-releases/ubuntu1404-x86_64-1.0.2.tar.bz2) +* [MacOS X](https://s3-us-west-2.amazonaws.com/snowboy/snowboy-releases/osx-x86_64-1.0.2.tar.bz2) * Raspberry Pi with Raspbian 8.0, all versions - ([1/2/3/Zero](https://s3-us-west-2.amazonaws.com/snowboy/snowboy-releases/rpi-arm-raspbian-8.0-1.0.1.tar.bz2)) + ([1/2/3/Zero](https://s3-us-west-2.amazonaws.com/snowboy/snowboy-releases/rpi-arm-raspbian-8.0-1.0.2.tar.bz2)) If you want to compile a version against your own environment/language, read on. @@ -76,17 +76,17 @@ If you need extra setup on your audio (especially on a Raspberry Pi), please see ## Compile a Python Wrapper - cd swig/python + cd swig/Python make SWIG will generate a `_snowboydetect.so` file and a simple (but hard-to-read) python wrapper `snowboydetect.py`. We have provided a higher level python wrapper `snowboydecoder.py` on top of that. -Feel free to adapt the `Makefile` in `swig/python` to your own system's setting if you cannot `make` it. +Feel free to adapt the `Makefile` in `swig/Python` to your own system's setting if you cannot `make` it. ## Quick Start -Go to the `swig/python` folder and open your python console: +Go to the `examples/Python` folder and open your python console: In [1]: import snowboydecoder @@ -120,6 +120,11 @@ See [Full Documentation](https://snowboy.kitt.ai/docs). ## Change Log +**v1.0.2, 5/24/2016** + +* Updated universal `snowboy.umdl` model +* added C++ examples, docs will come in next release. + **v1.0.1, 5/16/2016** * VAD now returns -2 on silence, -1 on error, 0 on voice and >0 on triggered models @@ -127,4 +132,4 @@ See [Full Documentation](https://snowboy.kitt.ai/docs). **v1.0.0, 5/10/2016** -* initial release \ No newline at end of file +* initial release diff --git a/examples/C++/Makefile b/examples/C++/Makefile new file mode 100644 index 00000000..1026a878 --- /dev/null +++ b/examples/C++/Makefile @@ -0,0 +1,22 @@ +include demo.mk + +BINFILES = demo + +all: $(BINFILES) + +%.a: + $(MAKE) -C ${@D} ${@F} + +$(BINFILES): $(PORTAUDIOLIBS) $(SNOWBOYDETECTLIBFILE) + +$(PORTAUDIOLIBS): + @-./install_portaudio.sh + +clean: + -rm -f *.o *.a $(BINFILES) + +depend: + -$(CXX) -M $(CXXFLAGS) *.cc > .depend.mk + +# Putting "-" so no error messages. +-include .depend.mk diff --git a/examples/C++/demo.cc b/examples/C++/demo.cc new file mode 100644 index 00000000..9d7df710 --- /dev/null +++ b/examples/C++/demo.cc @@ -0,0 +1,235 @@ +// example/C++/demo.cc + +// Copyright 2016 KITT.AI (author: Guoguo Chen) + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "include/snowboy-detect.h" + +int PortAudioCallback(const void* input, + void* output, + unsigned long frame_count, + const PaStreamCallbackTimeInfo* time_info, + PaStreamCallbackFlags status_flags, + void* user_data); + +class PortAudioWrapper { + public: + // Constructor. + PortAudioWrapper(int sample_rate, int num_channels, int bits_per_sample) { + num_lost_samples_ = 0; + min_read_samples_ = sample_rate * 0.1; + Init(sample_rate, num_channels, bits_per_sample); + } + + // Reads data from ring buffer. + template + void Read(std::vector* data) { + assert(data != NULL); + + // Checks ring buffer overflow. + if (num_lost_samples_ > 0) { + std::cerr << "Lost " << num_lost_samples_ << " samples due to ring" + << " buffer overflow." << std::endl; + num_lost_samples_ = 0; + } + + ring_buffer_size_t num_available_samples = 0; + while (true) { + num_available_samples = + PaUtil_GetRingBufferReadAvailable(&pa_ringbuffer_); + if (num_available_samples >= min_read_samples_) { + break; + } + Pa_Sleep(5); + } + + // Reads data. + num_available_samples = PaUtil_GetRingBufferReadAvailable(&pa_ringbuffer_); + data->resize(num_available_samples); + ring_buffer_size_t num_read_samples = PaUtil_ReadRingBuffer( + &pa_ringbuffer_, data->data(), num_available_samples); + if (num_read_samples != num_available_samples) { + std::cerr << num_available_samples << " samples were available, but " + << "only " << num_read_samples << " samples were read." << std::endl; + } + } + + int Callback(const void* input, void* output, + unsigned long frame_count, + const PaStreamCallbackTimeInfo* time_info, + PaStreamCallbackFlags status_flags) { + // Input audio. + ring_buffer_size_t num_written_samples = + PaUtil_WriteRingBuffer(&pa_ringbuffer_, input, frame_count); + num_lost_samples_ += frame_count - num_written_samples; + return paContinue; + } + + ~PortAudioWrapper() { + Pa_StopStream(pa_stream_); + Pa_CloseStream(pa_stream_); + Pa_Terminate(); + PaUtil_FreeMemory(ringbuffer_); + } + + private: + // Initialization. + bool Init(int sample_rate, int num_channels, int bits_per_sample) { + // Allocates ring buffer memory. + int ringbuffer_size = 16384; + ringbuffer_ = static_cast( + PaUtil_AllocateMemory(bits_per_sample / 8 * ringbuffer_size)); + if (ringbuffer_ == NULL) { + std::cerr << "Fail to allocate memory for ring buffer." << std::endl; + return false; + } + + // Initializes PortAudio ring buffer. + ring_buffer_size_t rb_init_ans = + PaUtil_InitializeRingBuffer(&pa_ringbuffer_, bits_per_sample / 8, + ringbuffer_size, ringbuffer_); + if (rb_init_ans == -1) { + std::cerr << "Ring buffer size is not power of 2." << std::endl; + return false; + } + + // Initializes PortAudio. + PaError pa_init_ans = Pa_Initialize(); + if (pa_init_ans != paNoError) { + std::cerr << "Fail to initialize PortAudio, error message is \"" + << Pa_GetErrorText(pa_init_ans) << "\"" << std::endl; + return false; + } + + PaError pa_open_ans; + if (bits_per_sample == 8) { + pa_open_ans = Pa_OpenDefaultStream( + &pa_stream_, num_channels, 0, paUInt8, sample_rate, + paFramesPerBufferUnspecified, PortAudioCallback, this); + } else if (bits_per_sample == 16) { + pa_open_ans = Pa_OpenDefaultStream( + &pa_stream_, num_channels, 0, paInt16, sample_rate, + paFramesPerBufferUnspecified, PortAudioCallback, this); + } else if (bits_per_sample == 32) { + pa_open_ans = Pa_OpenDefaultStream( + &pa_stream_, num_channels, 0, paInt32, sample_rate, + paFramesPerBufferUnspecified, PortAudioCallback, this); + } else { + std::cerr << "Unsupported BitsPerSample: " << bits_per_sample + << std::endl; + return false; + } + if (pa_open_ans != paNoError) { + std::cerr << "Fail to open PortAudio stream, error message is \"" + << Pa_GetErrorText(pa_open_ans) << "\"" << std::endl; + return false; + } + + PaError pa_stream_start_ans = Pa_StartStream(pa_stream_); + if (pa_stream_start_ans != paNoError) { + std::cerr << "Fail to start PortAudio stream, error message is \"" + << Pa_GetErrorText(pa_stream_start_ans) << "\"" << std::endl; + return false; + } + return true; + } + + private: + // Pointer to the ring buffer memory. + char* ringbuffer_; + + // Ring buffer wrapper used in PortAudio. + PaUtilRingBuffer pa_ringbuffer_; + + // Pointer to PortAudio stream. + PaStream* pa_stream_; + + // Number of lost samples at each Read() due to ring buffer overflow. + int num_lost_samples_; + + // Wait for this number of samples in each Read() call. + int min_read_samples_; +}; + +int PortAudioCallback(const void* input, + void* output, + unsigned long frame_count, + const PaStreamCallbackTimeInfo* time_info, + PaStreamCallbackFlags status_flags, + void* user_data) { + PortAudioWrapper* pa_wrapper = reinterpret_cast(user_data); + pa_wrapper->Callback(input, output, frame_count, time_info, status_flags); + return paContinue; +} + +void SignalHandler(int signal){ + std::cerr << "Caught signal " << signal << ", terminating..." << std::endl; + exit(0); +} + +int main(int argc, char* argv[]) { + std::string usage = + "Example that shows how to use Snowboy in C++. Parameters are\n" + "hard-coded in the parameter section. Please check the source code for\n" + "more details. Audio is captured by PortAudio.\n" + "\n" + "To run the example:\n" + " ./demo\n"; + + // Checks the command. + if (argc > 1) { + std::cerr << usage; + exit(1); + } + + // Configures signal handling. + struct sigaction sig_int_handler; + sig_int_handler.sa_handler = SignalHandler; + sigemptyset(&sig_int_handler.sa_mask); + sig_int_handler.sa_flags = 0; + sigaction(SIGINT, &sig_int_handler, NULL); + + // Parameter section. + // If you have multiple hotword models (e.g., 2), you should set + // and as follows: + // model_filename = "resources/snowboy.umdl,resources/alexa.pmdl"; + // sensitivity_str = "0.4,0.4"; + std::string resource_filename = "resources/common.res"; + std::string model_filename = "resources/snowboy.umdl"; + std::string sensitivity_str = "0.4"; + float audio_gain = 1; + + // Initializes Snowboy detector. + snowboy::SnowboyDetect detector(resource_filename, model_filename); + detector.SetSensitivity(sensitivity_str); + detector.SetAudioGain(audio_gain); + + // Initializes PortAudio. You may use other tools to capture the audio. + PortAudioWrapper pa_wrapper(detector.SampleRate(), + detector.NumChannels(), detector.BitsPerSample()); + + // Runs the detection. + // Note: I hard-coded as data type because detector.BitsPerSample() + // returns 16. + std::cout << "Listening... Press Ctrl+C to exit" << std::endl; + std::vector data; + while (true) { + pa_wrapper.Read(&data); + if (data.size() != 0) { + int result = detector.RunDetection(data.data(), data.size()); + if (result > 0) { + std::cout << "Hotword " << result << " detected!" << std::endl; + } + } + } + + return 0; +} diff --git a/examples/C++/demo.mk b/examples/C++/demo.mk new file mode 100644 index 00000000..22b83700 --- /dev/null +++ b/examples/C++/demo.mk @@ -0,0 +1,53 @@ +TOPDIR := ../../ +DYNAMIC := True +CC = $(CXX) +CXX := +LDFLAGS := +LDLIBS := +PORTAUDIOINC := portaudio/install/include +PORTAUDIOLIBS := portaudio/install/lib/libportaudio.a + +ifeq ($(DYNAMIC), True) + CXXFLAGS += -fPIC +endif + +ifeq ($(shell uname -m | cut -c 1-3), x86) + CXXFLAGS += -msse -msse2 +endif + +ifeq ($(shell uname), Darwin) + # By default Mac uses clang++ as g++, but people may have changed their + # default configuration. + CXX := clang++ + CXXFLAGS += -I$(TOPDIR) -Wall -Wno-sign-compare -Winit-self \ + -DHAVE_POSIX_MEMALIGN -DHAVE_CLAPACK -I$(PORTAUDIOINC) + LDLIBS += -ldl -lm -framework Accelerate -framework CoreAudio \ + -framework AudioToolbox -framework AudioUnit -framework CoreServices \ + $(PORTAUDIOLIBS) + SNOWBOYDETECTLIBFILE := $(TOPDIR)/lib/osx/libsnowboy-detect.a +else ifeq ($(shell uname), Linux) + CXX := g++ + CXXFLAGS += -I$(TOPDIR) -std=c++0x -Wall -Wno-sign-compare \ + -Wno-unused-local-typedefs -Winit-self -rdynamic \ + -DHAVE_POSIX_MEMALIGN -I$(PORTAUDIOINC) + LDLIBS += -ldl -lm -Wl,-Bstatic -Wl,-Bdynamic -lrt -lpthread $(PORTAUDIOLIBS) + ifneq ($(wildcard $(PORTAUDIOINC)/pa_linux_alsa.h),) + LDLIBS += -lasound + endif + ifneq ($(wildcard $(PORTAUDIOINC)/pa_jack.h),) + LDLIBS += -ljack + endif + SNOWBOYDETECTLIBFILE := $(TOPDIR)/lib/ubuntu64/libsnowboy-detect.a + ifneq (,$(findstring arm,$(shell uname -m))) + SNOWBOYDETECTLIBFILE := $(TOPDIR)/lib/rpi/libsnowboy-detect.a + endif +endif + +# Suppress clang warnings... +COMPILER = $(shell $(CXX) -v 2>&1 ) +ifeq ($(findstring clang,$(COMPILER)), clang) + CXXFLAGS += -Wno-mismatched-tags -Wno-c++11-extensions +endif + +# Set optimization level. +CXXFLAGS += -O3 diff --git a/examples/C++/install_portaudio.sh b/examples/C++/install_portaudio.sh new file mode 100755 index 00000000..beaccd4f --- /dev/null +++ b/examples/C++/install_portaudio.sh @@ -0,0 +1,31 @@ +#!/bin/bash + +# This script attempts to install PortAudio, which can grap a live audio stream +# from the soundcard. + +echo "Installing portaudio" + +if [ ! -e pa_stable_v19_20140130.tgz ]; then + wget -T 10 -t 3 \ + http://www.portaudio.com/archives/pa_stable_v19_20140130.tgz || exit 1; +fi + +tar -xovzf pa_stable_v19_20140130.tgz || exit 1 + +cd portaudio +patch < ../patches/portaudio.patch + +MACOS=`uname 2>/dev/null | grep Darwin` +if [ -z "$MACOS" ]; then + ./configure --prefix=`pwd`/install --with-pic + sed -i '40s:src/common/pa_ringbuffer.o::g' Makefile + sed -i '40s:$: src/common/pa_ringbuffer.o:' Makefile +else + # People may have changed OSX's default configuration -- we use clang++. + CC=clang CXX=clang++ ./configure --prefix=`pwd`/install --with-pic +fi + +make +make install + +cd .. diff --git a/examples/C++/patches/portaudio.patch b/examples/C++/patches/portaudio.patch new file mode 100644 index 00000000..3def7c67 --- /dev/null +++ b/examples/C++/patches/portaudio.patch @@ -0,0 +1,38 @@ +--- Makefile.in 2016-01-09 14:05:04.096356637 -0500 ++++ Makefile_new.in 2016-01-09 14:04:56.667925681 -0500 +@@ -193,6 +193,8 @@ + for include in $(INCLUDES); do \ + $(INSTALL_DATA) -m 644 $(top_srcdir)/include/$$include $(DESTDIR)$(includedir)/$$include; \ + done ++ $(INSTALL_DATA) -m 644 $(top_srcdir)/src/common/pa_ringbuffer.h $(DESTDIR)$(includedir)/$$include ++ $(INSTALL_DATA) -m 644 $(top_srcdir)/src/common/pa_util.h $(DESTDIR)$(includedir)/$$include + $(INSTALL) -d $(DESTDIR)$(libdir)/pkgconfig + $(INSTALL) -m 644 portaudio-2.0.pc $(DESTDIR)$(libdir)/pkgconfig/portaudio-2.0.pc + @echo "" +--- configure 2016-03-08 18:00:08.000000000 -0800 ++++ configure_new 2016-03-08 17:59:21.000000000 -0800 +@@ -15787,7 +15787,7 @@ + $as_echo "#define PA_USE_COREAUDIO 1" >>confdefs.h + + +- CFLAGS="$CFLAGS -I\$(top_srcdir)/src/os/unix -Werror" ++ CFLAGS="$CFLAGS -I\$(top_srcdir)/src/os/unix -Wall" + LIBS="-framework CoreAudio -framework AudioToolbox -framework AudioUnit -framework Carbon" + + if test "x$enable_mac_universal" = "xyes" ; then +@@ -15819,8 +15819,14 @@ + elif xcodebuild -version -sdk macosx10.9 Path >/dev/null 2>&1 ; then + mac_version_min="-mmacosx-version-min=10.4" + mac_sysroot="-isysroot `xcodebuild -version -sdk macosx10.9 Path`" ++ elif xcodebuild -version -sdk macosx10.10 Path >/dev/null 2>&1 ; then ++ mac_version_min="-mmacosx-version-min=10.4" ++ mac_sysroot="-isysroot `xcodebuild -version -sdk macosx10.10 Path`" ++ elif xcodebuild -version -sdk macosx10.11 Path >/dev/null 2>&1 ; then ++ mac_version_min="-mmacosx-version-min=10.4" ++ mac_sysroot="-isysroot `xcodebuild -version -sdk macosx10.11 Path`" + else +- as_fn_error $? "Couldn't find 10.5, 10.6, 10.7, 10.8 or 10.9 SDK" "$LINENO" 5 ++ as_fn_error $? "Couldn't find 10.5, 10.6, 10.7, 10.8, 10.9, 10.10 or 10.11 SDK" "$LINENO" 5 + fi + esac + diff --git a/swig/python/resources b/examples/C++/resources similarity index 100% rename from swig/python/resources rename to examples/C++/resources diff --git a/examples/Python/_snowboydetect.so b/examples/Python/_snowboydetect.so new file mode 120000 index 00000000..3f67a404 --- /dev/null +++ b/examples/Python/_snowboydetect.so @@ -0,0 +1 @@ +../../swig/Python/_snowboydetect.so \ No newline at end of file diff --git a/swig/python/demo.py b/examples/Python/demo.py similarity index 100% rename from swig/python/demo.py rename to examples/Python/demo.py diff --git a/swig/python/demo2.py b/examples/Python/demo2.py similarity index 100% rename from swig/python/demo2.py rename to examples/Python/demo2.py diff --git a/swig/python/requirements.txt b/examples/Python/requirements.txt similarity index 100% rename from swig/python/requirements.txt rename to examples/Python/requirements.txt diff --git a/examples/Python/resources b/examples/Python/resources new file mode 120000 index 00000000..81bd1c59 --- /dev/null +++ b/examples/Python/resources @@ -0,0 +1 @@ +../../resources/ \ No newline at end of file diff --git a/swig/python/snowboydecoder.py b/examples/Python/snowboydecoder.py similarity index 100% rename from swig/python/snowboydecoder.py rename to examples/Python/snowboydecoder.py diff --git a/examples/Python/snowboydetect.py b/examples/Python/snowboydetect.py new file mode 120000 index 00000000..a1393ff2 --- /dev/null +++ b/examples/Python/snowboydetect.py @@ -0,0 +1 @@ +../../swig/Python/snowboydetect.py \ No newline at end of file diff --git a/include/snowboy-detect.h b/include/snowboy-detect.h index 3982ee49..ea992d79 100644 --- a/include/snowboy-detect.h +++ b/include/snowboy-detect.h @@ -67,6 +67,20 @@ class SnowboyDetect { // above for the supported data format. int RunDetection(const std::string& data); + // Various versions of RunDetection() that take different format of audio. If + // NumChannels() > 1, e.g., NumChannels() == 2, then the array is as follows: + // + // d1c1, d1c2, d2c1, d2c2, d3c1, d3c2, ..., dNc1, dNc2 + // + // where d1c1 means data point 1 of channel 1. + // + // @param [in] data Small chunk of data to be detected. See + // above for the supported data format. + // @param [in] array_length Length of the data array. + int RunDetection(const float* const data, const int array_length); + int RunDetection(const int16_t* const data, const int array_length); + int RunDetection(const int32_t* const data, const int array_length); + // Sets the sensitivity string for the loaded hotwords. A is // a list of floating numbers between 0 and 1, and separated by comma. For // example, if there are 3 loaded hotwords, your string should looks something diff --git a/lib/ios/libsnowboy-detect.a b/lib/ios/libsnowboy-detect.a index 3c8eac4c..d67849e5 100644 Binary files a/lib/ios/libsnowboy-detect.a and b/lib/ios/libsnowboy-detect.a differ diff --git a/lib/osx/libsnowboy-detect.a b/lib/osx/libsnowboy-detect.a index 002aa919..0e63c25d 100644 Binary files a/lib/osx/libsnowboy-detect.a and b/lib/osx/libsnowboy-detect.a differ diff --git a/lib/rpi/libsnowboy-detect.a b/lib/rpi/libsnowboy-detect.a index cf849929..d2f007d0 100644 Binary files a/lib/rpi/libsnowboy-detect.a and b/lib/rpi/libsnowboy-detect.a differ diff --git a/lib/ubuntu64/libsnowboy-detect.a b/lib/ubuntu64/libsnowboy-detect.a index 1c2d09cf..6f1a6040 100644 Binary files a/lib/ubuntu64/libsnowboy-detect.a and b/lib/ubuntu64/libsnowboy-detect.a differ diff --git a/swig/python/Makefile b/swig/Python/Makefile similarity index 100% rename from swig/python/Makefile rename to swig/Python/Makefile diff --git a/swig/python/snowboy-detect-swig.i b/swig/Python/snowboy-detect-swig.i similarity index 87% rename from swig/python/snowboy-detect-swig.i rename to swig/Python/snowboy-detect-swig.i index 8fe450eb..76633993 100644 --- a/swig/python/snowboy-detect-swig.i +++ b/swig/Python/snowboy-detect-swig.i @@ -1,4 +1,4 @@ -// swig/snowboy-detect-swig.i +// swig/Python/snowboy-detect-swig.i // Copyright 2016 KITT.AI (author: Guoguo Chen)