-
Notifications
You must be signed in to change notification settings - Fork 1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #11 from Kitt-AI/devel
Devel
- Loading branch information
Showing
22 changed files
with
416 additions
and
10 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,12 @@ | ||
/lib/libsnowboy-detect.a | ||
snowboy-detect-swig.cc | ||
snowboydetect.py | ||
.DS_Store | ||
|
||
*.pyc | ||
*.o | ||
*.so | ||
|
||
/examples/C++/pa_stable_v19_20140130.tgz | ||
/examples/C++/portaudio | ||
/examples/C++/demo |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,7 +7,7 @@ by [KITT.AI](http://kitt.ai). | |
[Full Documentation](https://snowboy.kitt.ai/docs) | ||
|
||
|
||
Version: 1.0.1 (5/16/2016) | ||
Version: 1.0.2 (5/24/2016) | ||
|
||
Snowboy is a customizable hotword detection engine for you to create your own | ||
hotword like "OK Google" or "Alexa". It is powered by deep neural networks and has the following properties: | ||
|
@@ -32,11 +32,11 @@ It ships in the form of a **C library** with **Python** wrappers generated by SW | |
If you want support on other hardware/OS, please send your request to [[email protected]](mailto:snowboy.kitt.ai) | ||
|
||
## Precompiled Binaries | ||
* 64 bit Ubuntu [12.04](https://s3-us-west-2.amazonaws.com/snowboy/snowboy-releases/ubuntu1204-x86_64-1.0.1.tar.bz2) | ||
/ [14.04](https://s3-us-west-2.amazonaws.com/snowboy/snowboy-releases/ubuntu1404-x86_64-1.0.1.tar.bz2) | ||
* [MacOS X](https://s3-us-west-2.amazonaws.com/snowboy/snowboy-releases/osx-x86_64-1.0.1.tar.bz2) | ||
* 64 bit Ubuntu [12.04](https://s3-us-west-2.amazonaws.com/snowboy/snowboy-releases/ubuntu1204-x86_64-1.0.2.tar.bz2) | ||
/ [14.04](https://s3-us-west-2.amazonaws.com/snowboy/snowboy-releases/ubuntu1404-x86_64-1.0.2.tar.bz2) | ||
* [MacOS X](https://s3-us-west-2.amazonaws.com/snowboy/snowboy-releases/osx-x86_64-1.0.2.tar.bz2) | ||
* Raspberry Pi with Raspbian 8.0, all versions | ||
([1/2/3/Zero](https://s3-us-west-2.amazonaws.com/snowboy/snowboy-releases/rpi-arm-raspbian-8.0-1.0.1.tar.bz2)) | ||
([1/2/3/Zero](https://s3-us-west-2.amazonaws.com/snowboy/snowboy-releases/rpi-arm-raspbian-8.0-1.0.2.tar.bz2)) | ||
|
||
If you want to compile a version against your own environment/language, read on. | ||
|
||
|
@@ -76,17 +76,17 @@ If you need extra setup on your audio (especially on a Raspberry Pi), please see | |
|
||
## Compile a Python Wrapper | ||
|
||
cd swig/python | ||
cd swig/Python | ||
make | ||
|
||
SWIG will generate a `_snowboydetect.so` file and a simple (but hard-to-read) python wrapper `snowboydetect.py`. We have provided a higher level python wrapper `snowboydecoder.py` on top of that. | ||
|
||
Feel free to adapt the `Makefile` in `swig/python` to your own system's setting if you cannot `make` it. | ||
Feel free to adapt the `Makefile` in `swig/Python` to your own system's setting if you cannot `make` it. | ||
|
||
|
||
## Quick Start | ||
|
||
Go to the `swig/python` folder and open your python console: | ||
Go to the `examples/Python` folder and open your python console: | ||
|
||
In [1]: import snowboydecoder | ||
|
||
|
@@ -120,11 +120,16 @@ See [Full Documentation](https://snowboy.kitt.ai/docs). | |
|
||
## Change Log | ||
|
||
**v1.0.2, 5/24/2016** | ||
|
||
* Updated universal `snowboy.umdl` model | ||
* added C++ examples, docs will come in next release. | ||
|
||
**v1.0.1, 5/16/2016** | ||
|
||
* VAD now returns -2 on silence, -1 on error, 0 on voice and >0 on triggered models | ||
* added static library for Raspberry Pi in case people want to compile themselves instead of using the binary version | ||
|
||
**v1.0.0, 5/10/2016** | ||
|
||
* initial release | ||
* initial release |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
include demo.mk | ||
|
||
BINFILES = demo | ||
|
||
all: $(BINFILES) | ||
|
||
%.a: | ||
$(MAKE) -C ${@D} ${@F} | ||
|
||
$(BINFILES): $(PORTAUDIOLIBS) $(SNOWBOYDETECTLIBFILE) | ||
|
||
$(PORTAUDIOLIBS): | ||
@-./install_portaudio.sh | ||
|
||
clean: | ||
-rm -f *.o *.a $(BINFILES) | ||
|
||
depend: | ||
-$(CXX) -M $(CXXFLAGS) *.cc > .depend.mk | ||
|
||
# Putting "-" so no error messages. | ||
-include .depend.mk |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,235 @@ | ||
// example/C++/demo.cc | ||
|
||
// Copyright 2016 KITT.AI (author: Guoguo Chen) | ||
|
||
#include <cassert> | ||
#include <csignal> | ||
#include <iostream> | ||
#include <pa_ringbuffer.h> | ||
#include <pa_util.h> | ||
#include <portaudio.h> | ||
#include <string> | ||
#include <vector> | ||
|
||
#include "include/snowboy-detect.h" | ||
|
||
int PortAudioCallback(const void* input, | ||
void* output, | ||
unsigned long frame_count, | ||
const PaStreamCallbackTimeInfo* time_info, | ||
PaStreamCallbackFlags status_flags, | ||
void* user_data); | ||
|
||
class PortAudioWrapper { | ||
public: | ||
// Constructor. | ||
PortAudioWrapper(int sample_rate, int num_channels, int bits_per_sample) { | ||
num_lost_samples_ = 0; | ||
min_read_samples_ = sample_rate * 0.1; | ||
Init(sample_rate, num_channels, bits_per_sample); | ||
} | ||
|
||
// Reads data from ring buffer. | ||
template<typename T> | ||
void Read(std::vector<T>* data) { | ||
assert(data != NULL); | ||
|
||
// Checks ring buffer overflow. | ||
if (num_lost_samples_ > 0) { | ||
std::cerr << "Lost " << num_lost_samples_ << " samples due to ring" | ||
<< " buffer overflow." << std::endl; | ||
num_lost_samples_ = 0; | ||
} | ||
|
||
ring_buffer_size_t num_available_samples = 0; | ||
while (true) { | ||
num_available_samples = | ||
PaUtil_GetRingBufferReadAvailable(&pa_ringbuffer_); | ||
if (num_available_samples >= min_read_samples_) { | ||
break; | ||
} | ||
Pa_Sleep(5); | ||
} | ||
|
||
// Reads data. | ||
num_available_samples = PaUtil_GetRingBufferReadAvailable(&pa_ringbuffer_); | ||
data->resize(num_available_samples); | ||
ring_buffer_size_t num_read_samples = PaUtil_ReadRingBuffer( | ||
&pa_ringbuffer_, data->data(), num_available_samples); | ||
if (num_read_samples != num_available_samples) { | ||
std::cerr << num_available_samples << " samples were available, but " | ||
<< "only " << num_read_samples << " samples were read." << std::endl; | ||
} | ||
} | ||
|
||
int Callback(const void* input, void* output, | ||
unsigned long frame_count, | ||
const PaStreamCallbackTimeInfo* time_info, | ||
PaStreamCallbackFlags status_flags) { | ||
// Input audio. | ||
ring_buffer_size_t num_written_samples = | ||
PaUtil_WriteRingBuffer(&pa_ringbuffer_, input, frame_count); | ||
num_lost_samples_ += frame_count - num_written_samples; | ||
return paContinue; | ||
} | ||
|
||
~PortAudioWrapper() { | ||
Pa_StopStream(pa_stream_); | ||
Pa_CloseStream(pa_stream_); | ||
Pa_Terminate(); | ||
PaUtil_FreeMemory(ringbuffer_); | ||
} | ||
|
||
private: | ||
// Initialization. | ||
bool Init(int sample_rate, int num_channels, int bits_per_sample) { | ||
// Allocates ring buffer memory. | ||
int ringbuffer_size = 16384; | ||
ringbuffer_ = static_cast<char*>( | ||
PaUtil_AllocateMemory(bits_per_sample / 8 * ringbuffer_size)); | ||
if (ringbuffer_ == NULL) { | ||
std::cerr << "Fail to allocate memory for ring buffer." << std::endl; | ||
return false; | ||
} | ||
|
||
// Initializes PortAudio ring buffer. | ||
ring_buffer_size_t rb_init_ans = | ||
PaUtil_InitializeRingBuffer(&pa_ringbuffer_, bits_per_sample / 8, | ||
ringbuffer_size, ringbuffer_); | ||
if (rb_init_ans == -1) { | ||
std::cerr << "Ring buffer size is not power of 2." << std::endl; | ||
return false; | ||
} | ||
|
||
// Initializes PortAudio. | ||
PaError pa_init_ans = Pa_Initialize(); | ||
if (pa_init_ans != paNoError) { | ||
std::cerr << "Fail to initialize PortAudio, error message is \"" | ||
<< Pa_GetErrorText(pa_init_ans) << "\"" << std::endl; | ||
return false; | ||
} | ||
|
||
PaError pa_open_ans; | ||
if (bits_per_sample == 8) { | ||
pa_open_ans = Pa_OpenDefaultStream( | ||
&pa_stream_, num_channels, 0, paUInt8, sample_rate, | ||
paFramesPerBufferUnspecified, PortAudioCallback, this); | ||
} else if (bits_per_sample == 16) { | ||
pa_open_ans = Pa_OpenDefaultStream( | ||
&pa_stream_, num_channels, 0, paInt16, sample_rate, | ||
paFramesPerBufferUnspecified, PortAudioCallback, this); | ||
} else if (bits_per_sample == 32) { | ||
pa_open_ans = Pa_OpenDefaultStream( | ||
&pa_stream_, num_channels, 0, paInt32, sample_rate, | ||
paFramesPerBufferUnspecified, PortAudioCallback, this); | ||
} else { | ||
std::cerr << "Unsupported BitsPerSample: " << bits_per_sample | ||
<< std::endl; | ||
return false; | ||
} | ||
if (pa_open_ans != paNoError) { | ||
std::cerr << "Fail to open PortAudio stream, error message is \"" | ||
<< Pa_GetErrorText(pa_open_ans) << "\"" << std::endl; | ||
return false; | ||
} | ||
|
||
PaError pa_stream_start_ans = Pa_StartStream(pa_stream_); | ||
if (pa_stream_start_ans != paNoError) { | ||
std::cerr << "Fail to start PortAudio stream, error message is \"" | ||
<< Pa_GetErrorText(pa_stream_start_ans) << "\"" << std::endl; | ||
return false; | ||
} | ||
return true; | ||
} | ||
|
||
private: | ||
// Pointer to the ring buffer memory. | ||
char* ringbuffer_; | ||
|
||
// Ring buffer wrapper used in PortAudio. | ||
PaUtilRingBuffer pa_ringbuffer_; | ||
|
||
// Pointer to PortAudio stream. | ||
PaStream* pa_stream_; | ||
|
||
// Number of lost samples at each Read() due to ring buffer overflow. | ||
int num_lost_samples_; | ||
|
||
// Wait for this number of samples in each Read() call. | ||
int min_read_samples_; | ||
}; | ||
|
||
int PortAudioCallback(const void* input, | ||
void* output, | ||
unsigned long frame_count, | ||
const PaStreamCallbackTimeInfo* time_info, | ||
PaStreamCallbackFlags status_flags, | ||
void* user_data) { | ||
PortAudioWrapper* pa_wrapper = reinterpret_cast<PortAudioWrapper*>(user_data); | ||
pa_wrapper->Callback(input, output, frame_count, time_info, status_flags); | ||
return paContinue; | ||
} | ||
|
||
void SignalHandler(int signal){ | ||
std::cerr << "Caught signal " << signal << ", terminating..." << std::endl; | ||
exit(0); | ||
} | ||
|
||
int main(int argc, char* argv[]) { | ||
std::string usage = | ||
"Example that shows how to use Snowboy in C++. Parameters are\n" | ||
"hard-coded in the parameter section. Please check the source code for\n" | ||
"more details. Audio is captured by PortAudio.\n" | ||
"\n" | ||
"To run the example:\n" | ||
" ./demo\n"; | ||
|
||
// Checks the command. | ||
if (argc > 1) { | ||
std::cerr << usage; | ||
exit(1); | ||
} | ||
|
||
// Configures signal handling. | ||
struct sigaction sig_int_handler; | ||
sig_int_handler.sa_handler = SignalHandler; | ||
sigemptyset(&sig_int_handler.sa_mask); | ||
sig_int_handler.sa_flags = 0; | ||
sigaction(SIGINT, &sig_int_handler, NULL); | ||
|
||
// Parameter section. | ||
// If you have multiple hotword models (e.g., 2), you should set | ||
// <model_filename> and <sensitivity_str> as follows: | ||
// model_filename = "resources/snowboy.umdl,resources/alexa.pmdl"; | ||
// sensitivity_str = "0.4,0.4"; | ||
std::string resource_filename = "resources/common.res"; | ||
std::string model_filename = "resources/snowboy.umdl"; | ||
std::string sensitivity_str = "0.4"; | ||
float audio_gain = 1; | ||
|
||
// Initializes Snowboy detector. | ||
snowboy::SnowboyDetect detector(resource_filename, model_filename); | ||
detector.SetSensitivity(sensitivity_str); | ||
detector.SetAudioGain(audio_gain); | ||
|
||
// Initializes PortAudio. You may use other tools to capture the audio. | ||
PortAudioWrapper pa_wrapper(detector.SampleRate(), | ||
detector.NumChannels(), detector.BitsPerSample()); | ||
|
||
// Runs the detection. | ||
// Note: I hard-coded <int16_t> as data type because detector.BitsPerSample() | ||
// returns 16. | ||
std::cout << "Listening... Press Ctrl+C to exit" << std::endl; | ||
std::vector<int16_t> data; | ||
while (true) { | ||
pa_wrapper.Read(&data); | ||
if (data.size() != 0) { | ||
int result = detector.RunDetection(data.data(), data.size()); | ||
if (result > 0) { | ||
std::cout << "Hotword " << result << " detected!" << std::endl; | ||
} | ||
} | ||
} | ||
|
||
return 0; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
TOPDIR := ../../ | ||
DYNAMIC := True | ||
CC = $(CXX) | ||
CXX := | ||
LDFLAGS := | ||
LDLIBS := | ||
PORTAUDIOINC := portaudio/install/include | ||
PORTAUDIOLIBS := portaudio/install/lib/libportaudio.a | ||
|
||
ifeq ($(DYNAMIC), True) | ||
CXXFLAGS += -fPIC | ||
endif | ||
|
||
ifeq ($(shell uname -m | cut -c 1-3), x86) | ||
CXXFLAGS += -msse -msse2 | ||
endif | ||
|
||
ifeq ($(shell uname), Darwin) | ||
# By default Mac uses clang++ as g++, but people may have changed their | ||
# default configuration. | ||
CXX := clang++ | ||
CXXFLAGS += -I$(TOPDIR) -Wall -Wno-sign-compare -Winit-self \ | ||
-DHAVE_POSIX_MEMALIGN -DHAVE_CLAPACK -I$(PORTAUDIOINC) | ||
LDLIBS += -ldl -lm -framework Accelerate -framework CoreAudio \ | ||
-framework AudioToolbox -framework AudioUnit -framework CoreServices \ | ||
$(PORTAUDIOLIBS) | ||
SNOWBOYDETECTLIBFILE := $(TOPDIR)/lib/osx/libsnowboy-detect.a | ||
else ifeq ($(shell uname), Linux) | ||
CXX := g++ | ||
CXXFLAGS += -I$(TOPDIR) -std=c++0x -Wall -Wno-sign-compare \ | ||
-Wno-unused-local-typedefs -Winit-self -rdynamic \ | ||
-DHAVE_POSIX_MEMALIGN -I$(PORTAUDIOINC) | ||
LDLIBS += -ldl -lm -Wl,-Bstatic -Wl,-Bdynamic -lrt -lpthread $(PORTAUDIOLIBS) | ||
ifneq ($(wildcard $(PORTAUDIOINC)/pa_linux_alsa.h),) | ||
LDLIBS += -lasound | ||
endif | ||
ifneq ($(wildcard $(PORTAUDIOINC)/pa_jack.h),) | ||
LDLIBS += -ljack | ||
endif | ||
SNOWBOYDETECTLIBFILE := $(TOPDIR)/lib/ubuntu64/libsnowboy-detect.a | ||
ifneq (,$(findstring arm,$(shell uname -m))) | ||
SNOWBOYDETECTLIBFILE := $(TOPDIR)/lib/rpi/libsnowboy-detect.a | ||
endif | ||
endif | ||
|
||
# Suppress clang warnings... | ||
COMPILER = $(shell $(CXX) -v 2>&1 ) | ||
ifeq ($(findstring clang,$(COMPILER)), clang) | ||
CXXFLAGS += -Wno-mismatched-tags -Wno-c++11-extensions | ||
endif | ||
|
||
# Set optimization level. | ||
CXXFLAGS += -O3 |
Oops, something went wrong.