diff --git a/.eslintrc b/.eslintrc index 549455b..c036401 100644 --- a/.eslintrc +++ b/.eslintrc @@ -7,4 +7,4 @@ "rules": { "no-console": 0 } -} \ No newline at end of file +} diff --git a/.gitignore b/.gitignore index afef197..cd9b1de 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,9 @@ +# Distribution directory +dist/ + +# Typescript typings files +typings/ + # Logs logs *.log diff --git a/README.md b/README.md index 93e0823..b61f1f9 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@
- +
diff --git a/examples/annyang-example.js b/examples/annyang-example.js index bccaba0..06221b4 100644 --- a/examples/annyang-example.js +++ b/examples/annyang-example.js @@ -1,7 +1,7 @@ 'use strict' const ROOT_DIR = __dirname + '/../' -const Sonus = require(ROOT_DIR + 'index.js') +const Sonus = require(ROOT_DIR + 'dist/src/sonus.js') const speech = require('@google-cloud/speech')({ projectId: 'streaming-speech-sample', keyFilename: ROOT_DIR + 'keyfile.json' @@ -23,9 +23,9 @@ const commands = { } } -Sonus.annyang.addCommands(commands) +sonus.annyang.addCommands(commands) -Sonus.start(sonus) +sonus.start(); console.log('Say "' + hotwords[0].hotword + '"...') sonus.on('hotword', (index, keyword) => console.log("!" + keyword)) sonus.on('partial-result', result => console.log("Partial", result)) @@ -33,6 +33,6 @@ sonus.on('partial-result', result => console.log("Partial", result)) sonus.on('final-result', result => { console.log("Final", result) if (result.includes("stop")) { - Sonus.stop() + sonus.stop() } }) \ No newline at end of file diff --git a/examples/example.js b/examples/example.js index f1847a6..f394657 100644 --- a/examples/example.js +++ b/examples/example.js @@ -1,7 +1,7 @@ 'use strict' const ROOT_DIR = __dirname + '/../' -const Sonus = require(ROOT_DIR + 'index.js') +const Sonus = require(ROOT_DIR + 'dist/src/sonus.js') const speech = require('@google-cloud/speech')({ projectId: 'streaming-speech-sample', keyFilename: ROOT_DIR + 'keyfile.json' @@ -13,7 +13,7 @@ const language = "en-US" //recordProgram can also be 'arecord' which works much better on the Pi and low power devices const sonus = Sonus.init({ hotwords, language, recordProgram: "rec" }, speech) -Sonus.start(sonus) +sonus.start(); console.log('Say "' + hotwords[0].hotword + '"...') sonus.on('hotword', (index, keyword) => console.log("!" + keyword)) @@ -23,6 +23,6 @@ sonus.on('partial-result', result => console.log("Partial", result)) sonus.on('final-result', result => { console.log("Final", result) if (result.includes("stop")) { - Sonus.stop() + sonus.stop() } }) diff --git a/examples/trigger-example.js b/examples/trigger-example.js index 077f78c..914e47f 100644 --- a/examples/trigger-example.js +++ b/examples/trigger-example.js @@ -1,7 +1,7 @@ 'use strict' const ROOT_DIR = __dirname + '/../' -const Sonus = require(ROOT_DIR + 'index.js') +const Sonus = require(ROOT_DIR + 'dist/src/sonus.js') const speech = require('@google-cloud/speech')({ projectId: 'streaming-speech-sample', keyFilename: ROOT_DIR + 'keyfile.json' @@ -12,12 +12,12 @@ const language = "en-US" const sonus = Sonus.init({ hotwords, language }, speech) try{ - Sonus.trigger(sonus, 1) + sonus.trigger(1) } catch (e) { console.log('Triggering Sonus before starting it will throw the following exception:', e) } -Sonus.start(sonus) +sonus.start() sonus.on('hotword', (index, keyword) => console.log("!" + keyword)) @@ -28,15 +28,15 @@ sonus.on('error', (error) => console.log(error)) sonus.on('final-result', result => { console.log("Final", result) if (result.includes("stop")) { - Sonus.stop() + sonus.stop() } }) try{ - Sonus.trigger(sonus, 2) + sonus.trigger(2) } catch (e) { console.log('Triggering Sonus with an invalid index will throw the following error:', e) } //Will use index 0 with a hotword of "triggered" and start streaming immedietly -Sonus.trigger(sonus, 0, "some hotword") \ No newline at end of file +sonus.trigger(0, "some hotword") diff --git a/index.js b/index.js deleted file mode 100644 index 269e7df..0000000 --- a/index.js +++ /dev/null @@ -1,152 +0,0 @@ -'use strict' - -const record = require('node-record-lpcm16') -const stream = require('stream') -const {Detector, Models} = require('snowboy') - -const ERROR = { - NOT_STARTED: "NOT_STARTED", - INVALID_INDEX: "INVALID_INDEX" -} - -const CloudSpeechRecognizer = {} -CloudSpeechRecognizer.init = recognizer => { - const csr = new stream.Writable() - csr.listening = false - csr.recognizer = recognizer - return csr -} - -CloudSpeechRecognizer.startStreaming = (options, audioStream, cloudSpeechRecognizer) => { - if (cloudSpeechRecognizer.listening) { - return - } - - cloudSpeechRecognizer.listening = true - - const recognizer = cloudSpeechRecognizer.recognizer - const recognitionStream = recognizer.createRecognizeStream({ - config: { - encoding: 'LINEAR16', - sampleRate: 16000, - languageCode: options.language, - speechContext: options.speechContext || null - }, - singleUtterance: true, - interimResults: true, - verbose: true - }) - - recognitionStream.on('error', err => cloudSpeechRecognizer.emit('error', err)) - - - recognitionStream.on('data', data => { - if (data) { - cloudSpeechRecognizer.emit('data', data) - if (data.endpointerType === 'END_OF_UTTERANCE') { - cloudSpeechRecognizer.listening = false - audioStream.unpipe(recognitionStream) - } - } - }) - - audioStream.pipe(recognitionStream) -} - -const Sonus = {} -Sonus.annyang = require('./lib/annyang-core.js') - -Sonus.init = (options, recognizer) => { - // don't mutate options - const opts = Object.assign({}, options), - models = new Models(), - sonus = new stream.Writable(), - csr = CloudSpeechRecognizer.init(recognizer) - sonus.mic = {} - sonus.recordProgram = opts.recordProgram - sonus.device = opts.device - sonus.started = false - - // If we don't have any hotwords passed in, add the default global model - opts.hotwords = opts.hotwords || [1] - opts.hotwords.forEach(model => { - models.add({ - file: model.file || 'node_modules/snowboy/resources/snowboy.umdl', - sensitivity: model.sensitivity || '0.5', - hotwords: model.hotword || 'default' - }) - }) - - // defaults - opts.models = models - opts.resource = opts.resource || 'node_modules/snowboy/resources/common.res' - opts.audioGain = opts.audioGain || 2.0 - opts.language = opts.language || 'en-US' //https://cloud.google.com/speech/docs/languages - - const detector = sonus.detector = new Detector(opts) - - detector.on('silence', () => sonus.emit('silence')) - detector.on('sound', () => sonus.emit('sound')) - - // When a hotword is detected pipe the audio stream to speech detection - detector.on('hotword', (index, hotword) => { - sonus.trigger(index, hotword) - }) - - csr.on('error', error => sonus.emit('error', { streamingError: error })) - - let transcriptEmpty = true - csr.on('data', data => { - const result = data.results[0] - if (result) { - transcriptEmpty = false - if (result.isFinal) { - sonus.emit('final-result', result.transcript) - Sonus.annyang.trigger(result.transcript) - transcriptEmpty = true //reset transcript - } else { - sonus.emit('partial-result', result.transcript) - } - } else if (data.endpointerType === 'END_OF_UTTERANCE' && transcriptEmpty) { - sonus.emit('final-result', "") - } - }) - - sonus.trigger = (index, hotword) => { - if (sonus.started) { - try { - let triggerHotword = (index == 0) ? hotword : models.lookup(index) - sonus.emit('hotword', index, triggerHotword) - CloudSpeechRecognizer.startStreaming(opts, sonus.mic, csr) - } catch (e) { - throw ERROR.INVALID_INDEX - } - } else { - throw ERROR.NOT_STARTED - } - } - - return sonus -} - -Sonus.start = sonus => { - sonus.mic = record.start({ - threshold: 0, - device: sonus.device || null, - recordProgram: sonus.recordProgram || "rec", - verbose: false - }) - - sonus.mic.pipe(sonus.detector) - sonus.started = true -} - -Sonus.trigger = (sonus, index, hotword) => sonus.trigger(index, hotword) - -Sonus.pause = sonus => sonus.mic.pause() - -Sonus.resume = sonus => sonus.mic.resume() - -Sonus.stop = () => record.stop() - -module.exports = Sonus diff --git a/lib/annyang-core.js b/lib/annyang-core.js index 82c8b23..870185d 100644 --- a/lib/annyang-core.js +++ b/lib/annyang-core.js @@ -6,7 +6,6 @@ //! https://www.TalAter.com/annyang/ "use strict"; -let annyang; let commandsList = []; const callbacks = { start: [], error: [], end: [], result: [], resultMatch: [], resultNoMatch: [], errorNetwork: [], errorPermissionBlocked: [], errorPermissionDenied: [] }; let recognition; @@ -53,7 +52,7 @@ const logMessage = (text, extraParameters) => { const initIfNeeded = () => { if (!isInitialized()) { - annyang.init({}, false); + module.exports.annyang.init({}, false); } }; @@ -97,7 +96,7 @@ const parseResults = function (results) { invokeCallbacks(callbacks.resultNoMatch, results); }; -annyang = { +module.exports.annyang = { init: (commands, resetCommands) => { if (resetCommands === undefined) { @@ -202,5 +201,3 @@ annyang = { parseResults(sentences); } }; - -module.exports = annyang \ No newline at end of file diff --git a/package.json b/package.json index 25c9d95..6364faf 100644 --- a/package.json +++ b/package.json @@ -2,9 +2,12 @@ "name": "sonus", "version": "0.1.7", "description": "Open source cross platform decentralized always-on speech recognition framework", - "main": "index.js", + "main": "dist/src/sonus.js", "scripts": { - "test": "eslint ." + "test": "eslint . && tslint -c tslint.json src/**/*.ts", + "build": "tsc", + "example": "npm run build && node examples/example.js", + "prepublish": "npm run build" }, "repository": { "type": "git", @@ -32,6 +35,9 @@ "stream": "0.0.2" }, "devDependencies": { - "eslint": "^3.7.0" + "eslint": "^3.7.0", + "ts-node": "^2.1.0", + "tslint": "^4.5.1", + "typescript": "^2.2.1" } } diff --git a/sonus-small.png b/resources/sonus-small.png similarity index 100% rename from sonus-small.png rename to resources/sonus-small.png diff --git a/sonus.png b/resources/sonus.png similarity index 100% rename from sonus.png rename to resources/sonus.png diff --git a/src/cloud-speech-recognizer.ts b/src/cloud-speech-recognizer.ts new file mode 100644 index 0000000..0ec03ae --- /dev/null +++ b/src/cloud-speech-recognizer.ts @@ -0,0 +1,50 @@ +import { Writable } from 'stream'; + +export class CloudSpeechRecognizer { + private _listening: boolean; + private _recognizer: any; + private _stream: Writable; + + constructor(recognizer) { + this._recognizer = recognizer; + this._stream = new Writable(); + this._listening = false; + } + + public startStreaming(options, audioStream) { + if (this._listening) { + return; + } + + this._listening = true; + + const recognitionStream = this._recognizer.createRecognizeStream({ + config: { + encoding: 'LINEAR16', + sampleRate: 16000, + languageCode: options.language, + speechContext: options.speechContext || null + }, + singleUtterance: true, + interimResults: true, + verbose: true + }); + + recognitionStream.on('error', err => this._stream.emit('error', err)); + recognitionStream.on('data', data => { + if (data) { + this._stream.emit('data', data); + if (data.endpointerType === 'END_OF_UTTERANCE') { + this._listening = false; + audioStream.unpipe(recognitionStream); + } + } + }); + + audioStream.pipe(recognitionStream); + } + + public on(event, handler) { + this._stream.on(event, handler); + } +} diff --git a/src/sonus.ts b/src/sonus.ts new file mode 100644 index 0000000..69f2494 --- /dev/null +++ b/src/sonus.ts @@ -0,0 +1,132 @@ +import { Detector, Models } from 'snowboy'; +import { start as startRecording, stop as stopRecording } from 'node-record-lpcm16'; +import { Writable } from 'stream'; +import { annyang } from '../lib/annyang-core.js'; +import { CloudSpeechRecognizer } from './cloud-speech-recognizer'; + +const ERROR = { + NOT_STARTED: "NOT_STARTED", + INVALID_INDEX: "INVALID_INDEX" +}; + +export function init(options, recognizer) { + return new Sonus(options, recognizer); +} + +class Sonus { + private _opts: any; + private _stream: Writable; + private _csr: CloudSpeechRecognizer; + private _mic: any; + private _recordProgram: string; + private _device: any; + private _started: boolean; + private _models: Models; + private _detector: Detector; + private _transcriptEmpty: boolean; + + public annyang = annyang; + + constructor(options, recognizer) { + this._opts = { ...options }; + this._stream = new Writable(); + this._csr = new CloudSpeechRecognizer(recognizer); + + this._mic = {}; + this._recordProgram = this._opts.recordProgram; + this._device = this._opts.device; + this._started = false; + + // Create a hotword detector, and listen for hotwords. + this._models = new Models(); + const hotwords = this._opts.hotwords || [1]; + hotwords.forEach((model) => { + this._models.add({ + file: model.file || 'resources/snowboy.umdl', + sensitivity: model.sensitivity || '0.5', + hotwords: model.hotword || 'default' + }); + }); + + const detectorOptions = { + models: this._models, + resource: options.resource || 'resources/common.res', + audioGain: options.audioGain || 2.0, + language: options.language || 'en-US' // https://cloud.google.com/speech/docs/languages + }; + + this._detector = new Detector(detectorOptions); + this._detector.on('silence', () => this._stream.emit('silence')); + this._detector.on('sound', () => this._stream.emit('sound')); + + // When a hotword is detected, pipe the audio stream to speech detection. + this._detector.on('hotword', (index, hotword) => this._onHotword(index, hotword)); + + // Listen for speech recognition results. + this._transcriptEmpty = true; + this._csr.on('data', data => this._onSpeechData(data)); + this._csr.on('error', error => this._stream.emit('error', { streamingError: error })); + } + + public start() { + this._mic = startRecording({ + threshold: 0, + device: this._device || null, + recordProgram: this._recordProgram || "rec", + verbose: false + }); + + this._mic.pipe(this._detector); + this._started = true; + } + + public stop() { + stopRecording(); + } + + public pause() { + this._mic.pause(); + } + + public resume() { + this._mic.resume(); + } + + public on(event: string, handler: Function) { + this._stream.on(event, handler); + } + + public trigger(index: number, hotword: string) { + this._onHotword(index, hotword); + } + + private _onHotword(index: number, hotword: string) { + if (!this._started) { + throw ERROR.NOT_STARTED; + } + + try { + let triggerHotword = (index === 0) ? hotword : this._models.lookup(index); + this._stream.emit('hotword', index, triggerHotword); + this._csr.startStreaming(this._opts, this._mic); + } catch (e) { + throw ERROR.INVALID_INDEX; + } + } + + private _onSpeechData(data) { + const result = data.results[0]; + if (result) { + this._transcriptEmpty = false; + if (result.isFinal) { + this._stream.emit('final-result', result.transcript); + annyang.trigger(result.transcript); + this._transcriptEmpty = true; // reset transcript + } else { + this._stream.emit('partial-result', result.transcript); + } + } else if (data.endpointerType === 'END_OF_UTTERANCE' && this._transcriptEmpty) { + this._stream.emit('final-result', ""); + } + } +} diff --git a/tsconfig.json b/tsconfig.json new file mode 100644 index 0000000..8baf46d --- /dev/null +++ b/tsconfig.json @@ -0,0 +1,22 @@ +{ + "compilerOptions": { + "allowJs": true, + "target": "es2017", + "module": "commonjs", + "moduleResolution": "node", + "noEmitOnError": true, + "outDir": "dist/", + "strictNullChecks": true, + "noImplicitThis": true, + "inlineSourceMap": true, + "noUnusedLocals": true, + "noUnusedParameters": true, + "noImplicitAny": false + }, + "exclude": [ + "node_modules", + "dist", + "typings", + "examples" + ] +} diff --git a/tslint.json b/tslint.json new file mode 100644 index 0000000..2281c42 --- /dev/null +++ b/tslint.json @@ -0,0 +1,23 @@ +{ + "extends": "tslint:recommended", + "rules": { + "quotemark": [ + false + ], + "variable-name": [ + "check-format", + "allow-leading-underscore" + ], + "object-literal-sort-keys": false, + "arrow-parens": false, + "trailing-comma": [ + false + ], + "ordered-imports": [ + false + ], + "member-ordering": [ + false + ] + } +} \ No newline at end of file diff --git a/typings.json b/typings.json new file mode 100644 index 0000000..1b73a1f --- /dev/null +++ b/typings.json @@ -0,0 +1,5 @@ +{ + "globalDependencies": { + "node": "registry:dt/node#7.0.0+20170204020307" + } +}