Skip to content

Commit d3fffa1

Browse files
authored
Fix overlapFactor bug and cleanup a bit (#97)
Cleanup the speech commands model: - Fix an issue with `overlapFactor`, where if the user passed an overlap close to 1, the callback `f` in `startStream(f)` would still be called only once every 1s. Now that callback can be called as frequently as the frame duration (tested in the work-in-progress code lab). This makes the demo run in real-time, which is really nice. - Remove `columnBufferLength` and `columnHopLength` as extractor config params since they were used to indirectly talk about `overlapFactor` between the recognizer and extractor. Thus, add `overlapFactor` as config instead. - Change `DEFAULT_SUPPRESSION_TIME_MILLIS` from `1000ms` to `0ms`, so the user can get instant predictions. - Remove unused params `filterSize` from `RecognizerParams`, and `minSamples` from `StreamingRecognitionConfig` - Move top-level `utils/` folder to `training/utils` since only training uses it - Remove `utils/util.ts` (all the util functions there were unused), other than `powerOfTwo` which got moved to `audio_util.ts` - Make tests (`*_test.ts`) visible to the typescript compiler and linter. - Improve `tslint.json` - s/SpeechCommands/speechCommands/ in Rollup to align with the other models. - minimize use of `any`
1 parent 331a065 commit d3fffa1

25 files changed

+293
-434
lines changed

speech-commands/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ As the example above shows, you can specify optional parameters when calling
103103
`startStreaming()`. The supported parameters are:
104104

105105
* `overlapFactor`: Controls how often the recognizer performs prediction on
106-
spectrograms. Must be a number between 0 and 1 (default: 0.5). For example,
106+
spectrograms. Must be >=0 and <1 (default: 0.5). For example,
107107
if each spectrogram is 1000 ms long and `overlapFactor` is set to 0.25,
108108
the prediction will happen every 250 ms.
109109
* `includeSpectrogram`: Let the callback function be invoked with the

speech-commands/package.json

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "@tensorflow-models/speech-commands",
3-
"version": "0.1.3",
3+
"version": "0.1.4",
44
"description": "Speech-command recognizer in TensorFlow.js",
55
"main": "dist/index.js",
66
"unpkg": "dist/speech-commands.min.js",
@@ -19,28 +19,32 @@
1919
"@tensorflow/tfjs": "^0.13.1",
2020
"@tensorflow/tfjs-node": "^0.1.18",
2121
"@types/jasmine": "~2.8.8",
22+
"@types/rimraf": "^2.0.2",
23+
"@types/tempfile": "^2.0.0",
2224
"babel-core": "~6.26.0",
2325
"babel-plugin-transform-runtime": "~6.23.0",
26+
"clang-format": "^1.2.4",
2427
"dct": "^0.0.3",
2528
"jasmine": "^3.2.0",
2629
"jasmine-core": "^3.2.1",
2730
"kissfft-js": "^0.1.8",
2831
"rimraf": "2.6.2",
2932
"rollup": "~0.60.7",
3033
"rollup-plugin-node-resolve": "~3.3.0",
31-
"rollup-plugin-typescript2": "~0.15.0",
34+
"rollup-plugin-typescript2": "~0.13.0",
3235
"rollup-plugin-uglify": "~3.0.0",
3336
"tempfile": "2.0.0",
3437
"ts-node": "~5.0.0",
3538
"tslint": "~5.10.0",
39+
"tslint-no-circular-imports": "^0.6.1",
3640
"typescript": "2.9.2",
3741
"yalc": "~1.0.0-pre.21"
3842
},
3943
"scripts": {
40-
"build": "rimraf dist && tsc && rollup -c",
44+
"build": "tsc",
4145
"lint": "tslint -p . -t verbose",
42-
"publish-local": "yarn build && yalc push",
43-
"test": "yarn build && ts-node run_tests.ts"
46+
"publish-local": "rimraf dist && yarn build && rollup -c && yalc push",
47+
"test": "ts-node run_tests.ts"
4448
},
4549
"license": "Apache-2.0"
4650
}

speech-commands/rollup.config.js

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,22 @@ import node from 'rollup-plugin-node-resolve';
1919
import typescript from 'rollup-plugin-typescript2';
2020
import uglify from 'rollup-plugin-uglify';
2121

22-
const PREAMBLE =
23-
`// @tensorflow/tfjs-models Copyright ${(new Date).getFullYear()} Google`;
22+
const PREAMBLE = `/**
23+
* @license
24+
* Copyright ${(new Date).getFullYear()} Google LLC. All Rights Reserved.
25+
* Licensed under the Apache License, Version 2.0 (the "License");
26+
* you may not use this file except in compliance with the License.
27+
* You may obtain a copy of the License at
28+
*
29+
* http://www.apache.org/licenses/LICENSE-2.0
30+
*
31+
* Unless required by applicable law or agreed to in writing, software
32+
* distributed under the License is distributed on an "AS IS" BASIS,
33+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
34+
* See the License for the specific language governing permissions and
35+
* limitations under the License.
36+
* =============================================================================
37+
*/`;
2438

2539
function minify() {
2640
return uglify({
@@ -48,6 +62,7 @@ function config({
4862
],
4963
output: {
5064
banner: PREAMBLE,
65+
sourcemap: true,
5166
globals: {
5267
'@tensorflow/tfjs': 'tf'
5368
},
@@ -61,15 +76,15 @@ export default [
6176
config({
6277
output: {
6378
format: 'umd',
64-
name: 'SpeechCommands',
79+
name: 'speechCommands',
6580
file: 'dist/speech-commands.js'
6681
}
6782
}),
6883
config({
6984
plugins: [minify()],
7085
output: {
7186
format: 'umd',
72-
name: 'SpeechCommands',
87+
name: 'speechCommands',
7388
file: 'dist/speech-commands.min.js'
7489
}
7590
}),

speech-commands/src/browser_fft_extractor.ts

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,6 @@
2020
*/
2121

2222
import * as tf from '@tensorflow/tfjs';
23-
24-
// tslint:disable-next-line:max-line-length
2523
import {getAudioContextConstructor, getAudioMediaStream, normalize} from './browser_fft_utils';
2624
import {FeatureExtractor, RecognizerParams} from './types';
2725

@@ -61,6 +59,15 @@ export interface BrowserFftFeatureExtractorConfig extends RecognizerParams {
6159
* If `null` or `undefined`, will do no truncation.
6260
*/
6361
columnTruncateLength?: number;
62+
63+
/**
64+
* Overlap factor. Must be >=0 and <1.
65+
* For example, if the model takes a frame length of 1000 ms,
66+
* and if overlap factor is 0.4, there will be a 400ms
67+
* overlap between two successive frames, i.e., frames
68+
* will be taken every 600 ms.
69+
*/
70+
overlapFactor: number;
6471
}
6572

6673
/**
@@ -142,15 +149,12 @@ export class BrowserFftFeatureExtractor implements FeatureExtractor {
142149
this.fftSize = config.fftSize || 1024;
143150
this.frameDurationMillis = this.fftSize / this.sampleRateHz * 1e3;
144151
this.columnTruncateLength = config.columnTruncateLength || this.fftSize;
145-
const columnBufferLength = config.columnBufferLength || this.fftSize;
146-
const columnHopLength = config.columnHopLength || (this.fftSize / 2);
147-
this.overlapFactor = columnHopLength / columnBufferLength;
152+
this.overlapFactor = config.overlapFactor;
148153

149-
if (!(this.overlapFactor > 0)) {
150-
throw new Error(
151-
`Invalid overlapFactor: ${this.overlapFactor}. ` +
152-
`Check your columnBufferLength and columnHopLength.`);
153-
}
154+
tf.util.assert(
155+
this.overlapFactor >= 0 && this.overlapFactor < 1,
156+
`Expected overlapFactor to be >= 0 and < 1, ` +
157+
`but got ${this.overlapFactor}`);
154158

155159
if (this.columnTruncateLength > this.fftSize) {
156160
throw new Error(
@@ -190,8 +194,10 @@ export class BrowserFftFeatureExtractor implements FeatureExtractor {
190194

191195
this.frameCount = 0;
192196

197+
const period = Math.max(
198+
1, Math.round(this.numFramesPerSpectrogram * (1 - this.overlapFactor)));
193199
this.tracker = new Tracker(
194-
Math.round(this.numFramesPerSpectrogram * this.overlapFactor),
200+
period,
195201
Math.round(this.suppressionTimeMillis / this.frameDurationMillis));
196202
this.frameIntervalTask = setInterval(
197203
this.onAudioFrame.bind(this), this.fftSize / this.sampleRateHz * 1e3);
@@ -200,7 +206,6 @@ export class BrowserFftFeatureExtractor implements FeatureExtractor {
200206
private async onAudioFrame() {
201207
this.analyser.getFloatFrequencyData(this.freqData);
202208
if (this.freqData[0] === -Infinity) {
203-
console.warn(`No signal (frame #${this.frameCount})`);
204209
return;
205210
}
206211

0 commit comments

Comments
 (0)