Skip to content

Commit

Permalink
Fixed bugs around recognizing choices:
Browse files Browse the repository at this point in the history
- Emoji's weren't being recognized.
- allowPartialMatches option wasn't being passed all the way down.
  • Loading branch information
Stevenic committed May 16, 2017
1 parent 4f8f376 commit b0bc71c
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 15 deletions.
30 changes: 26 additions & 4 deletions Node/core/lib/dialogs/PromptRecognizers.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
Object.defineProperty(exports, "__esModule", { value: true });
var EntityRecognizer_1 = require("./EntityRecognizer");
var consts = require("../consts");
var simpleTokenizer = /\w+/ig;
var breakingChars = " \n\r~`!@#$%^&*()-+={}|[]\\:\";'<>?,./";
var PromptRecognizers = (function () {
function PromptRecognizers() {
}
Expand Down Expand Up @@ -164,7 +164,7 @@ var PromptRecognizers = (function () {
values.push(action.value);
}
}
var match = PromptRecognizers.findTopEntity(PromptRecognizers.recognizeValues(utterance, values));
var match = PromptRecognizers.findTopEntity(PromptRecognizers.recognizeValues(utterance, values, options));
if (match) {
entities.push({
type: consts.Entities.Match,
Expand Down Expand Up @@ -214,12 +214,12 @@ var PromptRecognizers = (function () {
options = options || {};
var entities = [];
var text = utterance.trim().toLowerCase();
var tokens = matchAll(simpleTokenizer, text);
var tokens = tokenize(text);
var maxDistance = options.hasOwnProperty('maxTokenDistance') ? options.maxTokenDistance : 2;
values.forEach(function (value, index) {
if (typeof value === 'string') {
var topScore = 0.0;
var vTokens = matchAll(simpleTokenizer, value.trim().toLowerCase());
var vTokens = tokenize(value.trim().toLowerCase());
for (var i = 0; i < tokens.length; i++) {
var score = matchValue(vTokens, i);
if (score > topScore) {
Expand Down Expand Up @@ -278,3 +278,25 @@ function matchAll(exp, text) {
}
return matches;
}
function tokenize(text) {
var tokens = [];
if (text && text.length > 0) {
var token = '';
for (var i = 0; i < text.length; i++) {
var chr = text[i];
if (breakingChars.indexOf(chr) >= 0) {
if (token.length > 0) {
tokens.push(token);
}
token = '';
}
else {
token += chr;
}
}
if (token.length > 0) {
tokens.push(token);
}
}
return tokens;
}
46 changes: 35 additions & 11 deletions Node/core/src/dialogs/PromptRecognizers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ import { EntityRecognizer, IFindMatchResult } from './EntityRecognizer';
import * as consts from '../consts';
import * as chrono from 'chrono-node';

const simpleTokenizer = /\w+/ig;
const breakingChars = " \n\r~`!@#$%^&*()-+={}|[]\\:\";'<>?,./";

export type StringOrRegExp = string|RegExp;

Expand Down Expand Up @@ -114,8 +114,8 @@ export class PromptRecognizers {
// Ensure cached
let key = namespace + ':' + expId;
let entities: IEntity<string>[] = [];
let locale = context.preferredLocale();
let utterance = context.message.text ? context.message.text.trim() : '';
const locale = context.preferredLocale();
const utterance = context.message.text ? context.message.text.trim() : '';
let cache = this.expCache[key];
if (!cache) {
this.expCache[key] = cache = {};
Expand All @@ -142,8 +142,8 @@ export class PromptRecognizers {
// Ensure cached
let key = namespace + ':' + listId;
let entities: IEntity<string>[] = [];
let locale = context.preferredLocale();
let utterance = context.message.text ? context.message.text.trim() : '';
const locale = context.preferredLocale();
const utterance = context.message.text ? context.message.text.trim() : '';
let cache = this.choiceCache[key];
if (!cache) {
this.expCache[key] = cache = {};
Expand Down Expand Up @@ -272,7 +272,7 @@ export class PromptRecognizers {
options = options || {};
let refData = options.refDate ? new Date(options.refDate) : null;
let entities: IEntity<string>[] = [];
let utterance = context.message.text ? context.message.text.trim() : '';
const utterance = context.message.text ? context.message.text.trim() : '';
let entity = EntityRecognizer.recognizeTime(utterance, refData);
if (entity) {
entity.score = PromptRecognizers.calculateScore(utterance, entity.entity);
Expand Down Expand Up @@ -302,7 +302,7 @@ export class PromptRecognizers {
}

// Recognize matched values.
let match = PromptRecognizers.findTopEntity(PromptRecognizers.recognizeValues(utterance, values));
let match = PromptRecognizers.findTopEntity(PromptRecognizers.recognizeValues(utterance, values, options));
if (match) {
// Push the choice onto the list of matches.
entities.push({
Expand Down Expand Up @@ -352,7 +352,7 @@ export class PromptRecognizers {
let score = 0.0;
if (matched > 0 && (matched == vTokens.length || options.allowPartialMatches)) {
// Percentage of tokens matched. If matching "second last" in
// "the second from the last one" the completness would be 1.0 since
// "the second from the last one" the completeness would be 1.0 since
// all tokens were found.
let completeness = matched / vTokens.length;

Expand All @@ -376,14 +376,14 @@ export class PromptRecognizers {
options = options || {};
let entities: IEntity<number>[] = [];
let text = utterance.trim().toLowerCase();
let tokens = matchAll(simpleTokenizer, text);
let tokens = tokenize(text);
let maxDistance = options.hasOwnProperty('maxTokenDistance') ? options.maxTokenDistance : 2;
values.forEach((value, index) => {
if (typeof value === 'string') {
// To match "last one" in "the last time I chose the last one" we need
// to recursively search the utterance starting from each token position.
let topScore = 0.0;
let vTokens = matchAll(simpleTokenizer, (<string>value).trim().toLowerCase());
let vTokens = tokenize((<string>value).trim().toLowerCase());
for (let i = 0; i < tokens.length; i++) {
let score = matchValue(vTokens, i);
if (score > topScore) {
Expand Down Expand Up @@ -439,4 +439,28 @@ function matchAll(exp: RegExp, text: string): string[] {
matches.push(match[0]);
}
return matches;
}
}

/** Breaks a string of text into an array of tokens. */
function tokenize(text: string): string[] {
let tokens: string[] = [];
if (text && text.length > 0) {
let token = '';
for (let i = 0; i < text.length; i++) {
const chr = text[i];
if (breakingChars.indexOf(chr) >= 0) {
if (token.length > 0) {
tokens.push(token);
}
token = '';
} else {
token += chr;
}
}
if (token.length > 0) {
tokens.push(token);
}
}
return tokens;
}

0 comments on commit b0bc71c

Please sign in to comment.