Frames

A walk through of fast string scoring algorithm.

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
1/*!
2 * string_score.js: String Scoring Algorithm 0.1.22
3 *
4 * http://joshaven.com/string_score
5 * https://github.com/joshaven/string_score
6 *
7 * Free to use, modify, etc... see attached license for more info
8 * Copyright (C) 2009-2015 Joshaven Potter <yourtech@gmail.com>
9 * MIT License: http://opensource.org/licenses/MIT
10 * Special thanks to all of the contributors listed here https://github.com/joshaven/string_score
11 *
12 * Updated: Tue Mar 10 2015
13*/
14
15/*jslint nomen:true, white:true, browser:true,devel:true */
16
17/**
18 * Scores a string against another string.
19 * 'Hello World'.score('he'); //=> 0.5931818181818181
20 * 'Hello World'.score('Hello'); //=> 0.7318181818181818
21 */
22String.prototype.score = function (word, fuzziness) {
23 'use strict';
24
25 // If the string is equal to the word, perfect match.
26 if (this === word) { return 1; }
27
28 //if it's not a perfect match and is empty return 0
29 if (word === "") { return 0; }
30
31 var runningScore = 0,
32 charScore,
33 finalScore,
34 string = this,
35 lString = string.toLowerCase(),
36 strLength = string.length,
37 lWord = word.toLowerCase(),
38 wordLength = word.length,
39 idxOf,
40 startAt = 0,
41 fuzzies = 1,
42 fuzzyFactor,
43 i;
44
45 // Cache fuzzyFactor for speed increase
46 if (fuzziness) { fuzzyFactor = 1 - fuzziness; }
47
48 // Walk through word and add up scores.
49 // Code duplication occurs to prevent checking fuzziness inside for loop
50 if (fuzziness) {
51 for (i = 0; i < wordLength; i+=1) {
52
53 // Find next first case-insensitive match of a character.
54 idxOf = lString.indexOf(lWord[i], startAt);
55
56 if (idxOf === -1) {
57 fuzzies += fuzzyFactor;
58 } else {
59 if (startAt === idxOf) {
60 // Consecutive letter & start-of-string Bonus
61 charScore = 0.7;
62 } else {
63 charScore = 0.1;
64
65 // Acronym Bonus
66 // Weighing Logic: Typing the first character of an acronym is as if you
67 // preceded it with two perfect character matches.
68 if (string[idxOf - 1] === ' ') { charScore += 0.8; }
69 }
70
71 // Same case bonus.
72 if (string[idxOf] === word[i]) { charScore += 0.1; }
73
74 // Update scores and startAt position for next round of indexOf
75 runningScore += charScore;
76 startAt = idxOf + 1;
77 }
78 }
79 } else {
80 for (i = 0; i < wordLength; i+=1) {
81 idxOf = lString.indexOf(lWord[i], startAt);
82 if (-1 === idxOf) { return 0; }
83
84 if (startAt === idxOf) {
85 charScore = 0.7;
86 } else {
87 charScore = 0.1;
88 if (string[idxOf - 1] === ' ') { charScore += 0.8; }
89 }
90 if (string[idxOf] === word[i]) { charScore += 0.1; }
91 runningScore += charScore;
92 startAt = idxOf + 1;
93 }
94 }
95
96 // Reduce penalty for longer strings.
97 finalScore = 0.5 * (runningScore / strLength + runningScore / wordLength) / fuzzies;
98
99 if ((lWord[0] === lString[0]) && (finalScore < 0.85)) {
100 finalScore += 0.15;
101 }
102
103 return finalScore;
104};
105

This module scores a string against another string.


Its one of the fastest way to compare two strings and ported in many languages. This story will walk through the implementation of the scoring algorithm.