diff options
author | John Hawthorn <john.hawthorn@gmail.com> | 2014-07-12 17:45:07 -0700 |
---|---|---|
committer | John Hawthorn <john.hawthorn@gmail.com> | 2014-07-12 22:11:45 -0700 |
commit | fcc187154468a8b73294b950fcb139edf0a1dda3 (patch) | |
tree | 8845f0b81e74f79a917ffa5e1ff05a343518501b | |
parent | bdfd476ce41bc219c03d1061f2e9daa2f0b8d1d0 (diff) |
New DP algorithm match scoring algorithm
-rw-r--r-- | Makefile | 2 | ||||
-rw-r--r-- | match.c | 65 | ||||
-rw-r--r-- | test.rb | 4 |
3 files changed, 66 insertions, 5 deletions
@@ -1,4 +1,4 @@ -CFLAGS+=-Wall -Wextra -g +CFLAGS+=-Wall -Wextra -g -std=c99 all: fzy testscore @@ -1,5 +1,7 @@ #include <ctype.h> #include <string.h> +#include <strings.h> +#include <stdio.h> static int is_subset(const char *needle, const char *haystack){ while(*needle){ @@ -11,14 +13,73 @@ static int is_subset(const char *needle, const char *haystack){ return 1; } +/* print one of the internal matrices */ +void mat_print(int *mat, int n, int m){ + int i, j; + for(i = 0; i < n; i++){ + for(j = 0; j < m; j++){ + fprintf(stderr, " %3zd", mat[i*m + j]); + } + fprintf(stderr, "\n"); + } + fprintf(stderr, "\n\n"); +} + +#define max(a, b) (((a) > (b)) ? (a) : (b)) +typedef int score_t; + +double calculate_score(const char *needle, const char *haystack){ + int n = strlen(needle); + int m = strlen(haystack); + + int bow[m]; + int D[n][m], M[n][m]; + bzero(D, sizeof(D)); + bzero(M, sizeof(M)); + + /* + * D[][] Stores the best score for this position ending with a match. + * M[][] Stores the best possible score at this position. + */ + + /* Which positions are beginning of words */ + int at_bow = 1; + for(int i = 0; i < m; i++){ + char ch = haystack[i]; + /* TODO: What about allcaps (ex. README) */ + bow[i] = (at_bow && isalnum(ch)) || isupper(ch); + at_bow = !isalnum(ch); + } + + for(int i = 0; i < n; i++){ + for(int j = 0; j < m; j++){ + int match = tolower(needle[i]) == tolower(haystack[j]); + if(match){ + score_t score = 0; + if(i && j) + score = M[i-1][j-1]; + if(bow[j]) + score += 2; + else if(i && j && D[i-1][j-1]) + score = max(score, 1 + D[i-1][j-1]); + M[i][j] = D[i][j] = score; + } + if(j) + M[i][j] = max(M[i][j], M[i][j-1]); + } + } + + return (float)(M[n-1][m-1]) / (float)(n * 2 + 1); +} + double match(const char *needle, const char *haystack){ if(!*needle){ return 1.0; }else if(!is_subset(needle, haystack)){ - return 0.0; + return -1.0; }else if(!strcasecmp(needle, haystack)){ return 1.0; }else{ - return 0.9; + return calculate_score(needle, haystack); } } @@ -8,11 +8,11 @@ describe "score" do end def assert_unmatched(candidate, query) - assert_equal 0, score(candidate, query) + assert_equal -1, score(candidate, query) end def assert_matched(candidate, query) - assert_operator 0, :<, score(candidate, query) + assert_operator 0, :<=, score(candidate, query) end it "scores 1 when the query is empty" do |