#include #include #include #include #include #include #include "fzy.h" int has_match(const char *needle, const char *haystack){ while(*needle){ if(!*haystack) return 0; while(*haystack && tolower(*needle) == tolower(*haystack++)) needle++; } return 1; } #define max(a, b) (((a) > (b)) ? (a) : (b)) typedef double score_t; #define SCORE_MAX INFINITY #define SCORE_MIN -INFINITY /* print one of the internal matrices */ void mat_print(score_t *mat, int n, int m){ int i, j; for(i = 0; i < n; i++){ for(j = 0; j < m; j++){ score_t val = mat[i*m + j]; if(val == SCORE_MIN){ fprintf(stderr, " -inf"); }else{ fprintf(stderr, " % .2f", val); } } fprintf(stderr, "\n"); } fprintf(stderr, "\n\n"); } double calculate_score(const char *needle, const char *haystack, size_t *positions){ if(!*haystack || !*needle) return SCORE_MIN; int n = strlen(needle); int m = strlen(haystack); if(m > 1024){ /* * Unreasonably large candidate: return no score * If it is a valid match it will still be returned, it will * just be ranked below any reasonably sized candidates */ return 0; } score_t match_bonus[m]; score_t D[n][m], M[n][m]; bzero(D, sizeof(D)); bzero(M, sizeof(M)); /* * D[][] Stores the best score for this position ending with a match. * M[][] Stores the best possible score at this position. */ #define SCORE_GAP_LEADING -0.005 #define SCORE_GAP_TRAILING -0.005 #define SCORE_GAP_INNER -0.01 #define SCORE_MATCH_CONSECUTIVE 1.0 #define SCORE_MATCH_SLASH 1.5 #define SCORE_MATCH_WORD 1.2 #define SCORE_MATCH_CAPITAL 1.1 #define SCORE_MATCH_DOT 0.8 /* Which positions are beginning of words */ char last_ch = '\0'; for(int i = 0; i < m; i++){ char ch = haystack[i]; score_t score = 0; if(isalnum(ch)){ if(!last_ch || last_ch == '/'){ score = SCORE_MATCH_SLASH; }else if(last_ch == '-' || last_ch == '_' || last_ch == ' ' || (last_ch >= '0' && last_ch <= '9')){ score = SCORE_MATCH_WORD; }else if(last_ch >= 'a' && last_ch <= 'z' && ch >= 'A' && ch <= 'Z'){ /* CamelCase */ score = SCORE_MATCH_CAPITAL; }else if(last_ch == '.'){ score = SCORE_MATCH_DOT; } } match_bonus[i] = score; last_ch = ch; } for(int i = 0; i < n; i++){ for(int j = 0; j < m; j++){ score_t score = (j || i) ? SCORE_MIN : 0; int match = tolower(needle[i]) == tolower(haystack[j]); D[i][j] = SCORE_MIN; if(match){ if(i && j){ score = max(score, M[i-1][j-1] + match_bonus[j]); /* consecutive match, doesn't stack with match_bonus */ score = max(score, D[i-1][j-1] + SCORE_MATCH_CONSECUTIVE); }else if(!i){ score = (j * SCORE_GAP_LEADING) + match_bonus[j]; } D[i][j] = score; } if(j){ if(i == n-1){ score = max(score, M[i][j-1] + SCORE_GAP_TRAILING); }else{ score = max(score, M[i][j-1] + SCORE_GAP_INNER); } } M[i][j] = score; } } #if 0 printf("\"%s\" =~ \"%s\"\n", needle, haystack); mat_print(&D[0][0], n, m); mat_print(&M[0][0], n, m); printf("\n"); #endif /* backtrace to find the positions of optimal matching */ if(positions){ for(int i = n-1, j = m-1; i >= 0; i--){ for(; j >= 0; j--){ /* * There may be multiple paths which result in * the optimal weight. * * For simplicity, we will pick the first one * we encounter, the latest in the candidate * string. */ if(tolower(needle[i]) == tolower(haystack[j]) && D[i][j] == M[i][j]){ positions[i] = j--; break; } } } } return M[n-1][m-1]; } double match_positions(const char *needle, const char *haystack, size_t *positions){ if(!*needle){ return SCORE_MAX; }else if(!has_match(needle, haystack)){ return SCORE_MIN; }else if(!strcasecmp(needle, haystack)){ if(positions){ int n = strlen(needle); for(int i = 0; i < n; i++) positions[i] = i; } return SCORE_MAX; }else{ return calculate_score(needle, haystack, positions); } } double match(const char *needle, const char *haystack){ return match_positions(needle, haystack, NULL); }