diff options
author | John Hawthorn <john@hawthorn.email> | 2019-12-27 22:26:48 -0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-12-27 22:26:48 -0800 |
commit | 25ebdd4f95f024bce2ec5b84f72c30bbd63ed57a (patch) | |
tree | ba3ff5313500d3b0078c78e1daadce36460c8a3d | |
parent | f770d7da17850036df887b2f41a86c38ba255135 (diff) | |
parent | 66316266df901eab81ae116c6e0728d10d0214e4 (diff) |
Merge pull request #132 from jhawthorn/no_vla
Avoid use of VLA and reduce memory usage in match to O(m)
-rw-r--r-- | Makefile | 2 | ||||
-rw-r--r-- | src/match.c | 221 | ||||
-rw-r--r-- | src/match.h | 2 | ||||
-rw-r--r-- | src/tty_interface.c | 4 | ||||
-rw-r--r-- | test/test_choices.c | 4 |
5 files changed, 146 insertions, 87 deletions
@@ -1,7 +1,7 @@ VERSION=1.0 CPPFLAGS=-DVERSION=\"${VERSION}\" -D_GNU_SOURCE -CFLAGS+=-Wall -Wextra -g -std=c99 -O3 -pedantic -Ideps +CFLAGS+=-Wall -Wextra -g -std=c99 -O3 -pedantic -Ideps -Werror=vla PREFIX?=/usr/local MANDIR?=$(PREFIX)/share/man BINDIR?=$(PREFIX)/bin diff --git a/src/match.c b/src/match.c index 2a0b058..a0c0785 100644 --- a/src/match.c +++ b/src/match.c @@ -4,6 +4,7 @@ #include <stdio.h> #include <float.h> #include <math.h> +#include <stdlib.h> #include "match.h" #include "bonus.h" @@ -27,122 +28,177 @@ int has_match(const char *needle, const char *haystack) { return 1; } +#define SWAP(x, y, T) do { T SWAP = x; x = y; y = SWAP; } while (0) + #define max(a, b) (((a) > (b)) ? (a) : (b)) -#ifdef DEBUG_VERBOSE -/* print one of the internal matrices */ -void mat_print(score_t *mat, char name, const char *needle, const char *haystack) { - int n = strlen(needle); - int m = strlen(haystack); - int i, j; - fprintf(stderr, "%c ", name); - for (j = 0; j < m; j++) { - fprintf(stderr, " %c", haystack[j]); - } - fprintf(stderr, "\n"); - for (i = 0; i < n; i++) { - fprintf(stderr, " %c |", needle[i]); - for (j = 0; j < m; j++) { - score_t val = mat[i * m + j]; - if (val == SCORE_MIN) { - fprintf(stderr, " -\u221E"); - } else { - fprintf(stderr, " %.3f", val); - } - } - fprintf(stderr, "\n"); - } - fprintf(stderr, "\n\n"); -} -#endif +struct match_struct { + int needle_len; + int haystack_len; + + char lower_needle[MATCH_MAX_LEN]; + char lower_haystack[MATCH_MAX_LEN]; + + score_t match_bonus[MATCH_MAX_LEN]; +}; static void precompute_bonus(const char *haystack, score_t *match_bonus) { /* Which positions are beginning of words */ - int m = strlen(haystack); char last_ch = '/'; - for (int i = 0; i < m; i++) { + for (int i = 0; haystack[i]; i++) { char ch = haystack[i]; match_bonus[i] = COMPUTE_BONUS(last_ch, ch); last_ch = ch; } } -score_t match_positions(const char *needle, const char *haystack, size_t *positions) { +static void setup_match_struct(struct match_struct *match, const char *needle, const char *haystack) { + match->needle_len = strlen(needle); + match->haystack_len = strlen(haystack); + + if (match->needle_len > MATCH_MAX_LEN || match->needle_len > match->haystack_len) { + return; + } + + for (int i = 0; i < match->needle_len; i++) + match->lower_needle[i] = tolower(needle[i]); + + for (int i = 0; i < match->haystack_len; i++) + match->lower_haystack[i] = tolower(haystack[i]); + + precompute_bonus(haystack, match->match_bonus); +} + +static inline void match_row(const struct match_struct *match, int row, score_t *curr_D, score_t *curr_M, const score_t *last_D, const score_t *last_M) { + int n = match->needle_len; + int m = match->haystack_len; + int i = row; + + const char *lower_needle = match->lower_needle; + const char *lower_haystack = match->lower_haystack; + const score_t *match_bonus = match->match_bonus; + + score_t prev_score = SCORE_MIN; + score_t gap_score = i == n - 1 ? SCORE_GAP_TRAILING : SCORE_GAP_INNER; + + for (int j = 0; j < m; j++) { + if (lower_needle[i] == lower_haystack[j]) { + score_t score = SCORE_MIN; + if (!i) { + score = (j * SCORE_GAP_LEADING) + match_bonus[j]; + } else if (j) { /* i > 0 && j > 0*/ + score = max( + last_M[j - 1] + match_bonus[j], + + /* consecutive match, doesn't stack with match_bonus */ + last_D[j - 1] + SCORE_MATCH_CONSECUTIVE); + } + curr_D[j] = score; + curr_M[j] = prev_score = max(score, prev_score + gap_score); + } else { + curr_D[j] = SCORE_MIN; + curr_M[j] = prev_score = prev_score + gap_score; + } + } +} + +score_t match(const char *needle, const char *haystack) { if (!*needle) return SCORE_MIN; - int n = strlen(needle); - int m = strlen(haystack); + struct match_struct match; + setup_match_struct(&match, needle, haystack); - if (n == m) { + int n = match.needle_len; + int m = match.haystack_len; + + if (m > MATCH_MAX_LEN || n > m) { + /* + * Unreasonably large candidate: return no score + * If it is a valid match it will still be returned, it will + * just be ranked below any reasonably sized candidates + */ + return SCORE_MIN; + } else if (n == m) { /* Since this method can only be called with a haystack which * matches needle. If the lengths of the strings are equal the * strings themselves must also be equal (ignoring case). */ - if (positions) - for (int i = 0; i < n; i++) - positions[i] = i; return SCORE_MAX; } - if (m > 1024) { + /* + * D[][] Stores the best score for this position ending with a match. + * M[][] Stores the best possible score at this position. + */ + score_t D[2][MATCH_MAX_LEN], M[2][MATCH_MAX_LEN]; + + score_t *last_D, *last_M; + score_t *curr_D, *curr_M; + + last_D = D[0]; + last_M = M[0]; + curr_D = D[1]; + curr_M = M[1]; + + for (int i = 0; i < n; i++) { + match_row(&match, i, curr_D, curr_M, last_D, last_M); + + SWAP(curr_D, last_D, score_t *); + SWAP(curr_M, last_M, score_t *); + } + + return last_M[m - 1]; +} + +score_t match_positions(const char *needle, const char *haystack, size_t *positions) { + if (!*needle) + return SCORE_MIN; + + struct match_struct match; + setup_match_struct(&match, needle, haystack); + + int n = match.needle_len; + int m = match.haystack_len; + + if (m > MATCH_MAX_LEN || n > m) { /* * Unreasonably large candidate: return no score * If it is a valid match it will still be returned, it will * just be ranked below any reasonably sized candidates */ return SCORE_MIN; + } else if (n == m) { + /* Since this method can only be called with a haystack which + * matches needle. If the lengths of the strings are equal the + * strings themselves must also be equal (ignoring case). + */ + if (positions) + for (int i = 0; i < n; i++) + positions[i] = i; + return SCORE_MAX; } - char lower_needle[n]; - char lower_haystack[m]; - - for (int i = 0; i < n; i++) - lower_needle[i] = tolower(needle[i]); - - for (int i = 0; i < m; i++) - lower_haystack[i] = tolower(haystack[i]); - - score_t match_bonus[m]; - score_t D[n][m], M[n][m]; - /* * D[][] Stores the best score for this position ending with a match. * M[][] Stores the best possible score at this position. */ - precompute_bonus(haystack, match_bonus); + score_t (*D)[MATCH_MAX_LEN], (*M)[MATCH_MAX_LEN]; + M = malloc(sizeof(score_t) * MATCH_MAX_LEN * n); + D = malloc(sizeof(score_t) * MATCH_MAX_LEN * n); + + score_t *last_D, *last_M; + score_t *curr_D, *curr_M; for (int i = 0; i < n; i++) { - score_t prev_score = SCORE_MIN; - score_t gap_score = i == n - 1 ? SCORE_GAP_TRAILING : SCORE_GAP_INNER; - - for (int j = 0; j < m; j++) { - if (lower_needle[i] == lower_haystack[j]) { - score_t score = SCORE_MIN; - if (!i) { - score = (j * SCORE_GAP_LEADING) + match_bonus[j]; - } else if (j) { /* i > 0 && j > 0*/ - score = max( - M[i - 1][j - 1] + match_bonus[j], - - /* consecutive match, doesn't stack with match_bonus */ - D[i - 1][j - 1] + SCORE_MATCH_CONSECUTIVE); - } - D[i][j] = score; - M[i][j] = prev_score = max(score, prev_score + gap_score); - } else { - D[i][j] = SCORE_MIN; - M[i][j] = prev_score = prev_score + gap_score; - } - } - } + curr_D = &D[i][0]; + curr_M = &M[i][0]; -#ifdef DEBUG_VERBOSE - fprintf(stderr, "\"%s\" =~ \"%s\"\n", needle, haystack); - mat_print(&D[0][0], 'D', needle, haystack); - mat_print(&M[0][0], 'M', needle, haystack); - fprintf(stderr, "\n"); -#endif + match_row(&match, i, curr_D, curr_M, last_D, last_M); + + last_D = curr_D; + last_M = curr_M; + } /* backtrace to find the positions of optimal matching */ if (positions) { @@ -173,9 +229,10 @@ score_t match_positions(const char *needle, const char *haystack, size_t *positi } } - return M[n - 1][m - 1]; -} + score_t result = M[n - 1][m - 1]; -score_t match(const char *needle, const char *haystack) { - return match_positions(needle, haystack, NULL); + free(M); + free(D); + + return result; } diff --git a/src/match.h b/src/match.h index 0fa7b6f..d4f292c 100644 --- a/src/match.h +++ b/src/match.h @@ -7,6 +7,8 @@ typedef double score_t; #define SCORE_MAX INFINITY #define SCORE_MIN -INFINITY +#define MATCH_MAX_LEN 1024 + int has_match(const char *needle, const char *haystack); score_t match_positions(const char *needle, const char *haystack, size_t *positions); score_t match(const char *needle, const char *haystack); diff --git a/src/tty_interface.c b/src/tty_interface.c index 225f33a..e87301d 100644 --- a/src/tty_interface.c +++ b/src/tty_interface.c @@ -36,8 +36,8 @@ static void draw_match(tty_interface_t *state, const char *choice, int selected) char *search = state->last_search; int n = strlen(search); - size_t positions[n + 1]; - for (int i = 0; i < n + 1; i++) + size_t positions[MATCH_MAX_LEN]; + for (int i = 0; i < n + 1 && i < MATCH_MAX_LEN; i++) positions[i] = -1; score_t score = match_positions(search, choice, &positions[0]); diff --git a/test/test_choices.c b/test/test_choices.c index 3ad7d6a..d86bc12 100644 --- a/test/test_choices.c +++ b/test/test_choices.c @@ -135,8 +135,8 @@ TEST test_choices_unicode() { } TEST test_choices_large_input() { - int N = 100000; - char *strings[N]; + const int N = 100000; + char *strings[100000]; for(int i = 0; i < N; i++) { asprintf(&strings[i], "%i", i); |