summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn Hawthorn <john@hawthorn.email>2019-12-27 22:26:48 -0800
committerGitHub <noreply@github.com>2019-12-27 22:26:48 -0800
commit25ebdd4f95f024bce2ec5b84f72c30bbd63ed57a (patch)
treeba3ff5313500d3b0078c78e1daadce36460c8a3d
parentf770d7da17850036df887b2f41a86c38ba255135 (diff)
parent66316266df901eab81ae116c6e0728d10d0214e4 (diff)
Merge pull request #132 from jhawthorn/no_vla
Avoid use of VLA and reduce memory usage in match to O(m)
-rw-r--r--Makefile2
-rw-r--r--src/match.c221
-rw-r--r--src/match.h2
-rw-r--r--src/tty_interface.c4
-rw-r--r--test/test_choices.c4
5 files changed, 146 insertions, 87 deletions
diff --git a/Makefile b/Makefile
index c1f6234..746994f 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
VERSION=1.0
CPPFLAGS=-DVERSION=\"${VERSION}\" -D_GNU_SOURCE
-CFLAGS+=-Wall -Wextra -g -std=c99 -O3 -pedantic -Ideps
+CFLAGS+=-Wall -Wextra -g -std=c99 -O3 -pedantic -Ideps -Werror=vla
PREFIX?=/usr/local
MANDIR?=$(PREFIX)/share/man
BINDIR?=$(PREFIX)/bin
diff --git a/src/match.c b/src/match.c
index 2a0b058..a0c0785 100644
--- a/src/match.c
+++ b/src/match.c
@@ -4,6 +4,7 @@
#include <stdio.h>
#include <float.h>
#include <math.h>
+#include <stdlib.h>
#include "match.h"
#include "bonus.h"
@@ -27,122 +28,177 @@ int has_match(const char *needle, const char *haystack) {
return 1;
}
+#define SWAP(x, y, T) do { T SWAP = x; x = y; y = SWAP; } while (0)
+
#define max(a, b) (((a) > (b)) ? (a) : (b))
-#ifdef DEBUG_VERBOSE
-/* print one of the internal matrices */
-void mat_print(score_t *mat, char name, const char *needle, const char *haystack) {
- int n = strlen(needle);
- int m = strlen(haystack);
- int i, j;
- fprintf(stderr, "%c ", name);
- for (j = 0; j < m; j++) {
- fprintf(stderr, " %c", haystack[j]);
- }
- fprintf(stderr, "\n");
- for (i = 0; i < n; i++) {
- fprintf(stderr, " %c |", needle[i]);
- for (j = 0; j < m; j++) {
- score_t val = mat[i * m + j];
- if (val == SCORE_MIN) {
- fprintf(stderr, " -\u221E");
- } else {
- fprintf(stderr, " %.3f", val);
- }
- }
- fprintf(stderr, "\n");
- }
- fprintf(stderr, "\n\n");
-}
-#endif
+struct match_struct {
+ int needle_len;
+ int haystack_len;
+
+ char lower_needle[MATCH_MAX_LEN];
+ char lower_haystack[MATCH_MAX_LEN];
+
+ score_t match_bonus[MATCH_MAX_LEN];
+};
static void precompute_bonus(const char *haystack, score_t *match_bonus) {
/* Which positions are beginning of words */
- int m = strlen(haystack);
char last_ch = '/';
- for (int i = 0; i < m; i++) {
+ for (int i = 0; haystack[i]; i++) {
char ch = haystack[i];
match_bonus[i] = COMPUTE_BONUS(last_ch, ch);
last_ch = ch;
}
}
-score_t match_positions(const char *needle, const char *haystack, size_t *positions) {
+static void setup_match_struct(struct match_struct *match, const char *needle, const char *haystack) {
+ match->needle_len = strlen(needle);
+ match->haystack_len = strlen(haystack);
+
+ if (match->needle_len > MATCH_MAX_LEN || match->needle_len > match->haystack_len) {
+ return;
+ }
+
+ for (int i = 0; i < match->needle_len; i++)
+ match->lower_needle[i] = tolower(needle[i]);
+
+ for (int i = 0; i < match->haystack_len; i++)
+ match->lower_haystack[i] = tolower(haystack[i]);
+
+ precompute_bonus(haystack, match->match_bonus);
+}
+
+static inline void match_row(const struct match_struct *match, int row, score_t *curr_D, score_t *curr_M, const score_t *last_D, const score_t *last_M) {
+ int n = match->needle_len;
+ int m = match->haystack_len;
+ int i = row;
+
+ const char *lower_needle = match->lower_needle;
+ const char *lower_haystack = match->lower_haystack;
+ const score_t *match_bonus = match->match_bonus;
+
+ score_t prev_score = SCORE_MIN;
+ score_t gap_score = i == n - 1 ? SCORE_GAP_TRAILING : SCORE_GAP_INNER;
+
+ for (int j = 0; j < m; j++) {
+ if (lower_needle[i] == lower_haystack[j]) {
+ score_t score = SCORE_MIN;
+ if (!i) {
+ score = (j * SCORE_GAP_LEADING) + match_bonus[j];
+ } else if (j) { /* i > 0 && j > 0*/
+ score = max(
+ last_M[j - 1] + match_bonus[j],
+
+ /* consecutive match, doesn't stack with match_bonus */
+ last_D[j - 1] + SCORE_MATCH_CONSECUTIVE);
+ }
+ curr_D[j] = score;
+ curr_M[j] = prev_score = max(score, prev_score + gap_score);
+ } else {
+ curr_D[j] = SCORE_MIN;
+ curr_M[j] = prev_score = prev_score + gap_score;
+ }
+ }
+}
+
+score_t match(const char *needle, const char *haystack) {
if (!*needle)
return SCORE_MIN;
- int n = strlen(needle);
- int m = strlen(haystack);
+ struct match_struct match;
+ setup_match_struct(&match, needle, haystack);
- if (n == m) {
+ int n = match.needle_len;
+ int m = match.haystack_len;
+
+ if (m > MATCH_MAX_LEN || n > m) {
+ /*
+ * Unreasonably large candidate: return no score
+ * If it is a valid match it will still be returned, it will
+ * just be ranked below any reasonably sized candidates
+ */
+ return SCORE_MIN;
+ } else if (n == m) {
/* Since this method can only be called with a haystack which
* matches needle. If the lengths of the strings are equal the
* strings themselves must also be equal (ignoring case).
*/
- if (positions)
- for (int i = 0; i < n; i++)
- positions[i] = i;
return SCORE_MAX;
}
- if (m > 1024) {
+ /*
+ * D[][] Stores the best score for this position ending with a match.
+ * M[][] Stores the best possible score at this position.
+ */
+ score_t D[2][MATCH_MAX_LEN], M[2][MATCH_MAX_LEN];
+
+ score_t *last_D, *last_M;
+ score_t *curr_D, *curr_M;
+
+ last_D = D[0];
+ last_M = M[0];
+ curr_D = D[1];
+ curr_M = M[1];
+
+ for (int i = 0; i < n; i++) {
+ match_row(&match, i, curr_D, curr_M, last_D, last_M);
+
+ SWAP(curr_D, last_D, score_t *);
+ SWAP(curr_M, last_M, score_t *);
+ }
+
+ return last_M[m - 1];
+}
+
+score_t match_positions(const char *needle, const char *haystack, size_t *positions) {
+ if (!*needle)
+ return SCORE_MIN;
+
+ struct match_struct match;
+ setup_match_struct(&match, needle, haystack);
+
+ int n = match.needle_len;
+ int m = match.haystack_len;
+
+ if (m > MATCH_MAX_LEN || n > m) {
/*
* Unreasonably large candidate: return no score
* If it is a valid match it will still be returned, it will
* just be ranked below any reasonably sized candidates
*/
return SCORE_MIN;
+ } else if (n == m) {
+ /* Since this method can only be called with a haystack which
+ * matches needle. If the lengths of the strings are equal the
+ * strings themselves must also be equal (ignoring case).
+ */
+ if (positions)
+ for (int i = 0; i < n; i++)
+ positions[i] = i;
+ return SCORE_MAX;
}
- char lower_needle[n];
- char lower_haystack[m];
-
- for (int i = 0; i < n; i++)
- lower_needle[i] = tolower(needle[i]);
-
- for (int i = 0; i < m; i++)
- lower_haystack[i] = tolower(haystack[i]);
-
- score_t match_bonus[m];
- score_t D[n][m], M[n][m];
-
/*
* D[][] Stores the best score for this position ending with a match.
* M[][] Stores the best possible score at this position.
*/
- precompute_bonus(haystack, match_bonus);
+ score_t (*D)[MATCH_MAX_LEN], (*M)[MATCH_MAX_LEN];
+ M = malloc(sizeof(score_t) * MATCH_MAX_LEN * n);
+ D = malloc(sizeof(score_t) * MATCH_MAX_LEN * n);
+
+ score_t *last_D, *last_M;
+ score_t *curr_D, *curr_M;
for (int i = 0; i < n; i++) {
- score_t prev_score = SCORE_MIN;
- score_t gap_score = i == n - 1 ? SCORE_GAP_TRAILING : SCORE_GAP_INNER;
-
- for (int j = 0; j < m; j++) {
- if (lower_needle[i] == lower_haystack[j]) {
- score_t score = SCORE_MIN;
- if (!i) {
- score = (j * SCORE_GAP_LEADING) + match_bonus[j];
- } else if (j) { /* i > 0 && j > 0*/
- score = max(
- M[i - 1][j - 1] + match_bonus[j],
-
- /* consecutive match, doesn't stack with match_bonus */
- D[i - 1][j - 1] + SCORE_MATCH_CONSECUTIVE);
- }
- D[i][j] = score;
- M[i][j] = prev_score = max(score, prev_score + gap_score);
- } else {
- D[i][j] = SCORE_MIN;
- M[i][j] = prev_score = prev_score + gap_score;
- }
- }
- }
+ curr_D = &D[i][0];
+ curr_M = &M[i][0];
-#ifdef DEBUG_VERBOSE
- fprintf(stderr, "\"%s\" =~ \"%s\"\n", needle, haystack);
- mat_print(&D[0][0], 'D', needle, haystack);
- mat_print(&M[0][0], 'M', needle, haystack);
- fprintf(stderr, "\n");
-#endif
+ match_row(&match, i, curr_D, curr_M, last_D, last_M);
+
+ last_D = curr_D;
+ last_M = curr_M;
+ }
/* backtrace to find the positions of optimal matching */
if (positions) {
@@ -173,9 +229,10 @@ score_t match_positions(const char *needle, const char *haystack, size_t *positi
}
}
- return M[n - 1][m - 1];
-}
+ score_t result = M[n - 1][m - 1];
-score_t match(const char *needle, const char *haystack) {
- return match_positions(needle, haystack, NULL);
+ free(M);
+ free(D);
+
+ return result;
}
diff --git a/src/match.h b/src/match.h
index 0fa7b6f..d4f292c 100644
--- a/src/match.h
+++ b/src/match.h
@@ -7,6 +7,8 @@ typedef double score_t;
#define SCORE_MAX INFINITY
#define SCORE_MIN -INFINITY
+#define MATCH_MAX_LEN 1024
+
int has_match(const char *needle, const char *haystack);
score_t match_positions(const char *needle, const char *haystack, size_t *positions);
score_t match(const char *needle, const char *haystack);
diff --git a/src/tty_interface.c b/src/tty_interface.c
index 225f33a..e87301d 100644
--- a/src/tty_interface.c
+++ b/src/tty_interface.c
@@ -36,8 +36,8 @@ static void draw_match(tty_interface_t *state, const char *choice, int selected)
char *search = state->last_search;
int n = strlen(search);
- size_t positions[n + 1];
- for (int i = 0; i < n + 1; i++)
+ size_t positions[MATCH_MAX_LEN];
+ for (int i = 0; i < n + 1 && i < MATCH_MAX_LEN; i++)
positions[i] = -1;
score_t score = match_positions(search, choice, &positions[0]);
diff --git a/test/test_choices.c b/test/test_choices.c
index 3ad7d6a..d86bc12 100644
--- a/test/test_choices.c
+++ b/test/test_choices.c
@@ -135,8 +135,8 @@ TEST test_choices_unicode() {
}
TEST test_choices_large_input() {
- int N = 100000;
- char *strings[N];
+ const int N = 100000;
+ char *strings[100000];
for(int i = 0; i < N; i++) {
asprintf(&strings[i], "%i", i);