From dfe82eb18d19f7cde3c0f0099718af06c94c314c Mon Sep 17 00:00:00 2001 From: John Hawthorn Date: Mon, 3 Apr 2017 02:11:20 -0700 Subject: Add theft --- deps/theft/.gitignore | 3 + deps/theft/LICENSE | 13 + deps/theft/theft.c | 496 ++++++++++++++++++++++++++++++++++++++ deps/theft/theft.h | 74 ++++++ deps/theft/theft_bloom.c | 145 +++++++++++ deps/theft/theft_bloom.h | 33 +++ deps/theft/theft_hash.c | 37 +++ deps/theft/theft_mt.c | 144 +++++++++++ deps/theft/theft_mt.h | 36 +++ deps/theft/theft_types.h | 197 +++++++++++++++ deps/theft/theft_types_internal.h | 73 ++++++ 11 files changed, 1251 insertions(+) create mode 100644 deps/theft/.gitignore create mode 100644 deps/theft/LICENSE create mode 100644 deps/theft/theft.c create mode 100644 deps/theft/theft.h create mode 100644 deps/theft/theft_bloom.c create mode 100644 deps/theft/theft_bloom.h create mode 100644 deps/theft/theft_hash.c create mode 100644 deps/theft/theft_mt.c create mode 100644 deps/theft/theft_mt.h create mode 100644 deps/theft/theft_types.h create mode 100644 deps/theft/theft_types_internal.h (limited to 'deps') diff --git a/deps/theft/.gitignore b/deps/theft/.gitignore new file mode 100644 index 0000000..68013d9 --- /dev/null +++ b/deps/theft/.gitignore @@ -0,0 +1,3 @@ +*.o +test_theft +libtheft.a diff --git a/deps/theft/LICENSE b/deps/theft/LICENSE new file mode 100644 index 0000000..0d1e969 --- /dev/null +++ b/deps/theft/LICENSE @@ -0,0 +1,13 @@ +Copyright (c) 2014 Scott Vokes + +Permission to use, copy, modify, and/or distribute this software for any +purpose with or without fee is hereby granted, provided that the above +copyright notice and this permission notice appear in all copies. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. diff --git a/deps/theft/theft.c b/deps/theft/theft.c new file mode 100644 index 0000000..7c6244f --- /dev/null +++ b/deps/theft/theft.c @@ -0,0 +1,496 @@ +#include + +#include "theft.h" +#include "theft_types_internal.h" +#include "theft_bloom.h" +#include "theft_mt.h" + +/* Initialize a theft test runner. + * BLOOM_BITS sets the size of the table used for detecting + * combinations of arguments that have already been tested. + * If 0, a default size will be chosen based on trial count. + * (This will only be used if all property types have hash + * callbacks defined.) The bloom filter can also be disabled + * by setting BLOOM_BITS to THEFT_BLOOM_DISABLE. + * + * Returns a NULL if malloc fails or BLOOM_BITS is out of bounds. */ +struct theft *theft_init(uint8_t bloom_bits) { + if ((bloom_bits != 0 && (bloom_bits < THEFT_BLOOM_BITS_MIN)) + || ((bloom_bits > THEFT_BLOOM_BITS_MAX) && + bloom_bits != THEFT_BLOOM_DISABLE)) { + return NULL; + } + + theft *t = malloc(sizeof(*t)); + if (t == NULL) { return NULL; } + memset(t, 0, sizeof(*t)); + + t->mt = theft_mt_init(DEFAULT_THEFT_SEED); + if (t->mt == NULL) { + free(t); + return NULL; + } else { + t->out = stdout; + t->requested_bloom_bits = bloom_bits; + return t; + } +} + +/* (Re-)initialize the random number generator with a specific seed. */ +void theft_set_seed(struct theft *t, uint64_t seed) { + t->seed = seed; + theft_mt_reset(t->mt, seed); +} + +/* Get a random 64-bit integer from the test runner's PRNG. */ +theft_seed theft_random(struct theft *t) { + theft_seed ns = (theft_seed)theft_mt_random(t->mt); + return ns; +} + +/* Get a random double from the test runner's PRNG. */ +double theft_random_double(struct theft *t) { + return theft_mt_random_double(t->mt); +} + +/* Change T's output stream handle to OUT. (Default: stdout.) */ +void theft_set_output_stream(struct theft *t, FILE *out) { t->out = out; } + +/* Check if all argument info structs have all required callbacks. */ +static bool +check_all_args(struct theft_propfun_info *info, bool *all_hashable) { + bool ah = true; + for (int i = 0; i < info->arity; i++) { + struct theft_type_info *ti = info->type_info[i]; + if (ti->alloc == NULL) { return false; } + if (ti->hash == NULL) { ah = false; } + } + *all_hashable = ah; + return true; +} + +static theft_progress_callback_res +default_progress_cb(struct theft_trial_info *info, void *env) { + (void)info; + (void)env; + return THEFT_PROGRESS_CONTINUE; +} + +static void infer_arity(struct theft_propfun_info *info) { + for (int i = 0; i < THEFT_MAX_ARITY; i++) { + if (info->type_info[i] == NULL) { + info->arity = i; + break; + } + } +} + +/* Run a series of randomized trials of a property function. + * + * Configuration is specified in CFG; many fields are optional. + * See the type definition in `theft_types.h`. */ +theft_run_res +theft_run(struct theft *t, struct theft_cfg *cfg) { + if (t == NULL || cfg == NULL) { + return THEFT_RUN_ERROR_BAD_ARGS; + } + + struct theft_propfun_info info; + memset(&info, 0, sizeof(info)); + info.name = cfg->name; + info.fun = cfg->fun; + memcpy(info.type_info, cfg->type_info, sizeof(info.type_info)); + info.always_seed_count = cfg->always_seed_count; + info.always_seeds = cfg->always_seeds; + + if (cfg->seed) { + theft_set_seed(t, cfg->seed); + } else { + theft_set_seed(t, DEFAULT_THEFT_SEED); + } + + if (cfg->trials == 0) { cfg->trials = THEFT_DEF_TRIALS; } + + return theft_run_internal(t, &info, cfg->trials, cfg->progress_cb, + cfg->env, cfg->report); +} + +/* Actually run the trials, with all arguments made explicit. */ +static theft_run_res +theft_run_internal(struct theft *t, struct theft_propfun_info *info, + int trials, theft_progress_cb *cb, void *env, + struct theft_trial_report *r) { + + struct theft_trial_report fake_report; + if (r == NULL) { r = &fake_report; } + memset(r, 0, sizeof(*r)); + + infer_arity(info); + if (info->arity == 0) { + return THEFT_RUN_ERROR_BAD_ARGS; + } + + if (t == NULL || info == NULL || info->fun == NULL + || info->arity == 0) { + return THEFT_RUN_ERROR_BAD_ARGS; + } + + bool all_hashable = false; + if (!check_all_args(info, &all_hashable)) { + return THEFT_RUN_ERROR_MISSING_CALLBACK; + } + + if (cb == NULL) { cb = default_progress_cb; } + + /* If all arguments are hashable, then attempt to use + * a bloom filter to avoid redundant checking. */ + if (all_hashable) { + if (t->requested_bloom_bits == 0) { + t->requested_bloom_bits = theft_bloom_recommendation(trials); + } + if (t->requested_bloom_bits != THEFT_BLOOM_DISABLE) { + t->bloom = theft_bloom_init(t->requested_bloom_bits); + } + } + + theft_seed seed = t->seed; + theft_seed initial_seed = t->seed; + int always_seeds = info->always_seed_count; + if (info->always_seeds == NULL) { always_seeds = 0; } + + void *args[THEFT_MAX_ARITY]; + + theft_progress_callback_res cres = THEFT_PROGRESS_CONTINUE; + + for (int trial = 0; trial < trials; trial++) { + memset(args, 0xFF, sizeof(args)); + if (cres == THEFT_PROGRESS_HALT) { break; } + + /* If any seeds to always run were specified, use those before + * reverting to the specified starting seed. */ + if (trial < always_seeds) { + seed = info->always_seeds[trial]; + } else if ((always_seeds > 0) && (trial == always_seeds)) { + seed = initial_seed; + } + + struct theft_trial_info ti = { + .name = info->name, + .trial = trial, + .seed = seed, + .arity = info->arity, + .args = args + }; + + theft_set_seed(t, seed); + all_gen_res_t gres = gen_all_args(t, info, seed, args, env); + switch (gres) { + case ALL_GEN_SKIP: + /* skip generating these args */ + ti.status = THEFT_TRIAL_SKIP; + r->skip++; + cres = cb(&ti, env); + break; + case ALL_GEN_DUP: + /* skip these args -- probably already tried */ + ti.status = THEFT_TRIAL_DUP; + r->dup++; + cres = cb(&ti, env); + break; + default: + case ALL_GEN_ERROR: + /* Error while generating args */ + ti.status = THEFT_TRIAL_ERROR; + cres = cb(&ti, env); + return THEFT_RUN_ERROR; + case ALL_GEN_OK: + /* (Extracted function to avoid deep nesting here.) */ + if (!run_trial(t, info, args, cb, env, r, &ti, &cres)) { + return THEFT_RUN_ERROR; + } + } + + free_args(info, args, env); + + /* Restore last known seed and generate next. */ + theft_set_seed(t, seed); + seed = theft_random(t); + } + + if (r->fail > 0) { + return THEFT_RUN_FAIL; + } else { + return THEFT_RUN_PASS; + } +} + +/* Now that arguments have been generated, run the trial and update + * counters, call cb with results, etc. */ +static bool run_trial(struct theft *t, struct theft_propfun_info *info, + void **args, theft_progress_cb *cb, void *env, + struct theft_trial_report *r, struct theft_trial_info *ti, + theft_progress_callback_res *cres) { + if (t->bloom) { mark_called(t, info, args, env); } + theft_trial_res tres = call_fun(info, args); + ti->status = tres; + switch (tres) { + case THEFT_TRIAL_PASS: + r->pass++; + *cres = cb(ti, env); + break; + case THEFT_TRIAL_FAIL: + if (!attempt_to_shrink(t, info, args, env)) { + ti->status = THEFT_TRIAL_ERROR; + *cres = cb(ti, env); + return false; + } + r->fail++; + *cres = report_on_failure(t, info, ti, cb, env); + break; + case THEFT_TRIAL_SKIP: + *cres = cb(ti, env); + r->skip++; + break; + case THEFT_TRIAL_DUP: + /* user callback should not return this; fall through */ + case THEFT_TRIAL_ERROR: + *cres = cb(ti, env); + free_args(info, args, env); + return false; + } + return true; +} + +static void free_args(struct theft_propfun_info *info, + void **args, void *env) { + for (int i = 0; i < info->arity; i++) { + theft_free_cb *fcb = info->type_info[i]->free; + if (fcb && args[i] != THEFT_SKIP) { + fcb(args[i], env); + } + } +} + +void theft_free(struct theft *t) { + if (t->bloom) { + theft_bloom_dump(t->bloom); + theft_bloom_free(t->bloom); + t->bloom = NULL; + } + theft_mt_free(t->mt); + free(t); +} + +/* Actually call the property function. Its number of arguments is not + * constrained by the typedef, but will be defined at the call site + * here. (If info->arity is wrong, it will probably crash.) */ +static theft_trial_res +call_fun(struct theft_propfun_info *info, void **args) { + theft_trial_res res = THEFT_TRIAL_ERROR; + switch (info->arity) { + case 1: + res = info->fun(args[0]); + break; + case 2: + res = info->fun(args[0], args[1]); + break; + case 3: + res = info->fun(args[0], args[1], args[2]); + break; + case 4: + res = info->fun(args[0], args[1], args[2], args[3]); + break; + case 5: + res = info->fun(args[0], args[1], args[2], args[3], args[4]); + break; + case 6: + res = info->fun(args[0], args[1], args[2], args[3], args[4], + args[5]); + break; + case 7: + res = info->fun(args[0], args[1], args[2], args[3], args[4], + args[5], args[6]); + break; + case 8: + res = info->fun(args[0], args[1], args[2], args[3], args[4], + args[5], args[6], args[7]); + break; + case 9: + res = info->fun(args[0], args[1], args[2], args[3], args[4], + args[5], args[6], args[7], args[8]); + break; + case 10: + res = info->fun(args[0], args[1], args[2], args[3], args[4], + args[5], args[6], args[7], args[8], args[9]); + break; + /* ... */ + default: + return THEFT_TRIAL_ERROR; + } + return res; +} + +/* Attempt to instantiate arguments, starting with the current seed. */ +static all_gen_res_t +gen_all_args(theft *t, struct theft_propfun_info *info, + theft_seed seed, void *args[THEFT_MAX_ARITY], void *env) { + for (int i = 0; i < info->arity; i++) { + struct theft_type_info *ti = info->type_info[i]; + void *p = ti->alloc(t, seed, env); + if (p == THEFT_SKIP || p == THEFT_ERROR) { + for (int j = 0; j < i; j++) { + ti->free(args[j], env); + } + if (p == THEFT_SKIP) { + return ALL_GEN_SKIP; + } else { + return ALL_GEN_ERROR; + } + } else { + args[i] = p; + } + seed = theft_random(t); + } + + /* check bloom filter */ + if (t->bloom && check_called(t, info, args, env)) { + return ALL_GEN_DUP; + } + + return ALL_GEN_OK; +} + +/* Attempt to simplify all arguments, breadth first. Continue as long as + * progress is made, i.e., until a local minima is reached. */ +static bool +attempt_to_shrink(theft *t, struct theft_propfun_info *info, + void *args[], void *env) { + bool progress = false; + do { + progress = false; + for (int ai = 0; ai < info->arity; ai++) { + struct theft_type_info *ti = info->type_info[ai]; + if (ti->shrink) { + /* attempt to simplify this argument by one step */ + shrink_res rres = attempt_to_shrink_arg(t, info, args, env, ai); + switch (rres) { + case SHRINK_OK: + progress = true; + break; + case SHRINK_DEAD_END: + break; + default: + case SHRINK_ERROR: + return false; + } + } + } + } while (progress); + + return true; +} + +/* Simplify an argument by trying all of its simplification tactics, in + * order, and checking whether the property still fails. If it passes, + * then revert the simplification and try another tactic. + * + * If the bloom filter is being used (i.e., if all arguments have hash + * callbacks defined), then use it to skip over areas of the state + * space that have probably already been tried. */ +static shrink_res +attempt_to_shrink_arg(theft *t, struct theft_propfun_info *info, + void *args[], void *env, int ai) { + struct theft_type_info *ti = info->type_info[ai]; + + for (uint32_t tactic = 0; tactic < THEFT_MAX_TACTICS; tactic++) { + void *cur = args[ai]; + void *nv = ti->shrink(cur, tactic, env); + if (nv == THEFT_NO_MORE_TACTICS) { + return SHRINK_DEAD_END; + } else if (nv == THEFT_ERROR) { + return SHRINK_ERROR; + } else if (nv == THEFT_DEAD_END) { + continue; /* try next tactic */ + } + + args[ai] = nv; + if (t->bloom) { + if (check_called(t, info, args, env)) { + /* probably redundant */ + if (ti->free) { ti->free(nv, env); } + args[ai] = cur; + continue; + } else { + mark_called(t, info, args, env); + } + } + theft_trial_res res = call_fun(info, args); + + switch (res) { + case THEFT_TRIAL_PASS: + case THEFT_TRIAL_SKIP: + /* revert */ + args[ai] = cur; + if (ti->free) { ti->free(nv, env); } + break; + case THEFT_TRIAL_FAIL: + if (ti->free) { ti->free(cur, env); } + return SHRINK_OK; + case THEFT_TRIAL_DUP: /* user callback should not return this */ + case THEFT_TRIAL_ERROR: + return SHRINK_ERROR; + } + } + (void)t; + return SHRINK_DEAD_END; +} + +/* Populate a buffer with hashes of all the arguments. */ +static void get_arg_hash_buffer(theft_hash *buffer, + struct theft_propfun_info *info, void **args, void *env) { + for (int i = 0; i < info->arity; i++) { + buffer[i] = info->type_info[i]->hash(args[i], env); + } +} + +/* Mark the tuple of argument instances as called in the bloom filter. */ +static void mark_called(theft *t, struct theft_propfun_info *info, + void **args, void *env) { + theft_hash buffer[THEFT_MAX_ARITY]; + get_arg_hash_buffer(buffer, info, args, env); + theft_bloom_mark(t->bloom, (uint8_t *)buffer, + info->arity * sizeof(theft_hash)); +} + +/* Check if this combination of argument instances has been called. */ +static bool check_called(theft *t, struct theft_propfun_info *info, + void **args, void *env) { + theft_hash buffer[THEFT_MAX_ARITY]; + get_arg_hash_buffer(buffer, info, args, env); + return theft_bloom_check(t->bloom, (uint8_t *)buffer, + info->arity * sizeof(theft_hash)); +} + +/* Print info about a failure. */ +static theft_progress_callback_res report_on_failure(theft *t, + struct theft_propfun_info *info, + struct theft_trial_info *ti, theft_progress_cb *cb, void *env) { + static theft_progress_callback_res cres; + + int arity = info->arity; + fprintf(t->out, "\n\n -- Counter-Example: %s\n", + info->name ? info-> name : ""); + fprintf(t->out, " Trial %u, Seed 0x%016llx\n", ti->trial, + (uint64_t)ti->seed); + for (int i = 0; i < arity; i++) { + theft_print_cb *print = info->type_info[i]->print; + if (print) { + fprintf(t->out, " Argument %d:\n", i); + print(t->out, ti->args[i], env); + fprintf(t->out, "\n"); + } + } + + cres = cb(ti, env); + return cres; +} diff --git a/deps/theft/theft.h b/deps/theft/theft.h new file mode 100644 index 0000000..1f4f464 --- /dev/null +++ b/deps/theft/theft.h @@ -0,0 +1,74 @@ +#ifndef THEFT_H +#define THEFT_H + +#include +#include +#include +#include + +/* Version 0.2.0 */ +#define THEFT_VERSION_MAJOR 0 +#define THEFT_VERSION_MINOR 2 +#define THEFT_VERSION_PATCH 0 + +/* A property can have at most this many arguments. */ +#define THEFT_MAX_ARITY 10 + +#include "theft_types.h" + +/* Default number of trials to run. */ +#define THEFT_DEF_TRIALS 100 + +/* Min and max bits used to determine bloom filter size. + * (A larger value uses more memory, but reduces the odds of an + * untested argument combination being falsely skipped.) */ +#define THEFT_BLOOM_BITS_MIN 13 /* 1 KB */ +#define THEFT_BLOOM_BITS_MAX 33 /* 1 GB */ + +/* Initialize a theft test runner. + * BLOOM_BITS sets the size of the table used for detecting + * combinations of arguments that have already been tested. + * If 0, a default size will be chosen based on trial count. + * (This will only be used if all property types have hash + * callbacks defined.) The bloom filter can also be disabled + * by setting BLOOM_BITS to THEFT_BLOOM_DISABLE. + * + * Returns a NULL if malloc fails or BLOOM_BITS is out of bounds. */ +struct theft *theft_init(uint8_t bloom_bits); + +/* Free a property-test runner. */ +void theft_free(struct theft *t); + +/* (Re-)initialize the random number generator with a specific seed. */ +void theft_set_seed(struct theft *t, uint64_t seed); + +/* Get a random 64-bit integer from the test runner's PRNG. */ +theft_hash theft_random(struct theft *t); + +/* Get a random double from the test runner's PRNG. */ +double theft_random_double(struct theft *t); + +/* Change T's output stream handle to OUT. (Default: stdout.) */ +void theft_set_output_stream(struct theft *t, FILE *out); + +/* Run a series of randomized trials of a property function. + * + * Configuration is specified in CFG; many fields are optional. + * See the type definition in `theft_types.h`. */ +theft_run_res +theft_run(struct theft *t, struct theft_cfg *cfg); + +/* Hash a buffer in one pass. (Wraps the below functions.) */ +theft_hash theft_hash_onepass(uint8_t *data, size_t bytes); + +/* Init/reset a hasher for incremental hashing. + * Returns true, or false if you gave it a NULL pointer. */ +void theft_hash_init(struct theft_hasher *h); + +/* Sink more data into an incremental hash. */ +void theft_hash_sink(struct theft_hasher *h, uint8_t *data, size_t bytes); + +/* Finish hashing and get the result. */ +theft_hash theft_hash_done(struct theft_hasher *h); + +#endif diff --git a/deps/theft/theft_bloom.c b/deps/theft/theft_bloom.c new file mode 100644 index 0000000..7b4ddb8 --- /dev/null +++ b/deps/theft/theft_bloom.c @@ -0,0 +1,145 @@ +#include +#include + +#include "theft.h" +#include "theft_bloom.h" + +#define DEFAULT_BLOOM_BITS 17 +#define DEBUG_BLOOM_FILTER 0 +#define LOG2_BITS_PER_BYTE 3 +#define HASH_COUNT 4 + +static uint8_t get_bits_set_count(uint8_t counts[256], uint8_t byte); + +/* Initialize a bloom filter. */ +struct theft_bloom *theft_bloom_init(uint8_t bit_size2) { + size_t sz = 1 << (bit_size2 - LOG2_BITS_PER_BYTE); + struct theft_bloom *b = malloc(sizeof(struct theft_bloom) + sz); + if (b) { + b->size = sz; + b->bit_count = bit_size2; + memset(b->bits, 0, sz); + } + return b; +} + +/* Hash data and mark it in the bloom filter. */ +void theft_bloom_mark(struct theft_bloom *b, uint8_t *data, size_t data_size) { + uint64_t hash = theft_hash_onepass(data, data_size); + uint8_t bc = b->bit_count; + uint64_t mask = (1 << bc) - 1; + + /* Use HASH_COUNT distinct slices of MASK bits as hashes for the bloom filter. */ + int bit_inc = (64 - bc) / HASH_COUNT; + + for (int i = 0; i < (64 - bc); i += bit_inc) { + uint64_t v = (hash & (mask << i)) >> i; + size_t offset = v / 8; + uint8_t bit = 1 << (v & 0x07); + b->bits[offset] |= bit; + } +} + +/* Check whether the data's hash is in the bloom filter. */ +bool theft_bloom_check(struct theft_bloom *b, uint8_t *data, size_t data_size) { + uint64_t hash = theft_hash_onepass(data, data_size); + uint8_t bc = b->bit_count; + uint64_t mask = (1 << bc) - 1; + + int bit_inc = (64 - bc) / HASH_COUNT; + + for (int i = 0; i < (64 - bc); i += bit_inc) { + uint64_t v = (hash & (mask << i)) >> i; + size_t offset = v / 8; + uint8_t bit = 1 << (v & 0x07); + if (0 == (b->bits[offset] & bit)) { return false; } + } + + return true; +} + +/* Free the bloom filter. */ +void theft_bloom_free(struct theft_bloom *b) { free(b); } + +/* Dump the bloom filter's contents. (Debugging.) */ +void theft_bloom_dump(struct theft_bloom *b) { + uint8_t counts[256]; + memset(counts, 0xFF, sizeof(counts)); + + size_t total = 0; + uint16_t row_total = 0; + + for (size_t i = 0; i < b->size; i++) { + uint8_t count = get_bits_set_count(counts, b->bits[i]); + total += count; + row_total += count; + #if DEBUG_BLOOM_FILTER > 1 + char c = (count == 0 ? '.' : '0' + count); + printf("%c", c); + if ((i & 63) == 0 || i == b->size - 1) { + printf(" - %2.1f%%\n", + (100 * row_total) / (64.0 * 8)); + row_total = 0; + } + #endif + } + + #if DEBUG_BLOOM_FILTER + printf(" -- bloom filter: %zd of %zd bits set (%2d%%)\n", + total, 8*b->size, (int)((100.0 * total)/(8.0*b->size))); + #endif + + /* If the total number of bits set is > the number of bytes + * in the table (i.e., > 1/8 full) then warn the user. */ + if (total > b->size) { + fprintf(stderr, "\nWARNING: bloom filter is %zd%% full, " + "larger bloom_bits value recommended.\n", + (size_t)((100 * total) / (8 * b->size))); + } +} + +/* Recommend a bloom filter size for a given number of trials. */ +uint8_t theft_bloom_recommendation(int trials) { + /* With a preferred priority of false positives under 0.1%, + * the required number of bits m in the bloom filter is: + * m = -lg(0.001)/(lg(2)^2) == 14.378 ~= 14, + * so we want an array with 1 << ceil(log2(14*trials)) cells. + * + * Note: The above formula is for the *ideal* number of hash + * functions, but we're using a hardcoded count. It appears to work + * well enough in practice, though, and this can be adjusted later + * without impacting the API. */ + #define TRIAL_MULTIPILER 14 + uint8_t res = DEFAULT_BLOOM_BITS; + + const uint8_t min = THEFT_BLOOM_BITS_MIN - LOG2_BITS_PER_BYTE; + const uint8_t max = THEFT_BLOOM_BITS_MAX - LOG2_BITS_PER_BYTE; + + for (uint8_t i = min; i < max; i++) { + int32_t v = (1 << i); + if (v > (TRIAL_MULTIPILER * trials)) { + res = i + LOG2_BITS_PER_BYTE; + break; + } + } + + #if DEBUG_BLOOM_FILTER + size_t sz = 1 << (res - LOG2_BITS_PER_BYTE); + printf("Using %zd bytes for bloom filter: %d trials -> bit_size2 %u\n", + sizeof(struct theft_bloom) + sz, trials, res); + #endif + + return res; +} + +/* Check a byte->bits set table, and lazily populate it. */ +static uint8_t get_bits_set_count(uint8_t counts[256], uint8_t byte) { + uint8_t v = counts[byte]; + if (v != 0xFF) { return v; } + uint8_t t = 0; + for (uint8_t i = 0; i < 8; i++) { + if (byte & (1 << i)) { t++; } + } + counts[byte] = t; + return t; +} diff --git a/deps/theft/theft_bloom.h b/deps/theft/theft_bloom.h new file mode 100644 index 0000000..c5c6553 --- /dev/null +++ b/deps/theft/theft_bloom.h @@ -0,0 +1,33 @@ +#ifndef THEFT_BLOOM_H +#define THEFT_BLOOM_H + +#include +#include +#include + +struct theft_bloom { + uint8_t bit_count; + size_t size; + uint8_t bits[]; +}; + +/* Initialize a bloom filter. */ +struct theft_bloom *theft_bloom_init(uint8_t bit_size2); + +/* Hash data and mark it in the bloom filter. */ +void theft_bloom_mark(struct theft_bloom *b, uint8_t *data, size_t data_size); + +/* Check whether the data's hash is in the bloom filter. */ +bool theft_bloom_check(struct theft_bloom *b, uint8_t *data, size_t data_size); + +/* Free the bloom filter. */ +void theft_bloom_free(struct theft_bloom *b); + +/* Dump the bloom filter's contents. (Debugging.) */ +void theft_bloom_dump(struct theft_bloom *b); + +/* Recommend a bloom filter size for a given number of trials. */ +uint8_t theft_bloom_recommendation(int trials); + + +#endif diff --git a/deps/theft/theft_hash.c b/deps/theft/theft_hash.c new file mode 100644 index 0000000..2ffa202 --- /dev/null +++ b/deps/theft/theft_hash.c @@ -0,0 +1,37 @@ +#include "theft.h" + +/* Fowler/Noll/Vo hash, 64-bit FNV-1a. + * This hashing algorithm is in the public domain. + * For more details, see: http://www.isthe.com/chongo/tech/comp/fnv/. */ +static const uint64_t fnv64_prime = 1099511628211L; +static const uint64_t fnv64_offset_basis = 14695981039346656037UL; + +/* Init a hasher for incremental hashing. */ +void theft_hash_init(struct theft_hasher *h) { + h->accum = fnv64_offset_basis; +} + +/* Sink more data into an incremental hash. */ +void theft_hash_sink(struct theft_hasher *h, uint8_t *data, size_t bytes) { + if (h == NULL || data == NULL) { return; } + uint64_t a = h->accum; + for (size_t i = 0; i < bytes; i++) { + a = (a ^ data[i]) * fnv64_prime; + } + h->accum = a; +} + +/* Finish hashing and get the result. */ +theft_hash theft_hash_done(struct theft_hasher *h) { + theft_hash res = h->accum; + theft_hash_init(h); /* reset */ + return res; +} + +/* Hash a buffer in one pass. (Wraps the above functions.) */ +theft_hash theft_hash_onepass(uint8_t *data, size_t bytes) { + struct theft_hasher h; + theft_hash_init(&h); + theft_hash_sink(&h, data, bytes); + return theft_hash_done(&h); +} diff --git a/deps/theft/theft_mt.c b/deps/theft/theft_mt.c new file mode 100644 index 0000000..312ddaf --- /dev/null +++ b/deps/theft/theft_mt.c @@ -0,0 +1,144 @@ +/* + A C-program for MT19937-64 (2004/9/29 version). + Coded by Takuji Nishimura and Makoto Matsumoto. + + This is a 64-bit version of Mersenne Twister pseudorandom number + generator. + + Before using, initialize the state by using init_genrand64(seed) + or init_by_array64(init_key, key_length). + + Copyright (C) 2004, Makoto Matsumoto and Takuji Nishimura, + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. The names of its contributors may not be used to endorse or promote + products derived from this software without specific prior written + permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + References: + T. Nishimura, ``Tables of 64-bit Mersenne Twisters'' + ACM Transactions on Modeling and + Computer Simulation 10. (2000) 348--357. + M. Matsumoto and T. Nishimura, + ``Mersenne Twister: a 623-dimensionally equidistributed + uniform pseudorandom number generator'' + ACM Transactions on Modeling and + Computer Simulation 8. (Jan. 1998) 3--30. + + Any feedback is very welcome. + http://www.math.hiroshima-u.ac.jp/~m-mat/MT/emt.html + email: m-mat @ math.sci.hiroshima-u.ac.jp (remove spaces) +*/ + +/* The code has been modified to store internal state in heap/stack + * allocated memory, rather than statically allocated memory, to allow + * multiple instances running in the same address space. */ + +#include +#include +#include "theft_mt.h" + +#define NN THEFT_MT_PARAM_N +#define MM 156 +#define MATRIX_A 0xB5026F5AA96619E9ULL +#define UM 0xFFFFFFFF80000000ULL /* Most significant 33 bits */ +#define LM 0x7FFFFFFFULL /* Least significant 31 bits */ + +static uint64_t genrand64_int64(struct theft_mt *r); + +/* Heap-allocate a mersenne twister struct. */ +struct theft_mt *theft_mt_init(uint64_t seed) { + struct theft_mt *mt = malloc(sizeof(struct theft_mt)); + if (mt == NULL) { return NULL; } + theft_mt_reset(mt, seed); + return mt; +} + +/* Free a heap-allocated mersenne twister struct. */ +void theft_mt_free(struct theft_mt *mt) { + free(mt); +} + +/* initializes mt[NN] with a seed */ +void theft_mt_reset(struct theft_mt *mt, uint64_t seed) +{ + mt->mt[0] = seed; + uint16_t mti = 0; + for (mti=1; mtimt[mti] = (6364136223846793005ULL * + (mt->mt[mti-1] ^ (mt->mt[mti-1] >> 62)) + mti); + } + mt->mti = mti; +} + +/* Get a 64-bit random number. */ +uint64_t theft_mt_random(struct theft_mt *mt) { + return genrand64_int64(mt); +} + +/* Generate a random number on [0,1]-real-interval. */ +double theft_mt_random_double(struct theft_mt *mt) +{ + return (genrand64_int64(mt) >> 11) * (1.0/9007199254740991.0); +} + +/* generates a random number on [0, 2^64-1]-interval */ +static uint64_t genrand64_int64(struct theft_mt *r) +{ + int i; + uint64_t x; + static uint64_t mag01[2]={0ULL, MATRIX_A}; + + if (r->mti >= NN) { /* generate NN words at one time */ + + /* if init has not been called, */ + /* a default initial seed is used */ + if (r->mti == NN+1) + theft_mt_reset(r, 5489ULL); + + for (i=0;imt[i]&UM)|(r->mt[i+1]&LM); + r->mt[i] = r->mt[i+MM] ^ (x>>1) ^ mag01[(int)(x&1ULL)]; + } + for (;imt[i]&UM)|(r->mt[i+1]&LM); + r->mt[i] = r->mt[i+(MM-NN)] ^ (x>>1) ^ mag01[(int)(x&1ULL)]; + } + x = (r->mt[NN-1]&UM)|(r->mt[0]&LM); + r->mt[NN-1] = r->mt[MM-1] ^ (x>>1) ^ mag01[(int)(x&1ULL)]; + + r->mti = 0; + } + + x = r->mt[r->mti++]; + + x ^= (x >> 29) & 0x5555555555555555ULL; + x ^= (x << 17) & 0x71D67FFFEDA60000ULL; + x ^= (x << 37) & 0xFFF7EEE000000000ULL; + x ^= (x >> 43); + + return x; +} diff --git a/deps/theft/theft_mt.h b/deps/theft/theft_mt.h new file mode 100644 index 0000000..65d7098 --- /dev/null +++ b/deps/theft/theft_mt.h @@ -0,0 +1,36 @@ +#ifndef THEFT_MT_H +#define THEFT_MT_H + +#include + +/* Wrapper for Mersenne Twister. + * See copyright and license in theft_mt.c, more details at: + * http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/emt.html + * + * The code has been modified to store internal state in heap/stack + * allocated memory, rather than statically allocated memory, to allow + * multiple instances running in the same address space. */ + +#define THEFT_MT_PARAM_N 312 + +struct theft_mt { + uint64_t mt[THEFT_MT_PARAM_N]; /* the array for the state vector */ + int16_t mti; +}; + +/* Heap-allocate a mersenne twister struct. */ +struct theft_mt *theft_mt_init(uint64_t seed); + +/* Free a heap-allocated mersenne twister struct. */ +void theft_mt_free(struct theft_mt *mt); + +/* Reset a mersenne twister struct, possibly stack-allocated. */ +void theft_mt_reset(struct theft_mt *mt, uint64_t seed); + +/* Get a 64-bit random number. */ +uint64_t theft_mt_random(struct theft_mt *mt); + +/* Generate a random number on [0,1]-real-interval. */ +double theft_mt_random_double(struct theft_mt *mt); + +#endif diff --git a/deps/theft/theft_types.h b/deps/theft/theft_types.h new file mode 100644 index 0000000..a966b04 --- /dev/null +++ b/deps/theft/theft_types.h @@ -0,0 +1,197 @@ +#ifndef THEFT_TYPES_H +#define THEFT_TYPES_H + +/* A pseudo-random number/seed, used to generate instances. */ +typedef uint64_t theft_seed; + +/* A hash of an instance. */ +typedef uint64_t theft_hash; + +/* These are opaque, as far as the API is concerned. */ +struct theft_bloom; /* bloom filter */ +struct theft_mt; /* mersenne twister PRNG */ + +/* Struct for property-testing state. */ +struct theft { + FILE *out; + theft_seed seed; + uint8_t requested_bloom_bits; + struct theft_bloom *bloom; /* bloom filter */ + struct theft_mt *mt; /* random number generator */ +}; + +/* Special sentinel values returned instead of instance pointers. */ +#define THEFT_SKIP ((void *)-1) +#define THEFT_ERROR ((void *)-2) +#define THEFT_DEAD_END ((void *)-1) +#define THEFT_NO_MORE_TACTICS ((void *)-3) + +/* Explicitly disable using the bloom filter. + * Note that if you do this, you must be sure your simplify function + * *always* returns a simpler value, or it will loop forever. */ +#define THEFT_BLOOM_DISABLE ((uint8_t)-1) + +/* Allocate and return an instance of the type, based on a known + * pseudo-random number seed. To get additional seeds, use + * theft_random(t); this stream of numbers will be deterministic, so if + * the alloc callback is constructed appropriately, an identical + * instance can be constructed later from the same initial seed. + * + * Returns a pointer to the instance, THEFT_ERROR, or THEFT_SKIP. */ +typedef void *(theft_alloc_cb)(struct theft *t, theft_seed seed, void *env); + +/* Free an instance. */ +typedef void (theft_free_cb)(void *instance, void *env); + +/* Hash an instance. Used to skip combinations of arguments which + * have probably already been checked. */ +typedef theft_hash (theft_hash_cb)(void *instance, void *env); + +/* Attempt to shrink an instance to a simpler instance. + * + * For a given INSTANCE, there are likely to be multiple ways in which + * it can be simplified. For example, a list of unsigned ints could have + * the first element decremented, divided by 2, or dropped. This + * callback should return a pointer to a freshly allocated, simplified + * instance, or should return THEFT_DEAD_END to indicate that the + * instance cannot be simplified further by this method. + * + * These tactics will be lazily explored breadth-first, to + * try to find simpler versions of arguments that cause the + * property to no longer hold. + * + * If this callback is NULL, it is equivalent to always returning + * THEFT_NO_MORE_TACTICS. */ +typedef void *(theft_shrink_cb)(void *instance, uint32_t tactic, void *env); + +/* Print INSTANCE to output stream F. + * Used for displaying counter-examples. Can be NULL. */ +typedef void (theft_print_cb)(FILE *f, void *instance, void *env); + +/* Result from a single trial. */ +typedef enum { + THEFT_TRIAL_PASS, /* property held */ + THEFT_TRIAL_FAIL, /* property contradicted */ + THEFT_TRIAL_SKIP, /* user requested skip; N/A */ + THEFT_TRIAL_DUP, /* args probably already tried */ + THEFT_TRIAL_ERROR, /* unrecoverable error, halt */ +} theft_trial_res; + +/* A test property function. Arguments must match the types specified by + * theft_cfg.type_info, or the result will be undefined. For example, a + * propfun `prop_foo(A x, B y, C z)` must have a type_info array of + * `{ info_A, info_B, info_C }`. + * + * Should return: + * THEFT_TRIAL_PASS if the property holds, + * THEFT_TRIAL_FAIL if a counter-example is found, + * THEFT_TRIAL_SKIP if the combination of args isn't applicable, + * or THEFT_TRIAL_ERROR if the whole run should be halted. */ +typedef theft_trial_res (theft_propfun)( /* arguments unconstrained */ ); + +/* Callbacks used for testing with random instances of a type. + * For more information, see comments on their typedefs. */ +struct theft_type_info { + /* Required: */ + theft_alloc_cb *alloc; /* gen random instance from seed */ + + /* Optional, but recommended: */ + theft_free_cb *free; /* free instance */ + theft_hash_cb *hash; /* instance -> hash */ + theft_shrink_cb *shrink; /* shrink instance */ + theft_print_cb *print; /* fprintf instance */ +}; + +/* Result from an individual trial. */ +struct theft_trial_info { + const char *name; /* property name */ + int trial; /* N'th trial */ + theft_seed seed; /* Seed used */ + theft_trial_res status; /* Run status */ + uint8_t arity; /* Number of arguments */ + void **args; /* Arguments used */ +}; + +/* Whether to keep running trials after N failures/skips/etc. */ +typedef enum { + THEFT_PROGRESS_CONTINUE, /* keep running trials */ + THEFT_PROGRESS_HALT, /* no need to continue */ +} theft_progress_callback_res; + +/* Handle test results. + * Can be used to halt after too many failures, print '.' after + * every N trials, etc. */ +typedef theft_progress_callback_res +(theft_progress_cb)(struct theft_trial_info *info, void *env); + +/* Result from a trial run. */ +typedef enum { + THEFT_RUN_PASS = 0, /* no failures */ + THEFT_RUN_FAIL = 1, /* 1 or more failures */ + THEFT_RUN_ERROR = 2, /* an error occurred */ + THEFT_RUN_ERROR_BAD_ARGS = -1, /* API misuse */ + /* Missing required callback for 1 or more types */ + THEFT_RUN_ERROR_MISSING_CALLBACK = -2, +} theft_run_res; + +/* Optional report from a trial run; same meanings as theft_trial_res. */ +struct theft_trial_report { + size_t pass; + size_t fail; + size_t skip; + size_t dup; +}; + +/* Configuration struct for a theft test. + * In C99, this struct can be specified as a literal, like this: + * + * struct theft_cfg cfg = { + * .name = "example", + * .fun = prop_fun, + * .type_info = { type_arg_a, type_arg_b }, + * .seed = 0x7he5eed, + * }; + * + * and omitted fields will be set to defaults. + * */ +struct theft_cfg { + /* Property function under test, and info about its arguments. + * The function is called with as many arguments are there + * are values in TYPE_INFO, so it can crash if that is wrong. */ + theft_propfun *fun; + struct theft_type_info *type_info[THEFT_MAX_ARITY]; + + /* -- All fields after this point are optional. -- */ + + /* Property name, displayed in test runner output. */ + const char *name; + + /* Array of seeds to always run, and its length. + * Can be used for regression tests. */ + int always_seed_count; /* number of seeds */ + theft_seed *always_seeds; /* seeds to always run */ + + /* Number of trials to run. Defaults to THEFT_DEF_TRIALS. */ + int trials; + + /* Progress callback, used to display progress in + * slow-running tests, halt early under certain conditions, etc. */ + theft_progress_cb *progress_cb; + + /* Environment pointer. This is completely opaque to theft itself, + * but will be passed along to all callbacks. */ + void *env; + + /* Struct to populate with more detailed test results. */ + struct theft_trial_report *report; + + /* Seed for the random number generator. */ + theft_seed seed; +}; + +/* Internal state for incremental hashing. */ +struct theft_hasher { + theft_hash accum; +}; + +#endif diff --git a/deps/theft/theft_types_internal.h b/deps/theft/theft_types_internal.h new file mode 100644 index 0000000..2f2af79 --- /dev/null +++ b/deps/theft/theft_types_internal.h @@ -0,0 +1,73 @@ +#ifndef THEFT_TYPES_INTERNAL_H +#define THEFT_TYPES_INTERNAL_H + +typedef struct theft theft; + +#define THEFT_MAX_TACTICS ((uint32_t)-1) +#define DEFAULT_THEFT_SEED 0xa600d16b175eedL + +typedef enum { + ALL_GEN_OK, /* all arguments generated okay */ + ALL_GEN_SKIP, /* skip due to user constraints */ + ALL_GEN_DUP, /* skip probably duplicated trial */ + ALL_GEN_ERROR, /* memory error or other failure */ +} all_gen_res_t; + +typedef enum { + SHRINK_OK, /* simplified argument further */ + SHRINK_DEAD_END, /* at local minima */ + SHRINK_ERROR, /* hard error during shrinking */ +} shrink_res; + +/* Testing context for a specific property function. */ +struct theft_propfun_info { + const char *name; /* property name, can be NULL */ + theft_propfun *fun; /* property function under test */ + + /* Type info for ARITY arguments. */ + uint8_t arity; /* number of arguments */ + struct theft_type_info *type_info[THEFT_MAX_ARITY]; + + /* Optional array of seeds to always run. + * Can be used for regression tests. */ + int always_seed_count; /* number of seeds */ + theft_seed *always_seeds; /* seeds to always run */ +}; + +static theft_trial_res +call_fun(struct theft_propfun_info *info, void **args); + +static bool run_trial(struct theft *t, struct theft_propfun_info *info, + void **args, theft_progress_cb *cb, void *env, + struct theft_trial_report *r, struct theft_trial_info *ti, + theft_progress_callback_res *cres); + +static void mark_called(theft *t, struct theft_propfun_info *info, + void **args, void *env); + +static bool check_called(theft *t, struct theft_propfun_info *info, + void **args, void *env); + +static theft_progress_callback_res report_on_failure(theft *t, + struct theft_propfun_info *info, + struct theft_trial_info *ti, theft_progress_cb *cb, void *env); + +static all_gen_res_t gen_all_args(theft *t, struct theft_propfun_info *info, + theft_seed seed, void *args[THEFT_MAX_ARITY], void *env); + +static void free_args(struct theft_propfun_info *info, + void **args, void *env); + +static theft_run_res theft_run_internal(struct theft *t, + struct theft_propfun_info *info, + int trials, theft_progress_cb *cb, void *env, + struct theft_trial_report *r); + +static bool attempt_to_shrink(theft *t, struct theft_propfun_info *info, + void *args[], void *env); + +static shrink_res +attempt_to_shrink_arg(theft *t, struct theft_propfun_info *info, + void *args[], void *env, int ai); + +#endif -- cgit v1.2.3