summaryrefslogtreecommitdiff
path: root/users/drashna/keyrecords/autocorrection
diff options
context:
space:
mode:
Diffstat (limited to 'users/drashna/keyrecords/autocorrection')
-rw-r--r--users/drashna/keyrecords/autocorrection/autocorrection.c304
-rw-r--r--users/drashna/keyrecords/autocorrection/autocorrection.h17
-rw-r--r--users/drashna/keyrecords/autocorrection/autocorrection_data.h1
-rwxr-xr-xusers/drashna/keyrecords/autocorrection/make_autocorrection_data.py298
-rw-r--r--users/drashna/keyrecords/autocorrection/readme.md301
5 files changed, 0 insertions, 921 deletions
diff --git a/users/drashna/keyrecords/autocorrection/autocorrection.c b/users/drashna/keyrecords/autocorrection/autocorrection.c
deleted file mode 100644
index 90fdba8f5e..0000000000
--- a/users/drashna/keyrecords/autocorrection/autocorrection.c
+++ /dev/null
@@ -1,304 +0,0 @@
-// Copyright 2021 Google LLC
-// Copyright 2021 @filterpaper
-// SPDX-License-Identifier: Apache-2.0
-// Original source: https://getreuer.info/posts/keyboards/autocorrection
-
-#include "autocorrection.h"
-#include <string.h>
-
-#if __has_include("autocorrection_data.h")
-# pragma GCC push_options
-# pragma GCC optimize("O0")
-# include "autocorrection_data.h"
-# ifndef AUTOCORRECTION_MIN_LENGTH
-# define AUTOCORRECTION_MIN_LENGTH AUTOCORRECT_MIN_LENGTH
-# endif
-# ifndef AUTOCORRECTION_MAX_LENGTH
-# define AUTOCORRECTION_MAX_LENGTH AUTOCORRECT_MAX_LENGTH
-# endif
-# define autocorrection_data autocorrect_data
-# if AUTOCORRECTION_MIN_LENGTH < 4
-# error Minimum Length is too short and may cause overflows
-# endif
-# if DICTIONARY_SIZE > SIZE_MAX
-# error Dictionary size excees maximum size permitted
-# endif
-
-static uint8_t typo_buffer[AUTOCORRECT_MAX_LENGTH] = {KC_SPC};
-static uint8_t typo_buffer_size = 1;
-
-/**
- * @brief function for querying the enabled state of autocorrect
- *
- * @return true if enabled
- * @return false if disabled
- */
-bool autocorrect_is_enabled(void) {
- return userspace_config.autocorrection;
-}
-
-/**
- * @brief Enables autocorrect and saves state to eeprom
- *
- */
-void autocorrect_enable(void) {
- userspace_config.autocorrection = true;
- eeconfig_update_user(userspace_config.raw);
-}
-
-/**
- * @brief Disables autocorrect and saves state to eeprom
- *
- */
-void autocorrect_disable(void) {
- userspace_config.autocorrection = false;
- typo_buffer_size = 0;
- eeconfig_update_user(userspace_config.raw);
-}
-
-/**
- * @brief Toggles autocorrect's status and save state to eeprom
- *
- */
-void autocorrect_toggle(void) {
- userspace_config.autocorrection = !userspace_config.autocorrection;
- typo_buffer_size = 0;
- eeconfig_update_user(userspace_config.raw);
-}
-
-/**
- * @brief handler for determining if autocorrect should process keypress
- *
- * @param keycode Keycode registered by matrix press, per keymap
- * @param record keyrecord_t structure
- * @param typo_buffer_size passed along to allow resetting of autocorrect buffer
- * @param mods allow processing of mod status
- * @return true Allow autocorection
- * @return false Stop processing and escape from autocorrect.
- */
-__attribute__((weak)) bool process_autocorrect_user(uint16_t *keycode, keyrecord_t *record, uint8_t *typo_buffer_size, uint8_t *mods) {
- // See quantum_keycodes.h for reference on these matched ranges.
- switch (*keycode) {
- // Exclude these keycodes from processing.
- case KC_LSFT:
- case KC_RSFT:
- case KC_CAPS:
- case QK_TO ... QK_ONE_SHOT_LAYER_MAX:
- case QK_LAYER_TAP_TOGGLE ... QK_LAYER_MOD_MAX:
- case QK_ONE_SHOT_MOD ... QK_ONE_SHOT_MOD_MAX:
- return false;
-
- // Mask for base keycode from shifted keys.
- case QK_LSFT ... QK_LSFT + 255:
- case QK_RSFT ... QK_RSFT + 255:
- if (*keycode >= QK_LSFT && *keycode <= (QK_LSFT + 255)) {
- *mods |= MOD_LSFT;
- } else {
- *mods |= MOD_RSFT;
- }
- *keycode &= 0xFF; // Get the basic keycode.
- return true;
-# ifndef NO_ACTION_TAPPING
- // Exclude tap-hold keys when they are held down
- // and mask for base keycode when they are tapped.
- case QK_LAYER_TAP ... QK_LAYER_TAP_MAX:
-# ifdef NO_ACTION_LAYER
- // Exclude Layer Tap, if layers are disabled
- // but action tapping is still enabled.
- return false;
-# endif
- case QK_MOD_TAP ... QK_MOD_TAP_MAX:
- // Exclude hold keycode
- if (!record->tap.count) {
- return false;
- }
- *keycode &= 0xFF;
- break;
-# else
- case QK_MOD_TAP ... QK_MOD_TAP_MAX:
- case QK_LAYER_TAP ... QK_LAYER_TAP_MAX:
- // Exclude if disabled
- return false;
-# endif
- // Exclude swap hands keys when they are held down
- // and mask for base keycode when they are tapped.
- case QK_SWAP_HANDS ... QK_SWAP_HANDS_MAX:
-# ifdef SWAP_HANDS_ENABLE
- if (*keycode >= 0x56F0 || !record->tap.count) {
- return false;
- }
- *keycode &= 0xFF;
- break;
-# else
- // Exclude if disabled
- return false;
-# endif
- }
-
- // Disable autocorrect while a mod other than shift is active.
- if ((*mods & ~MOD_MASK_SHIFT) != 0) {
- *typo_buffer_size = 0;
- return false;
- }
-
- return true;
-}
-
-/**
- * @brief handling for when autocorrection has been triggered
- *
- * @param backspaces number of characters to remove
- * @param str pointer to PROGMEM string to replace mistyped seletion with
- * @return true apply correction
- * @return false user handled replacement
- */
-__attribute__((weak)) bool apply_autocorrect(uint8_t backspaces, const char *str) {
- return true;
-}
-
-/**
- * @brief Process handler for autocorrect feature
- *
- * @param keycode Keycode registered by matrix press, per keymap
- * @param record keyrecord_t structure
- * @return true Continue processing keycodes, and send to host
- * @return false Stop processing keycodes, and don't send to host
- */
-bool process_autocorrection(uint16_t keycode, keyrecord_t *record) {
- uint8_t mods = get_mods();
-# ifndef NO_ACTION_ONESHOT
- mods |= get_oneshot_mods();
-# endif
-
- if ((keycode >= AUTOCORRECT_ON && keycode <= AUTOCORRECT_TOGGLE) && record->event.pressed) {
- if (keycode == AUTOCORRECT_ON) {
- autocorrect_enable();
- } else if (keycode == AUTOCORRECT_OFF) {
- autocorrect_disable();
- } else if (keycode == AUTOCORRECT_TOGGLE) {
- autocorrect_toggle();
- } else {
- return true;
- }
-
- return false;
- }
-
- if (!autocorrect_is_enabled()) {
- typo_buffer_size = 0;
- return true;
- }
-
- if (!record->event.pressed) {
- return true;
- }
-
- // autocorrect keycode verification and extraction
- if (!process_autocorrect_user(&keycode, record, &typo_buffer_size, &mods)) {
- return true;
- }
-
- // keycode buffer check
- switch (keycode) {
- case KC_A ... KC_Z:
- // process normally
- break;
- case KC_1 ... KC_0:
- case KC_TAB ... KC_SEMICOLON:
- case KC_GRAVE ... KC_SLASH:
- // Set a word boundary if space, period, digit, etc. is pressed.
- keycode = KC_SPC;
- break;
- case KC_ENTER:
- // Behave more conservatively for the enter key. Reset, so that enter
- // can't be used on a word ending.
- typo_buffer_size = 0;
- keycode = KC_SPC;
- break;
- case KC_BSPC:
- // Remove last character from the buffer.
- if (typo_buffer_size > 0) {
- --typo_buffer_size;
- }
- return true;
- case KC_QUOTE:
- // Treat " (shifted ') as a word boundary.
- if ((mods & MOD_MASK_SHIFT) != 0) {
- keycode = KC_SPC;
- }
- break;
- default:
- // Clear state if some other non-alpha key is pressed.
- typo_buffer_size = 0;
- return true;
- }
-
- // Rotate oldest character if buffer is full.
- if (typo_buffer_size >= AUTOCORRECT_MAX_LENGTH) {
- memmove(typo_buffer, typo_buffer + 1, AUTOCORRECT_MAX_LENGTH - 1);
- typo_buffer_size = AUTOCORRECT_MAX_LENGTH - 1;
- }
-
- // Append `keycode` to buffer.
- typo_buffer[typo_buffer_size++] = keycode;
- // Return if buffer is smaller than the shortest word.
- if (typo_buffer_size < AUTOCORRECT_MIN_LENGTH) {
- return true;
- }
-
- // Check for typo in buffer using a trie stored in `autocorrect_data`.
- uint16_t state = 0;
- uint8_t code = pgm_read_byte(autocorrect_data + state);
- for (int8_t i = typo_buffer_size - 1; i >= 0; --i) {
- uint8_t const key_i = typo_buffer[i];
-
- if (code & 64) { // Check for match in node with multiple children.
- code &= 63;
- for (; code != key_i; code = pgm_read_byte(autocorrect_data + (state += 3))) {
- if (!code) return true;
- }
- // Follow link to child node.
- state = (pgm_read_byte(autocorrect_data + state + 1) | pgm_read_byte(autocorrect_data + state + 2) << 8);
- // Check for match in node with single child.
- } else if (code != key_i) {
- return true;
- } else if (!(code = pgm_read_byte(autocorrect_data + (++state)))) {
- ++state;
- }
-
- // Stop if `state` becomes an invalid index. This should not normally
- // happen, it is a safeguard in case of a bug, data corruption, etc.
- if (state >= DICTIONARY_SIZE) {
- return true;
- }
-
- code = pgm_read_byte(autocorrect_data + state);
-
- if (code & 128) { // A typo was found! Apply autocorrect.
- const uint8_t backspaces = (code & 63) + !record->event.pressed;
- if (apply_autocorrect(backspaces, (char const *)(autocorrect_data + state + 1))) {
- for (uint8_t i = 0; i < backspaces; ++i) {
- tap_code(KC_BSPC);
- }
- send_string_P((char const *)(autocorrect_data + state + 1));
- }
-
- if (keycode == KC_SPC) {
- typo_buffer[0] = KC_SPC;
- typo_buffer_size = 1;
- return true;
- } else {
- typo_buffer_size = 0;
- return false;
- }
- }
- }
- return true;
-}
-# pragma GCC pop_options
-#else
-# pragma message "Warning!!! Autocorrect is not corretly setup!"
-bool process_autocorrection(uint16_t keycode, keyrecord_t* record) {
- return true;
-}
-#endif
diff --git a/users/drashna/keyrecords/autocorrection/autocorrection.h b/users/drashna/keyrecords/autocorrection/autocorrection.h
deleted file mode 100644
index 8946b91f1f..0000000000
--- a/users/drashna/keyrecords/autocorrection/autocorrection.h
+++ /dev/null
@@ -1,17 +0,0 @@
-// Copyright 2021 Google LLC
-// Copyright 2021 @filterpaper
-// SPDX-License-Identifier: Apache-2.0
-// Original source: https://getreuer.info/posts/keyboards/autocorrection
-
-#pragma once
-
-#include "drashna.h"
-
-bool process_autocorrection(uint16_t keycode, keyrecord_t *record);
-bool process_autocorrect_user(uint16_t *keycode, keyrecord_t *record, uint8_t *typo_buffer_size, uint8_t *mods);
-bool apply_autocorrect(uint8_t backspaces, const char *str);
-
-bool autocorrect_is_enabled(void);
-void autocorrect_enable(void);
-void autocorrect_disable(void);
-void autocorrect_toggle(void);
diff --git a/users/drashna/keyrecords/autocorrection/autocorrection_data.h b/users/drashna/keyrecords/autocorrection/autocorrection_data.h
deleted file mode 100644
index 90484d3b12..0000000000
--- a/users/drashna/keyrecords/autocorrection/autocorrection_data.h
+++ /dev/null
@@ -1 +0,0 @@
-#include "autocorrect_data.h"
diff --git a/users/drashna/keyrecords/autocorrection/make_autocorrection_data.py b/users/drashna/keyrecords/autocorrection/make_autocorrection_data.py
deleted file mode 100755
index 0dd9b78b9c..0000000000
--- a/users/drashna/keyrecords/autocorrection/make_autocorrection_data.py
+++ /dev/null
@@ -1,298 +0,0 @@
-# Copyright 2021-2022 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# https://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Python program to make autocorrection_data.h.
-
-This program reads "autocorrection_dict.txt" and generates a C source file
-"autocorrection_data.h" with a serialized trie embedded as an array. Run this
-program without arguments like
-
-$ python3 make_autocorrection_data.py
-
-Or to read from a different typo dict file, pass it as the first argument like
-
-$ python3 make_autocorrection_data.py dict.txt
-
-Each line of the dict file defines one typo and its correction with the syntax
-"typo -> correction". Blank lines or lines starting with '#' are ignored.
-Example:
-
- :thier -> their
- dosen't -> doesn't
- fitler -> filter
- lenght -> length
- ouput -> output
- widht -> width
-
-See autocorrection_dict_extra.txt for a larger example.
-
-For full documentation, see
-https://getreuer.info/posts/keyboards/autocorrection
-"""
-
-import sys
-import textwrap
-from typing import Any, Dict, Iterator, List, Tuple
-
-try:
- from english_words import english_words_lower_alpha_set as CORRECT_WORDS
-except ImportError:
- print('Autocorrection will falsely trigger when a typo is a substring of a '
- 'correctly spelled word. To check for this, install the english_words '
- 'package and rerun this script:\n\n pip install english_words\n')
- # Use a minimal word list as a fallback.
- CORRECT_WORDS = ('apparent', 'association', 'available', 'classification',
- 'effect', 'entertainment', 'fantastic', 'information',
- 'integrate', 'international', 'language', 'loosest',
- 'manual', 'nothing', 'provides', 'reference', 'statehood',
- 'technology', 'virtually', 'wealthier', 'wonderful')
-
-KC_A = 4
-KC_SPC = 0x2c
-KC_QUOT = 0x34
-
-TYPO_CHARS = dict(
- [
- ("'", KC_QUOT),
- (':', KC_SPC), # "Word break" character.
- ] +
- # Characters a-z.
- [(chr(c), c + KC_A - ord('a')) for c in range(ord('a'), ord('z') + 1)]
-)
-
-
-def parse_file(file_name: str) -> List[Tuple[str, str]]:
- """Parses autocorrections dictionary file.
-
- Each line of the file defines one typo and its correction with the syntax
- "typo -> correction". Blank lines or lines starting with '#' are ignored. The
- function validates that typos only have characters in TYPO_CHARS, that
- typos are not substrings of other typos, and checking that typos don't trigger
- on CORRECT_WORDS.
-
- Args:
- file_name: String, path of the autocorrections dictionary.
- Returns:
- List of (typo, correction) tuples.
- """
- correct_words = ('information', 'available', 'international', 'language', 'loosest', 'reference', 'wealthier', 'entertainment', 'association', 'provides', 'technology', 'statehood')
- autocorrections = []
- typos = set()
- for line_number, typo, correction in parse_file_lines(file_name):
- if typo in typos:
- print(f'Warning:{line_number}: Ignoring duplicate typo: "{typo}"')
- continue
-
- # Check that `typo` is valid.
- if not(all([c in TYPO_CHARS for c in typo])):
- print(f'Error:{line_number}: Typo "{typo}" has '
- 'characters other than ' + ''.join(TYPO_CHARS.keys()))
- sys.exit(1)
- for other_typo in typos:
- if typo in other_typo or other_typo in typo:
- print(f'Error:{line_number}: Typos may not be substrings of one '
- f'another, otherwise the longer typo would never trigger: '
- f'"{typo}" vs. "{other_typo}".')
- sys.exit(1)
- if len(typo) < 5:
- print(f'Warning:{line_number}: It is suggested that typos are at '
- f'least 5 characters long to avoid false triggers: "{typo}"')
-
- check_typo_against_dictionary(typo, line_number, correct_words)
-
- autocorrections.append((typo, correction))
- typos.add(typo)
-
- return autocorrections
-
-
-def make_trie(autocorrections: List[Tuple[str, str]]) -> Dict[str, Any]:
- """Makes a trie from the the typos, writing in reverse.
-
- Args:
- autocorrections: List of (typo, correction) tuples.
- Returns:
- Dict of dict, representing the trie.
- """
- trie = {}
- for typo, correction in autocorrections:
- node = trie
- for letter in typo[::-1]:
- node = node.setdefault(letter, {})
- node['LEAF'] = (typo, correction)
-
- return trie
-
-
-def parse_file_lines(file_name: str) -> Iterator[Tuple[int, str, str]]:
- """Parses lines read from `file_name` into typo-correction pairs."""
-
- line_number = 0
- for line in open(file_name, 'rt'):
- line_number += 1
- line = line.strip()
- if line and line[0] != '#':
- # Parse syntax "typo -> correction", using strip to ignore indenting.
- tokens = [token.strip() for token in line.split('->', 1)]
- if len(tokens) != 2 or not tokens[0]:
- print(f'Error:{line_number}: Invalid syntax: "{line}"')
- sys.exit(1)
-
- typo, correction = tokens
- typo = typo.lower() # Force typos to lowercase.
- typo = typo.replace(' ', ':')
-
- yield line_number, typo, correction
-
-
-def check_typo_against_dictionary(typo: str, line_number: int, correct_words) -> None:
- """Checks `typo` against English dictionary words."""
-
- if typo.startswith(':') and typo.endswith(':'):
- if typo[1:-1] in correct_words:
- print(f'Warning:{line_number}: Typo "{typo}" is a correctly spelled dictionary word.')
- elif typo.startswith(':') and not typo.endswith(':'):
- for word in correct_words:
- if word.startswith(typo[1:]):
- print(f'Warning:{line_number}: Typo "{typo}" would falsely trigger on correctly spelled word "{word}".')
- elif not typo.startswith(':') and typo.endswith(':'):
- for word in correct_words:
- if word.endswith(typo[:-1]):
- print(f'Warning:{line_number}: Typo "{typo}" would falsely trigger on correctly spelled word "{word}".')
- elif not typo.startswith(':') and not typo.endswith(':'):
- for word in correct_words:
- if typo in word:
- print(f'Warning:{line_number}: Typo "{typo}" would falsely trigger on correctly spelled word "{word}".')
-
-
-def serialize_trie(autocorrections: List[Tuple[str, str]],
- trie: Dict[str, Any]) -> List[int]:
- """Serializes trie and correction data in a form readable by the C code.
-
- Args:
- autocorrections: List of (typo, correction) tuples.
- trie: Dict of dicts.
- Returns:
- List of ints in the range 0-255.
- """
- table = []
-
- # Traverse trie in depth first order.
- def traverse(trie_node: Dict[str, Any]) -> Dict[str, Any]:
- if 'LEAF' in trie_node: # Handle a leaf trie node.
- typo, correction = trie_node['LEAF']
- word_boundary_ending = typo[-1] == ':'
- typo = typo.strip(':')
- i = 0 # Make the autocorrection data for this entry and serialize it.
- while i < min(len(typo), len(correction)) and typo[i] == correction[i]:
- i += 1
- backspaces = len(typo) - i - 1 + word_boundary_ending
- assert 0 <= backspaces <= 63
- correction = correction[i:]
- data = [backspaces + 128] + list(bytes(correction, 'ascii')) + [0]
-
- entry = {'data': data, 'links': [], 'byte_offset': 0}
- table.append(entry)
- elif len(trie_node) == 1: # Handle trie node with a single child.
- c, trie_node = next(iter(trie_node.items()))
- entry = {'chars': c, 'byte_offset': 0}
-
- # It's common for a trie to have long chains of single-child nodes. We
- # find the whole chain so that we can serialize it more efficiently.
- while len(trie_node) == 1 and 'LEAF' not in trie_node:
- c, trie_node = next(iter(trie_node.items()))
- entry['chars'] += c
-
- table.append(entry)
- entry['links'] = [traverse(trie_node)]
- else: # Handle trie node with multiple children.
- entry = {'chars': ''.join(sorted(trie_node.keys())), 'byte_offset': 0}
- table.append(entry)
- entry['links'] = [traverse(trie_node[c]) for c in entry['chars']]
- return entry
-
- traverse(trie)
-
- def serialize(e: Dict[str, Any]) -> List[int]:
- if not e['links']: # Handle a leaf table entry.
- return e['data']
- elif len(e['links']) == 1: # Handle a chain table entry.
- return [TYPO_CHARS[c] for c in e['chars']] + [0]
- else: # Handle a branch table entry.
- data = []
- for c, link in zip(e['chars'], e['links']):
- data += [TYPO_CHARS[c] | (0 if data else 64)] + encode_link(link)
- return data + [0]
-
- byte_offset = 0
- for e in table: # To encode links, first compute byte offset of each entry.
- e['byte_offset'] = byte_offset
- byte_offset += len(serialize(e))
-
- return [b for e in table for b in serialize(e)] # Serialize final table.
-
-
-def encode_link(link: Dict[str, Any]) -> List[int]:
- """Encodes a node link as two bytes."""
- byte_offset = link['byte_offset']
- if not (0 <= byte_offset <= 0xffff):
- print('Error: The autocorrection table is too large, a node link exceeds '
- '64KB limit. Try reducing the autocorrection dict to fewer entries.')
- sys.exit(1)
- return [byte_offset & 255, byte_offset >> 8]
-
-
-def write_generated_code(autocorrections: List[Tuple[str, str]],
- data: List[int],
- file_name: str) -> None:
- """Writes autocorrection data as generated C code to `file_name`.
-
- Args:
- autocorrections: List of (typo, correction) tuples.
- data: List of ints in 0-255, the serialized trie.
- file_name: String, path of the output C file.
- """
- assert all(0 <= b <= 255 for b in data)
-
- def typo_len(e: Tuple[str, str]) -> int:
- return len(e[0])
-
- min_typo = min(autocorrections, key=typo_len)[0]
- max_typo = max(autocorrections, key=typo_len)[0]
- generated_code = ''.join([
- '// Generated code.\n\n',
- f'// Autocorrection dictionary ({len(autocorrections)} entries):\n',
- ''.join(sorted(f'// {typo:<{len(max_typo)}} -> {correction}\n'
- for typo, correction in autocorrections)),
- f'\n#define AUTOCORRECTION_MIN_LENGTH {len(min_typo)} // "{min_typo}"\n',
- f'#define AUTOCORRECTION_MAX_LENGTH {len(max_typo)} // "{max_typo}"\n\n',
- textwrap.fill('static const uint8_t autocorrection_data[%d] PROGMEM = {%s};' % (
- len(data), ', '.join(map(str, data))), width=80, subsequent_indent=' '),
- '\n\n'])
-
- with open(file_name, 'wt') as f:
- f.write(generated_code)
-
-
-def main(argv):
- dict_file = argv[1] if len(argv) > 1 else 'autocorrection_dict.txt'
- autocorrections = parse_file(dict_file)
- trie = make_trie(autocorrections)
- data = serialize_trie(autocorrections, trie)
- print(f'Processed %d autocorrection entries to table with %d bytes.'
- % (len(autocorrections), len(data)))
- write_generated_code(autocorrections, data, 'autocorrection_data.h')
-
-if __name__ == '__main__':
- main(sys.argv)
diff --git a/users/drashna/keyrecords/autocorrection/readme.md b/users/drashna/keyrecords/autocorrection/readme.md
deleted file mode 100644
index d920508793..0000000000
--- a/users/drashna/keyrecords/autocorrection/readme.md
+++ /dev/null
@@ -1,301 +0,0 @@
-# Autocorrection
-
-This is taken from [Pascal Getreuer's implemenation](https://getreuer.info/posts/keyboards/autocorrection/index.html), with a number of modifications.
-
-To enabled Autocorrection, add `AUTOCORRECTION_ENABLE = yes` to your `rules.mk` file.
-
-This is mostly a reproduction of Pascal's docs:
-
-## Overview
-Some words are more prone to typos than others. I have a habit of typo-ing *ouput* and *fitler*. This post describes a rudimentary autocorrection implementation that runs on your keyboard with QMK.
-
-The animation below shows the effect as I type *aparent*. As I press the final t, the autocorrection feature detects the typo and automatically sends keys to correct it:
-
-Example: Autocorrecting *aparent* → apparent.
-
-**Features**:
-* It runs on your keyboard, so it is always active no matter what software.
-* Low resource cost: for an autocorrection dictionary of 70 entries, firmware size cost is 1620 bytes and average CPU cost per key press is about 20 µs.
-* It is case insensitive. It corrects Fitler to Filter and FITLER to FILTER.
-* It works within words. It corrects fitlered, fitlering, and useful for programming, within longer identifiers like DesignButterworthFitle*r.
-
-**Limitations**: Running autocorrection on the keyboard comes with some constraints. It is rudimentary like I said:
-* It is limited to alphabet characters a–z. No accented or Unicode letters; I’m sorry this probably isn’t useful for languages besides English.
-* It does not follow mouse or hotkey driven cursor movement.
-
-## Taking autocorrection for a test drive
-With the above flashed to your keyboard, try for instance typing the misspelled word ouput. The instant you type the final t, the word should be speedily autocorrected to output. As further tests, try becuase and invliad.
-
-Here is the full list of typos corrected using the provided autocorrection_data.h file. : is a special character denoting a word break. See below for how to change the autocorrection dictionary.
-
-```
-:guage -> gauge
-:the:the: -> the
-:thier -> their
-:ture -> true
-accomodate -> accommodate
-acommodate -> accommodate
-aparent -> apparent
-aparrent -> apparent
-apparant -> apparent
-apparrent -> apparent
-aquire -> acquire
-becuase -> because
-cauhgt -> caught
-cheif -> chief
-choosen -> chosen
-cieling -> ceiling
-collegue -> colleague
-concensus -> consensus
-contians -> contains
-cosnt -> const
-dervied -> derived
-fales -> false
-fasle -> false
-fitler -> filter
-flase -> false
-foward -> forward
-frequecy -> frequency
-gaurantee -> guarantee
-guaratee -> guarantee
-heigth -> height
-heirarchy -> hierarchy
-inclued -> include
-interator -> iterator
-intput -> input
-invliad -> invalid
-lenght -> length
-liasion -> liaison
-libary -> library
-listner -> listener
-looses: -> loses
-looup -> lookup
-manefist -> manifest
-namesapce -> namespace
-namespcae -> namespace
-occassion -> occasion
-occured -> occurred
-ouptut -> output
-ouput -> output
-overide -> override
-postion -> position
-priviledge -> privilege
-psuedo -> pseudo
-recieve -> receive
-refered -> referred
-relevent -> relevant
-repitition -> repetition
-reuslt -> result
-retrun -> return
-retun -> return
-reutrn -> return
-saftey -> safety
-seperate -> separate
-singed -> signed
-stirng -> string
-strign -> string
-swithc -> switch
-swtich -> switch
-thresold -> threshold
-udpate -> update
-widht -> width
-```
-
-## Firmware size and CPU costs
-I am anxiously aware that a keyboard microcontroller has limited resources. So I was sure to measure how much memory and CPU time autocorrection consumes during development. These measurements are for the example autocorrection dictionary as used above, which has 70 entries:
-* **Firmware size**: Autocorrection increases my firmware size by a total of 1620 bytes. Breaking that down, 1104 bytes are for the autocorrection_data array and 516 bytes for the autocorrection code.
-* **CPU time**: On my Elite-C microcontrollers, the average CPU time for process_autocorrection to process an alpha key press is around 20 µs. Consider this a rough order-of-magnitude cost. Processing cost increases (more trie nodes are visited) when recent input is close to a known typo, with the max being when a long typo is matched.
-
-The costs are not free but reasonable. For reference, the firmware size cost for mouse keys is 2124 bytes and the CPU time to process a layer switch is about 70 µs, so autocorrection is cheaper than those things. Of course, the cost scales with the size of the autocorrection dictionary, so keep that in mind if you add a lot more entries.
-
-## How does it work?
-The function process_autocorrection maintains a small buffer of recent key presses. On each key press, it checks whether the buffer ends in a recognized typo, and if so, automatically sends keystrokes to correct it.
-
-The tricky part is how to efficiently check the buffer for typos. We don’t want to spend too much memory or time on storing or searching the typos. A good solution is to represent the typos with a trie data structure. A trie is a tree data structure where each node is a letter, and words are formed by following a path to one of the leaves.
-
-
-An example trie.
-Since we search whether the buffer ends in a typo, we store the trie writing in reverse. The trie is queried starting from the last letter, then second to last letter, and so on, until either a letter doesn’t match or we reach a leaf, meaning a typo was found.
-
-## Changing the autocorrection dictionary
-The file autocorrection_data.h encodes the typos to correct. While you could simply use the version of this file provided above for a practical configuration, you can make your own to personalize the autocorrection to your most troublesome typos:
-
-1. First, create an autocorrection dictionary autocorrection_dict.txt, like
-
- ```
-:thier -> their
-fitler -> filter
-lenght -> length
-ouput -> output
-widht -> width
-```
- For a practical 70-entry example, see autocorrection_dict.txt. And for a yet larger 400-entry example, see autocorrection_dict_extra.txt.
-
- The syntax is `typo -> correction`. Typos and corrections are case insensitive, and any whitespace before or after the typo and correction is ignored. The typo must be only the letters a–z, or the special character : representing a word break. The correction may have any characters.
-
-2. Use the make_autocorrection_data.py Python script to process the dictionary
-
-```
-$ python3 make_autocorrection_data.py
-Processed 70 autocorrection entries to table with 1104 bytes.
-```
-The script arranges the entries in autocorrection_dict.txt into a trie and generates autocorrection_data.h with the serialized trie embedded as an array.
-
-3. Finally, recompile and flash your keymap.
-
-The generated C header looks like this:
-
-autocorrection_data.h
-```c
-// Generated code.
-
-#define AUTOCORRECTION_MIN_LENGTH 5 // "cheif"
-#define AUTOCORRECTION_MAX_LENGTH 10 // "accomodate"
-
-static const uint8_t autocorrection_data[1104] PROGMEM = {108, 43, 0, 6,
- 71, 0, 7, 81, 0, 8, 199, 0, 9, 240, 1, 10, 250, 1, 11, 26, 2, 17, 53, 2,
- 18, 190, 2, 19, 202, 2, 21, 212, 2, 22, 20, 3, 23, 67, 3, 28, 16, 4, 0,
- 72, 50, 0, 22, 60, 0, 0, 11, 23, 44, 8, 11, 23, 44, 0, 132, 0, 8, 22, 18,
- 18, 15, 0, 132, 115, 101, 115, 0, 11, 23, 12, 26, 22, 0, 129, 99, 104, 0,
- 68, 94, 0, 8, 106, 0, 15, 174, 0, 21, 187, 0, 0, 12, 15, 25, 17, 12, 0,
- 131, 97, 108, 105, 100, 0, 74, 119, 0, 12, 129, 0, 21, 140, 0, 24, 165,
- 0, 0, 17, 12, 22, 0, 131, 103, 110, 101, 100, 0, 25, 21, 8, 7, 0, 131,
- 105, 118, 101, 100, 0, 72, 147, 0, 24, 156, 0, 0, 9, 8, 21, 0, 129, 114,
- 101, 100, 0, 6, 6, 18, 0, 129, 114, 101, 100, 0, 15, 6, 17, 12, 0, 129,
- 100, 101, 0, 18, 22, 8, 21, 11, 23, 0, 130, 104, 111, 108, 100, 0, 4, 26,
- 18, 9, 0, 131, 114, 119, 97, 114, 100, 0, 68, 233, 0, 6, 246, 0, 7, 4, 1,
- 8, 16, 1, 10, 52, 1, 15, 81, 1, 21, 90, 1, 22, 117, 1, 23, 144, 1, 24,
- 215, 1, 25, 228, 1, 0, 6, 19, 22, 8, 16, 4, 17, 0, 130, 97, 99, 101, 0,
- 19, 4, 22, 8, 16, 4, 17, 0, 131, 112, 97, 99, 101, 0, 12, 21, 8, 25, 18,
- 0, 130, 114, 105, 100, 101, 0, 23, 0, 68, 25, 1, 17, 36, 1, 0, 21, 4, 24,
- 10, 0, 130, 110, 116, 101, 101, 0, 4, 21, 24, 4, 10, 0, 135, 117, 97,
- 114, 97, 110, 116, 101, 101, 0, 68, 59, 1, 7, 69, 1, 0, 24, 10, 44, 0,
- 131, 97, 117, 103, 101, 0, 8, 15, 12, 25, 12, 21, 19, 0, 130, 103, 101,
- 0, 22, 4, 9, 0, 130, 108, 115, 101, 0, 76, 97, 1, 24, 109, 1, 0, 24, 20,
- 4, 0, 132, 99, 113, 117, 105, 114, 101, 0, 23, 44, 0, 130, 114, 117, 101,
- 0, 4, 0, 79, 126, 1, 24, 134, 1, 0, 9, 0, 131, 97, 108, 115, 101, 0, 6,
- 8, 5, 0, 131, 97, 117, 115, 101, 0, 4, 0, 71, 156, 1, 19, 193, 1, 21,
- 203, 1, 0, 18, 16, 0, 80, 166, 1, 18, 181, 1, 0, 18, 6, 4, 0, 135, 99,
- 111, 109, 109, 111, 100, 97, 116, 101, 0, 6, 6, 4, 0, 132, 109, 111, 100,
- 97, 116, 101, 0, 7, 24, 0, 132, 112, 100, 97, 116, 101, 0, 8, 19, 8, 22,
- 0, 132, 97, 114, 97, 116, 101, 0, 10, 8, 15, 15, 18, 6, 0, 130, 97, 103,
- 117, 101, 0, 8, 12, 6, 8, 21, 0, 131, 101, 105, 118, 101, 0, 12, 8, 11,
- 6, 0, 130, 105, 101, 102, 0, 17, 0, 76, 3, 2, 21, 16, 2, 0, 15, 8, 12, 6,
- 0, 133, 101, 105, 108, 105, 110, 103, 0, 12, 23, 22, 0, 131, 114, 105,
- 110, 103, 0, 70, 33, 2, 23, 44, 2, 0, 12, 23, 26, 22, 0, 131, 105, 116,
- 99, 104, 0, 10, 12, 8, 11, 0, 129, 104, 116, 0, 72, 69, 2, 10, 80, 2, 18,
- 89, 2, 21, 156, 2, 24, 167, 2, 0, 22, 18, 18, 11, 6, 0, 131, 115, 101,
- 110, 0, 12, 21, 23, 22, 0, 129, 110, 103, 0, 12, 0, 86, 98, 2, 23, 124,
- 2, 0, 68, 105, 2, 22, 114, 2, 0, 12, 15, 0, 131, 105, 115, 111, 110, 0,
- 4, 6, 6, 18, 0, 131, 105, 111, 110, 0, 76, 131, 2, 22, 146, 2, 0, 23, 12,
- 19, 8, 21, 0, 134, 101, 116, 105, 116, 105, 111, 110, 0, 18, 19, 0, 131,
- 105, 116, 105, 111, 110, 0, 23, 24, 8, 21, 0, 131, 116, 117, 114, 110, 0,
- 85, 174, 2, 23, 183, 2, 0, 23, 8, 21, 0, 130, 117, 114, 110, 0, 8, 21, 0,
- 128, 114, 110, 0, 7, 8, 24, 22, 19, 0, 131, 101, 117, 100, 111, 0, 24,
- 18, 18, 15, 0, 129, 107, 117, 112, 0, 72, 219, 2, 18, 3, 3, 0, 76, 229,
- 2, 15, 238, 2, 17, 248, 2, 0, 11, 23, 44, 0, 130, 101, 105, 114, 0, 23,
- 12, 9, 0, 131, 108, 116, 101, 114, 0, 23, 22, 12, 15, 0, 130, 101, 110,
- 101, 114, 0, 23, 4, 21, 8, 23, 17, 12, 0, 135, 116, 101, 114, 97, 116,
- 111, 114, 0, 72, 30, 3, 17, 38, 3, 24, 51, 3, 0, 15, 4, 9, 0, 129, 115,
- 101, 0, 4, 12, 23, 17, 18, 6, 0, 131, 97, 105, 110, 115, 0, 22, 17, 8, 6,
- 17, 18, 6, 0, 133, 115, 101, 110, 115, 117, 115, 0, 74, 86, 3, 11, 96, 3,
- 15, 118, 3, 17, 129, 3, 22, 218, 3, 24, 232, 3, 0, 11, 24, 4, 6, 0, 130,
- 103, 104, 116, 0, 71, 103, 3, 10, 110, 3, 0, 12, 26, 0, 129, 116, 104, 0,
- 17, 8, 15, 0, 129, 116, 104, 0, 22, 24, 8, 21, 0, 131, 115, 117, 108,
- 116, 0, 68, 139, 3, 8, 150, 3, 22, 210, 3, 0, 21, 4, 19, 19, 4, 0, 130,
- 101, 110, 116, 0, 85, 157, 3, 25, 200, 3, 0, 68, 164, 3, 21, 175, 3, 0,
- 19, 4, 0, 132, 112, 97, 114, 101, 110, 116, 0, 4, 19, 0, 68, 185, 3, 19,
- 193, 3, 0, 133, 112, 97, 114, 101, 110, 116, 0, 4, 0, 131, 101, 110, 116,
- 0, 8, 15, 8, 21, 0, 130, 97, 110, 116, 0, 18, 6, 0, 130, 110, 115, 116,
- 0, 12, 9, 8, 17, 4, 16, 0, 132, 105, 102, 101, 115, 116, 0, 83, 239, 3,
- 23, 6, 4, 0, 87, 246, 3, 24, 254, 3, 0, 17, 12, 0, 131, 112, 117, 116, 0,
- 18, 0, 130, 116, 112, 117, 116, 0, 19, 24, 18, 0, 131, 116, 112, 117,
- 116, 0, 70, 29, 4, 8, 41, 4, 11, 51, 4, 21, 69, 4, 0, 8, 24, 20, 8, 21,
- 9, 0, 129, 110, 99, 121, 0, 23, 9, 4, 22, 0, 130, 101, 116, 121, 0, 6,
- 21, 4, 21, 12, 8, 11, 0, 135, 105, 101, 114, 97, 114, 99, 104, 121, 0, 4,
- 5, 12, 15, 0, 130, 114, 97, 114, 121, 0};
-```
-
-## Troubleshooting
-### Avoiding false triggers
-By default, typos are searched within words, to find typos within longer identifiers like maxFitlerOuput. While this is useful, a consequence is that autocorrection will falsely trigger when a typo happens to be a substring of a correctly-spelled word. For instance, if we had thier -> their as an entry, it would falsely trigger on (correct, though relatively uncommon) words like “wealthier” and “filthier.”
-
-The solution is to set a word break : before and/or after the typo to constrain matching. : matches space, period, comma, underscore, digits, and most other non-alpha characters.
-
-| Text | thier | :thier | thier: | :thier: |
-|------|-------|--------|--------|---------|
-|see thier typo|matches|matches|matches|matches|
-it’s thiers |matches|matches|no|no|
-wealthier words|matches|no|matches|no|
-
-:thier: is most restrictive, matching only when thier is a whole word.
-
-The make_autocorrection_data.py script makes an effort to check for entries that would false trigger as substrings of correct words. It searches each typo against a dictionary of 25K English words from the english_words Python package, provided it’s installed.
-
-### Overriding autocorrection
-Occasionally you might actually want to type a typo (for instance, while editing autocorrection_dict.txt) without being autocorrected. Here is a way to do that:
-
-1. Begin typing the typo.
-2. Before typing the last letter, press and release the Ctrl or Alt key.
-3. Type the remaining letters.
-
-This works because the autocorrection implementation doesn’t understand hotkeys, so it resets itself whenever a modifier other than shift is held.
-
-Alternatively, the `AUTO_CTN` keycode will toggle autocorrection on and off.
-
-## Closing thoughts
-Based on my own use, an autocorrection dictionary of a few dozen entries is enough to help in day-to-day writing. On the other hand, it is of course far from comprehensively checking that every word is spelled correctly. Keyboard microcontrollers might not have the resources check against a full English dictionary any time soon, but a lot of editors and other software have good integrated spell check features.
-
-I suggest to enable and use spell check in combination with autocorrection:
-* Sublime: Open the View menu and enable “Spell Check.”
-* Eclipse: Open the Window menu, click Preferences, and search for “Spelling.”
-* Vim: Type :set spell, and misspellings will be highlighted. Use ]s to jump to the next misspelled word and z= to get suggested corrections for the word under the cursor. See the :help spell documentation. Vim also has an abbreviations feature that can autocorrect misspellings (see :help abbreviations).
-* Emacs: Use M-x flyspell-mode to enable Flyspell mode in the current buffer. Or for programming, use M-x flyspell-prog-mode to check comments and strings only. See the spelling documentation. There is also an abbreviations feature that can do autocorrection.
-
-Some useful resources:
-
-* Wikipedia has a [large list of common typos](https://en.wikipedia.org/wiki/Wikipedia:Lists_of_common_misspellings/For_machines).
-* EmacsWiki has another [list of typos](https://www.emacswiki.org/emacs/autocorrection_abbrev_defs).
-* You can find data on English word frequencies at https://www.wordfrequency.info/samples.asp.
-
-# Appendix: Trie binary data format
-This section details how the trie is serialized to byte data in autocorrection_data. You don’t need to care about this to use this autocorrection implementation. But I document it for the record in case anyone is interested in modifying the implementation, or just curious how it works.
-
-What I did here is fairly arbitrary, but it is simple to decode and gets the job done.
-
-## Encoding
-All autocorrection data is stored in a single flat array autocorrection_data. Each trie node is associated with a byte offset into this array, where data for that node is encoded, beginning with root at offset 0. There are three kinds of nodes. The highest two bits of the first byte of the node indicate what kind:
-
-* 00 ⇒ chain node: a trie node with a single child.
-* 01 ⇒ branching node: a trie node with multiple children.
-* 10 ⇒ leaf node: a leaf, corresponding to a typo and storing its correction.
-
-An example trie.
-Branching node. Each branch is encoded with one byte for the keycode (KC_A–KC_Z) followed by a link to the child node. Links between nodes are 16-bit byte offsets relative to the beginning of the array, serialized in little endian order.
-
-All branches are serialized this way, one after another, and terminated with a zero byte. As described above, the node is identified as a branch by setting the two high bits of the first byte to 01, done by bitwise ORing the first keycode with 64. keycode. The root node for the above figure would be serialized like:
-
- +-------+-------+-------+-------+-------+-------+-------+
- | R|64 | node 2 | T | node 3 | 0 |
- +-------+-------+-------+-------+-------+-------+-------+
-
-Chain node. Tries tend to have long chains of single-child nodes, as seen in the example above with f-i-t-l in fitler. So to save space, we use a different format to encode chains than branching nodes. A chain is encoded as a string of keycodes, beginning with the node closest to the root, and terminated with a zero byte. The child of the last node in the chain is encoded immediately after. That child could be either a branching node or a leaf.
-
-In the figure above, the f-i-t-l chain is encoded as
-
- +-------+-------+-------+-------+-------+
- | L | T | I | F | 0 |
- +-------+-------+-------+-------+-------+
-If we were to encode this chain using the same format used for branching nodes, we would encode a 16-bit node link with every node, costing 8 more bytes in this example. Across the whole trie, this adds up. Conveniently, we can point to intermediate points in the chain and interpret the bytes in the same way as before. E.g. starting at the i instead of the l, and the subchain has the same format.
-
-Leaf node. A leaf node corresponds to a particular typo and stores data to correct the typo. The leaf begins with a byte for the number of backspaces to type, and is followed by a null-terminated ASCII string of the replacement text. The idea is, after tapping backspace the indicated number of times, we can simply pass this string to QMK’s send_string_P function. For fitler, we need to tap backspace 3 times (not 4, because we catch the typo as the final ‘r’ is pressed) and replace it with lter. To identify the node as a leaf, the two high bits are set to 10 by ORing the backspace count with 128:
-
- +-------+-------+-------+-------+-------+-------+
- | 3|128 | 'l' | 't' | 'e' | 'r' | 0 |
- +-------+-------+-------+-------+-------+-------+
-## Decoding
-This format is by design decodable with fairly simple logic. A 16-bit variable state represents our current position in the trie, initialized with 0 to start at the root node. Then, for each keycode, test the highest two bits in the byte at state to identify the kind of node.
-
-* 00 ⇒ chain node: If the node’s byte matches the keycode, increment state by one to go to the next byte. If the next byte is zero, increment again to go to the following node.
-* 01 ⇒ branching node: Search the branches for one that matches the keycode, and follow its node link.
-* 10 ⇒ leaf node: a typo has been found! We read its first byte for the number of backspaces to type, then pass its following bytes to send_string_P to type the correction.