mirror of
https://github.com/RobTillaart/Arduino.git
synced 2024-10-03 18:09:02 -04:00
0.1.4 Soundex
This commit is contained in:
parent
affb0a60aa
commit
406e2ca71a
@ -6,7 +6,7 @@ jobs:
|
||||
lint:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: actions/checkout@v3
|
||||
- uses: arduino/arduino-lint-action@v1
|
||||
with:
|
||||
library-manager: update
|
||||
|
@ -8,7 +8,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: actions/checkout@v3
|
||||
- uses: ruby/setup-ruby@v1
|
||||
with:
|
||||
ruby-version: 2.6
|
||||
|
@ -10,7 +10,7 @@ jobs:
|
||||
test:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: actions/checkout@v3
|
||||
- name: json-syntax-check
|
||||
uses: limitusus/json-syntax-check@v1
|
||||
with:
|
||||
|
@ -6,6 +6,14 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/)
|
||||
and this project adheres to [Semantic Versioning](http://semver.org/).
|
||||
|
||||
|
||||
## [0.1.4] - 2023-02-02
|
||||
- update readme.md
|
||||
- update GitHub actions
|
||||
- update license 2023
|
||||
- allow **SOUNDEX_MAX_LENGTH** be defined from command line.
|
||||
- move code to .cpp
|
||||
|
||||
|
||||
## [0.1.3] - 2022-11-24
|
||||
- Add RP2040 support to build-CI.
|
||||
- Add CHANGELOG.md
|
||||
@ -13,7 +21,6 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
|
||||
- fix bug in performance example
|
||||
- add output file of 2 examples as reference
|
||||
|
||||
|
||||
## [0.1.2] - 2022-02-06
|
||||
- added soundex16(), soundex32()
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2022-2022 Rob Tillaart
|
||||
Copyright (c) 2022-2023 Rob Tillaart
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
|
@ -13,17 +13,17 @@ Arduino Library for calculating Soundex hash.
|
||||
|
||||
## Description
|
||||
|
||||
This library generates a (string based) hash based upon how a word sounds.
|
||||
This algorithm is called Soundex.
|
||||
The original algorithm was developed by Robert C. Russell and
|
||||
Margaret King Odell over 100 years ago.
|
||||
This library generates a (string based) hash based upon how a word sounds.
|
||||
This algorithm is called Soundex.
|
||||
The original algorithm was developed by Robert C. Russell and
|
||||
Margaret King Odell over 100 years ago.
|
||||
There are several variations of Soundex and these might be supported in the future.
|
||||
|
||||
The algorithm roughly copies the uppercase first letter of the word,
|
||||
The algorithm roughly copies the uppercase first letter of the word,
|
||||
followed by 3 digits replacing the consonants.
|
||||
|
||||
The base Soundex has 26 x 7 x 7 x 7 = 8918 possible outcomes,
|
||||
this could be easily encoded in an uint16_t.
|
||||
The base Soundex has 26 x 7 x 7 x 7 = 8918 possible outcomes,
|
||||
this could be easily encoded in an uint16_t.
|
||||
This insight triggered the experimental functions.
|
||||
|
||||
|
||||
@ -33,7 +33,7 @@ The library has two experimental functions, **soundex16()** and **soundex32()**.
|
||||
These functions pack a Soundex length 5 hash in a uint16_t and a length 10 in a uint32_t.
|
||||
These compress soundex() results.
|
||||
|
||||
Advantages (16 bit version):
|
||||
Advantages (16 bit version):
|
||||
- better hash as it adds 1 extra character
|
||||
- saves 60% of RAM, (5 bytes vs 2 bytes).
|
||||
- allows faster comparisons, (compare 2 bytes is faster than 5 )
|
||||
@ -47,11 +47,11 @@ Disadvantage:
|
||||
|
||||
The hash codes of these new SoundexNN() are a continuous numeric range.
|
||||
|
||||
| Checksum | bytes | chars | range/values | used | notes |
|
||||
|:------------|:-------:|:-------:|---------------:|:-------:|:-------------|
|
||||
| soundex | 5 | 4 | 8.917 | 1e-6% | default |
|
||||
| soundex16 | 2 | 5 | 62.425 | 95.3% | 0xF3D9 |
|
||||
| soundex32 | 4 | 10 | 1.049.193.781 | 24.4% | 0x3E89 6D35 |
|
||||
| Checksum | bytes | chars | range/values | used | notes |
|
||||
|:------------|:-------:|:-------:|----------------:|:-------:|:-------------|
|
||||
| soundex | 5 | 4 | 8.917 | 1e-6% | default |
|
||||
| soundex16 | 2 | 5 | 62.425 | 95.3% | 0xF3D9 |
|
||||
| soundex32 | 4 | 10 | 1.049.193.781 | 24.4% | 0x3E89 6D35 |
|
||||
|
||||
Note that soundex16() and soundex32() compresses info much better than
|
||||
the standard soundex().
|
||||
@ -68,32 +68,40 @@ It would allow to compress very long soundex() results (up to 22 chars) in 8 byt
|
||||
|
||||
## Interface
|
||||
|
||||
Use **\#include "Soundex.h"**
|
||||
```cpp
|
||||
#include "Soundex.h"
|
||||
```
|
||||
|
||||
#### Core
|
||||
|
||||
- **Soundex()** Constructor.
|
||||
- **void setLength(uint8_t length = 4)** Sets the length to include more digits.
|
||||
- **void setLength(uint8_t length = 4)** Sets the length to include more digits.
|
||||
Maximum length = SOUNDEX_MAX_LENGTH - 1 == 11 (default).
|
||||
- **uint8_t getLength()** returns current length.
|
||||
- **char \* soundex(const char \* str)** determines the (Russell & Odell) Soundex code of the string.
|
||||
- **uint16_t soundex16(const char \* str)** determines the (Russell & Odell) Soundex code with
|
||||
|
||||
#### Experimental
|
||||
|
||||
- **uint16_t soundex16(const char \* str)** determines the (Russell & Odell) Soundex code with
|
||||
length = 5 of the string and packs the result in an uint16_t.
|
||||
Note: preferably printed in HEX.
|
||||
- **uint32_t soundex32(const char \* str)** determines the (Russell & Odell) Soundex code with
|
||||
- **uint32_t soundex32(const char \* str)** determines the (Russell & Odell) Soundex code with
|
||||
length == 10 of the string and packs it in an uint32_t.
|
||||
Note: preferably printed in HEX.
|
||||
|
||||
|
||||
#### Performance
|
||||
|
||||
Not tested ESP32 (and many other platforms) yet.
|
||||
First numbers of **.soundex("Trichloroethylene")** measured with
|
||||
Not tested on other platforms.
|
||||
|
||||
First numbers of **.soundex("Trichloroethylene")** measured with
|
||||
a test sketch shows the following timing per word.
|
||||
|
||||
| Checksum | digits | UNO 16 MHz | ESP32 240 MHz | notes |
|
||||
|:----------|:------:|:----------:|:-------------:|:------|
|
||||
| soundex | 4 | 28 us | 4 us |
|
||||
| soundex16 | 5 | 48 us | 6 us | not optimized
|
||||
| soundex32 | 10 | 120 us | 10 us | not optimized
|
||||
| Checksum | digits | UNO 16 MHz | ESP32 240 MHz | notes |
|
||||
|:------------|:--------:|:------------:|:---------------:|:--------|
|
||||
| soundex | 4 | 28 us | 4 us |
|
||||
| soundex16 | 5 | 48 us | 6 us | not optimized
|
||||
| soundex32 | 10 | 120 us | 10 us | not optimized
|
||||
|
||||
|
||||
## Operation
|
||||
@ -103,12 +111,14 @@ See examples.
|
||||
|
||||
## Future ideas
|
||||
|
||||
#### must
|
||||
- documentation
|
||||
#### Must
|
||||
|
||||
- improve documentation
|
||||
- add examples
|
||||
|
||||
|
||||
#### should
|
||||
#### Should
|
||||
|
||||
- more testing
|
||||
- other platforms
|
||||
- different key lengths
|
||||
@ -116,17 +126,20 @@ See examples.
|
||||
- performance
|
||||
|
||||
|
||||
#### could
|
||||
- Other algorithms might be added in the future.
|
||||
- reverse_soundex()
|
||||
- Daitch–Mokotoff Soundex
|
||||
- Beider-Morse Soundex
|
||||
- Metaphone
|
||||
#### Could
|
||||
|
||||
- use spare bits of soundex16/32 as parity / checksum.
|
||||
|
||||
|
||||
#### wont
|
||||
#### Wont
|
||||
|
||||
- efficient storage of the Soundex array
|
||||
- encode in nibbles. (13 bytes instead of 26) => more code, performance?
|
||||
0x01, 0x23, 0x01 etc.
|
||||
(performance test was slower, gain in RAM == PROGMEM loss.
|
||||
- Other algorithms might be added in the future.
|
||||
- reverse_soundex()
|
||||
- Daitch–Mokotoff Soundex
|
||||
- Beider-Morse Soundex
|
||||
- Metaphone
|
||||
|
||||
|
@ -1,7 +1,7 @@
|
||||
//
|
||||
// FILE: Soundex.cpp
|
||||
// AUTHOR: Rob Tillaart
|
||||
// VERSION: 0.1.3
|
||||
// VERSION: 0.1.4
|
||||
// DATE: 2022-02-05
|
||||
// PURPOSE: Arduino Library for calculating Soundex hash
|
||||
// URL: https://github.com/RobTillaart/Soundex
|
||||
@ -28,7 +28,13 @@ void Soundex::setLength(uint8_t length)
|
||||
{
|
||||
_length = SOUNDEX_MAX_LENGTH - 1;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
uint8_t Soundex::getLength()
|
||||
{
|
||||
return _length;
|
||||
}
|
||||
|
||||
|
||||
char * Soundex::soundex(const char * str)
|
||||
|
@ -2,7 +2,7 @@
|
||||
//
|
||||
// FILE: Soundex.h
|
||||
// AUTHOR: Rob Tillaart
|
||||
// VERSION: 0.1.3
|
||||
// VERSION: 0.1.4
|
||||
// DATE: 2022-02-05
|
||||
// PURPOSE: Arduino Library for calculating Soundex hash
|
||||
// URL: https://github.com/RobTillaart/Soundex
|
||||
@ -11,21 +11,23 @@
|
||||
#include "Arduino.h"
|
||||
|
||||
|
||||
#define SOUNDEX_LIB_VERSION (F("0.1.3"))
|
||||
#define SOUNDEX_LIB_VERSION (F("0.1.4"))
|
||||
|
||||
|
||||
#define SOUNDEX_MIN_LENGTH 4
|
||||
#define SOUNDEX_MAX_LENGTH 12
|
||||
|
||||
#ifndef SOUNDEX_MAX_LENGTH
|
||||
#define SOUNDEX_MAX_LENGTH 12
|
||||
#endif
|
||||
|
||||
|
||||
class Soundex
|
||||
{
|
||||
public:
|
||||
Soundex();
|
||||
Soundex();
|
||||
|
||||
void setLength(uint8_t length = 4);
|
||||
uint8_t getLength() { return _length; };
|
||||
uint8_t getLength();
|
||||
|
||||
char * soundex(const char * str); // Russel and Odell
|
||||
uint16_t soundex16(const char * str); // Russel and Odell length = 5
|
||||
@ -40,5 +42,5 @@ private:
|
||||
};
|
||||
|
||||
|
||||
// -- END OF FILE --
|
||||
// -- END OF FILE --
|
||||
|
||||
|
@ -51,6 +51,7 @@ void setup()
|
||||
start = micros();
|
||||
char * key = SDX.soundex(token);
|
||||
stop = micros();
|
||||
|
||||
Serial.print(stop - start);
|
||||
total += (stop - start);
|
||||
Serial.print("\t");
|
||||
@ -60,6 +61,7 @@ void setup()
|
||||
Serial.print("\n");
|
||||
token = strtok(NULL, " ,");
|
||||
words++;
|
||||
|
||||
delay(10);
|
||||
}
|
||||
|
||||
|
@ -80,4 +80,4 @@ void loop()
|
||||
}
|
||||
|
||||
|
||||
// -- END OF FILE --
|
||||
// -- END OF FILE --
|
||||
|
@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "Soundex",
|
||||
"keywords": "Soundex, hash",
|
||||
"keywords": "Soundex,hash,Soundex16,Soundex32",
|
||||
"description": "Arduino Library for soundex.",
|
||||
"authors":
|
||||
[
|
||||
@ -15,7 +15,7 @@
|
||||
"type": "git",
|
||||
"url": "https://github.com/RobTillaart/Soundex.git"
|
||||
},
|
||||
"version": "0.1.3",
|
||||
"version": "0.1.4",
|
||||
"license": "MIT",
|
||||
"frameworks": "arduino",
|
||||
"platforms": "*",
|
||||
|
@ -1,9 +1,9 @@
|
||||
name=Soundex
|
||||
version=0.1.3
|
||||
version=0.1.4
|
||||
author=Rob Tillaart <rob.tillaart@gmail.com>
|
||||
maintainer=Rob Tillaart <rob.tillaart@gmail.com>
|
||||
sentence="Arduino Library for calculating Soundex hash.
|
||||
paragraph=
|
||||
paragraph=Experimental Soundex16, Soundex32
|
||||
category=Signal Input/Output
|
||||
url=https://github.com/RobTillaart/Soundex
|
||||
architectures=*
|
||||
|
@ -123,4 +123,5 @@ unittest(test_soundex_getLength)
|
||||
unittest_main()
|
||||
|
||||
|
||||
// --------
|
||||
// -- END OF FILE --
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user