mirror of
https://github.com/RobTillaart/Arduino.git
synced 2024-10-03 18:09:02 -04:00
0.1.1 Soundex
This commit is contained in:
parent
ec8aeb0659
commit
1fa9bbb40a
14
libraries/Soundex/.arduino-ci.yml
Normal file
14
libraries/Soundex/.arduino-ci.yml
Normal file
@ -0,0 +1,14 @@
|
||||
compile:
|
||||
# Choosing to run compilation tests on 2 different Arduino platforms
|
||||
platforms:
|
||||
- uno
|
||||
# - due
|
||||
# - zero
|
||||
# - leonardo
|
||||
- m4
|
||||
- esp32
|
||||
# - esp8266
|
||||
# - mega2560
|
||||
|
||||
libraries:
|
||||
# - "printHelpers"
|
13
libraries/Soundex/.github/workflows/arduino-lint.yml
vendored
Normal file
13
libraries/Soundex/.github/workflows/arduino-lint.yml
vendored
Normal file
@ -0,0 +1,13 @@
|
||||
|
||||
name: Arduino-lint
|
||||
|
||||
on: [push, pull_request]
|
||||
jobs:
|
||||
lint:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: arduino/arduino-lint-action@v1
|
||||
with:
|
||||
library-manager: update
|
||||
compliance: strict
|
17
libraries/Soundex/.github/workflows/arduino_test_runner.yml
vendored
Normal file
17
libraries/Soundex/.github/workflows/arduino_test_runner.yml
vendored
Normal file
@ -0,0 +1,17 @@
|
||||
---
|
||||
name: Arduino CI
|
||||
|
||||
on: [push, pull_request]
|
||||
|
||||
jobs:
|
||||
runTest:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: ruby/setup-ruby@v1
|
||||
with:
|
||||
ruby-version: 2.6
|
||||
- run: |
|
||||
gem install arduino_ci
|
||||
arduino_ci.rb
|
18
libraries/Soundex/.github/workflows/jsoncheck.yml
vendored
Normal file
18
libraries/Soundex/.github/workflows/jsoncheck.yml
vendored
Normal file
@ -0,0 +1,18 @@
|
||||
name: JSON check
|
||||
|
||||
on:
|
||||
push:
|
||||
paths:
|
||||
- '**.json'
|
||||
pull_request:
|
||||
|
||||
jobs:
|
||||
test:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: json-syntax-check
|
||||
uses: limitusus/json-syntax-check@v1
|
||||
with:
|
||||
pattern: "\\.json$"
|
||||
|
21
libraries/Soundex/LICENSE
Normal file
21
libraries/Soundex/LICENSE
Normal file
@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2022-2022 Rob Tillaart
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
77
libraries/Soundex/README.md
Normal file
77
libraries/Soundex/README.md
Normal file
@ -0,0 +1,77 @@
|
||||
|
||||
[![Arduino CI](https://github.com/RobTillaart/Soundex/workflows/Arduino%20CI/badge.svg)](https://github.com/marketplace/actions/arduino_ci)
|
||||
[![Arduino-lint](https://github.com/RobTillaart/Soundex/actions/workflows/arduino-lint.yml/badge.svg)](https://github.com/RobTillaart/Soundex/actions/workflows/arduino-lint.yml)
|
||||
[![JSON check](https://github.com/RobTillaart/Soundex/actions/workflows/jsoncheck.yml/badge.svg)](https://github.com/RobTillaart/Soundex/actions/workflows/jsoncheck.yml)
|
||||
[![License: MIT](https://img.shields.io/badge/license-MIT-green.svg)](https://github.com/RobTillaart/Soundex/blob/master/LICENSE)
|
||||
[![GitHub release](https://img.shields.io/github/release/RobTillaart/Soundex.svg?maxAge=3600)](https://github.com/RobTillaart/Soundex/releases)
|
||||
|
||||
|
||||
# Soundex
|
||||
|
||||
Arduino Library for calculating Soundex hash.
|
||||
|
||||
|
||||
## Description
|
||||
|
||||
This library generates a (string based) hash based upon how a word sounds.
|
||||
This algorithm is called Soundex.
|
||||
The original algorithm was developed by Robert C. Russell and Margaret King Odell over 100 years ago.
|
||||
There are several variations of Soundex and these might be supported in the future.
|
||||
|
||||
The algorithm roughly copies the uppercase first letter of the word, followed by 3 digits replacing the consonants.
|
||||
|
||||
The base Soundex has 26 x 7 x 7 x 7 = 8918 possible outcomes, this could be encoded in an uint16_t.
|
||||
|
||||
|
||||
#### Links
|
||||
|
||||
- https://en.wikipedia.org/wiki/Soundex
|
||||
- https://en.wikipedia.org/wiki/Metaphone (not implemented)
|
||||
|
||||
|
||||
## Interface
|
||||
|
||||
Use **\#include "Soundex.h"**
|
||||
|
||||
- **Soundex()** Constructor.
|
||||
- **void setLength(uint8_t length = 4)** Sets the length to include more digits. max length = 11
|
||||
- **uint8_t getLength()** returns current length.
|
||||
- **char \* soundex(const char \* str)** determines the (Russell & Odell) Soundex code of the string.
|
||||
|
||||
|
||||
|
||||
#### Performance
|
||||
|
||||
Not tested ESP32 (and many other platforms) yet.
|
||||
First numbers of **.soundex(str)** measured with test sketch shows the following timing per word.
|
||||
|
||||
| Checksum | digits | UNO 16 MHz | ESP32 240 MHz |
|
||||
|:------------|:------:|:-----------:|:-------------:|
|
||||
| Soundex | 3 | 32 us | |
|
||||
|
||||
|
||||
|
||||
## Operation
|
||||
|
||||
See examples.
|
||||
|
||||
|
||||
## Future ideas
|
||||
|
||||
- more testing
|
||||
- other platforms
|
||||
- different key lengths
|
||||
- string lengths
|
||||
- performance
|
||||
- numeric version of Soundex
|
||||
- store in an uint16_t (bit fields 5,3,4,4)
|
||||
- uint16_t soundexN(const char \* str).
|
||||
- efficient storage of the Soundex array
|
||||
- encode in nibbles. (13 bytes instead of 26) => more code, performance?
|
||||
0x01, 0x23, 0x01 etc.
|
||||
- Other algorithms might be added in the future.
|
||||
- Daitch–Mokotoff Soundex
|
||||
- Beider-Morse Soundex
|
||||
- Metaphone
|
||||
|
||||
|
70
libraries/Soundex/Soundex.cpp
Normal file
70
libraries/Soundex/Soundex.cpp
Normal file
@ -0,0 +1,70 @@
|
||||
//
|
||||
// FILE: Soundex.cpp
|
||||
// AUTHOR: Rob Tillaart
|
||||
// VERSION: 0.1.1
|
||||
// DATE: 2022-02-05
|
||||
// PURPOSE: Arduino Library for calculating Soundex hash
|
||||
// URL: https://github.com/RobTillaart/Soundex
|
||||
|
||||
|
||||
#include "Soundex.h"
|
||||
|
||||
|
||||
Soundex::Soundex()
|
||||
{
|
||||
_buffer[0] = '\0';
|
||||
_length = 4;
|
||||
}
|
||||
|
||||
|
||||
void Soundex::setLength(uint32_t length)
|
||||
{
|
||||
_length = length;
|
||||
if (_length > (SOUNDEX_MAX_LENGTH - 1))
|
||||
{
|
||||
_length = SOUNDEX_MAX_LENGTH - 1;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
char * Soundex::soundex(const char * str)
|
||||
{
|
||||
uint8_t i = 0; // index for the buffer.
|
||||
|
||||
// fill buffer with zeros
|
||||
for (i = 0; i < _length; i++) _buffer[i] = '0';
|
||||
_buffer[_length] = '\0';
|
||||
|
||||
// find begin of word, skip spaces, digits
|
||||
char *p = (char *) &str[0];
|
||||
while((*p != 0) && (isalpha(*p) == false)) p++;
|
||||
if (*p == 0) return _buffer;
|
||||
|
||||
// handle first character
|
||||
i = 0;
|
||||
_buffer[i++] = toupper(*p);
|
||||
uint8_t last = sdx[_buffer[0] - 'A']; // remember last code
|
||||
p++;
|
||||
|
||||
// process the remainder of the string
|
||||
while ((*p != 0) && (i < _length))
|
||||
{
|
||||
if (isalpha(*p)) // skip non ASCII
|
||||
{
|
||||
uint8_t current = sdx[toupper(*p) - 'A'];
|
||||
// new code?
|
||||
if (last != current)
|
||||
{
|
||||
last = current;
|
||||
if (last != 0) _buffer[i++] = '0' + last;
|
||||
}
|
||||
}
|
||||
p++;
|
||||
}
|
||||
return _buffer;
|
||||
}
|
||||
|
||||
|
||||
// -- END OF FILE --
|
||||
|
||||
|
45
libraries/Soundex/Soundex.h
Normal file
45
libraries/Soundex/Soundex.h
Normal file
@ -0,0 +1,45 @@
|
||||
#pragma once
|
||||
//
|
||||
// FILE: Soundex.h
|
||||
// AUTHOR: Rob Tillaart
|
||||
// VERSION: 0.1.1
|
||||
// DATE: 2022-02-05
|
||||
// PURPOSE: Arduino Library for calculating Soundex hash
|
||||
// URL: https://github.com/RobTillaart/Soundex
|
||||
//
|
||||
// HISTORY
|
||||
// 0.1.0 2011-05-20 stand alone application
|
||||
// 0.1.1 2022-02-05 initial library version
|
||||
|
||||
|
||||
#include "Arduino.h"
|
||||
|
||||
|
||||
#define SOUNDEX_LIB_VERSION (F("0.1.1"))
|
||||
#define SOUNDEX_MAX_LENGTH 12
|
||||
|
||||
class Soundex
|
||||
{
|
||||
public:
|
||||
Soundex();
|
||||
|
||||
void setLength(uint32_t length = 4);
|
||||
uint8_t getLength() { return _length; };
|
||||
|
||||
char * soundex(const char * str); // Russel and Odell
|
||||
|
||||
|
||||
private:
|
||||
char _buffer[SOUNDEX_MAX_LENGTH];
|
||||
uint8_t _length;
|
||||
|
||||
// This array can be made smaller (less RAM)
|
||||
// - encode in nibbles. (13 bytes iso 26) => more code, performance?
|
||||
// 0x01, 0x23, 0x01 etc.
|
||||
uint8_t sdx[26] = {0,1,2,3,0,1,2,0,0,2,2,4,5,5,0,1,2,6,2,3,0,1,0,2,0,2 };
|
||||
};
|
||||
|
||||
|
||||
// -- END OF FILE --
|
||||
|
||||
|
@ -0,0 +1,85 @@
|
||||
//
|
||||
// FILE: soundex_performance.ino
|
||||
// AUTHOR: Rob Tillaart
|
||||
// PURPOSE: demo
|
||||
|
||||
|
||||
#include "Arduino.h"
|
||||
#include "Soundex.h"
|
||||
|
||||
Soundex SDX;
|
||||
|
||||
uint32_t start, stop, total;
|
||||
char *token;
|
||||
uint16_t words;
|
||||
|
||||
|
||||
char str[] = "Lorem ipsum dolor sit amet, \
|
||||
consectetuer adipiscing elit. Aenean commodo ligula eget dolor. \
|
||||
Aenean massa. Cum sociis natoque penatibus et magnis dis parturient \
|
||||
montes, nascetur ridiculus mus. Donec quam felis, ultricies nec, \
|
||||
pellentesque eu, pretium quis, sem. Nulla consequat massa quis enim. \
|
||||
Donec pede justo, fringilla vel, aliquet nec, vulputate eget, arcu. \
|
||||
In enim justo, rhoncus ut, imperdiet a, venenatis vitae, justo. \
|
||||
Nullam dictum felis eu pede mollis pretium. Integer tincidunt. \
|
||||
Cras dapibus. Vivamus elementum semper nisi. \
|
||||
Aenean vulputate eleifend tellus. Aenean leo ligula, porttitor eu, \
|
||||
consequat vitae, eleifend ac, enim. Aliquam lorem ante, dapibus in, \
|
||||
viverra quis, feugiat a, tellus. Phasellus viverra nulla ut metus \
|
||||
varius laoreet. Quisque rutrum. Aenean imperdiet. Etiam ultricies \
|
||||
nisi vel augue. Curabitur ullamcorper ultricies nisi. Nam eget dui.";
|
||||
|
||||
|
||||
|
||||
void setup()
|
||||
{
|
||||
Serial.begin(115200);
|
||||
while (!Serial);
|
||||
|
||||
Serial.println();
|
||||
Serial.print("SOUNDEX_LIB_VERSION: ");
|
||||
Serial.println(SOUNDEX_LIB_VERSION);
|
||||
delay(100);
|
||||
|
||||
// SOUNDEX PER WORD
|
||||
token = strtok(str, " ,");
|
||||
words = 0;
|
||||
total = 0;
|
||||
|
||||
while (token != NULL)
|
||||
{
|
||||
start = micros();
|
||||
char * key = SDX.soundex(token);
|
||||
stop = micros();
|
||||
Serial.print(stop - start);
|
||||
total += (stop - start);
|
||||
Serial.print("\t");
|
||||
Serial.print(key);
|
||||
Serial.print("\t");
|
||||
Serial.print(token);
|
||||
Serial.print("\n");
|
||||
token = strtok(NULL, " ,");
|
||||
words++;
|
||||
delay(10);
|
||||
}
|
||||
|
||||
// TOTAL TIME
|
||||
Serial.print("\nTOTAL: \t");
|
||||
Serial.print(total);
|
||||
Serial.print("\t");
|
||||
Serial.print(words);
|
||||
Serial.print("\t");
|
||||
Serial.print(1.0 * words / total, 4);
|
||||
Serial.print(" per word\n");
|
||||
delay(10);
|
||||
|
||||
Serial.println("\ndone...");
|
||||
}
|
||||
|
||||
|
||||
void loop()
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
// -- END OF FILE --
|
53
libraries/Soundex/examples/soundex_test/soundex_test.ino
Normal file
53
libraries/Soundex/examples/soundex_test/soundex_test.ino
Normal file
@ -0,0 +1,53 @@
|
||||
//
|
||||
// FILE: soundex_test.ino
|
||||
// AUTHOR: Rob Tillaart
|
||||
// PURPOSE: demo
|
||||
|
||||
|
||||
#include "Arduino.h"
|
||||
#include "Soundex.h"
|
||||
|
||||
Soundex SDX;
|
||||
|
||||
uint32_t start, stop;
|
||||
|
||||
|
||||
void setup()
|
||||
{
|
||||
Serial.begin(115200);
|
||||
while (!Serial);
|
||||
|
||||
Serial.println();
|
||||
Serial.print("SOUNDEX_LIB_VERSION: ");
|
||||
Serial.println(SOUNDEX_LIB_VERSION);
|
||||
delay(100);
|
||||
|
||||
start = micros();
|
||||
char * p = SDX.soundex("soundex");
|
||||
stop = micros();
|
||||
Serial.print(p);
|
||||
Serial.print("\t");
|
||||
Serial.println(stop - start);
|
||||
|
||||
// examples from wikipedia
|
||||
Serial.println(SDX.soundex("Robert")); // R163
|
||||
Serial.println(SDX.soundex("Rupert")); // R163
|
||||
Serial.println(SDX.soundex("Rubin")); // R150
|
||||
Serial.println(SDX.soundex("Tymczak")); // T522
|
||||
Serial.println(SDX.soundex("Pfister")); // P236
|
||||
Serial.println(SDX.soundex("Honeyman")); // H555
|
||||
|
||||
SDX.setLength(10);
|
||||
// e.g. for long chemical names
|
||||
Serial.println(SDX.soundex("Trichloroethylene")); // T624634500
|
||||
Serial.println(SDX.soundex("pentacarbon decahydrate")); // P532615323
|
||||
Serial.println(SDX.soundex("deoxyribonucleic acid")); // D261524223
|
||||
}
|
||||
|
||||
|
||||
void loop()
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
// -- END OF FILE --
|
16
libraries/Soundex/keywords.txt
Normal file
16
libraries/Soundex/keywords.txt
Normal file
@ -0,0 +1,16 @@
|
||||
# Syntax Colouring Map For Soundex
|
||||
|
||||
# Data types (KEYWORD1)
|
||||
Soundex KEYWORD1
|
||||
|
||||
|
||||
# Methods and Functions (KEYWORD2)
|
||||
soundex KEYWORD2
|
||||
|
||||
setLength KEYWORD2
|
||||
getLength KEYWORD2
|
||||
|
||||
|
||||
# Constants (LITERAL1)
|
||||
SOUNDEX_LIB_VERSION LITERAL1
|
||||
|
23
libraries/Soundex/library.json
Normal file
23
libraries/Soundex/library.json
Normal file
@ -0,0 +1,23 @@
|
||||
{
|
||||
"name": "Soundex",
|
||||
"keywords": "Soundex, hash",
|
||||
"description": "Arduino Library for soundex.",
|
||||
"authors":
|
||||
[
|
||||
{
|
||||
"name": "Rob Tillaart",
|
||||
"email": "Rob.Tillaart@gmail.com",
|
||||
"maintainer": true
|
||||
}
|
||||
],
|
||||
"repository":
|
||||
{
|
||||
"type": "git",
|
||||
"url": "https://github.com/RobTillaart/Soundex.git"
|
||||
},
|
||||
"version": "0.1.1",
|
||||
"license": "MIT",
|
||||
"frameworks": "arduino",
|
||||
"platforms": "*",
|
||||
"headers": "Soundex.h"
|
||||
}
|
11
libraries/Soundex/library.properties
Normal file
11
libraries/Soundex/library.properties
Normal file
@ -0,0 +1,11 @@
|
||||
name=Soundex
|
||||
version=0.1.1
|
||||
author=Rob Tillaart <rob.tillaart@gmail.com>
|
||||
maintainer=Rob Tillaart <rob.tillaart@gmail.com>
|
||||
sentence="Arduino Library for calculating Soundex hash.
|
||||
paragraph=
|
||||
category=Signal Input/Output
|
||||
url=https://github.com/RobTillaart/Soundex
|
||||
architectures=*
|
||||
includes=Soundex.h
|
||||
depends=
|
88
libraries/Soundex/test/unit_test_001.cpp
Normal file
88
libraries/Soundex/test/unit_test_001.cpp
Normal file
@ -0,0 +1,88 @@
|
||||
//
|
||||
// FILE: unit_test_001.cpp
|
||||
// AUTHOR: Rob Tillaart
|
||||
// DATE: 2022-02-05
|
||||
// PURPOSE: unit tests for the Soundex library
|
||||
// https://github.com/RobTillaart/Soundex
|
||||
// https://github.com/Arduino-CI/arduino_ci/blob/master/REFERENCE.md
|
||||
//
|
||||
|
||||
// supported assertions
|
||||
// https://github.com/Arduino-CI/arduino_ci/blob/master/cpp/unittest/Assertion.h#L33-L42
|
||||
// ----------------------------
|
||||
// assertEqual(expected, actual)
|
||||
// assertNotEqual(expected, actual)
|
||||
// assertLess(expected, actual)
|
||||
// assertMore(expected, actual)
|
||||
// assertLessOrEqual(expected, actual)
|
||||
// assertMoreOrEqual(expected, actual)
|
||||
// assertTrue(actual)
|
||||
// assertFalse(actual)
|
||||
// assertNull(actual)
|
||||
// assertNotNull(actual)
|
||||
|
||||
#include <ArduinoUnitTests.h>
|
||||
|
||||
|
||||
#include "Arduino.h"
|
||||
#include "Soundex.h"
|
||||
|
||||
|
||||
|
||||
unittest_setup()
|
||||
{
|
||||
fprintf(stderr, "SOUNDEX_LIB_VERSION: %s\n", (char *) SOUNDEX_LIB_VERSION);
|
||||
}
|
||||
|
||||
unittest_teardown()
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
unittest(test_soundex_3)
|
||||
{
|
||||
Soundex SDX;
|
||||
// examples from Wikipedia
|
||||
assertEqual("R163", SDX.soundex("Robert"));
|
||||
assertEqual("R163", SDX.soundex("Rupert"));
|
||||
assertEqual("R150", SDX.soundex("Rubin"));
|
||||
assertEqual("T522", SDX.soundex("Tymczak"));
|
||||
assertEqual("P236", SDX.soundex("Pfister"));
|
||||
assertEqual("H555", SDX.soundex("Honeyman"));
|
||||
}
|
||||
|
||||
|
||||
unittest(test_soundex_chemicals)
|
||||
{
|
||||
Soundex SDX;
|
||||
|
||||
SDX.setLength(10);
|
||||
|
||||
// e.g. for long chemical names
|
||||
assertEqual("T624634500", SDX.soundex("Trichloroethylene"));
|
||||
assertEqual("P532615323", SDX.soundex("pentacarbon decahydrate"));
|
||||
assertEqual("D261524223", SDX.soundex("deoxyribonucleic acid"));
|
||||
}
|
||||
|
||||
|
||||
unittest(test_getLength)
|
||||
{
|
||||
Soundex SDX;
|
||||
|
||||
assertEqual(4, SDX.getLength());
|
||||
|
||||
for (int i = 4; i < 12; i++)
|
||||
{
|
||||
SDX.setLength(i);
|
||||
assertEqual(i, SDX.getLength());
|
||||
}
|
||||
|
||||
SDX.setLength(12);
|
||||
assertEqual(11, SDX.getLength());
|
||||
}
|
||||
|
||||
|
||||
unittest_main()
|
||||
|
||||
|
||||
// --------
|
Loading…
Reference in New Issue
Block a user