Module:CanonicalForms: Difference between revisions
No edit summary |
// via Wikitext Extension for VSCode |
||
| (5 intermediate revisions by the same user not shown) | |||
| Line 1: | Line 1: | ||
-- | --[[ | ||
* Name: CanonicalForms | |||
* Author: Mark W. Datysgeld | |||
* Description: Text normalization utility that removes wiki markup and maps user input to canonical values using configurable lookup tables | |||
* Notes: Example usage: local mapping = { { canonical = "gTLD", synonyms = {"generic", "g"} }, { canonical = "ccTLD", synonyms = {"country", "cc"} } }; local canonical, css, category = require('Module:CanonicalForms').normalize(inputString, mapping) | |||
]] | |||
local CanonicalForms = {} | local CanonicalForms = {} | ||
-- | -- Normalize an input string | ||
-- | -- Removes wiki markup, converts to lowercase, and maps to canonical form | ||
function CanonicalForms.normalize(input, mappingTable) | function CanonicalForms.normalize(input, mappingTable) | ||
if not input or input == "" then | if not input or input == "" then | ||
| Line 31: | Line 15: | ||
end | end | ||
-- Remove wiki | -- Remove wiki internal link markup (e.g., "[[Brand TLD]]" → "Brand TLD") | ||
local cleanInput = input:gsub("%[%[([^|%]]+)|?[^%]]*%]%]", "%1"):lower() | local cleanInput = input:gsub("%[%[([^|%]]+)|?[^%]]*%]%]", "%1"):lower() | ||
for _, group in ipairs(mappingTable) do | -- Create lookup table for faster matching (first call only) | ||
if not mappingTable._lookupCache then | |||
local lookupCache = {} | |||
for _, group in ipairs(mappingTable) do | |||
-- Add the canonical form itself to the lookup (in lowercase) | |||
lookupCache[group.canonical:lower()] = group | |||
-- Add all synonyms to the lookup | |||
for _, syn in ipairs(group.synonyms or {}) do | |||
lookupCache[syn:lower()] = group | |||
end | end | ||
end | end | ||
mappingTable._lookupCache = lookupCache | |||
end | |||
-- Direct lookup via cache | |||
local match = mappingTable._lookupCache[cleanInput] | |||
if match then | |||
return match.canonical, match.css, match.category | |||
end | end | ||