Module:CanonicalForms: Difference between revisions
Appearance
// via Wikitext Extension for VSCode |
// via Wikitext Extension for VSCode |
||
| Line 32: | Line 32: | ||
local cleanInput = input:gsub("%[%[([^|%]]+)|?[^%]]*%]%]", "%1"):lower() | local cleanInput = input:gsub("%[%[([^|%]]+)|?[^%]]*%]%]", "%1"):lower() | ||
for _, group in ipairs(mappingTable) do | -- Create a lookup table for faster synonym matching | ||
-- This is done on first call with a new mappingTable | |||
if not mappingTable._lookupCache then | |||
local lookupCache = {} | |||
for _, group in ipairs(mappingTable) do | |||
for _, syn in ipairs(group.synonyms or {}) do | |||
lookupCache[syn:lower()] = group | |||
end | end | ||
end | end | ||
mappingTable._lookupCache = lookupCache | |||
end | |||
-- Direct lookup instead of nested loops | |||
local match = mappingTable._lookupCache[cleanInput] | |||
if match then | |||
return match.canonical, match.css, match.category | |||
end | end | ||
Revision as of 20:23, 7 April 2025
Documentation for this module may be created at Module:CanonicalForms/doc
-- Module:CanonicalForms
-- Normalizes strings by removing wiki markup and mapping them to canonical values.
-- Mapping tables include:
-- * canonical: Standard display value.
-- * synonyms: Synonyms (case-insensitive) mapped to canonical.
-- * [optional] css: Associated CSS class.
-- * [optional] category: Auto-assignment category.
--
-- Example:
-- local mapping = {
-- { canonical = "gTLD", synonyms = {"gtld", "generic", "tld"} },
-- { canonical = "ccTLD", synonyms = {"cctld", "country", "cc"} }
-- }
-- local canonical, css, category = require('Module:CanonicalForms').normalize(inputString, mapping)
local CanonicalForms = {}
--- Normalize an input string.
-- Removes wiki markup and converts input to lowercase.
-- Checks mappingTable for a matching synonym and returns the corresponding canonical value.
-- @param input String to normalize.
-- @param mappingTable Array of mapping groups with 'canonical', 'synonyms', and optional 'css' and 'category'.
-- @return canonical Matched value, or cleaned input if no match.
-- @return css Optional CSS class.
-- @return category Optional category string.
function CanonicalForms.normalize(input, mappingTable)
if not input or input == "" then
return nil, nil, nil
end
-- Remove wiki-link markup; eg. "[[Brand TLD]]" to "Brand TLD"
local cleanInput = input:gsub("%[%[([^|%]]+)|?[^%]]*%]%]", "%1"):lower()
-- Create a lookup table for faster synonym matching
-- This is done on first call with a new mappingTable
if not mappingTable._lookupCache then
local lookupCache = {}
for _, group in ipairs(mappingTable) do
for _, syn in ipairs(group.synonyms or {}) do
lookupCache[syn:lower()] = group
end
end
mappingTable._lookupCache = lookupCache
end
-- Direct lookup instead of nested loops
local match = mappingTable._lookupCache[cleanInput]
if match then
return match.canonical, match.css, match.category
end
return cleanInput, nil, nil
end
return CanonicalForms