Module:CanonicalForms: Difference between revisions
Appearance
No edit summary |
// via Wikitext Extension for VSCode |
||
| Line 1: | Line 1: | ||
-- Module:CanonicalForms | -- Module:CanonicalForms | ||
-- | -- Normalizes strings by removing wiki markup and mapping them to canonical values. | ||
-- | -- Mapping tables include: | ||
-- | -- * canonical: Standard display value. | ||
-- | -- * synonyms: Synonyms (case-insensitive) mapped to canonical. | ||
-- | -- * [optional] css: Associated CSS class. | ||
-- | -- * [optional] category: Auto-assignment category. | ||
-- | -- | ||
-- Example: | |||
-- Example | |||
-- local mapping = { | -- local mapping = { | ||
-- { canonical = "gTLD", synonyms = {" | -- { canonical = "gTLD", synonyms = {"gtld", "generic", "tld"} }, | ||
-- { canonical = "ccTLD", synonyms = {"cctld", "country", "cc"} } | -- { canonical = "ccTLD", synonyms = {"cctld", "country", "cc"} } | ||
-- } | -- } | ||
| Line 19: | Line 16: | ||
local CanonicalForms = {} | local CanonicalForms = {} | ||
--- | --- Normalize an input string. | ||
-- | -- Removes wiki markup and converts input to lowercase. | ||
-- @param input | -- Checks mappingTable for a matching synonym and returns the corresponding canonical value. | ||
-- @param mappingTable | -- @param input String to normalize. | ||
-- @return canonical | -- @param mappingTable Array of mapping groups with 'canonical', 'synonyms', and optional 'css' and 'category'. | ||
-- @return css | -- @return canonical Matched value, or cleaned input if no match. | ||
-- @return category | -- @return css Optional CSS class. | ||
-- @return category Optional category string. | |||
function CanonicalForms.normalize(input, mappingTable) | function CanonicalForms.normalize(input, mappingTable) | ||
if not input or input == "" then | if not input or input == "" then | ||
| Line 31: | Line 29: | ||
end | end | ||
-- Remove wiki-link markup | -- Remove wiki-link markup; eg. "[[Brand TLD]]" to "Brand TLD" | ||
local cleanInput = input:gsub("%[%[([^|%]]+)|?[^%]]*%]%]", "%1"):lower() | local cleanInput = input:gsub("%[%[([^|%]]+)|?[^%]]*%]%]", "%1"):lower() | ||
Revision as of 02:43, 3 April 2025
Documentation for this module may be created at Module:CanonicalForms/doc
-- Module:CanonicalForms
-- Normalizes strings by removing wiki markup and mapping them to canonical values.
-- Mapping tables include:
-- * canonical: Standard display value.
-- * synonyms: Synonyms (case-insensitive) mapped to canonical.
-- * [optional] css: Associated CSS class.
-- * [optional] category: Auto-assignment category.
--
-- Example:
-- local mapping = {
-- { canonical = "gTLD", synonyms = {"gtld", "generic", "tld"} },
-- { canonical = "ccTLD", synonyms = {"cctld", "country", "cc"} }
-- }
-- local canonical, css, category = require('Module:CanonicalForms').normalize(inputString, mapping)
local CanonicalForms = {}
--- Normalize an input string.
-- Removes wiki markup and converts input to lowercase.
-- Checks mappingTable for a matching synonym and returns the corresponding canonical value.
-- @param input String to normalize.
-- @param mappingTable Array of mapping groups with 'canonical', 'synonyms', and optional 'css' and 'category'.
-- @return canonical Matched value, or cleaned input if no match.
-- @return css Optional CSS class.
-- @return category Optional category string.
function CanonicalForms.normalize(input, mappingTable)
if not input or input == "" then
return nil, nil, nil
end
-- Remove wiki-link markup; eg. "[[Brand TLD]]" to "Brand TLD"
local cleanInput = input:gsub("%[%[([^|%]]+)|?[^%]]*%]%]", "%1"):lower()
for _, group in ipairs(mappingTable) do
for _, syn in ipairs(group.synonyms or {}) do
if cleanInput == syn:lower() then
return group.canonical, group.css, group.category
end
end
end
return cleanInput, nil, nil
end
return CanonicalForms