Module:CanonicalForms: Difference between revisions
Appearance
// via Wikitext Extension for VSCode |
// via Wikitext Extension for VSCode |
||
| Line 5: | Line 5: | ||
-- * synonyms: Additional case-insensitive terms mapped to canonical | -- * synonyms: Additional case-insensitive terms mapped to canonical | ||
-- * [optional] css: CSS class | -- * [optional] css: CSS class | ||
-- * [optional] category: Auto- | -- * [optional] category: Auto-assigned category | ||
-- | -- | ||
-- Example: | -- Example: | ||
-- local mapping = { | -- local mapping = { | ||
-- { canonical = "gTLD", synonyms = {"generic", " | -- { canonical = "gTLD", synonyms = {"generic", "g"} }, | ||
-- { canonical = "ccTLD", synonyms = {"country", "cc"} } | -- { canonical = "ccTLD", synonyms = {"country", "cc"} } | ||
-- } | -- } | ||
-- local canonical, css, category = require('Module:CanonicalForms').normalize(inputString, mapping) | -- local canonical, css, category = require('Module:CanonicalForms').normalize(inputString, mapping) | ||
-- | -- | ||
local CanonicalForms = {} | local CanonicalForms = {} | ||
-- Normalize an input string | |||
-- Removes wiki markup, converts to lowercase, and maps to canonical form | -- Removes wiki markup, converts to lowercase, and maps to canonical form | ||
function CanonicalForms.normalize(input, mappingTable) | function CanonicalForms.normalize(input, mappingTable) | ||
if not input or input == "" then | if not input or input == "" then | ||
| Line 30: | Line 24: | ||
end | end | ||
-- Remove wiki | -- Remove wiki internal link markup (e.g., "[[Brand TLD]]" → "Brand TLD") | ||
local cleanInput = input:gsub("%[%[([^|%]]+)|?[^%]]*%]%]", "%1"):lower() | local cleanInput = input:gsub("%[%[([^|%]]+)|?[^%]]*%]%]", "%1"):lower() | ||
Revision as of 18:24, 13 May 2025
Documentation for this module may be created at Module:CanonicalForms/doc
-- Module:CanonicalForms
-- Normalizes strings by removing wiki markup and mapping to canonical values.
-- Mapping tables:
-- * canonical: Display value (automatically included in case-insensitive matching)
-- * synonyms: Additional case-insensitive terms mapped to canonical
-- * [optional] css: CSS class
-- * [optional] category: Auto-assigned category
--
-- Example:
-- local mapping = {
-- { canonical = "gTLD", synonyms = {"generic", "g"} },
-- { canonical = "ccTLD", synonyms = {"country", "cc"} }
-- }
-- local canonical, css, category = require('Module:CanonicalForms').normalize(inputString, mapping)
--
local CanonicalForms = {}
-- Normalize an input string
-- Removes wiki markup, converts to lowercase, and maps to canonical form
function CanonicalForms.normalize(input, mappingTable)
if not input or input == "" then
return nil, nil, nil
end
-- Remove wiki internal link markup (e.g., "[[Brand TLD]]" → "Brand TLD")
local cleanInput = input:gsub("%[%[([^|%]]+)|?[^%]]*%]%]", "%1"):lower()
-- Create lookup table for faster matching (first call only)
if not mappingTable._lookupCache then
local lookupCache = {}
for _, group in ipairs(mappingTable) do
-- Add the canonical form itself to the lookup (in lowercase)
lookupCache[group.canonical:lower()] = group
-- Add all synonyms to the lookup
for _, syn in ipairs(group.synonyms or {}) do
lookupCache[syn:lower()] = group
end
end
mappingTable._lookupCache = lookupCache
end
-- Direct lookup via cache
local match = mappingTable._lookupCache[cleanInput]
if match then
return match.canonical, match.css, match.category
end
return cleanInput, nil, nil
end
return CanonicalForms