Module:CanonicalForms: Difference between revisions
Appearance
Created page with "-- Module:CanonicalForms -- This module provides generic normalization functions. -- Templates can supply their own mapping tables (arrays of mapping groups) -- where each group is a table with: -- • canonical: The canonical display value. -- • synonyms: A table (array) of synonyms (case insensitive). -- • [optional] css: Any extra property (e.g. CSS class) associated with the canonical value. -- -- The functions in this module will process an input string a..." |
No edit summary |
||
| Line 1: | Line 1: | ||
-- Module:CanonicalForms | -- Module:CanonicalForms | ||
-- This module provides generic normalization functions. | -- This module provides generic normalization functions. | ||
-- Templates | -- It removes any wiki-link markup (e.g., [[Brand TLD]]) from the input and converts it to a canonical form. | ||
-- Templates supply their own mapping tables (arrays of mapping groups), where each group is a table with: | |||
-- • canonical: The canonical display value. | -- • canonical: The canonical display value. | ||
-- • synonyms: | -- • synonyms: An array of synonyms (case insensitive) that should normalize to the canonical value. | ||
-- • [optional] css: | -- • [optional] css: An extra property (e.g., a CSS class) associated with the canonical value. | ||
-- • [optional] category: A category string for auto-assignment. | |||
-- | -- | ||
-- The | -- The normalize function processes an input string by stripping wiki markup, | ||
-- | -- converting it to lowercase, and then comparing it against the provided synonyms. | ||
-- It returns the canonical value along with any associated css and category. | |||
-- | -- | ||
-- Usage example: | -- Usage example: | ||
| Line 15: | Line 17: | ||
-- { canonical = "ccTLD", synonyms = {"cctld", "country", "cc"} } | -- { canonical = "ccTLD", synonyms = {"cctld", "country", "cc"} } | ||
-- } | -- } | ||
-- local canonical, css = require('Module:CanonicalForms').normalize(inputString, mapping) | -- local canonical, css, category = require('Module:CanonicalForms').normalize(inputString, mapping) | ||
-- | -- | ||
local CanonicalForms = {} | local CanonicalForms = {} | ||
--- Normalize an input string based on a provided mapping table. | --- Normalize an input string based on a provided mapping table. | ||
-- Strips wiki link markup, converts the string to lowercase, | |||
-- and returns the canonical value, along with optional css and category properties if a match is found. | |||
-- @param input The input string to normalize. | -- @param input The input string to normalize. | ||
-- @param mappingTable An array of mapping groups. | -- @param mappingTable An array of mapping groups. | ||
-- @return canonical The canonical display value (if a match is found), | -- @return canonical The canonical display value (if a match is found), or the cleaned input if no match. | ||
-- @return css (optional) The associated extra property (e.g. CSS class) if available. | -- @return css (optional) The associated extra property (e.g. CSS class) if available. | ||
-- @return category (optional) The associated category string if available. | |||
function CanonicalForms.normalize(input, mappingTable) | function CanonicalForms.normalize(input, mappingTable) | ||
if not input or input == "" then | if not input or input == "" then | ||
return nil, nil | return nil, nil, nil | ||
end | end | ||
local lowerInput = | -- Remove wiki-link markup: e.g., [[Brand TLD]] becomes "Brand TLD" | ||
local cleanInput = input:gsub("%[%[([^|%]]+)|?[^%]]*%]%]", "%1") | |||
local lowerInput = cleanInput:lower() | |||
for _, group in ipairs(mappingTable) do | for _, group in ipairs(mappingTable) do | ||
if group.synonyms and type(group.synonyms) == "table" then | if group.synonyms and type(group.synonyms) == "table" then | ||
for _, syn in ipairs(group.synonyms) do | for _, syn in ipairs(group.synonyms) do | ||
if lowerInput == syn:lower() then | if lowerInput == syn:lower() then | ||
return group.canonical, group.css | return group.canonical, group.css, group.category | ||
end | end | ||
end | end | ||
end | end | ||
end | end | ||
return | return cleanInput, nil, nil | ||
end | end | ||
return CanonicalForms | return CanonicalForms | ||
Revision as of 18:48, 12 February 2025
Documentation for this module may be created at Module:CanonicalForms/doc
-- Module:CanonicalForms
-- This module provides generic normalization functions.
-- It removes any wiki-link markup (e.g., [[Brand TLD]]) from the input and converts it to a canonical form.
-- Templates supply their own mapping tables (arrays of mapping groups), where each group is a table with:
-- • canonical: The canonical display value.
-- • synonyms: An array of synonyms (case insensitive) that should normalize to the canonical value.
-- • [optional] css: An extra property (e.g., a CSS class) associated with the canonical value.
-- • [optional] category: A category string for auto-assignment.
--
-- The normalize function processes an input string by stripping wiki markup,
-- converting it to lowercase, and then comparing it against the provided synonyms.
-- It returns the canonical value along with any associated css and category.
--
-- Usage example:
-- local mapping = {
-- { canonical = "gTLD", synonyms = {"generic", "gtld", "tld"} },
-- { canonical = "ccTLD", synonyms = {"cctld", "country", "cc"} }
-- }
-- local canonical, css, category = require('Module:CanonicalForms').normalize(inputString, mapping)
--
local CanonicalForms = {}
--- Normalize an input string based on a provided mapping table.
-- Strips wiki link markup, converts the string to lowercase,
-- and returns the canonical value, along with optional css and category properties if a match is found.
-- @param input The input string to normalize.
-- @param mappingTable An array of mapping groups.
-- @return canonical The canonical display value (if a match is found), or the cleaned input if no match.
-- @return css (optional) The associated extra property (e.g. CSS class) if available.
-- @return category (optional) The associated category string if available.
function CanonicalForms.normalize(input, mappingTable)
if not input or input == "" then
return nil, nil, nil
end
-- Remove wiki-link markup: e.g., [[Brand TLD]] becomes "Brand TLD"
local cleanInput = input:gsub("%[%[([^|%]]+)|?[^%]]*%]%]", "%1")
local lowerInput = cleanInput:lower()
for _, group in ipairs(mappingTable) do
if group.synonyms and type(group.synonyms) == "table" then
for _, syn in ipairs(group.synonyms) do
if lowerInput == syn:lower() then
return group.canonical, group.css, group.category
end
end
end
end
return cleanInput, nil, nil
end
return CanonicalForms