Jump to content

Module:CanonicalForms: Difference between revisions

No edit summary
// via Wikitext Extension for VSCode
Line 1: Line 1:
-- Module:CanonicalForms
-- Module:CanonicalForms
-- Provides normalization functions by stripping wiki-link markup and mapping input to canonical values.
-- Normalizes strings by removing wiki markup and mapping them to canonical values.
-- Templates supply mapping tables, each containing:
-- Mapping tables include:
--  canonical: The standard display value.
--  * canonical: Standard display value.
--  synonyms: Case-insensitive alternatives mapping to the canonical value.
--  * synonyms: Synonyms (case-insensitive) mapped to canonical.
--  [optional] css: An associated CSS class.
--  * [optional] css: Associated CSS class.
--  [optional] category: A category string for auto-assignment.
--  * [optional] category: Auto-assignment category.
--
--
-- The normalize function removes wiki markup, converts input to lowercase,
-- Example:
-- and matches it against provided synonyms, returning the canonical value, css, and category if applicable.
--
-- Example usage:
--  local mapping = {
--  local mapping = {
--    { canonical = "gTLD", synonyms = {"generic", "gtld", "tld"} },
--    { canonical = "gTLD", synonyms = {"gtld", "generic", "tld"} },
--    { canonical = "ccTLD", synonyms = {"cctld", "country", "cc"} }
--    { canonical = "ccTLD", synonyms = {"cctld", "country", "cc"} }
--  }
--  }
Line 19: Line 16:
local CanonicalForms = {}
local CanonicalForms = {}


--- Normalizes an input string based on a mapping table.
--- Normalize an input string.
-- Strips wiki-link markup, converts to lowercase, and returns the canonical value.
-- Removes wiki markup and converts input to lowercase.
-- @param input The input string to normalize.
-- Checks mappingTable for a matching synonym and returns the corresponding canonical value.
-- @param mappingTable An array of mapping groups.
-- @param input String to normalize.
-- @return canonical The matched canonical value, or the cleaned input if no match is found.
-- @param mappingTable Array of mapping groups with 'canonical', 'synonyms', and optional 'css' and 'category'.
-- @return css (optional) An associated CSS class if available.
-- @return canonical Matched value, or cleaned input if no match.
-- @return category (optional) An associated category string if available.
-- @return css Optional CSS class.
-- @return category Optional category string.
function CanonicalForms.normalize(input, mappingTable)
function CanonicalForms.normalize(input, mappingTable)
     if not input or input == "" then
     if not input or input == "" then
Line 31: Line 29:
     end
     end


     -- Remove wiki-link markup: [[Brand TLD]] "Brand TLD"
     -- Remove wiki-link markup; eg. "[[Brand TLD]]" to "Brand TLD"
     local cleanInput = input:gsub("%[%[([^|%]]+)|?[^%]]*%]%]", "%1"):lower()
     local cleanInput = input:gsub("%[%[([^|%]]+)|?[^%]]*%]%]", "%1"):lower()



Revision as of 02:43, 3 April 2025

Documentation for this module may be created at Module:CanonicalForms/doc

-- Module:CanonicalForms
-- Normalizes strings by removing wiki markup and mapping them to canonical values.
-- Mapping tables include:
--   * canonical: Standard display value.
--   * synonyms: Synonyms (case-insensitive) mapped to canonical.
--   * [optional] css: Associated CSS class.
--   * [optional] category: Auto-assignment category.
--
-- Example:
--   local mapping = {
--     { canonical = "gTLD", synonyms = {"gtld", "generic", "tld"} },
--     { canonical = "ccTLD", synonyms = {"cctld", "country", "cc"} }
--   }
--   local canonical, css, category = require('Module:CanonicalForms').normalize(inputString, mapping)

local CanonicalForms = {}

--- Normalize an input string.
-- Removes wiki markup and converts input to lowercase.
-- Checks mappingTable for a matching synonym and returns the corresponding canonical value.
-- @param input String to normalize.
-- @param mappingTable Array of mapping groups with 'canonical', 'synonyms', and optional 'css' and 'category'.
-- @return canonical Matched value, or cleaned input if no match.
-- @return css Optional CSS class.
-- @return category Optional category string.
function CanonicalForms.normalize(input, mappingTable)
    if not input or input == "" then
        return nil, nil, nil
    end

    -- Remove wiki-link markup; eg. "[[Brand TLD]]" to "Brand TLD"
    local cleanInput = input:gsub("%[%[([^|%]]+)|?[^%]]*%]%]", "%1"):lower()

    for _, group in ipairs(mappingTable) do
        for _, syn in ipairs(group.synonyms or {}) do
            if cleanInput == syn:lower() then
                return group.canonical, group.css, group.category
            end
        end
    end

    return cleanInput, nil, nil
end

return CanonicalForms