Jump to content

Module:CanonicalForms: Difference between revisions

No edit summary
No edit summary
Line 1: Line 1:
-- Module:CanonicalForms
-- Module:CanonicalForms
-- This module provides generic normalization functions.
-- Provides normalization functions by stripping wiki-link markup and mapping input to canonical values.
-- It removes any wiki-link markup (e.g., [[Brand TLD]]) from the input and converts it to a canonical form.
-- Templates supply mapping tables, each containing:
-- Templates supply their own mapping tables (arrays of mapping groups), where each group is a table with:
--  • canonical: The standard display value.
--  • canonical: The canonical display value.
--  • synonyms:  Case-insensitive alternatives mapping to the canonical value.
--  • synonyms:  An array of synonyms (case insensitive) that should normalize to the canonical value.
--  • [optional] css: An associated CSS class.
--  • [optional] css: An extra property (e.g., a CSS class) associated with the canonical value.
--  • [optional] category: A category string for auto-assignment.
--  • [optional] category: A category string for auto-assignment.
--
--
-- The normalize function processes an input string by stripping wiki markup,
-- The normalize function removes wiki markup, converts input to lowercase,  
-- converting it to lowercase, and then comparing it against the provided synonyms.
-- and matches it against provided synonyms, returning the canonical value, css, and category if applicable.
-- It returns the canonical value along with any associated css and category.
--
--
-- Usage example:
-- Example usage:
--  local mapping = {
--  local mapping = {
--    { canonical = "gTLD", synonyms = {"generic", "gtld", "tld"} },
--    { canonical = "gTLD", synonyms = {"generic", "gtld", "tld"} },
Line 18: Line 16:
--  }
--  }
--  local canonical, css, category = require('Module:CanonicalForms').normalize(inputString, mapping)
--  local canonical, css, category = require('Module:CanonicalForms').normalize(inputString, mapping)
--
 
local CanonicalForms = {}
local CanonicalForms = {}


--- Normalize an input string based on a provided mapping table.
--- Normalizes an input string based on a mapping table.
-- Strips wiki link markup, converts the string to lowercase,
-- Strips wiki-link markup, converts to lowercase, and returns the canonical value.
-- and returns the canonical value, along with optional css and category properties if a match is found.
-- @param input The input string to normalize.
-- @param input The input string to normalize.
-- @param mappingTable An array of mapping groups.
-- @param mappingTable An array of mapping groups.
-- @return canonical The canonical display value (if a match is found), or the cleaned input if no match.
-- @return canonical The matched canonical value, or the cleaned input if no match is found.
-- @return css (optional) The associated extra property (e.g. CSS class) if available.
-- @return css (optional) An associated CSS class if available.
-- @return category (optional) The associated category string if available.
-- @return category (optional) An associated category string if available.
function CanonicalForms.normalize(input, mappingTable)
function CanonicalForms.normalize(input, mappingTable)
     if not input or input == "" then
     if not input or input == "" then
Line 34: Line 31:
     end
     end


     -- Remove wiki-link markup: e.g., [[Brand TLD]] becomes "Brand TLD"
     -- Remove wiki-link markup: [[Brand TLD]] "Brand TLD"
     local cleanInput = input:gsub("%[%[([^|%]]+)|?[^%]]*%]%]", "%1")
     local cleanInput = input:gsub("%[%[([^|%]]+)|?[^%]]*%]%]", "%1"):lower()
    local lowerInput = cleanInput:lower()


     for _, group in ipairs(mappingTable) do
     for _, group in ipairs(mappingTable) do
         if group.synonyms and type(group.synonyms) == "table" then
         for _, syn in ipairs(group.synonyms or {}) do
            for _, syn in ipairs(group.synonyms) do
            if cleanInput == syn:lower() then
                if lowerInput == syn:lower() then
                return group.canonical, group.css, group.category
                    return group.canonical, group.css, group.category
                end
             end
             end
         end
         end
     end
     end
     return cleanInput, nil, nil
     return cleanInput, nil, nil
end
end


return CanonicalForms
return CanonicalForms

Revision as of 19:49, 12 February 2025

Documentation for this module may be created at Module:CanonicalForms/doc

-- Module:CanonicalForms
-- Provides normalization functions by stripping wiki-link markup and mapping input to canonical values.
-- Templates supply mapping tables, each containing:
--   • canonical: The standard display value.
--   • synonyms:  Case-insensitive alternatives mapping to the canonical value.
--   • [optional] css: An associated CSS class.
--   • [optional] category: A category string for auto-assignment.
--
-- The normalize function removes wiki markup, converts input to lowercase, 
-- and matches it against provided synonyms, returning the canonical value, css, and category if applicable.
--
-- Example usage:
--   local mapping = {
--     { canonical = "gTLD", synonyms = {"generic", "gtld", "tld"} },
--     { canonical = "ccTLD", synonyms = {"cctld", "country", "cc"} }
--   }
--   local canonical, css, category = require('Module:CanonicalForms').normalize(inputString, mapping)

local CanonicalForms = {}

--- Normalizes an input string based on a mapping table.
-- Strips wiki-link markup, converts to lowercase, and returns the canonical value.
-- @param input The input string to normalize.
-- @param mappingTable An array of mapping groups.
-- @return canonical The matched canonical value, or the cleaned input if no match is found.
-- @return css (optional) An associated CSS class if available.
-- @return category (optional) An associated category string if available.
function CanonicalForms.normalize(input, mappingTable)
    if not input or input == "" then
        return nil, nil, nil
    end

    -- Remove wiki-link markup: [[Brand TLD]] → "Brand TLD"
    local cleanInput = input:gsub("%[%[([^|%]]+)|?[^%]]*%]%]", "%1"):lower()

    for _, group in ipairs(mappingTable) do
        for _, syn in ipairs(group.synonyms or {}) do
            if cleanInput == syn:lower() then
                return group.canonical, group.css, group.category
            end
        end
    end

    return cleanInput, nil, nil
end

return CanonicalForms