Jump to content

Module:CanonicalForms: Difference between revisions

Created page with "-- Module:CanonicalForms -- This module provides generic normalization functions. -- Templates can supply their own mapping tables (arrays of mapping groups) -- where each group is a table with: -- • canonical: The canonical display value. -- • synonyms: A table (array) of synonyms (case insensitive). -- • [optional] css: Any extra property (e.g. CSS class) associated with the canonical value. -- -- The functions in this module will process an input string a..."
 
No edit summary
Line 1: Line 1:
-- Module:CanonicalForms
-- Module:CanonicalForms
-- This module provides generic normalization functions.
-- This module provides generic normalization functions.
-- Templates can supply their own mapping tables (arrays of mapping groups)
-- It removes any wiki-link markup (e.g., [[Brand TLD]]) from the input and converts it to a canonical form.
-- where each group is a table with:
-- Templates supply their own mapping tables (arrays of mapping groups), where each group is a table with:
--  • canonical: The canonical display value.
--  • canonical: The canonical display value.
--  • synonyms:  A table (array) of synonyms (case insensitive).
--  • synonyms:  An array of synonyms (case insensitive) that should normalize to the canonical value.
--  • [optional] css: Any extra property (e.g. CSS class) associated with the canonical value.
--  • [optional] css: An extra property (e.g., a CSS class) associated with the canonical value.
--  • [optional] category: A category string for auto-assignment.
--
--
-- The functions in this module will process an input string against the provided mapping table
-- The normalize function processes an input string by stripping wiki markup,
-- and return the canonical value (and optionally any extra property) if a match is found.
-- converting it to lowercase, and then comparing it against the provided synonyms.
-- It returns the canonical value along with any associated css and category.
--
--
-- Usage example:
-- Usage example:
Line 15: Line 17:
--    { canonical = "ccTLD", synonyms = {"cctld", "country", "cc"} }
--    { canonical = "ccTLD", synonyms = {"cctld", "country", "cc"} }
--  }
--  }
--  local canonical, css = require('Module:CanonicalForms').normalize(inputString, mapping)
--  local canonical, css, category = require('Module:CanonicalForms').normalize(inputString, mapping)
--
--
local CanonicalForms = {}
local CanonicalForms = {}


--- Normalize an input string based on a provided mapping table.
--- Normalize an input string based on a provided mapping table.
-- Strips wiki link markup, converts the string to lowercase,
-- and returns the canonical value, along with optional css and category properties if a match is found.
-- @param input The input string to normalize.
-- @param input The input string to normalize.
-- @param mappingTable An array of mapping groups.
-- @param mappingTable An array of mapping groups.
-- @return canonical The canonical display value (if a match is found), otherwise the original input.
-- @return canonical The canonical display value (if a match is found), or the cleaned input if no match.
-- @return css (optional) The associated extra property (e.g. CSS class) if available.
-- @return css (optional) The associated extra property (e.g. CSS class) if available.
-- @return category (optional) The associated category string if available.
function CanonicalForms.normalize(input, mappingTable)
function CanonicalForms.normalize(input, mappingTable)
     if not input or input == "" then
     if not input or input == "" then
         return nil, nil
         return nil, nil, nil
     end
     end


     local lowerInput = input:lower()
    -- Remove wiki-link markup: e.g., [[Brand TLD]] becomes "Brand TLD"
    local cleanInput = input:gsub("%[%[([^|%]]+)|?[^%]]*%]%]", "%1")
     local lowerInput = cleanInput:lower()
 
     for _, group in ipairs(mappingTable) do
     for _, group in ipairs(mappingTable) do
         if group.synonyms and type(group.synonyms) == "table" then
         if group.synonyms and type(group.synonyms) == "table" then
             for _, syn in ipairs(group.synonyms) do
             for _, syn in ipairs(group.synonyms) do
                 if lowerInput == syn:lower() then
                 if lowerInput == syn:lower() then
                     return group.canonical, group.css
                     return group.canonical, group.css, group.category
                 end
                 end
             end
             end
         end
         end
     end
     end
     return input, nil
     return cleanInput, nil, nil
end
end


return CanonicalForms
return CanonicalForms

Revision as of 18:48, 12 February 2025

Documentation for this module may be created at Module:CanonicalForms/doc

-- Module:CanonicalForms
-- This module provides generic normalization functions.
-- It removes any wiki-link markup (e.g., [[Brand TLD]]) from the input and converts it to a canonical form.
-- Templates supply their own mapping tables (arrays of mapping groups), where each group is a table with:
--   • canonical: The canonical display value.
--   • synonyms:  An array of synonyms (case insensitive) that should normalize to the canonical value.
--   • [optional] css: An extra property (e.g., a CSS class) associated with the canonical value.
--   • [optional] category: A category string for auto-assignment.
--
-- The normalize function processes an input string by stripping wiki markup,
-- converting it to lowercase, and then comparing it against the provided synonyms.
-- It returns the canonical value along with any associated css and category.
--
-- Usage example:
--   local mapping = {
--     { canonical = "gTLD", synonyms = {"generic", "gtld", "tld"} },
--     { canonical = "ccTLD", synonyms = {"cctld", "country", "cc"} }
--   }
--   local canonical, css, category = require('Module:CanonicalForms').normalize(inputString, mapping)
--
local CanonicalForms = {}

--- Normalize an input string based on a provided mapping table.
-- Strips wiki link markup, converts the string to lowercase,
-- and returns the canonical value, along with optional css and category properties if a match is found.
-- @param input The input string to normalize.
-- @param mappingTable An array of mapping groups.
-- @return canonical The canonical display value (if a match is found), or the cleaned input if no match.
-- @return css (optional) The associated extra property (e.g. CSS class) if available.
-- @return category (optional) The associated category string if available.
function CanonicalForms.normalize(input, mappingTable)
    if not input or input == "" then
        return nil, nil, nil
    end

    -- Remove wiki-link markup: e.g., [[Brand TLD]] becomes "Brand TLD"
    local cleanInput = input:gsub("%[%[([^|%]]+)|?[^%]]*%]%]", "%1")
    local lowerInput = cleanInput:lower()

    for _, group in ipairs(mappingTable) do
        if group.synonyms and type(group.synonyms) == "table" then
            for _, syn in ipairs(group.synonyms) do
                if lowerInput == syn:lower() then
                    return group.canonical, group.css, group.category
                end
            end
        end
    end
    return cleanInput, nil, nil
end

return CanonicalForms