Module:CanonicalForms: Difference between revisions

No edit summary
No edit summary
Line 1: Line 1:
-- Module:CanonicalForms
-- Module:CanonicalForms
-- This module provides generic normalization functions.
-- Provides normalization functions by stripping wiki-link markup and mapping input to canonical values.
-- It removes any wiki-link markup (e.g., [[Brand TLD]]) from the input and converts it to a canonical form.
-- Templates supply mapping tables, each containing:
-- Templates supply their own mapping tables (arrays of mapping groups), where each group is a table with:
--  • canonical: The standard display value.
--  • canonical: The canonical display value.
--  • synonyms:  Case-insensitive alternatives mapping to the canonical value.
--  • synonyms:  An array of synonyms (case insensitive) that should normalize to the canonical value.
--  • [optional] css: An associated CSS class.
--  • [optional] css: An extra property (e.g., a CSS class) associated with the canonical value.
--  • [optional] category: A category string for auto-assignment.
--  • [optional] category: A category string for auto-assignment.
--
--
-- The normalize function processes an input string by stripping wiki markup,
-- The normalize function removes wiki markup, converts input to lowercase,  
-- converting it to lowercase, and then comparing it against the provided synonyms.
-- and matches it against provided synonyms, returning the canonical value, css, and category if applicable.
-- It returns the canonical value along with any associated css and category.
--
--
-- Usage example:
-- Example usage:
--  local mapping = {
--  local mapping = {
--    { canonical = "gTLD", synonyms = {"generic", "gtld", "tld"} },
--    { canonical = "gTLD", synonyms = {"generic", "gtld", "tld"} },
Line 18: Line 16:
--  }
--  }
--  local canonical, css, category = require('Module:CanonicalForms').normalize(inputString, mapping)
--  local canonical, css, category = require('Module:CanonicalForms').normalize(inputString, mapping)
--
 
local CanonicalForms = {}
local CanonicalForms = {}


--- Normalize an input string based on a provided mapping table.
--- Normalizes an input string based on a mapping table.
-- Strips wiki link markup, converts the string to lowercase,
-- Strips wiki-link markup, converts to lowercase, and returns the canonical value.
-- and returns the canonical value, along with optional css and category properties if a match is found.
-- @param input The input string to normalize.
-- @param input The input string to normalize.
-- @param mappingTable An array of mapping groups.
-- @param mappingTable An array of mapping groups.
-- @return canonical The canonical display value (if a match is found), or the cleaned input if no match.
-- @return canonical The matched canonical value, or the cleaned input if no match is found.
-- @return css (optional) The associated extra property (e.g. CSS class) if available.
-- @return css (optional) An associated CSS class if available.
-- @return category (optional) The associated category string if available.
-- @return category (optional) An associated category string if available.
function CanonicalForms.normalize(input, mappingTable)
function CanonicalForms.normalize(input, mappingTable)
     if not input or input == "" then
     if not input or input == "" then
Line 34: Line 31:
     end
     end


     -- Remove wiki-link markup: e.g., [[Brand TLD]] becomes "Brand TLD"
     -- Remove wiki-link markup: [[Brand TLD]] "Brand TLD"
     local cleanInput = input:gsub("%[%[([^|%]]+)|?[^%]]*%]%]", "%1")
     local cleanInput = input:gsub("%[%[([^|%]]+)|?[^%]]*%]%]", "%1"):lower()
    local lowerInput = cleanInput:lower()


     for _, group in ipairs(mappingTable) do
     for _, group in ipairs(mappingTable) do
         if group.synonyms and type(group.synonyms) == "table" then
         for _, syn in ipairs(group.synonyms or {}) do
            for _, syn in ipairs(group.synonyms) do
            if cleanInput == syn:lower() then
                if lowerInput == syn:lower() then
                return group.canonical, group.css, group.category
                    return group.canonical, group.css, group.category
                end
             end
             end
         end
         end
     end
     end
     return cleanInput, nil, nil
     return cleanInput, nil, nil
end
end


return CanonicalForms
return CanonicalForms