Module:CanonicalForms: Difference between revisions

Created page with "-- Module:CanonicalForms -- This module provides generic normalization functions. -- Templates can supply their own mapping tables (arrays of mapping groups) -- where each group is a table with: -- • canonical: The canonical display value. -- • synonyms: A table (array) of synonyms (case insensitive). -- • [optional] css: Any extra property (e.g. CSS class) associated with the canonical value. -- -- The functions in this module will process an input string a..."
 
// via Wikitext Extension for VSCode
 
(7 intermediate revisions by the same user not shown)
Line 1: Line 1:
-- Module:CanonicalForms
--[[
-- This module provides generic normalization functions.
* Name: CanonicalForms
-- Templates can supply their own mapping tables (arrays of mapping groups)
* Author: Mark W. Datysgeld
-- where each group is a table with:
* Description: Text normalization utility that removes wiki markup and maps user input to canonical values using configurable lookup tables
--  • canonical: The canonical display value.
* Notes: Example usage: local mapping = { { canonical = "gTLD", synonyms = {"generic", "g"} }, { canonical = "ccTLD", synonyms = {"country", "cc"} } }; local canonical, css, category = require('Module:CanonicalForms').normalize(inputString, mapping)
--  • synonyms: A table (array) of synonyms (case insensitive).
]]
--  • [optional] css: Any extra property (e.g. CSS class) associated with the canonical value.
 
--
-- The functions in this module will process an input string against the provided mapping table
-- and return the canonical value (and optionally any extra property) if a match is found.
--
-- Usage example:
--  local mapping = {
--    { canonical = "gTLD", synonyms = {"generic", "gtld", "tld"} },
--    { canonical = "ccTLD", synonyms = {"cctld", "country", "cc"} }
--  }
--  local canonical, css = require('Module:CanonicalForms').normalize(inputString, mapping)
--
local CanonicalForms = {}
local CanonicalForms = {}


--- Normalize an input string based on a provided mapping table.
-- Normalize an input string
-- @param input The input string to normalize.
-- Removes wiki markup, converts to lowercase, and maps to canonical form
-- @param mappingTable An array of mapping groups.
-- @return canonical The canonical display value (if a match is found), otherwise the original input.
-- @return css (optional) The associated extra property (e.g. CSS class) if available.
function CanonicalForms.normalize(input, mappingTable)
function CanonicalForms.normalize(input, mappingTable)
     if not input or input == "" then
     if not input or input == "" then
         return nil, nil
         return nil, nil, nil
     end
     end


     local lowerInput = input:lower()
    -- Remove wiki internal link markup (e.g., "[[Brand TLD]]" → "Brand TLD")
     for _, group in ipairs(mappingTable) do
     local cleanInput = input:gsub("%[%[([^|%]]+)|?[^%]]*%]%]", "%1"):lower()
        if group.synonyms and type(group.synonyms) == "table" then
 
             for _, syn in ipairs(group.synonyms) do
     -- Create lookup table for faster matching (first call only)
                 if lowerInput == syn:lower() then
    if not mappingTable._lookupCache then
                    return group.canonical, group.css
        local lookupCache = {}
                end
        for _, group in ipairs(mappingTable) do
            -- Add the canonical form itself to the lookup (in lowercase)
            lookupCache[group.canonical:lower()] = group
           
            -- Add all synonyms to the lookup
             for _, syn in ipairs(group.synonyms or {}) do
                 lookupCache[syn:lower()] = group
             end
             end
         end
         end
        mappingTable._lookupCache = lookupCache
     end
     end
     return input, nil
 
    -- Direct lookup via cache
    local match = mappingTable._lookupCache[cleanInput]
    if match then
        return match.canonical, match.css, match.category
    end
 
     return cleanInput, nil, nil
end
end


return CanonicalForms
return CanonicalForms