Jump to content

Module:CanonicalForms: Difference between revisions

// via Wikitext Extension for VSCode
// via Wikitext Extension for VSCode
Line 2: Line 2:
-- Normalizes strings by removing wiki markup and mapping to canonical values.
-- Normalizes strings by removing wiki markup and mapping to canonical values.
-- Mapping tables:
-- Mapping tables:
--  * canonical: Display value
--  * canonical: Display value (automatically included in case-insensitive matching)
--  * synonyms: Case-insensitive terms mapped to canonical
--  * synonyms: Additional case-insensitive terms mapped to canonical
--  * [optional] css: CSS class
--  * [optional] css: CSS class
--  * [optional] category: Auto-assignment category
--  * [optional] category: Auto-assignment category
Line 9: Line 9:
-- Example:
-- Example:
--  local mapping = {
--  local mapping = {
--    { canonical = "gTLD", synonyms = {"gtld", "generic", "tld"} },
--    { canonical = "gTLD", synonyms = {"generic", "tld"} },
--    { canonical = "ccTLD", synonyms = {"cctld", "country", "cc"} }
--    { canonical = "ccTLD", synonyms = {"country", "cc"} }
--  }
--  }
--  local canonical, css, category = require('Module:CanonicalForms').normalize(inputString, mapping)
--  local canonical, css, category = require('Module:CanonicalForms').normalize(inputString, mapping)
--
-- Note: The canonical value itself is automatically included in case-insensitive matching.


local CanonicalForms = {}
local CanonicalForms = {}
Line 35: Line 37:
         local lookupCache = {}
         local lookupCache = {}
         for _, group in ipairs(mappingTable) do
         for _, group in ipairs(mappingTable) do
            -- Add the canonical form itself to the lookup (in lowercase)
            lookupCache[group.canonical:lower()] = group
           
            -- Add all synonyms to the lookup
             for _, syn in ipairs(group.synonyms or {}) do
             for _, syn in ipairs(group.synonyms or {}) do
                 lookupCache[syn:lower()] = group
                 lookupCache[syn:lower()] = group

Revision as of 02:49, 12 April 2025

Documentation for this module may be created at Module:CanonicalForms/doc

-- Module:CanonicalForms
-- Normalizes strings by removing wiki markup and mapping to canonical values.
-- Mapping tables:
--   * canonical: Display value (automatically included in case-insensitive matching)
--   * synonyms: Additional case-insensitive terms mapped to canonical
--   * [optional] css: CSS class
--   * [optional] category: Auto-assignment category
--
-- Example:
--   local mapping = {
--     { canonical = "gTLD", synonyms = {"generic", "tld"} },
--     { canonical = "ccTLD", synonyms = {"country", "cc"} }
--   }
--   local canonical, css, category = require('Module:CanonicalForms').normalize(inputString, mapping)
--
-- Note: The canonical value itself is automatically included in case-insensitive matching.

local CanonicalForms = {}

--- Normalize an input string.
-- Removes wiki markup, converts to lowercase, and maps to canonical form.
-- @param input String to normalize.
-- @param mappingTable Mapping groups with 'canonical', 'synonyms', and optional 'css'/'category'.
-- @return canonical Matched value or cleaned input if no match.
-- @return css Optional CSS class.
-- @return category Optional category string.
function CanonicalForms.normalize(input, mappingTable)
    if not input or input == "" then
        return nil, nil, nil
    end

    -- Remove wiki-link markup (e.g., "[[Brand TLD]]" → "Brand TLD")
    local cleanInput = input:gsub("%[%[([^|%]]+)|?[^%]]*%]%]", "%1"):lower()

    -- Create lookup table for faster matching (first call only)
    if not mappingTable._lookupCache then
        local lookupCache = {}
        for _, group in ipairs(mappingTable) do
            -- Add the canonical form itself to the lookup (in lowercase)
            lookupCache[group.canonical:lower()] = group
            
            -- Add all synonyms to the lookup
            for _, syn in ipairs(group.synonyms or {}) do
                lookupCache[syn:lower()] = group
            end
        end
        mappingTable._lookupCache = lookupCache
    end

    -- Direct lookup via cache
    local match = mappingTable._lookupCache[cleanInput]
    if match then
        return match.canonical, match.css, match.category
    end

    return cleanInput, nil, nil
end

return CanonicalForms