Module:NormalizationText: Difference between revisions

// via Wikitext Extension for VSCode
 
// via Wikitext Extension for VSCode
 
(3 intermediate revisions by the same user not shown)
Line 1: Line 1:
-- Module:NormalizationText
--[[
-- Comprehensive text normalization module for standardizing text formats.
* Name: NormalizationText
-- Features:
* Author: Mark W. Datysgeld
--  * String normalization (case, whitespace, punctuation)
* Description: Centralized text normalization module for standardizing text formats with comprehensive processing capabilities
--  * Wiki link processing
* Notes: String normalization (case, whitespace, punctuation); wiki link processing; multi-value string handling; user input sanitization with pattern categories; extensive caching for performance; supports custom patterns and delimiter handling
--  * Multi-value string handling
]]
--  * User input sanitization
--  * Extensive caching for performance


local p = {}
local p = {}
-- Dependencies
local LinkParser = require('Module:LinkParser')


-- Module-level caches
-- Module-level caches
local functionCache = {}
local functionCache = {}
local wikiLinkCache = {}


-- Pattern categories for sanitizing user input
-- Pattern categories for sanitizing user input
Line 20: Line 20:
             pattern = "%[%[([^|%]]+)%]%]",  
             pattern = "%[%[([^|%]]+)%]%]",  
             replacement = function(match)  
             replacement = function(match)  
                 return p.processWikiLink("[[" .. match .. "]]", "strip")  
                 return LinkParser.processWikiLink("[[" .. match .. "]]", "strip")  
             end  
             end  
         },
         },
Line 26: Line 26:
             pattern = "%[%[([^|%]]+)|([^%]]+)%]%]",  
             pattern = "%[%[([^|%]]+)|([^%]]+)%]%]",  
             replacement = function(match1, match2)  
             replacement = function(match1, match2)  
                 return p.processWikiLink("[[" .. match1 .. "|" .. match2 .. "]]", "strip")  
                 return LinkParser.processWikiLink("[[" .. match1 .. "|" .. match2 .. "]]", "strip")  
             end  
             end  
         }
         }
Line 120: Line 120:
end
end


-- Process wiki links with different modes
function p.processWikiLink(value, mode)
    -- Default mode is "extract"
    mode = mode or "extract"
   
    -- Early return for nil or empty values
    if not value or value == "" then
        return mode == "check" and false or value
    end
   
    -- Create cache key combining value and mode
    local cacheKey = value .. ":" .. mode
   
    -- Check cache first
    if wikiLinkCache[cacheKey] ~= nil then
        return wikiLinkCache[cacheKey]
    end
   
    -- Check if the value is a wiki link
    local isWikiLink = value:match("^%[%[.-%]%]$") ~= nil
   
    -- For check mode, just return whether it's a wiki link
    if mode == "check" then
        wikiLinkCache[cacheKey] = isWikiLink
        return isWikiLink
    end
   
    -- If it's not a wiki link, return the original value
    if not isWikiLink then
        wikiLinkCache[cacheKey] = value
        return value
    end
   
    -- Extract components from the wiki link
    local pageName, displayText
   
    -- Try to match [[PageName|DisplayText]] format
    pageName, displayText = value:match("^%[%[([^%|%]]+)%|([^%]]+)%]%]$")
   
    if not pageName then
        -- Try to match [[PageName]] format
        pageName = value:match("^%[%[([^%|%]]+)%]%]$")
        displayText = pageName -- In this case, display text is the same as page name
    end
   
    -- Determine result based on mode
    local result
    if mode == "extract" then
        result = pageName or value
    elseif mode == "strip" then
        result = displayText or value
    else
        -- Default to extract mode for unknown modes
        result = pageName or value
    end
   
    -- Store result in cache
    wikiLinkCache[cacheKey] = result
   
    return result
end
-- Extract page name from wiki link
function p.extractFromWikiLink(value)
    return p.processWikiLink(value, "extract")
end


-- Sanitizes user input by removing unwanted patterns
-- Sanitizes user input by removing unwanted patterns