|
|
| Line 80: |
Line 80: |
| local CanonicalForms = require('Module:CanonicalForms') | | local CanonicalForms = require('Module:CanonicalForms') |
| local SemanticCategoryHelpers = require('Module:SemanticCategoryHelpers') | | local SemanticCategoryHelpers = require('Module:SemanticCategoryHelpers') |
| | local NormalizationText = require('Module:NormalizationText') |
|
| |
|
| -------------------------------------------------------------------------------- | | -------------------------------------------------------------------------------- |
| Line 110: |
Line 111: |
|
| |
|
| -- Trims leading and trailing whitespace from a string | | -- Trims leading and trailing whitespace from a string |
| -- Now handles nil values gracefully by returning an empty string | | -- Now delegates to NormalizationText |
| function p.trim(s) | | function p.trim(s) |
| if s == nil then | | return NormalizationText.trim(s) |
| return ""
| |
| end
| |
| return (s:gsub("^%s+", ""):gsub("%s+$", ""))
| |
| end | | end |
|
| |
|
| -- Joins a table of values with the specified delimiter | | -- Joins a table of values with the specified delimiter |
| | -- Now delegates to NormalizationText |
| function p.joinValues(values, delimiter) | | function p.joinValues(values, delimiter) |
| delimiter = delimiter or "; " | | return NormalizationText.joinValues(values, delimiter) |
| if not values or #values == 0 then return "" end
| |
| return table.concat(values, delimiter)
| |
| end | | end |
|
| |
|
| Line 138: |
Line 135: |
|
| |
|
| -- Process wiki links with different modes: extract, strip, or check | | -- Process wiki links with different modes: extract, strip, or check |
| -- @param value string The input string that may contain wiki links | | -- Now delegates to NormalizationText |
| -- @param mode string The processing mode: "extract" (get page name), "strip" (get display text), "check" (detect if it's a wiki link)
| |
| -- @return string|boolean The processed result based on the mode
| |
| function p.processWikiLink(value, mode) | | function p.processWikiLink(value, mode) |
| -- Default mode is "extract" | | return NormalizationText.processWikiLink(value, mode) |
| mode = mode or "extract"
| |
|
| |
| -- Early return for nil or empty values
| |
| if not value or value == "" then
| |
| return mode == "check" and false or value
| |
| end
| |
|
| |
| -- Create cache key combining value and mode
| |
| local cacheKey = value .. ":" .. mode
| |
|
| |
| -- Check cache first
| |
| if wikiLinkCache[cacheKey] ~= nil then
| |
| return wikiLinkCache[cacheKey]
| |
| end
| |
|
| |
| -- Check if the value is a wiki link
| |
| local isWikiLink = value:match("^%[%[.-%]%]$") ~= nil
| |
|
| |
| -- For check mode, just return whether it's a wiki link
| |
| if mode == "check" then
| |
| wikiLinkCache[cacheKey] = isWikiLink
| |
| return isWikiLink
| |
| end
| |
|
| |
| -- If it's not a wiki link, return the original value
| |
| if not isWikiLink then
| |
| wikiLinkCache[cacheKey] = value
| |
| return value
| |
| end
| |
|
| |
| -- Extract components from the wiki link
| |
| local pageName, displayText
| |
|
| |
| -- Try to match [[PageName|DisplayText]] format
| |
| pageName, displayText = value:match("^%[%[([^%|%]]+)%|([^%]]+)%]%]$")
| |
|
| |
| if not pageName then
| |
| -- Try to match [[PageName]] format
| |
| pageName = value:match("^%[%[([^%|%]]+)%]%]$")
| |
| displayText = pageName -- In this case, display text is the same as page name
| |
| end
| |
|
| |
| -- Determine result based on mode
| |
| local result
| |
| if mode == "extract" then
| |
| result = pageName or value
| |
| elseif mode == "strip" then
| |
| result = displayText or value
| |
| else
| |
| -- Default to extract mode for unknown modes
| |
| result = pageName or value
| |
| end
| |
|
| |
| -- Store result in cache
| |
| wikiLinkCache[cacheKey] = result
| |
|
| |
| return result
| |
| end | | end |
|
| |
|
| -- Extract page name from wiki link [[Name]] or [[Name|Text]] | | -- Extract page name from wiki link [[Name]] or [[Name|Text]] |
| -- Now uses the unified processWikiLink function | | -- Now delegates to NormalizationText |
| -- @param value string The input string that may contain wiki links
| |
| -- @return string The extracted page name or the original value if no wiki link is found
| |
| function p.extractFromWikiLink(value) | | function p.extractFromWikiLink(value) |
| return p.processWikiLink(value, "extract") | | return NormalizationText.extractFromWikiLink(value) |
| end | | end |
|
| |
|
| Line 246: |
Line 182: |
|
| |
|
| -- Sanitizes user input by removing or transforming unwanted patterns | | -- Sanitizes user input by removing or transforming unwanted patterns |
| -- Now with caching for better performance | | -- Now delegates to NormalizationText |
| -- @param value The input string to sanitize
| |
| -- @param patternCategories Optional table or string of pattern categories to apply
| |
| -- @param customPatterns Optional table of additional patterns to apply
| |
| -- @param options Optional table of options:
| |
| -- - preserveWikiLinks: If true, wiki links will not be stripped
| |
| -- @return The sanitized string
| |
| function p.sanitizeUserInput(value, patternCategories, customPatterns, options) | | function p.sanitizeUserInput(value, patternCategories, customPatterns, options) |
| -- Fast path for nil/empty values | | return NormalizationText.sanitizeUserInput(value, patternCategories, customPatterns, options) |
| if not value or value == "" then return "" end
| |
|
| |
| -- Initialize options
| |
| options = options or {}
| |
|
| |
| -- Create a cache key
| |
| -- For caching, we need to handle the case where patternCategories or customPatterns are tables
| |
| local patternCategoriesKey
| |
| if type(patternCategories) == "table" then
| |
| patternCategoriesKey = "table:" .. tostring(patternCategories)
| |
| else
| |
| patternCategoriesKey = tostring(patternCategories)
| |
| end
| |
|
| |
| local customPatternsKey
| |
| if type(customPatterns) == "table" then
| |
| customPatternsKey = "table:" .. tostring(customPatterns)
| |
| else
| |
| customPatternsKey = tostring(customPatterns)
| |
| end
| |
|
| |
| local preserveWikiLinksKey = options.preserveWikiLinks and "true" or "false"
| |
|
| |
| local cacheKey = p.generateCacheKey("sanitizeUserInput", value, patternCategoriesKey, customPatternsKey, preserveWikiLinksKey)
| |
|
| |
| -- Use the caching wrapper
| |
| return p.withCache(cacheKey, function()
| |
| -- Collect patterns to apply
| |
| local patternsToApply = {}
| |
| local patternCount = 0
| |
|
| |
| -- Process requested pattern categories
| |
| if patternCategories then
| |
| -- Handle single category string
| |
| if type(patternCategories) == "string" then
| |
| -- Skip WIKI_LINKS category if preserveWikiLinks is true
| |
| if not (options.preserveWikiLinks and patternCategories == "WIKI_LINKS") then
| |
| if p.SANITIZE_PATTERNS[patternCategories] then
| |
| for _, pattern in ipairs(p.SANITIZE_PATTERNS[patternCategories]) do
| |
| patternCount = patternCount + 1
| |
| patternsToApply[patternCount] = pattern
| |
| end
| |
| end
| |
| end
| |
| -- Handle table of categories
| |
| elseif type(patternCategories) == "table" then
| |
| for _, category in ipairs(patternCategories) do
| |
| -- Skip WIKI_LINKS category if preserveWikiLinks is true
| |
| if not (options.preserveWikiLinks and category == "WIKI_LINKS") then
| |
| if p.SANITIZE_PATTERNS[category] then
| |
| for _, pattern in ipairs(p.SANITIZE_PATTERNS[category]) do
| |
| patternCount = patternCount + 1
| |
| patternsToApply[patternCount] = pattern
| |
| end
| |
| end
| |
| end
| |
| end
| |
| end
| |
| else
| |
| -- Default to WIKI_LINKS and SINGLE_BRACES if no categories specified
| |
| -- Skip WIKI_LINKS if preserveWikiLinks is true
| |
| if not options.preserveWikiLinks then
| |
| for _, pattern in ipairs(p.SANITIZE_PATTERNS.WIKI_LINKS) do
| |
| patternCount = patternCount + 1
| |
| patternsToApply[patternCount] = pattern
| |
| end
| |
| end
| |
|
| |
| -- Always include SINGLE_BRACES
| |
| for _, pattern in ipairs(p.SANITIZE_PATTERNS.SINGLE_BRACES) do
| |
| patternCount = patternCount + 1
| |
| patternsToApply[patternCount] = pattern
| |
| end
| |
| end
| |
|
| |
| -- Add any custom patterns
| |
| if customPatterns and type(customPatterns) == "table" then
| |
| for _, pattern in ipairs(customPatterns) do
| |
| patternCount = patternCount + 1
| |
| patternsToApply[patternCount] = pattern
| |
| end
| |
| end
| |
|
| |
| -- Fast path if no patterns to apply
| |
| if patternCount == 0 then
| |
| return value
| |
| end
| |
|
| |
| -- Apply each pattern sequentially
| |
| local result = value
| |
| for i = 1, patternCount do
| |
| local patternInfo = patternsToApply[i]
| |
| result = result:gsub(patternInfo.pattern, patternInfo.replacement)
| |
| end
| |
|
| |
| return result
| |
| end)
| |
| end | | end |
|
| |
|
| Line 888: |
Line 721: |
|
| |
|
| -- Generic function to split multi-value strings with various delimiters | | -- Generic function to split multi-value strings with various delimiters |
| -- Returns an array of individual values | | -- Now delegates to NormalizationText |
| function p.splitMultiValueString(value, delimiters) | | function p.splitMultiValueString(value, delimiters) |
| if not value or value == "" then return {} end | | return NormalizationText.splitMultiValueString(value, delimiters) |
|
| |
| -- Use provided delimiters or default ones
| |
| delimiters = delimiters or defaultDelimiters
| |
|
| |
| -- Standardize all delimiters to semicolons
| |
| local standardizedInput = value
| |
| for _, delimiter in ipairs(delimiters) do
| |
| standardizedInput = standardizedInput:gsub(delimiter.pattern, delimiter.replacement)
| |
| end
| |
|
| |
| -- Pre-allocate table based on delimiter count
| |
| -- Count semicolons to estimate the number of items
| |
| local count = 0
| |
| for _ in standardizedInput:gmatch(";") do
| |
| count = count + 1
| |
| end
| |
|
| |
| -- Pre-allocate table with estimated size (count+1 for the last item)
| |
| local items = {}
| |
|
| |
| -- Split by semicolons and return the array
| |
| local index = 1
| |
| for item in standardizedInput:gmatch("[^;]+") do
| |
| local trimmed = item:match("^%s*(.-)%s*$")
| |
| if trimmed and trimmed ~= "" then
| |
| items[index] = trimmed
| |
| index = index + 1
| |
| end
| |
| end
| |
|
| |
| return items
| |
| end | | end |
|
| |
|