Module:SemanticCategoryHelpers

Revision as of 18:26, 10 April 2025 by MarkWD (talk | contribs) (// via Wikitext Extension for VSCode)
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)

Documentation for this module may be created at Module:SemanticCategoryHelpers/doc

-- Module:SemanticCategoryHelpers
-- Provides utilities for semantic property and category handling in templates.
-- Extracted from TemplateHelpers to improve modularity and focus.
--
-- This module combines semantic property and category utilities that are
-- frequently used together in templates. It provides functions for:
-- * Splitting multi-value strings (e.g., "value1; value2 and value3")
-- * Building category tags from category names
-- * Adding categories based on canonical mappings
-- * Processing multi-value semantic properties
-- * Handling specialized semantic properties (countries, regions, languages)
-- * Generating semantic properties based on configuration

local p = {}

-- Dependencies
local CanonicalForms = require('Module:CanonicalForms')
local SemanticAnnotations = require('Module:SemanticAnnotations')

--------------------------------------------------------------------------------
-- Core Utilities
--------------------------------------------------------------------------------

-- Default delimiters for splitMultiValueString
-- Defined once as an upvalue to avoid recreating on each function call
local defaultDelimiters = {
    {pattern = "%s+and%s+", replacement = ";"},
    {pattern = ";%s*", replacement = ";"}
}

-- Generic function to split multi-value strings with various delimiters
-- Returns an array of individual values
function p.splitMultiValueString(value, delimiters)
    if not value or value == "" then return {} end
    
    -- Use provided delimiters or default ones
    delimiters = delimiters or defaultDelimiters
    
    -- Standardize all delimiters to semicolons
    local standardizedInput = value
    for _, delimiter in ipairs(delimiters) do
        standardizedInput = standardizedInput:gsub(delimiter.pattern, delimiter.replacement)
    end
    
    -- Pre-allocate table based on delimiter count
    -- Count semicolons to estimate the number of items
    local count = 0
    for _ in standardizedInput:gmatch(";") do 
        count = count + 1 
    end
    
    -- Pre-allocate table with estimated size (count+1 for the last item)
    local items = {}
    
    -- Split by semicolons and return the array
    local index = 1
    for item in standardizedInput:gmatch("[^;]+") do
        local trimmed = item:match("^%s*(.-)%s*$")
        if trimmed and trimmed ~= "" then
            items[index] = trimmed
            index = index + 1
        end
    end
    
    return items
end

-- Helper function to check if a field contains multiple values
function p.isMultiValueField(value)
    if not value or value == "" then return false end
    
    -- Check for common multi-value delimiters
    return value:match(";") or value:match("%s+and%s+")
end

--------------------------------------------------------------------------------
-- Category Utilities
--------------------------------------------------------------------------------

-- Builds a category string from a table of category names
-- Pre-allocates the formatted table for better performance
function p.buildCategories(categories)
    if not categories or #categories == 0 then return "" end
    
    -- Pre-allocate formatted table based on input size
    local formatted = {}
    local index = 1
    
    for _, cat in ipairs(categories) do
        -- Check if the category already has the [[ ]] wrapper
        if not string.match(cat, "^%[%[Category:") then
            formatted[index] = string.format("[[Category:%s]]", cat)
        else
            formatted[index] = cat
        end
        index = index + 1
    end
    return table.concat(formatted, "\n")
end

-- Adds categories based on a canonical mapping
function p.addMappingCategories(value, mapping)
    if not value or value == "" or not mapping then return {} end
    local categories = {}
    local canonical = select(1, CanonicalForms.normalize(value, mapping))
    
    if canonical then
        for _, group in ipairs(mapping) do
            if group.canonical == canonical and group.category then
                table.insert(categories, group.category)
                break
            end
        end
    end
    
    return categories
end

-- Generic function to add multi-value categories
-- This is a generalized helper that can be used for any multi-value category field
function p.addMultiValueCategories(value, processor, categories, options)
    if not value or value == "" then return categories end
    
    options = options or {}
    
    -- Get the values to process
    local items
    if options.valueGetter and type(options.valueGetter) == "function" then
        -- Use custom value getter if provided
        items = options.valueGetter(value)
    else
        -- Default to splitting the string
        items = p.splitMultiValueString(value)
    end
    
    -- Pre-allocate space in the categories table
    -- Estimate the number of new categories to add
    local currentSize = #categories
    local estimatedNewSize = currentSize + #items
    
    -- Process each item and add as a category
    for _, item in ipairs(items) do
        -- Apply processor if provided
        local processedItem = item
        if processor and type(processor) == "function" then
            processedItem = processor(item)
        end
        
        -- Only add if valid
        if processedItem and processedItem ~= "" then
            categories[currentSize + 1] = processedItem
            currentSize = currentSize + 1
        end
    end
    
    return categories
end

-- Splits a region string that may contain "and" conjunctions
-- Returns an array of individual region names
-- This is now a wrapper around splitMultiValueString for backward compatibility
function p.splitRegionCategories(regionValue)
    return p.splitMultiValueString(regionValue)
end

--------------------------------------------------------------------------------
-- Semantic Property Helpers
--------------------------------------------------------------------------------

-- Generic function to add multi-value semantic properties
-- This is a generalized helper that can be used for any multi-value property
function p.addMultiValueSemanticProperties(value, propertyName, processor, semanticOutput, options)
    if not value or value == "" then return semanticOutput end
    
    options = options or {}
    local processedItems = {}
    
    -- Get the values to process
    local items
    if options.valueGetter and type(options.valueGetter) == "function" then
        -- Use custom value getter if provided
        items = options.valueGetter(value)
    else
        -- Default to splitting the string
        items = p.splitMultiValueString(value)
    end
    
    -- For non-SMW case, collect property HTML fragments in a table for efficient concatenation
    local propertyHtml = {}
    
    -- Process each item and add as a semantic property
    for _, item in ipairs(items) do
        -- Apply processor if provided
        local processedItem = item
        if processor and type(processor) == "function" then
            processedItem = processor(item)
        end
        
        -- Only add if valid and not already processed
        if processedItem and processedItem ~= "" and not processedItems[processedItem] then
            processedItems[processedItem] = true
            
            -- Add as semantic property
            if mw.smw then
                mw.smw.set({[propertyName] = processedItem})
            else
                -- Collect HTML fragments instead of concatenating strings
                table.insert(propertyHtml, '<div style="display:none;">')
                table.insert(propertyHtml, '  {{#set: ' .. propertyName .. '=' .. processedItem .. ' }}')
                table.insert(propertyHtml, '</div>')
            end
        end
    end
    
    -- For non-SMW case, concatenate all property HTML fragments at once
    if not mw.smw and #propertyHtml > 0 then
        semanticOutput = semanticOutput .. "\n" .. table.concat(propertyHtml, "\n")
    end
    
    return semanticOutput
end

-- Adds semantic properties for multiple countries
-- This is a wrapper around addMultiValueSemanticProperties for backward compatibility
-- For new code, prefer using addMultiValueSemanticProperties directly with appropriate options
function p.addMultiCountrySemanticProperties(countryValue, semanticOutput)
    local CountryData = require('Module:CountryData')
    
    -- Create a processor function that uses CountryData for normalization
    local function countryProcessor(country)
        local normalized = CountryData.normalizeCountryName(country)
        -- Skip unrecognized countries
        if normalized == "(Unrecognized)" then
            return nil
        end
        return normalized
    end
    
    return p.addMultiValueSemanticProperties(
        countryValue,
        "Has country",
        countryProcessor,
        semanticOutput
    )
end

-- Adds semantic properties for multiple regions
-- This is a wrapper around addMultiValueSemanticProperties for backward compatibility
-- For new code, prefer using addMultiValueSemanticProperties directly with appropriate options
function p.addMultiRegionSemanticProperties(regionValue, semanticOutput)
    -- Use CountryData for region information
    local CountryData = require('Module:CountryData')
    
    -- First, replace "and" with semicolons to standardize the delimiter
    local standardizedInput = regionValue:gsub("%s+and%s+", ";")
    
    -- Define a processor that works directly with the data in CountryData
    local function regionProcessor(region)
        -- Skip unrecognized regions
        if region == "(Unrecognized)" then
            return nil
        end
        
        -- Trim the region and return it - CountryData will handle normalization
        local trimmed = region:match("^%s*(.-)%s*$")
        return trimmed
    end
    
    return p.addMultiValueSemanticProperties(
        standardizedInput,
        "Has ICANN region",
        regionProcessor,
        semanticOutput
    )
end

-- Adds semantic properties for multiple languages
-- This is a wrapper around addMultiValueSemanticProperties for backward compatibility
-- For new code, prefer using addMultiValueSemanticProperties directly with appropriate options
function p.addMultiLanguageSemanticProperties(languagesValue, semanticOutput)
    local LanguageNormalization = require('Module:LanguageNormalization')
    
    return p.addMultiValueSemanticProperties(
        languagesValue,
        "Speaks language",
        LanguageNormalization.normalize,
        semanticOutput
    )
end

-- Helper function to process additional properties with multi-value support
-- This standardizes how additional properties are handled across templates
function p.processAdditionalProperties(args, semanticConfig, semanticOutput, skipProperties)
    if not semanticConfig or not semanticConfig.additionalProperties then
        return semanticOutput
    end
    
    skipProperties = skipProperties or {}
    
    -- For non-SMW case, collect property HTML fragments in a table for efficient concatenation
    local propertyHtml = {}
    
    for property, sourceFields in pairs(semanticConfig.additionalProperties) do
        -- Skip properties that are handled separately
        if not skipProperties[property] then
            for _, fieldName in ipairs(sourceFields) do
                if args[fieldName] and args[fieldName] ~= "" then
                    local value = args[fieldName]
                    
                    -- Apply transformation if available
                    if semanticConfig.transforms and semanticConfig.transforms[property] then
                        value = semanticConfig.transforms[property](value)
                    end
                    
                    -- Check if this is a multi-value field that needs to be split
                    if p.isMultiValueField(value) then
                        -- Use the generic multi-value function
                        semanticOutput = p.addMultiValueSemanticProperties(
                            value,
                            property,
                            semanticConfig.transforms and semanticConfig.transforms[property],
                            semanticOutput
                        )
                    else
                        -- Single value property
                        if mw.smw then
                            mw.smw.set({[property] = value})
                        else
                            -- Collect HTML fragments instead of concatenating strings
                            table.insert(propertyHtml, '<div style="display:none;">')
                            table.insert(propertyHtml, '  {{#set: ' .. property .. '=' .. value .. ' }}')
                            table.insert(propertyHtml, '</div>')
                        end
                    end
                end
            end
        end
    end
    
    -- For non-SMW case, concatenate all property HTML fragments at once
    if not mw.smw and #propertyHtml > 0 then
        semanticOutput = semanticOutput .. "\n" .. table.concat(propertyHtml, "\n")
    end
    
    return semanticOutput
end

-- Generates semantic properties based on configuration
-- @param args - Template parameters
-- @param semanticConfig - Config with properties, transforms, additionalProperties
-- @param options - Options: transform (functions), skipProperties (to exclude)
-- @return Wikitext with semantic annotations
function p.generateSemanticProperties(args, semanticConfig, options)
    if not args or not semanticConfig then return "" end
    
    local SemanticAnnotations = require('Module:SemanticAnnotations')
    options = options or {}
    
    -- Set options
    local semanticOptions = {
        transform = semanticConfig.transforms or options.transform
    }
    
    -- Set basic properties
    local semanticOutput = SemanticAnnotations.setSemanticProperties(
        args, 
        semanticConfig.properties, 
        semanticOptions
    )
    
    -- Process additional properties with multi-value support
    local skipProperties = options.skipProperties or {}
    semanticOutput = p.processAdditionalProperties(args, semanticConfig, semanticOutput, skipProperties)
    
    return semanticOutput
end

return p