Jump to content

Module:SemanticCategoryHelpers: Difference between revisions

// via Wikitext Extension for VSCode
// via Wikitext Extension for VSCode
 
(28 intermediate revisions by the same user not shown)
Line 1: Line 1:
-- Module:SemanticCategoryHelpers
--[[
-- Provides utilities for semantic property and category handling in templates.
* Name: SemanticCategoryHelpers
-- Extracted from TemplateHelpers to improve modularity and focus.
* Author: Mark W. Datysgeld
--
* Description: Utilities for semantic property and category handling in templates, extracted from TemplateHelpers for improved modularity
-- This module combines semantic property and category utilities that are
* Notes: Splitting multi-value strings; building category tags; adding categories based on canonical mappings; processing multi-value semantic properties; generating semantic properties based on configuration; retrieving property descriptions from property pages; includes property description caching and type registry
-- frequently used together in templates. It provides functions for:
]]
-- * Splitting multi-value strings (e.g., "value1; value2 and value3")
-- * Building category tags from category names
-- * Adding categories based on canonical mappings
-- * Processing multi-value semantic properties
-- * Handling specialized semantic properties (countries, regions, languages)
-- * Generating semantic properties based on configuration


local p = {}
local p = {}
Line 17: Line 11:
local CanonicalForms = require('Module:CanonicalForms')
local CanonicalForms = require('Module:CanonicalForms')
local SemanticAnnotations = require('Module:SemanticAnnotations')
local SemanticAnnotations = require('Module:SemanticAnnotations')
local TemplateHelpers = require('Module:TemplateHelpers')


--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
-- Core Utilities
-- Property Description Cache
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------


-- Default delimiters for splitMultiValueString
-- Module-level cache for property descriptions
-- Defined once as an upvalue to avoid recreating on each function call
local propertyDescriptionCache = {}
local defaultDelimiters = {
    {pattern = "%s+and%s+", replacement = ";"},
    {pattern = ";%s*", replacement = ";"}
}


-- Generic function to split multi-value strings with various delimiters
-- Get property description from a property page
-- Returns an array of individual values
-- @param propertyName string The name of the property (e.g., "Has interview format")
function p.splitMultiValueString(value, delimiters)
-- @return string|nil The property description or nil if not found
     if not value or value == "" then return {} end
function p.getPropertyDescription(propertyName)
    -- Check cache first
     if propertyDescriptionCache[propertyName] ~= nil then
        return propertyDescriptionCache[propertyName]
    end
      
      
     -- Use provided delimiters or default ones
     -- Construct the property page title
     delimiters = delimiters or defaultDelimiters
     local propertyPageTitle = "Property:" .. propertyName
      
      
     -- Standardize all delimiters to semicolons
     -- Try to load the property page
     local standardizedInput = value
     local propertyPage = mw.title.new(propertyPageTitle)
     for _, delimiter in ipairs(delimiters) do
     if not propertyPage or not propertyPage.exists then
         standardizedInput = standardizedInput:gsub(delimiter.pattern, delimiter.replacement)
         propertyDescriptionCache[propertyName] = nil
        return nil
     end
     end
      
      
     -- Pre-allocate table based on delimiter count
     -- Extract the description from the page content
    -- Count semicolons to estimate the number of items
     local content = propertyPage:getContent()
     local count = 0
    if not content then
    for _ in standardizedInput:gmatch(";") do
         propertyDescriptionCache[propertyName] = nil
         count = count + 1
        return nil
     end
     end
      
      
     -- Pre-allocate table with estimated size (count+1 for the last item)
     -- Look for the property description in the content
     local items = {}
    -- Pattern matches [[Has property description::description text@en]]
    local description = content:match("%[%[Has property description::(.-)@?e?n?%]%]")
     if not description then
        -- Try canonical description as fallback
        description = content:match("'''Canonical description''': (.-)[%.%?!]")
    end
      
      
     -- Split by semicolons and return the array
     -- Cache the result (even if nil)
    local index = 1
    propertyDescriptionCache[propertyName] = description
    for item in standardizedInput:gmatch("[^;]+") do
        local trimmed = item:match("^%s*(.-)%s*$")
        if trimmed and trimmed ~= "" then
            items[index] = trimmed
            index = index + 1
        end
    end
      
      
     return items
     return description
end
end
--------------------------------------------------------------------------------
-- Property Type Registry
--------------------------------------------------------------------------------
-- Registry of property types with their configurations
-- Each property type has:
-- - getPropertyName: Function that returns the property name from ConfigRepository
-- - processor: Function that processes a value for this property type
local propertyTypes = {
    country = {
        getPropertyName = function()
            return require('Module:ConfigRepository').semanticProperties.country
        end,
        processor = function(value)
            local CountryData = require('Module:CountryData')
            local normalized = CountryData.normalizeCountryName(value)
            if normalized == "(Unrecognized)" then
                return nil
            end
            return normalized
        end
    },
    region = {
        getPropertyName = function()
            return require('Module:ConfigRepository').semanticProperties.region
        end,
        processor = function(value)
            if value == "(Unrecognized)" then
                return nil
            end
            return value:match("^%s*(.-)%s*$") -- Trim whitespace
        end
    },
    language = {
        getPropertyName = function()
            return require('Module:ConfigRepository').semanticProperties.language
        end,
        processor = function(value)
            return require('Module:NormalizationLanguage').normalize(value)
        end
    },
    person = {
        getPropertyName = function()
            return require('Module:ConfigRepository').semanticProperties.person
        end
    }
}
--------------------------------------------------------------------------------
-- Core Utilities
--------------------------------------------------------------------------------
-- Semicolon-only pattern for backward compatibility with splitSemicolonValues
-- Exposed as a module-level constant for use by other modules
p.SEMICOLON_PATTERN = {{pattern = ";%s*", replacement = ";"}}


-- Helper function to check if a field contains multiple values
-- Helper function to check if a field contains multiple values
Line 145: Line 196:
     else
     else
         -- Default to splitting the string
         -- Default to splitting the string
         items = p.splitMultiValueString(value)
         items = TemplateHelpers.splitMultiValueString(value)
     end
     end
      
      
     -- Pre-allocate space in the categories table
     -- Process each item and collect valid categories
    -- Estimate the number of new categories to add
     local newCategories = {}
    local currentSize = #categories
     local estimatedNewSize = currentSize + #items
   
    -- Process each item and add as a category
     for _, item in ipairs(items) do
     for _, item in ipairs(items) do
         -- Apply processor if provided
         -- Apply processor if provided
Line 163: Line 210:
         -- Only add if valid
         -- Only add if valid
         if processedItem and processedItem ~= "" then
         if processedItem and processedItem ~= "" then
             categories[currentSize + 1] = processedItem
             table.insert(newCategories, processedItem)
            currentSize = currentSize + 1
         end
         end
     end
     end
      
      
     return categories
    -- Combine existing categories with new ones
    for _, category in ipairs(newCategories) do
        table.insert(categories, category)
    end
   
    -- Use the centralized removeDuplicates function to deduplicate the combined list
     return TemplateHelpers.removeDuplicates(categories)
end
end


Line 175: Line 227:
-- This is now a wrapper around splitMultiValueString for backward compatibility
-- This is now a wrapper around splitMultiValueString for backward compatibility
function p.splitRegionCategories(regionValue)
function p.splitRegionCategories(regionValue)
     return p.splitMultiValueString(regionValue)
     return TemplateHelpers.splitMultiValueString(regionValue)
end
end


Line 182: Line 234:
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------


-- Generic function to add multi-value semantic properties
-- Unified function to add semantic properties for any property type
-- This is a generalized helper that can be used for any multi-value property
-- @param propertyType - The type of property (e.g., "country", "region", "language")
function p.addMultiValueSemanticProperties(value, propertyName, processor, semanticOutput, options)
-- @param value - The value to process
-- @param semanticOutput - The current semantic output to append to
-- @param options - Additional options for processing
-- @return The updated semantic output
function p.addSemanticProperties(propertyType, value, semanticOutput, options)
     if not value or value == "" then return semanticOutput end
     if not value or value == "" then return semanticOutput end
      
      
     options = options or {}
     options = options or {}
     local processedItems = {}
   
    -- Get configuration for this property type
     local config = propertyTypes[propertyType]
    if not config then
        -- Check if propertyType is a key in ConfigRepository.semanticProperties
        local ConfigRepository = require('Module:ConfigRepository')
        local propertyName = ConfigRepository.semanticProperties[propertyType]
       
        if propertyName then
            -- Create a dynamic config for this property
            config = {
                getPropertyName = function() return propertyName end,
                processor = options.processor
            }
        else
            -- If it's a direct property name, use it as is
            config = {
                getPropertyName = function() return propertyType end,
                processor = options.processor
            }
        end
    end
   
    -- Get property name from config
    local propertyName = config.getPropertyName()
      
      
     -- Get the values to process
     -- Get the values to process
Line 197: Line 277:
     else
     else
         -- Default to splitting the string
         -- Default to splitting the string
         items = p.splitMultiValueString(value)
         items = TemplateHelpers.splitMultiValueString(value)
     end
     end
      
      
Line 203: Line 283:
     local propertyHtml = {}
     local propertyHtml = {}
      
      
     -- Process each item and add as a semantic property
    -- Collect properties for batched processing
    local propertyValues = {}
   
     -- Process each item and collect valid values
    local validValues = {}
   
    -- Process each item and collect for batched property setting
     for _, item in ipairs(items) do
     for _, item in ipairs(items) do
         -- Apply processor if provided
         -- Apply processor if provided
         local processedItem = item
         local processedItem = item
         if processor and type(processor) == "function" then
         if config.processor and type(config.processor) == "function" then
             processedItem = processor(item)
             processedItem = config.processor(item)
        end
       
        -- Only add if valid
        if processedItem and processedItem ~= "" then
            table.insert(validValues, processedItem)
        end
    end
   
    -- Deduplicate the values using the centralized function
    validValues = TemplateHelpers.removeDuplicates(validValues)
   
    -- Add the deduplicated values to the property collection
    if #validValues > 0 then
        if #validValues == 1 then
            -- Single value case
            propertyValues[propertyName] = validValues[1]
        else
            -- Multiple values case
            propertyValues[propertyName] = validValues
         end
         end
          
          
         -- Only add if valid and not already processed
         -- For non-SMW fallback, generate HTML fragments for each value
         if processedItem and processedItem ~= "" and not processedItems[processedItem] then
         if not mw.smw then
            processedItems[processedItem] = true
            for _, processedItem in ipairs(validValues) do
           
            -- Add as semantic property
            if mw.smw then
                mw.smw.set({[propertyName] = processedItem})
            else
                -- Collect HTML fragments instead of concatenating strings
                 table.insert(propertyHtml, '<div style="display:none;">')
                 table.insert(propertyHtml, '<div style="display:none;">')
                 table.insert(propertyHtml, '  {{#set: ' .. propertyName .. '=' .. processedItem .. ' }}')
                 table.insert(propertyHtml, '  {{#set: ' .. propertyName .. '=' .. processedItem .. ' }}')
Line 227: Line 326:
     end
     end
      
      
     -- For non-SMW case, concatenate all property HTML fragments at once
     -- Use batched property setting with SemanticAnnotations if properties exist
     if not mw.smw and #propertyHtml > 0 then
    if mw.smw and next(propertyValues) then
        local SemanticAnnotations = require('Module:SemanticAnnotations')
        local dummyArgs = {} -- We're not using args from the template
        local additionalOutput = SemanticAnnotations.setSemanticProperties(
            dummyArgs,
            propertyValues,
            {transform = nil} -- No transforms needed as we've already processed the values
        )
       
        if additionalOutput and additionalOutput ~= "" then
            semanticOutput = semanticOutput .. additionalOutput
        end
     elseif not mw.smw and #propertyHtml > 0 then
        -- For non-SMW case, concatenate all property HTML fragments at once
         semanticOutput = semanticOutput .. "\n" .. table.concat(propertyHtml, "\n")
         semanticOutput = semanticOutput .. "\n" .. table.concat(propertyHtml, "\n")
     end
     end
      
      
     return semanticOutput
     return semanticOutput
end
-- Adds semantic properties for multiple countries
-- This is a wrapper around addMultiValueSemanticProperties for backward compatibility
-- For new code, prefer using addMultiValueSemanticProperties directly with appropriate options
function p.addMultiCountrySemanticProperties(countryValue, semanticOutput)
    local CountryData = require('Module:CountryData')
   
    -- Create a processor function that uses CountryData for normalization
    local function countryProcessor(country)
        local normalized = CountryData.normalizeCountryName(country)
        -- Skip unrecognized countries
        if normalized == "(Unrecognized)" then
            return nil
        end
        return normalized
    end
   
    return p.addMultiValueSemanticProperties(
        countryValue,
        "Has country",
        countryProcessor,
        semanticOutput
    )
end
-- Adds semantic properties for multiple regions
-- This is a wrapper around addMultiValueSemanticProperties for backward compatibility
-- For new code, prefer using addMultiValueSemanticProperties directly with appropriate options
function p.addMultiRegionSemanticProperties(regionValue, semanticOutput)
    -- Use CountryData for region information
    local CountryData = require('Module:CountryData')
   
    -- First, replace "and" with semicolons to standardize the delimiter
    local standardizedInput = regionValue:gsub("%s+and%s+", ";")
   
    -- Define a processor that works directly with the data in CountryData
    local function regionProcessor(region)
        -- Skip unrecognized regions
        if region == "(Unrecognized)" then
            return nil
        end
       
        -- Trim the region and return it - CountryData will handle normalization
        local trimmed = region:match("^%s*(.-)%s*$")
        return trimmed
    end
   
    return p.addMultiValueSemanticProperties(
        standardizedInput,
        "Has ICANN region",
        regionProcessor,
        semanticOutput
    )
end
-- Adds semantic properties for multiple languages
-- This is a wrapper around addMultiValueSemanticProperties for backward compatibility
-- For new code, prefer using addMultiValueSemanticProperties directly with appropriate options
function p.addMultiLanguageSemanticProperties(languagesValue, semanticOutput)
    local LanguageNormalization = require('Module:LanguageNormalization')
   
    return p.addMultiValueSemanticProperties(
        languagesValue,
        "Speaks language",
        LanguageNormalization.normalize,
        semanticOutput
    )
end
end


Line 312: Line 356:
     skipProperties = skipProperties or {}
     skipProperties = skipProperties or {}
      
      
     -- For non-SMW case, collect property HTML fragments in a table for efficient concatenation
     -- Map to collect all values for each property for batch processing
     local propertyHtml = {}
     local allBatchProperties = {}
      
      
     for property, sourceFields in pairs(semanticConfig.additionalProperties) do
     for property, sourceFields in pairs(semanticConfig.additionalProperties) do
         -- Skip properties that are handled separately
         -- Skip properties that are handled separately
         if not skipProperties[property] then
         if not skipProperties[property] then
            -- Find the property type key in ConfigRepository
            local propertyTypeKey = nil
            local ConfigRepository = require('Module:ConfigRepository')
            for key, name in pairs(ConfigRepository.semanticProperties) do
                if name == property then
                    propertyTypeKey = key
                    break
                end
            end
           
            -- If no matching key found, use the property name directly
            if not propertyTypeKey then
                propertyTypeKey = property
            end
           
            -- Get transform function if available
            local transform = nil
            if semanticConfig.transforms and semanticConfig.transforms[property] then
                transform = semanticConfig.transforms[property]
            end
           
            -- Process each source field for this property
             for _, fieldName in ipairs(sourceFields) do
             for _, fieldName in ipairs(sourceFields) do
                 if args[fieldName] and args[fieldName] ~= "" then
                 local _, value = TemplateHelpers.getFieldValue(args, { key = fieldName })
                     local value = args[fieldName]
                if value and value ~= "" then
                    -- Split multi-value fields
                     local values = TemplateHelpers.splitMultiValueString(value)
                      
                      
                     -- Apply transformation if available
                     -- For each value, transform if needed and add to property collection
                    if semanticConfig.transforms and semanticConfig.transforms[property] then
                    for _, singleValue in ipairs(values) do
                        value = semanticConfig.transforms[property](value)
                        -- Apply transform if provided
                    end
                        local transformedValue = singleValue
                   
                        if transform and type(transform) == "function" then
                    -- Check if this is a multi-value field that needs to be split
                            transformedValue = transform(singleValue)
                    if p.isMultiValueField(value) then
                        end
                        -- Use the generic multi-value function
                       
                        semanticOutput = p.addMultiValueSemanticProperties(
                        if transformedValue and transformedValue ~= "" then
                            value,
                             -- Initialize property in batch collection if needed
                            property,
                             allBatchProperties[property] = allBatchProperties[property] or {}
                            semanticConfig.transforms and semanticConfig.transforms[property],
                           
                             semanticOutput
                             -- Add the transformed value to the batch collection
                        )
                             table.insert(allBatchProperties[property], transformedValue)
                    else
                        -- Single value property
                        if mw.smw then
                             mw.smw.set({[property] = value})
                        else
                             -- Collect HTML fragments instead of concatenating strings
                             table.insert(propertyHtml, '<div style="display:none;">')
                            table.insert(propertyHtml, '  {{#set: ' .. property .. '=' .. value .. ' }}')
                            table.insert(propertyHtml, '</div>')
                         end
                         end
                     end
                     end
Line 352: Line 411:
     end
     end
      
      
     -- For non-SMW case, concatenate all property HTML fragments at once
     -- Process all collected properties in one batch
    if not mw.smw and #propertyHtml > 0 then
    if next(allBatchProperties) then
        semanticOutput = semanticOutput .. "\n" .. table.concat(propertyHtml, "\n")
        local SemanticAnnotations = require('Module:SemanticAnnotations')
       
        -- Deduplicate all property values before sending to SemanticAnnotations
        for prop, values in pairs(allBatchProperties) do
            if type(values) == 'table' then
                allBatchProperties[prop] = TemplateHelpers.removeDuplicates(values)
            end
        end
       
        -- Use batched property setting with SemanticAnnotations
        if mw.smw then
            local dummyArgs = {} -- We're not using args from the template
            local additionalOutput = SemanticAnnotations.setSemanticProperties(
                dummyArgs,
                allBatchProperties,
                {transform = nil} -- No transforms needed as we've already processed the values
            )
           
            if additionalOutput and additionalOutput ~= "" then
                semanticOutput = semanticOutput .. additionalOutput
            end
        else
            -- Fallback to HTML generation for non-SMW case
            local propertyHtml = {}
           
            for property, values in pairs(allBatchProperties) do
                for _, value in ipairs(values) do
                    table.insert(propertyHtml, '<div style="display:none;">')
                    table.insert(propertyHtml, '  {{#set: ' .. property .. '=' .. value .. ' }}')
                    table.insert(propertyHtml, '</div>')
                end
            end
           
            -- Concatenate all property HTML fragments at once
            if #propertyHtml > 0 then
                semanticOutput = semanticOutput .. "\n" .. table.concat(propertyHtml, "\n")
            end
        end
     end
     end
      
      
Line 376: Line 472:
     }
     }
      
      
     -- Set basic properties
     -- Collect all properties in a single batch for complete deduplication
    local allProperties = {}
   
    -- Add basic properties directly from properties config
    if semanticConfig.properties then
        for property, param in pairs(semanticConfig.properties) do
            -- Copy to allProperties
            allProperties[property] = param
        end
    end
   
    -- Process additional properties
    local skipProperties = options.skipProperties or {}
   
    -- Create a collector function that gathers properties instead of generating output
    local propertyCollector = {}
   
    -- Process additional properties into the collector
    if semanticConfig.additionalProperties then
        for property, sourceFields in pairs(semanticConfig.additionalProperties) do
            -- Skip properties that are handled separately
            if not skipProperties[property] then
                -- Collect all values for this property
                local allValues = {}
               
                for _, fieldName in ipairs(sourceFields) do
                    if args[fieldName] and args[fieldName] ~= "" then
                        local value = args[fieldName]
                       
                        -- Transform if needed
                        if semanticConfig.transforms and semanticConfig.transforms[property] then
                            local transform = semanticConfig.transforms[property]
                            if transform and type(transform) == "function" then
                                local items = TemplateHelpers.splitMultiValueString(value)
                                for _, item in ipairs(items) do
                                    local transformed = transform(item)
                                    if transformed and transformed ~= "" then
                                        table.insert(allValues, transformed)
                                    end
                                end
                            else
                                -- If no transform, add as is
                                table.insert(allValues, value)
                            end
                        else
                            -- No transform, add as is
                            table.insert(allValues, value)
                        end
                    end
                end
               
                -- Only add if we have values
                if #allValues > 0 then
                    if allProperties[property] then
                        -- Convert to array if needed
                        if type(allProperties[property]) ~= "table" then
                            allProperties[property] = {allProperties[property]}
                        end
                       
                        -- Add all values
                        for _, val in ipairs(allValues) do
                            table.insert(allProperties[property], val)
                        end
                    else
                        -- If only one value, add directly
                        if #allValues == 1 then
                            allProperties[property] = allValues[1]
                        else
                            allProperties[property] = allValues
                        end
                    end
                end
            end
        end
    end
   
    -- Deduplicate all property values before sending to SemanticAnnotations
    for prop, values in pairs(allProperties) do
        if type(values) == 'table' then
            allProperties[prop] = TemplateHelpers.removeDuplicates(values)
        end
    end
   
    -- Now process all collected properties in one batch using setSemanticProperties
     local semanticOutput = SemanticAnnotations.setSemanticProperties(
     local semanticOutput = SemanticAnnotations.setSemanticProperties(
         args,  
         args,  
         semanticConfig.properties,  
         allProperties,  
         semanticOptions
         semanticOptions
     )
     )
   
    -- Process additional properties with multi-value support
    local skipProperties = options.skipProperties or {}
    semanticOutput = p.processAdditionalProperties(args, semanticConfig, semanticOutput, skipProperties)
      
      
     return semanticOutput
     return semanticOutput

Latest revision as of 03:12, 25 August 2025

Documentation for this module may be created at Module:SemanticCategoryHelpers/doc

--[[
* Name: SemanticCategoryHelpers
* Author: Mark W. Datysgeld
* Description: Utilities for semantic property and category handling in templates, extracted from TemplateHelpers for improved modularity
* Notes: Splitting multi-value strings; building category tags; adding categories based on canonical mappings; processing multi-value semantic properties; generating semantic properties based on configuration; retrieving property descriptions from property pages; includes property description caching and type registry
]]

local p = {}

-- Dependencies
local CanonicalForms = require('Module:CanonicalForms')
local SemanticAnnotations = require('Module:SemanticAnnotations')
local TemplateHelpers = require('Module:TemplateHelpers')

--------------------------------------------------------------------------------
-- Property Description Cache
--------------------------------------------------------------------------------

-- Module-level cache for property descriptions
local propertyDescriptionCache = {}

-- Get property description from a property page
-- @param propertyName string The name of the property (e.g., "Has interview format")
-- @return string|nil The property description or nil if not found
function p.getPropertyDescription(propertyName)
    -- Check cache first
    if propertyDescriptionCache[propertyName] ~= nil then
        return propertyDescriptionCache[propertyName]
    end
    
    -- Construct the property page title
    local propertyPageTitle = "Property:" .. propertyName
    
    -- Try to load the property page
    local propertyPage = mw.title.new(propertyPageTitle)
    if not propertyPage or not propertyPage.exists then
        propertyDescriptionCache[propertyName] = nil
        return nil
    end
    
    -- Extract the description from the page content
    local content = propertyPage:getContent()
    if not content then
        propertyDescriptionCache[propertyName] = nil
        return nil
    end
    
    -- Look for the property description in the content
    -- Pattern matches [[Has property description::description text@en]]
    local description = content:match("%[%[Has property description::(.-)@?e?n?%]%]")
    if not description then
        -- Try canonical description as fallback
        description = content:match("'''Canonical description''': (.-)[%.%?!]")
    end
    
    -- Cache the result (even if nil)
    propertyDescriptionCache[propertyName] = description
    
    return description
end

--------------------------------------------------------------------------------
-- Property Type Registry
--------------------------------------------------------------------------------

-- Registry of property types with their configurations
-- Each property type has:
-- - getPropertyName: Function that returns the property name from ConfigRepository
-- - processor: Function that processes a value for this property type
local propertyTypes = {
    country = {
        getPropertyName = function() 
            return require('Module:ConfigRepository').semanticProperties.country
        end,
        processor = function(value)
            local CountryData = require('Module:CountryData')
            local normalized = CountryData.normalizeCountryName(value)
            if normalized == "(Unrecognized)" then
                return nil
            end
            return normalized
        end
    },
    region = {
        getPropertyName = function() 
            return require('Module:ConfigRepository').semanticProperties.region
        end,
        processor = function(value)
            if value == "(Unrecognized)" then
                return nil
            end
            return value:match("^%s*(.-)%s*$") -- Trim whitespace
        end
    },
    language = {
        getPropertyName = function() 
            return require('Module:ConfigRepository').semanticProperties.language
        end,
        processor = function(value)
            return require('Module:NormalizationLanguage').normalize(value)
        end
    },
    person = {
        getPropertyName = function() 
            return require('Module:ConfigRepository').semanticProperties.person
        end
    }
}

--------------------------------------------------------------------------------
-- Core Utilities
--------------------------------------------------------------------------------

-- Semicolon-only pattern for backward compatibility with splitSemicolonValues
-- Exposed as a module-level constant for use by other modules
p.SEMICOLON_PATTERN = {{pattern = ";%s*", replacement = ";"}}


-- Helper function to check if a field contains multiple values
function p.isMultiValueField(value)
    if not value or value == "" then return false end
    
    -- Check for common multi-value delimiters
    return value:match(";") or value:match("%s+and%s+")
end

--------------------------------------------------------------------------------
-- Category Utilities
--------------------------------------------------------------------------------

-- Ensures a category string is properly wrapped in MediaWiki syntax
function p.formatCategoryName(categoryName)
    if not categoryName or categoryName == "" then return "" end
    
    -- Already has full MediaWiki syntax
    if categoryName:match("^%[%[Category:[^%]]+%]%]$") then
        return categoryName
    end
    
    -- Has partial syntax, normalize it
    if categoryName:match("^Category:") then
        return string.format("[[%s]]", categoryName)
    end
    
    -- Plain category name, add full syntax
    return string.format("[[Category:%s]]", categoryName)
end

-- Builds a category string from a table of category names
-- Pre-allocates the formatted table for better performance
function p.buildCategories(categories)
    if not categories or #categories == 0 then return "" end
    
    -- Pre-allocate formatted table based on input size
    local formatted = {}
    local index = 1
    
    for _, cat in ipairs(categories) do
        -- Use the formatCategoryName function to ensure proper syntax
        formatted[index] = p.formatCategoryName(cat)
        index = index + 1
    end
    return table.concat(formatted, "\n")
end

-- Adds categories based on a canonical mapping
function p.addMappingCategories(value, mapping)
    if not value or value == "" or not mapping then return {} end
    local categories = {}
    local canonical = select(1, CanonicalForms.normalize(value, mapping))
    
    if canonical then
        for _, group in ipairs(mapping) do
            if group.canonical == canonical and group.category then
                table.insert(categories, group.category)
                break
            end
        end
    end
    
    return categories
end

-- Generic function to add multi-value categories
-- This is a generalized helper that can be used for any multi-value category field
function p.addMultiValueCategories(value, processor, categories, options)
    if not value or value == "" then return categories end
    
    options = options or {}
    
    -- Get the values to process
    local items
    if options.valueGetter and type(options.valueGetter) == "function" then
        -- Use custom value getter if provided
        items = options.valueGetter(value)
    else
        -- Default to splitting the string
        items = TemplateHelpers.splitMultiValueString(value)
    end
    
    -- Process each item and collect valid categories
    local newCategories = {}
    for _, item in ipairs(items) do
        -- Apply processor if provided
        local processedItem = item
        if processor and type(processor) == "function" then
            processedItem = processor(item)
        end
        
        -- Only add if valid
        if processedItem and processedItem ~= "" then
            table.insert(newCategories, processedItem)
        end
    end
    
    -- Combine existing categories with new ones
    for _, category in ipairs(newCategories) do
        table.insert(categories, category)
    end
    
    -- Use the centralized removeDuplicates function to deduplicate the combined list
    return TemplateHelpers.removeDuplicates(categories)
end

-- Splits a region string that may contain "and" conjunctions
-- Returns an array of individual region names
-- This is now a wrapper around splitMultiValueString for backward compatibility
function p.splitRegionCategories(regionValue)
    return TemplateHelpers.splitMultiValueString(regionValue)
end

--------------------------------------------------------------------------------
-- Semantic Property Helpers
--------------------------------------------------------------------------------

-- Unified function to add semantic properties for any property type
-- @param propertyType - The type of property (e.g., "country", "region", "language")
-- @param value - The value to process
-- @param semanticOutput - The current semantic output to append to
-- @param options - Additional options for processing
-- @return The updated semantic output
function p.addSemanticProperties(propertyType, value, semanticOutput, options)
    if not value or value == "" then return semanticOutput end
    
    options = options or {}
    
    -- Get configuration for this property type
    local config = propertyTypes[propertyType]
    if not config then
        -- Check if propertyType is a key in ConfigRepository.semanticProperties
        local ConfigRepository = require('Module:ConfigRepository')
        local propertyName = ConfigRepository.semanticProperties[propertyType]
        
        if propertyName then
            -- Create a dynamic config for this property
            config = {
                getPropertyName = function() return propertyName end,
                processor = options.processor
            }
        else
            -- If it's a direct property name, use it as is
            config = {
                getPropertyName = function() return propertyType end,
                processor = options.processor
            }
        end
    end
    
    -- Get property name from config
    local propertyName = config.getPropertyName()
    
    -- Get the values to process
    local items
    if options.valueGetter and type(options.valueGetter) == "function" then
        -- Use custom value getter if provided
        items = options.valueGetter(value)
    else
        -- Default to splitting the string
        items = TemplateHelpers.splitMultiValueString(value)
    end
    
    -- For non-SMW case, collect property HTML fragments in a table for efficient concatenation
    local propertyHtml = {}
    
    -- Collect properties for batched processing
    local propertyValues = {}
    
    -- Process each item and collect valid values
    local validValues = {}
    
    -- Process each item and collect for batched property setting
    for _, item in ipairs(items) do
        -- Apply processor if provided
        local processedItem = item
        if config.processor and type(config.processor) == "function" then
            processedItem = config.processor(item)
        end
        
        -- Only add if valid
        if processedItem and processedItem ~= "" then
            table.insert(validValues, processedItem)
        end
    end
    
    -- Deduplicate the values using the centralized function
    validValues = TemplateHelpers.removeDuplicates(validValues)
    
    -- Add the deduplicated values to the property collection
    if #validValues > 0 then
        if #validValues == 1 then
            -- Single value case
            propertyValues[propertyName] = validValues[1]
        else
            -- Multiple values case
            propertyValues[propertyName] = validValues
        end
        
        -- For non-SMW fallback, generate HTML fragments for each value
        if not mw.smw then
            for _, processedItem in ipairs(validValues) do
                table.insert(propertyHtml, '<div style="display:none;">')
                table.insert(propertyHtml, '  {{#set: ' .. propertyName .. '=' .. processedItem .. ' }}')
                table.insert(propertyHtml, '</div>')
            end
        end
    end
    
    -- Use batched property setting with SemanticAnnotations if properties exist
    if mw.smw and next(propertyValues) then
        local SemanticAnnotations = require('Module:SemanticAnnotations')
        local dummyArgs = {} -- We're not using args from the template
        local additionalOutput = SemanticAnnotations.setSemanticProperties(
            dummyArgs,
            propertyValues,
            {transform = nil} -- No transforms needed as we've already processed the values
        )
        
        if additionalOutput and additionalOutput ~= "" then
            semanticOutput = semanticOutput .. additionalOutput
        end
    elseif not mw.smw and #propertyHtml > 0 then
        -- For non-SMW case, concatenate all property HTML fragments at once
        semanticOutput = semanticOutput .. "\n" .. table.concat(propertyHtml, "\n")
    end
    
    return semanticOutput
end

-- Helper function to process additional properties with multi-value support
-- This standardizes how additional properties are handled across templates
function p.processAdditionalProperties(args, semanticConfig, semanticOutput, skipProperties)
    if not semanticConfig or not semanticConfig.additionalProperties then
        return semanticOutput
    end
    
    skipProperties = skipProperties or {}
    
    -- Map to collect all values for each property for batch processing
    local allBatchProperties = {}
    
    for property, sourceFields in pairs(semanticConfig.additionalProperties) do
        -- Skip properties that are handled separately
        if not skipProperties[property] then
            -- Find the property type key in ConfigRepository
            local propertyTypeKey = nil
            local ConfigRepository = require('Module:ConfigRepository')
            for key, name in pairs(ConfigRepository.semanticProperties) do
                if name == property then
                    propertyTypeKey = key
                    break
                end
            end
            
            -- If no matching key found, use the property name directly
            if not propertyTypeKey then
                propertyTypeKey = property
            end
            
            -- Get transform function if available
            local transform = nil
            if semanticConfig.transforms and semanticConfig.transforms[property] then
                transform = semanticConfig.transforms[property]
            end
            
            -- Process each source field for this property
            for _, fieldName in ipairs(sourceFields) do
                local _, value = TemplateHelpers.getFieldValue(args, { key = fieldName })
                if value and value ~= "" then
                    -- Split multi-value fields
                    local values = TemplateHelpers.splitMultiValueString(value)
                    
                    -- For each value, transform if needed and add to property collection
                    for _, singleValue in ipairs(values) do
                        -- Apply transform if provided
                        local transformedValue = singleValue
                        if transform and type(transform) == "function" then
                            transformedValue = transform(singleValue)
                        end
                        
                        if transformedValue and transformedValue ~= "" then
                            -- Initialize property in batch collection if needed
                            allBatchProperties[property] = allBatchProperties[property] or {}
                            
                            -- Add the transformed value to the batch collection
                            table.insert(allBatchProperties[property], transformedValue)
                        end
                    end
                end
            end
        end
    end
    
    -- Process all collected properties in one batch
    if next(allBatchProperties) then
        local SemanticAnnotations = require('Module:SemanticAnnotations')
        
        -- Deduplicate all property values before sending to SemanticAnnotations
        for prop, values in pairs(allBatchProperties) do
            if type(values) == 'table' then
                allBatchProperties[prop] = TemplateHelpers.removeDuplicates(values)
            end
        end
        
        -- Use batched property setting with SemanticAnnotations
        if mw.smw then
            local dummyArgs = {} -- We're not using args from the template
            local additionalOutput = SemanticAnnotations.setSemanticProperties(
                dummyArgs,
                allBatchProperties,
                {transform = nil} -- No transforms needed as we've already processed the values
            )
            
            if additionalOutput and additionalOutput ~= "" then
                semanticOutput = semanticOutput .. additionalOutput
            end
        else
            -- Fallback to HTML generation for non-SMW case
            local propertyHtml = {}
            
            for property, values in pairs(allBatchProperties) do
                for _, value in ipairs(values) do
                    table.insert(propertyHtml, '<div style="display:none;">')
                    table.insert(propertyHtml, '  {{#set: ' .. property .. '=' .. value .. ' }}')
                    table.insert(propertyHtml, '</div>')
                end
            end
            
            -- Concatenate all property HTML fragments at once
            if #propertyHtml > 0 then
                semanticOutput = semanticOutput .. "\n" .. table.concat(propertyHtml, "\n")
            end
        end
    end
    
    return semanticOutput
end

-- Generates semantic properties based on configuration
-- @param args - Template parameters
-- @param semanticConfig - Config with properties, transforms, additionalProperties
-- @param options - Options: transform (functions), skipProperties (to exclude)
-- @return Wikitext with semantic annotations
function p.generateSemanticProperties(args, semanticConfig, options)
    if not args or not semanticConfig then return "" end
    
    local SemanticAnnotations = require('Module:SemanticAnnotations')
    options = options or {}
    
    -- Set options
    local semanticOptions = {
        transform = semanticConfig.transforms or options.transform
    }
    
    -- Collect all properties in a single batch for complete deduplication
    local allProperties = {}
    
    -- Add basic properties directly from properties config
    if semanticConfig.properties then
        for property, param in pairs(semanticConfig.properties) do
            -- Copy to allProperties
            allProperties[property] = param
        end
    end
    
    -- Process additional properties
    local skipProperties = options.skipProperties or {}
    
    -- Create a collector function that gathers properties instead of generating output
    local propertyCollector = {}
    
    -- Process additional properties into the collector
    if semanticConfig.additionalProperties then
        for property, sourceFields in pairs(semanticConfig.additionalProperties) do
            -- Skip properties that are handled separately
            if not skipProperties[property] then
                -- Collect all values for this property
                local allValues = {}
                
                for _, fieldName in ipairs(sourceFields) do
                    if args[fieldName] and args[fieldName] ~= "" then
                        local value = args[fieldName]
                        
                        -- Transform if needed
                        if semanticConfig.transforms and semanticConfig.transforms[property] then
                            local transform = semanticConfig.transforms[property]
                            if transform and type(transform) == "function" then
                                local items = TemplateHelpers.splitMultiValueString(value)
                                for _, item in ipairs(items) do
                                    local transformed = transform(item)
                                    if transformed and transformed ~= "" then
                                        table.insert(allValues, transformed)
                                    end
                                end
                            else
                                -- If no transform, add as is
                                table.insert(allValues, value)
                            end
                        else
                            -- No transform, add as is
                            table.insert(allValues, value)
                        end
                    end
                end
                
                -- Only add if we have values
                if #allValues > 0 then
                    if allProperties[property] then
                        -- Convert to array if needed
                        if type(allProperties[property]) ~= "table" then
                            allProperties[property] = {allProperties[property]}
                        end
                        
                        -- Add all values
                        for _, val in ipairs(allValues) do
                            table.insert(allProperties[property], val)
                        end
                    else
                        -- If only one value, add directly
                        if #allValues == 1 then
                            allProperties[property] = allValues[1]
                        else
                            allProperties[property] = allValues
                        end
                    end
                end
            end
        end
    end
    
    -- Deduplicate all property values before sending to SemanticAnnotations
    for prop, values in pairs(allProperties) do
        if type(values) == 'table' then
            allProperties[prop] = TemplateHelpers.removeDuplicates(values)
        end
    end
    
    -- Now process all collected properties in one batch using setSemanticProperties
    local semanticOutput = SemanticAnnotations.setSemanticProperties(
        args, 
        allProperties, 
        semanticOptions
    )
    
    return semanticOutput
end

return p