Module:SemanticCategoryHelpers
Documentation for this module may be created at Module:SemanticCategoryHelpers/doc
-- Module:SemanticCategoryHelpers
-- Provides utilities for semantic property and category handling in templates.
-- Extracted from TemplateHelpers to improve modularity and focus.
--
-- This module combines semantic property and category utilities that are
-- frequently used together in templates. It provides functions for:
-- * Splitting multi-value strings (e.g., "value1; value2 and value3")
-- * Building category tags from category names
-- * Adding categories based on canonical mappings
-- * Processing multi-value semantic properties
-- * Handling specialized semantic properties (countries, regions, languages)
-- * Generating semantic properties based on configuration
local p = {}
-- Dependencies
local CanonicalForms = require('Module:CanonicalForms')
local SemanticAnnotations = require('Module:SemanticAnnotations')
--------------------------------------------------------------------------------
-- Core Utilities
--------------------------------------------------------------------------------
-- Default delimiters for splitMultiValueString
-- Defined once as an upvalue to avoid recreating on each function call
local defaultDelimiters = {
{pattern = "%s+and%s+", replacement = ";"},
{pattern = ";%s*", replacement = ";"}
}
-- Generic function to split multi-value strings with various delimiters
-- Returns an array of individual values
function p.splitMultiValueString(value, delimiters)
if not value or value == "" then return {} end
-- Use provided delimiters or default ones
delimiters = delimiters or defaultDelimiters
-- Standardize all delimiters to semicolons
local standardizedInput = value
for _, delimiter in ipairs(delimiters) do
standardizedInput = standardizedInput:gsub(delimiter.pattern, delimiter.replacement)
end
-- Pre-allocate table based on delimiter count
-- Count semicolons to estimate the number of items
local count = 0
for _ in standardizedInput:gmatch(";") do
count = count + 1
end
-- Pre-allocate table with estimated size (count+1 for the last item)
local items = {}
-- Split by semicolons and return the array
local index = 1
for item in standardizedInput:gmatch("[^;]+") do
local trimmed = item:match("^%s*(.-)%s*$")
if trimmed and trimmed ~= "" then
items[index] = trimmed
index = index + 1
end
end
return items
end
-- Helper function to check if a field contains multiple values
function p.isMultiValueField(value)
if not value or value == "" then return false end
-- Check for common multi-value delimiters
return value:match(";") or value:match("%s+and%s+")
end
--------------------------------------------------------------------------------
-- Category Utilities
--------------------------------------------------------------------------------
-- Builds a category string from a table of category names
-- Pre-allocates the formatted table for better performance
function p.buildCategories(categories)
if not categories or #categories == 0 then return "" end
-- Pre-allocate formatted table based on input size
local formatted = {}
local index = 1
for _, cat in ipairs(categories) do
-- Check if the category already has the [[ ]] wrapper
if not string.match(cat, "^%[%[Category:") then
formatted[index] = string.format("[[Category:%s]]", cat)
else
formatted[index] = cat
end
index = index + 1
end
return table.concat(formatted, "\n")
end
-- Adds categories based on a canonical mapping
function p.addMappingCategories(value, mapping)
if not value or value == "" or not mapping then return {} end
local categories = {}
local canonical = select(1, CanonicalForms.normalize(value, mapping))
if canonical then
for _, group in ipairs(mapping) do
if group.canonical == canonical and group.category then
table.insert(categories, group.category)
break
end
end
end
return categories
end
-- Generic function to add multi-value categories
-- This is a generalized helper that can be used for any multi-value category field
function p.addMultiValueCategories(value, processor, categories, options)
if not value or value == "" then return categories end
options = options or {}
-- Get the values to process
local items
if options.valueGetter and type(options.valueGetter) == "function" then
-- Use custom value getter if provided
items = options.valueGetter(value)
else
-- Default to splitting the string
items = p.splitMultiValueString(value)
end
-- Pre-allocate space in the categories table
-- Estimate the number of new categories to add
local currentSize = #categories
local estimatedNewSize = currentSize + #items
-- Process each item and add as a category
for _, item in ipairs(items) do
-- Apply processor if provided
local processedItem = item
if processor and type(processor) == "function" then
processedItem = processor(item)
end
-- Only add if valid
if processedItem and processedItem ~= "" then
categories[currentSize + 1] = processedItem
currentSize = currentSize + 1
end
end
return categories
end
-- Splits a region string that may contain "and" conjunctions
-- Returns an array of individual region names
-- This is now a wrapper around splitMultiValueString for backward compatibility
function p.splitRegionCategories(regionValue)
return p.splitMultiValueString(regionValue)
end
--------------------------------------------------------------------------------
-- Semantic Property Helpers
--------------------------------------------------------------------------------
-- Generic function to add multi-value semantic properties
-- This is a generalized helper that can be used for any multi-value property
function p.addMultiValueSemanticProperties(value, propertyName, processor, semanticOutput, options)
if not value or value == "" then return semanticOutput end
options = options or {}
local processedItems = {}
-- Get the values to process
local items
if options.valueGetter and type(options.valueGetter) == "function" then
-- Use custom value getter if provided
items = options.valueGetter(value)
else
-- Default to splitting the string
items = p.splitMultiValueString(value)
end
-- For non-SMW case, collect property HTML fragments in a table for efficient concatenation
local propertyHtml = {}
-- Process each item and add as a semantic property
for _, item in ipairs(items) do
-- Apply processor if provided
local processedItem = item
if processor and type(processor) == "function" then
processedItem = processor(item)
end
-- Only add if valid and not already processed
if processedItem and processedItem ~= "" and not processedItems[processedItem] then
processedItems[processedItem] = true
-- Add as semantic property
if mw.smw then
mw.smw.set({[propertyName] = processedItem})
else
-- Collect HTML fragments instead of concatenating strings
table.insert(propertyHtml, '<div style="display:none;">')
table.insert(propertyHtml, ' {{#set: ' .. propertyName .. '=' .. processedItem .. ' }}')
table.insert(propertyHtml, '</div>')
end
end
end
-- For non-SMW case, concatenate all property HTML fragments at once
if not mw.smw and #propertyHtml > 0 then
semanticOutput = semanticOutput .. "\n" .. table.concat(propertyHtml, "\n")
end
return semanticOutput
end
-- Adds semantic properties for multiple countries
-- This is a wrapper around addMultiValueSemanticProperties for backward compatibility
-- For new code, prefer using addMultiValueSemanticProperties directly with appropriate options
function p.addMultiCountrySemanticProperties(countryValue, semanticOutput)
local CountryData = require('Module:CountryData')
-- Create a processor function that uses CountryData for normalization
local function countryProcessor(country)
local normalized = CountryData.normalizeCountryName(country)
-- Skip unrecognized countries
if normalized == "(Unrecognized)" then
return nil
end
return normalized
end
return p.addMultiValueSemanticProperties(
countryValue,
"Has country",
countryProcessor,
semanticOutput
)
end
-- Adds semantic properties for multiple regions
-- This is a wrapper around addMultiValueSemanticProperties for backward compatibility
-- For new code, prefer using addMultiValueSemanticProperties directly with appropriate options
function p.addMultiRegionSemanticProperties(regionValue, semanticOutput)
-- Use CountryData for region information
local CountryData = require('Module:CountryData')
-- First, replace "and" with semicolons to standardize the delimiter
local standardizedInput = regionValue:gsub("%s+and%s+", ";")
-- Define a processor that works directly with the data in CountryData
local function regionProcessor(region)
-- Skip unrecognized regions
if region == "(Unrecognized)" then
return nil
end
-- Trim the region and return it - CountryData will handle normalization
local trimmed = region:match("^%s*(.-)%s*$")
return trimmed
end
return p.addMultiValueSemanticProperties(
standardizedInput,
"Has ICANN region",
regionProcessor,
semanticOutput
)
end
-- Adds semantic properties for multiple languages
-- This is a wrapper around addMultiValueSemanticProperties for backward compatibility
-- For new code, prefer using addMultiValueSemanticProperties directly with appropriate options
function p.addMultiLanguageSemanticProperties(languagesValue, semanticOutput)
local LanguageNormalization = require('Module:LanguageNormalization')
return p.addMultiValueSemanticProperties(
languagesValue,
"Speaks language",
LanguageNormalization.normalize,
semanticOutput
)
end
-- Helper function to process additional properties with multi-value support
-- This standardizes how additional properties are handled across templates
function p.processAdditionalProperties(args, semanticConfig, semanticOutput, skipProperties)
if not semanticConfig or not semanticConfig.additionalProperties then
return semanticOutput
end
skipProperties = skipProperties or {}
-- For non-SMW case, collect property HTML fragments in a table for efficient concatenation
local propertyHtml = {}
for property, sourceFields in pairs(semanticConfig.additionalProperties) do
-- Skip properties that are handled separately
if not skipProperties[property] then
for _, fieldName in ipairs(sourceFields) do
if args[fieldName] and args[fieldName] ~= "" then
local value = args[fieldName]
-- Apply transformation if available
if semanticConfig.transforms and semanticConfig.transforms[property] then
value = semanticConfig.transforms[property](value)
end
-- Check if this is a multi-value field that needs to be split
if p.isMultiValueField(value) then
-- Use the generic multi-value function
semanticOutput = p.addMultiValueSemanticProperties(
value,
property,
semanticConfig.transforms and semanticConfig.transforms[property],
semanticOutput
)
else
-- Single value property
if mw.smw then
mw.smw.set({[property] = value})
else
-- Collect HTML fragments instead of concatenating strings
table.insert(propertyHtml, '<div style="display:none;">')
table.insert(propertyHtml, ' {{#set: ' .. property .. '=' .. value .. ' }}')
table.insert(propertyHtml, '</div>')
end
end
end
end
end
end
-- For non-SMW case, concatenate all property HTML fragments at once
if not mw.smw and #propertyHtml > 0 then
semanticOutput = semanticOutput .. "\n" .. table.concat(propertyHtml, "\n")
end
return semanticOutput
end
-- Generates semantic properties based on configuration
-- @param args - Template parameters
-- @param semanticConfig - Config with properties, transforms, additionalProperties
-- @param options - Options: transform (functions), skipProperties (to exclude)
-- @return Wikitext with semantic annotations
function p.generateSemanticProperties(args, semanticConfig, options)
if not args or not semanticConfig then return "" end
local SemanticAnnotations = require('Module:SemanticAnnotations')
options = options or {}
-- Set options
local semanticOptions = {
transform = semanticConfig.transforms or options.transform
}
-- Set basic properties
local semanticOutput = SemanticAnnotations.setSemanticProperties(
args,
semanticConfig.properties,
semanticOptions
)
-- Process additional properties with multi-value support
local skipProperties = options.skipProperties or {}
semanticOutput = p.processAdditionalProperties(args, semanticConfig, semanticOutput, skipProperties)
return semanticOutput
end
return p