Module:SemanticCategoryHelpers: Difference between revisions
// via Wikitext Extension for VSCode |
// via Wikitext Extension for VSCode |
||
| (29 intermediate revisions by the same user not shown) | |||
| Line 1: | Line 1: | ||
-- | --[[ | ||
* Name: SemanticCategoryHelpers | |||
* Author: Mark W. Datysgeld | |||
* Description: Utilities for semantic property and category handling in templates, extracted from TemplateHelpers for improved modularity | |||
* Notes: Splitting multi-value strings; building category tags; adding categories based on canonical mappings; processing multi-value semantic properties; generating semantic properties based on configuration; retrieving property descriptions from property pages; includes property description caching and type registry | |||
]] | |||
local p = {} | local p = {} | ||
| Line 17: | Line 11: | ||
local CanonicalForms = require('Module:CanonicalForms') | local CanonicalForms = require('Module:CanonicalForms') | ||
local SemanticAnnotations = require('Module:SemanticAnnotations') | local SemanticAnnotations = require('Module:SemanticAnnotations') | ||
local TemplateHelpers = require('Module:TemplateHelpers') | |||
-------------------------------------------------------------------------------- | -------------------------------------------------------------------------------- | ||
-- | -- Property Description Cache | ||
-------------------------------------------------------------------------------- | -------------------------------------------------------------------------------- | ||
-- | -- Module-level cache for property descriptions | ||
local propertyDescriptionCache = {} | |||
local | |||
} | |||
-- | -- Get property description from a property page | ||
-- | -- @param propertyName string The name of the property (e.g., "Has interview format") | ||
function p. | -- @return string|nil The property description or nil if not found | ||
if | function p.getPropertyDescription(propertyName) | ||
-- Check cache first | |||
if propertyDescriptionCache[propertyName] ~= nil then | |||
return propertyDescriptionCache[propertyName] | |||
end | |||
-- | -- Construct the property page title | ||
local propertyPageTitle = "Property:" .. propertyName | |||
-- | -- Try to load the property page | ||
local | local propertyPage = mw.title.new(propertyPageTitle) | ||
if not propertyPage or not propertyPage.exists then | |||
propertyDescriptionCache[propertyName] = nil | |||
return nil | |||
end | end | ||
-- | -- Extract the description from the page content | ||
local content = propertyPage:getContent() | |||
local | if not content then | ||
propertyDescriptionCache[propertyName] = nil | |||
return nil | |||
end | end | ||
-- | -- Look for the property description in the content | ||
-- Pattern matches [[Has property description::description text@en]] | |||
local description = content:match("%[%[Has property description::(.-)@?e?n?%]%]") | |||
if not description then | |||
-- Try canonical description as fallback | |||
description = content:match("'''Canonical description''': (.-)[%.%?!]") | |||
end | |||
-- | -- Cache the result (even if nil) | ||
propertyDescriptionCache[propertyName] = description | |||
return | return description | ||
end | end | ||
-------------------------------------------------------------------------------- | |||
-- Property Type Registry | |||
-------------------------------------------------------------------------------- | |||
-- Registry of property types with their configurations | |||
-- Each property type has: | |||
-- - getPropertyName: Function that returns the property name from ConfigRepository | |||
-- - processor: Function that processes a value for this property type | |||
local propertyTypes = { | |||
country = { | |||
getPropertyName = function() | |||
return require('Module:ConfigRepository').semanticProperties.country | |||
end, | |||
processor = function(value) | |||
local CountryData = require('Module:CountryData') | |||
local normalized = CountryData.normalizeCountryName(value) | |||
if normalized == "(Unrecognized)" then | |||
return nil | |||
end | |||
return normalized | |||
end | |||
}, | |||
region = { | |||
getPropertyName = function() | |||
return require('Module:ConfigRepository').semanticProperties.region | |||
end, | |||
processor = function(value) | |||
if value == "(Unrecognized)" then | |||
return nil | |||
end | |||
return value:match("^%s*(.-)%s*$") -- Trim whitespace | |||
end | |||
}, | |||
language = { | |||
getPropertyName = function() | |||
return require('Module:ConfigRepository').semanticProperties.language | |||
end, | |||
processor = function(value) | |||
return require('Module:NormalizationLanguage').normalize(value) | |||
end | |||
}, | |||
person = { | |||
getPropertyName = function() | |||
return require('Module:ConfigRepository').semanticProperties.person | |||
end | |||
} | |||
} | |||
-------------------------------------------------------------------------------- | |||
-- Core Utilities | |||
-------------------------------------------------------------------------------- | |||
-- Semicolon-only pattern for backward compatibility with splitSemicolonValues | |||
-- Exposed as a module-level constant for use by other modules | |||
p.SEMICOLON_PATTERN = {{pattern = ";%s*", replacement = ";"}} | |||
-- Helper function to check if a field contains multiple values | -- Helper function to check if a field contains multiple values | ||
| Line 77: | Line 128: | ||
-- Category Utilities | -- Category Utilities | ||
-------------------------------------------------------------------------------- | -------------------------------------------------------------------------------- | ||
-- Ensures a category string is properly wrapped in MediaWiki syntax | |||
function p.formatCategoryName(categoryName) | |||
if not categoryName or categoryName == "" then return "" end | |||
-- Already has full MediaWiki syntax | |||
if categoryName:match("^%[%[Category:[^%]]+%]%]$") then | |||
return categoryName | |||
end | |||
-- Has partial syntax, normalize it | |||
if categoryName:match("^Category:") then | |||
return string.format("[[%s]]", categoryName) | |||
end | |||
-- Plain category name, add full syntax | |||
return string.format("[[Category:%s]]", categoryName) | |||
end | |||
-- Builds a category string from a table of category names | -- Builds a category string from a table of category names | ||
| Line 88: | Line 157: | ||
for _, cat in ipairs(categories) do | for _, cat in ipairs(categories) do | ||
-- | -- Use the formatCategoryName function to ensure proper syntax | ||
formatted[index] = p.formatCategoryName(cat) | |||
index = index + 1 | index = index + 1 | ||
end | end | ||
| Line 131: | Line 196: | ||
else | else | ||
-- Default to splitting the string | -- Default to splitting the string | ||
items = | items = TemplateHelpers.splitMultiValueString(value) | ||
end | end | ||
-- | -- Process each item and collect valid categories | ||
local newCategories = {} | |||
local | |||
for _, item in ipairs(items) do | for _, item in ipairs(items) do | ||
-- Apply processor if provided | -- Apply processor if provided | ||
| Line 149: | Line 210: | ||
-- Only add if valid | -- Only add if valid | ||
if processedItem and processedItem ~= "" then | if processedItem and processedItem ~= "" then | ||
table.insert(newCategories, processedItem) | |||
end | end | ||
end | end | ||
return categories | -- Combine existing categories with new ones | ||
for _, category in ipairs(newCategories) do | |||
table.insert(categories, category) | |||
end | |||
-- Use the centralized removeDuplicates function to deduplicate the combined list | |||
return TemplateHelpers.removeDuplicates(categories) | |||
end | end | ||
| Line 161: | Line 227: | ||
-- This is now a wrapper around splitMultiValueString for backward compatibility | -- This is now a wrapper around splitMultiValueString for backward compatibility | ||
function p.splitRegionCategories(regionValue) | function p.splitRegionCategories(regionValue) | ||
return | return TemplateHelpers.splitMultiValueString(regionValue) | ||
end | end | ||
| Line 168: | Line 234: | ||
-------------------------------------------------------------------------------- | -------------------------------------------------------------------------------- | ||
-- | -- Unified function to add semantic properties for any property type | ||
-- | -- @param propertyType - The type of property (e.g., "country", "region", "language") | ||
function p. | -- @param value - The value to process | ||
-- @param semanticOutput - The current semantic output to append to | |||
-- @param options - Additional options for processing | |||
-- @return The updated semantic output | |||
function p.addSemanticProperties(propertyType, value, semanticOutput, options) | |||
if not value or value == "" then return semanticOutput end | if not value or value == "" then return semanticOutput end | ||
options = options or {} | options = options or {} | ||
local | |||
-- Get configuration for this property type | |||
local config = propertyTypes[propertyType] | |||
if not config then | |||
-- Check if propertyType is a key in ConfigRepository.semanticProperties | |||
local ConfigRepository = require('Module:ConfigRepository') | |||
local propertyName = ConfigRepository.semanticProperties[propertyType] | |||
if propertyName then | |||
-- Create a dynamic config for this property | |||
config = { | |||
getPropertyName = function() return propertyName end, | |||
processor = options.processor | |||
} | |||
else | |||
-- If it's a direct property name, use it as is | |||
config = { | |||
getPropertyName = function() return propertyType end, | |||
processor = options.processor | |||
} | |||
end | |||
end | |||
-- Get property name from config | |||
local propertyName = config.getPropertyName() | |||
-- Get the values to process | -- Get the values to process | ||
| Line 183: | Line 277: | ||
else | else | ||
-- Default to splitting the string | -- Default to splitting the string | ||
items = | items = TemplateHelpers.splitMultiValueString(value) | ||
end | end | ||
| Line 189: | Line 283: | ||
local propertyHtml = {} | local propertyHtml = {} | ||
-- Process each item and | -- Collect properties for batched processing | ||
local propertyValues = {} | |||
-- Process each item and collect valid values | |||
local validValues = {} | |||
-- Process each item and collect for batched property setting | |||
for _, item in ipairs(items) do | for _, item in ipairs(items) do | ||
-- Apply processor if provided | -- Apply processor if provided | ||
local processedItem = item | local processedItem = item | ||
if processor and type(processor) == "function" then | if config.processor and type(config.processor) == "function" then | ||
processedItem = processor(item) | processedItem = config.processor(item) | ||
end | |||
-- Only add if valid | |||
if processedItem and processedItem ~= "" then | |||
table.insert(validValues, processedItem) | |||
end | |||
end | |||
-- Deduplicate the values using the centralized function | |||
validValues = TemplateHelpers.removeDuplicates(validValues) | |||
-- Add the deduplicated values to the property collection | |||
if #validValues > 0 then | |||
if #validValues == 1 then | |||
-- Single value case | |||
propertyValues[propertyName] = validValues[1] | |||
else | |||
-- Multiple values case | |||
propertyValues[propertyName] = validValues | |||
end | end | ||
-- | -- For non-SMW fallback, generate HTML fragments for each value | ||
if | if not mw.smw then | ||
for _, processedItem in ipairs(validValues) do | |||
table.insert(propertyHtml, '<div style="display:none;">') | table.insert(propertyHtml, '<div style="display:none;">') | ||
table.insert(propertyHtml, ' {{#set: ' .. propertyName .. '=' .. processedItem .. ' }}') | table.insert(propertyHtml, ' {{#set: ' .. propertyName .. '=' .. processedItem .. ' }}') | ||
| Line 213: | Line 326: | ||
end | end | ||
-- | -- Use batched property setting with SemanticAnnotations if properties exist | ||
if mw.smw and next(propertyValues) then | |||
local SemanticAnnotations = require('Module:SemanticAnnotations') | |||
local dummyArgs = {} -- We're not using args from the template | |||
local additionalOutput = SemanticAnnotations.setSemanticProperties( | |||
dummyArgs, | |||
propertyValues, | |||
{transform = nil} -- No transforms needed as we've already processed the values | |||
) | |||
if additionalOutput and additionalOutput ~= "" then | |||
semanticOutput = semanticOutput .. additionalOutput | |||
end | |||
elseif not mw.smw and #propertyHtml > 0 then | |||
-- For non-SMW case, concatenate all property HTML fragments at once | |||
semanticOutput = semanticOutput .. "\n" .. table.concat(propertyHtml, "\n") | semanticOutput = semanticOutput .. "\n" .. table.concat(propertyHtml, "\n") | ||
end | end | ||
return semanticOutput | return semanticOutput | ||
end | end | ||
| Line 298: | Line 356: | ||
skipProperties = skipProperties or {} | skipProperties = skipProperties or {} | ||
-- | -- Map to collect all values for each property for batch processing | ||
local | local allBatchProperties = {} | ||
for property, sourceFields in pairs(semanticConfig.additionalProperties) do | for property, sourceFields in pairs(semanticConfig.additionalProperties) do | ||
-- Skip properties that are handled separately | -- Skip properties that are handled separately | ||
if not skipProperties[property] then | if not skipProperties[property] then | ||
-- Find the property type key in ConfigRepository | |||
local propertyTypeKey = nil | |||
local ConfigRepository = require('Module:ConfigRepository') | |||
for key, name in pairs(ConfigRepository.semanticProperties) do | |||
if name == property then | |||
propertyTypeKey = key | |||
break | |||
end | |||
end | |||
-- If no matching key found, use the property name directly | |||
if not propertyTypeKey then | |||
propertyTypeKey = property | |||
end | |||
-- Get transform function if available | |||
local transform = nil | |||
if semanticConfig.transforms and semanticConfig.transforms[property] then | |||
transform = semanticConfig.transforms[property] | |||
end | |||
-- Process each source field for this property | |||
for _, fieldName in ipairs(sourceFields) do | for _, fieldName in ipairs(sourceFields) do | ||
local _, value = TemplateHelpers.getFieldValue(args, { key = fieldName }) | |||
local value | if value and value ~= "" then | ||
-- Split multi-value fields | |||
local values = TemplateHelpers.splitMultiValueString(value) | |||
-- | -- For each value, transform if needed and add to property collection | ||
for _, singleValue in ipairs(values) do | |||
-- Apply transform if provided | |||
local transformedValue = singleValue | |||
if transform and type(transform) == "function" then | |||
transformedValue = transform(singleValue) | |||
end | |||
if transformedValue and transformedValue ~= "" then | |||
-- Initialize property in batch collection if needed | |||
allBatchProperties[property] = allBatchProperties[property] or {} | |||
-- Add the transformed value to the batch collection | |||
table.insert(allBatchProperties[property], transformedValue) | |||
-- | |||
table.insert( | |||
end | end | ||
end | end | ||
| Line 338: | Line 411: | ||
end | end | ||
-- | -- Process all collected properties in one batch | ||
if next(allBatchProperties) then | |||
local SemanticAnnotations = require('Module:SemanticAnnotations') | |||
-- Deduplicate all property values before sending to SemanticAnnotations | |||
for prop, values in pairs(allBatchProperties) do | |||
if type(values) == 'table' then | |||
allBatchProperties[prop] = TemplateHelpers.removeDuplicates(values) | |||
end | |||
end | |||
-- Use batched property setting with SemanticAnnotations | |||
if mw.smw then | |||
local dummyArgs = {} -- We're not using args from the template | |||
local additionalOutput = SemanticAnnotations.setSemanticProperties( | |||
dummyArgs, | |||
allBatchProperties, | |||
{transform = nil} -- No transforms needed as we've already processed the values | |||
) | |||
if additionalOutput and additionalOutput ~= "" then | |||
semanticOutput = semanticOutput .. additionalOutput | |||
end | |||
else | |||
-- Fallback to HTML generation for non-SMW case | |||
local propertyHtml = {} | |||
for property, values in pairs(allBatchProperties) do | |||
for _, value in ipairs(values) do | |||
table.insert(propertyHtml, '<div style="display:none;">') | |||
table.insert(propertyHtml, ' {{#set: ' .. property .. '=' .. value .. ' }}') | |||
table.insert(propertyHtml, '</div>') | |||
end | |||
end | |||
-- Concatenate all property HTML fragments at once | |||
if #propertyHtml > 0 then | |||
semanticOutput = semanticOutput .. "\n" .. table.concat(propertyHtml, "\n") | |||
end | |||
end | |||
end | end | ||
| Line 362: | Line 472: | ||
} | } | ||
-- | -- Collect all properties in a single batch for complete deduplication | ||
local allProperties = {} | |||
-- Add basic properties directly from properties config | |||
if semanticConfig.properties then | |||
for property, param in pairs(semanticConfig.properties) do | |||
-- Copy to allProperties | |||
allProperties[property] = param | |||
end | |||
end | |||
-- Process additional properties | |||
local skipProperties = options.skipProperties or {} | |||
-- Create a collector function that gathers properties instead of generating output | |||
local propertyCollector = {} | |||
-- Process additional properties into the collector | |||
if semanticConfig.additionalProperties then | |||
for property, sourceFields in pairs(semanticConfig.additionalProperties) do | |||
-- Skip properties that are handled separately | |||
if not skipProperties[property] then | |||
-- Collect all values for this property | |||
local allValues = {} | |||
for _, fieldName in ipairs(sourceFields) do | |||
if args[fieldName] and args[fieldName] ~= "" then | |||
local value = args[fieldName] | |||
-- Transform if needed | |||
if semanticConfig.transforms and semanticConfig.transforms[property] then | |||
local transform = semanticConfig.transforms[property] | |||
if transform and type(transform) == "function" then | |||
local items = TemplateHelpers.splitMultiValueString(value) | |||
for _, item in ipairs(items) do | |||
local transformed = transform(item) | |||
if transformed and transformed ~= "" then | |||
table.insert(allValues, transformed) | |||
end | |||
end | |||
else | |||
-- If no transform, add as is | |||
table.insert(allValues, value) | |||
end | |||
else | |||
-- No transform, add as is | |||
table.insert(allValues, value) | |||
end | |||
end | |||
end | |||
-- Only add if we have values | |||
if #allValues > 0 then | |||
if allProperties[property] then | |||
-- Convert to array if needed | |||
if type(allProperties[property]) ~= "table" then | |||
allProperties[property] = {allProperties[property]} | |||
end | |||
-- Add all values | |||
for _, val in ipairs(allValues) do | |||
table.insert(allProperties[property], val) | |||
end | |||
else | |||
-- If only one value, add directly | |||
if #allValues == 1 then | |||
allProperties[property] = allValues[1] | |||
else | |||
allProperties[property] = allValues | |||
end | |||
end | |||
end | |||
end | |||
end | |||
end | |||
-- Deduplicate all property values before sending to SemanticAnnotations | |||
for prop, values in pairs(allProperties) do | |||
if type(values) == 'table' then | |||
allProperties[prop] = TemplateHelpers.removeDuplicates(values) | |||
end | |||
end | |||
-- Now process all collected properties in one batch using setSemanticProperties | |||
local semanticOutput = SemanticAnnotations.setSemanticProperties( | local semanticOutput = SemanticAnnotations.setSemanticProperties( | ||
args, | args, | ||
allProperties, | |||
semanticOptions | semanticOptions | ||
) | ) | ||
return semanticOutput | return semanticOutput | ||