Jump to content

Module:ConfigRepository

Revision as of 00:44, 24 April 2025 by MarkWD (talk | contribs)

Documentation for this module may be created at Module:ConfigRepository/doc

-- Module:ConfigRepository
-- Single source of truth for all template configurations across the ICANNWiki ecosystem
--
-- This module centralizes configuration data to ensure consistency, eliminate duplication, and provide a standardized structure for all templates. It works in conjunction with the Template Blueprint Framework to power the rendering and semantic processing
--
-- This module consists of three main parts:
-- 1. Global constants and properties shared across templates
-- 2. Template-specific configurations with standardized structure
-- 3. Configuration access functions for retrieving and standardizing configurations
--
-- Each template configuration provides a standardized structure with the following components:
--
-- meta:        Template metadata including description and rendering options
-- categories:  Base and conditional categories for the template
-- patterns:    Regular expression patterns used by the template for validation and parsing
-- fields:      Maps template field names in article source to display labels
-- mappings:    Canonical mappings for normalizing user input values
-- constants:   Template-specific constant values
-- semantics:   Semantic MediaWiki integration with the following sub-components:
--   properties:           Maps semantic properties to template fields (1:1 mapping)
--   additionalProperties: Maps semantic properties to multiple possible template fields
--   transforms:           Functions to transform field values before storing as semantic properties
--   skipProperties:       Properties to exclude from automatic processing

local p = {}

------------------------------------------------------------------------------
-- Global Constants and Properties
------------------------------------------------------------------------------

-- Global field labels used across multiple templates
-- These provide consistent labeling for common fields across all templates
-- and ensure that changes to label text only need to be made in one place
p.fieldLabels = {
    -- Geographic information
    region = "Region",
    country = "Country",
    
    -- Temporal information
    date = "Date",
    
    -- Contact information
    website = "Website"
    -- Add other cross-template field labels as needed
}

-- Global date formatting configuration
-- Controls how dates are displayed throughout the wiki's templates
p.dateFormatting = {
    useShortMonthNames = true -- When true, use "Apr. 21, 2025" format; when false, use "April 21, 2025"
}

-- Global semantic property names used across multiple templates
-- These properties connect data across different template types
-- and enable cross-template semantic queries
p.semanticProperties = {
    -- Geographic properties
    region = "Has ICANN region",
    country = "Has country",
    
    -- Person-related properties
    language = "Knows language",
    person = "Has person"
    -- Add other cross-template properties as needed
}

------------------------------------------------------------------------------
-- Global Patterns
------------------------------------------------------------------------------

-- Common text processing patterns used across multiple templates
-- These patterns are used for validation, parsing, and normalization
p.patterns = {
    -- Delimiters for multi-value fields
    itemDelimiter = ";%s*",  -- Matches semicolon followed by optional whitespace; used for splitting multi-value fields
    countryDelimiter = "([^;]+)",  -- Matches any sequence of characters not containing semicolons; used for country parsing
    
    -- URL validation
    websitePattern = "^https?://[^%s]+",  -- Matches URLs starting with http:// or https:// followed by non-whitespace chars
    
    -- Domain and TLD processing
    tldExtension = "%.([^%.]+)$",  -- Extracts the TLD part after the last dot (e.g., "com" from "example.com")
    
    -- Event naming patterns
    eventSeriesNumber = "^([%w%s]+)%s+(%d+)$",     -- e.g., "ICANN 76"
    eventSeriesYear = "^([%w%s]+)%s+(%d%d%d%d)$"   -- e.g., "IGF 2023"
}

------------------------------------------------------------------------------
-- Template Definitions (Lazy Loading)
------------------------------------------------------------------------------

-- Cache for loaded templates
local loadedTemplates = {}

-- Template definitions as functions for lazy loading
local templateDefinitions = {
    -- Event template definition
    Event = function()
        return {
            meta = {
                description = "Event template"
            },
            
            categories = { -- Default categories
                base = {"Events"}
            },

            patterns = {
                -- Patterns for event navigation detection
                seriesNumber = p.patterns.eventSeriesNumber, -- e.g., "ICANN 76"
                seriesYear = p.patterns.eventSeriesYear -- e.g., "IGF 2023"
            },
            
            fields = {
                {key="logo", label="Logo"},
                {key="process", label="Process"},
                {key="start", label="Start Date"},
                {key="end", label="End Date"},
                {key="region", label=p.fieldLabels.region},
                {keys={"country", "territory"}, label=p.fieldLabels.country},
                {key="city", label="City"},
                {key="venue", label="Venue"},
                {key="organizer", label="Organizer"},
                {keys={"website", "url"}, label=p.fieldLabels.website},
                {keys={"subject", "category"}, label="Subject"} -- REVIEW UNUSED
            },
            
            semantics = {
                properties = {
                    ["Has start date"] = "start",
                    ["Has end date"] = "end",
                    ["Part of process"] = "process",
                    -- "Has country" and "Has ICANN region" handled by CountryData.addCountrySemanticProperties
                    ["Has city"] = "city",
                    ["Has venue"] = "venue",
                    ["Has event organizer"] = "organizer"
                    -- "Has event subject" -- UNUSED
                },
                
                additionalProperties = {
                    ["Has country"] = {"country", "territory"}
                    -- "Has event subject" UNUSED
                },
                
                transforms = {
                    ["Has start date"] = function(value)
                        return tostring(require('Module:NormalizationDate').formatDate(value))
                    end,
                    ["Has end date"] = function(value)
                        return tostring(require('Module:NormalizationDate').formatDate(value))
                    end
                },
                
                skipProperties = { -- Handled separately
                    ["Has country"] = true,  
                    ["Has ICANN region"] = true,
                    ["Has event subject"] = true  -- REVIEW: UNUSED
                }
            }
        }
    end,
    
    -- Person template definition
    Person = function()
        return {
            meta = {
                description = "Person template"
            },
            
            categories = { -- Default categories
                base = {"Person"}
            },
            
            mappings = {
                -- Community mappings for normalizing user input to canonical forms
                -- Each entry maps various synonyms to a canonical form and associated category
                community = {
                    -- ICANN-related communities
                    {canonical = "ICANN Community",
                     synonyms = {"icann", "community"},
                     category = "ICANN Community"},
                    {canonical = "ICANN Staff",
                     synonyms = {"staff", "icann org"},
                     category = "ICANN Staff"},
                    
                    -- Technical communities
                    {canonical = "Root Server Operator Community",
                     synonyms = {"root server operator", "rso"},
                     category = "Root Server Operator Community"},
                    {canonical = "RIR Community",
                     synonyms = {"rir"},
                     category = "RIR Community"},
                    {canonical = "Universal Acceptance Community",
                     synonyms = {"universal acceptance", "ua", "ua member", "idn", "idn community"},
                     category = "Universal Acceptance Community"},
                    
                    -- Internet Governance organizations
                    {canonical = "ISOC Community",
                     synonyms = {"isoc", "internet society", "internet society community", "isoc member"},
                     category = "ISOC Community"},
                    {canonical = "IETF Community",
                     synonyms = {"ietf", "ietf member"},
                     category = "IETF Community"},
                    {canonical = "W3C Community",
                     synonyms = {"w3c", "w3c member"},
                     category = "W3C Community"},
                    {canonical = "IGF Community",
                     synonyms = {"igf", "nri", "youth igf"},
                     category = "IGF Community"},
                    
                    -- Government-related
                    {canonical = "Governmental",
                     synonyms = {"government"},
                     category = "Governmental"},
                    {canonical = "Intergovernmental",
                     synonyms = {"igo"},
                     category = "Intergovernmental"}
                }
            },
            
            fields = {
                {key="community", label="Community"},
                {key="affiliation", label="ICANN group"},
                {key="organization", label="Organization"},
                {key="region", label=p.fieldLabels.region},
                {keys={"country", "territory"}, label=p.fieldLabels.country},
                {key="languages", label="Languages"},
                {key="website", label=p.fieldLabels.website},
                {key="soi", label="SOI"},
                {key="userbox", label="Achievements"}
            },
            
            patterns = {
                itemDelimiter = p.patterns.itemDelimiter, -- For splitting multi-value fields
                websitePattern = p.patterns.websitePattern -- For URL validation
            },
            
            semantics = {
                properties = {
                    ["Has governance community"] = "community",
                    ["Has ICANN affiliation"] = "affiliation",
                    ["Has organization"] = "organization",
                    -- "Has ICANN region", "Has country", "Knows language", are all handled separately
                },
                
                additionalProperties = {
                    -- Handle multiple countries and regions in the special case handlers
                    ["Has country"] = {"country"},
                    ["Has ICANN region"] = {"region"}
                },
                
                transforms = {
                    ["Has governance community"] = function(value)
                        local CanonicalForms = require('Module:CanonicalForms')
                        -- Use the loaded template from cache to avoid circular reference
                        local personTemplate = loadedTemplates["Person"]
                        if not personTemplate then
                            -- Fallback to direct access if not loaded yet
                            return select(1, CanonicalForms.normalize(value, p.templates.Person.mappings.community)) or value
                        end
                        return select(1, CanonicalForms.normalize(value, personTemplate.mappings.community)) or value
                    end,
                    ["Knows language"] = function(value)
                        -- Return raw value, the semantic property should store the raw data
                        return value
                    end
                },
                
                skipProperties = { -- Handled separately
                    ["Has country"] = true,
                    ["Has ICANN region"] = true
                }
            }
        }
    end
}

------------------------------------------------------------------------------
-- TEMPLATE-SPECIFIC CONFIGURATIONS
------------------------------------------------------------------------------

-- Create templates table with metatable for lazy loading
p.templates = {}

-- Set up metatable for lazy loading
setmetatable(p.templates, {
    __index = function(t, key)
        -- Check if we have a cached version
        if loadedTemplates[key] then
            return loadedTemplates[key]
        end
        
        -- Check if we have a template definition for this key
        if templateDefinitions[key] then
            -- Load the template definition
            loadedTemplates[key] = templateDefinitions[key]()
            return loadedTemplates[key]
        end
        
        -- For backward compatibility, keep the legacy templates
        if key == "TLD" then
            -- TLD template definition (legacy)
            return {
                meta = {
                    description = "TLD/ccTLD article template"
                },
                
                categories = {
                    base = {}, -- TLDs use type-based categories
                    conditional = {
                        rvc = "TLDs with RVCs",
                        idn = "IDN",
                        idn_cctld = "IDN ccTLD"
                    },
                },
                
                mappings = {
                    type = {
                        {canonical = "gTLD",
                         synonyms = {"generic", "tld", "generic top level domain", "generic top-level domain", "generic tld"},
                         category = "gTLD"},
                        {canonical = "ccTLD",
                         synonyms = {"country", "cc", "country code top level domain", "country code top-level domain", "country tld"},
                         category = "ccTLD"}
                    },
                    subtype = {
                        {canonical="geoTLD",
                         synonyms={"geo tld","geo","geographic","geographical","geographic top level domain","geographic top-level domain","geographic tld"},
                         css="tld-template-subtype-geotld",
                         category="geoTLD"},
                        {canonical="dotBrand",
                         synonyms={"brand","brandtld","brand tld","brand top level domain","brand top-level domain"},
                         css="tld-template-subtype-brandtld",
                         category="dotBrand"},
                        {canonical="Sponsored TLD",
                         synonyms={"sponsored","sponsored top level domain","sponsored top-level domain"},
                         css="tld-template-subtype-sponsored",
                         category="Sponsored TLD"},
                        {canonical="Legacy TLD",
                         synonyms={"legacy","legacy top level domain","legacy top-level domain"},
                         css="tld-template-subtype-legacytld",
                         category="Legacy TLD"},
                        {canonical="2012 gTLD Round",
                         synonyms={"gtld round 2012","2012 ngtld round","2012 ngtld","ngtld 2012","ngtld","2012"},
                         css="tld-template-subtype-ngtld-round-2012",
                         category="2012 gTLD Round"}
                    }
                },
                
                -- List of legacy TLDs that are not integrated into the nTLDStats database
                constants = {
                    classicTLDs = {
                        -- Generic TLDs
                        com=true, net=true, org=true, info=true,
                        -- Restricted TLDs
                        edu=true, gov=true, mil=true, int=true,
                        -- Sponsored TLDs
                        aero=true, asia=true, cat=true, coop=true, 
                        jobs=true, mobi=true, museum=true, post=true, 
                        tel=true, travel=true, xxx=true
                    }
                },

                patterns = {
                    tldExtension = p.patterns.tldExtension, -- For extracting TLD from domain
                    countryDelimiter = p.patterns.countryDelimiter -- For country parsing
                },
                
                fields = {
                    {key="type",label="Type"},
                    {key="subtype",label="Subtype"},
                    {key="status",label="Status"},
                    {keys={"country", "territory"},label=p.fieldLabels.country},
                    {key="introduced",label="Introduced"},
                    {keys={"date", "implemented"},label="Implemented"},
                    {keys={"script", "language"},label="Script"},
                    {key="translation",label="English version"},
                    {key="ascii",label="Punycode"},
                    {keys={"registry", "registryprovider"},label="Registry"},
                    {key="website",label=p.fieldLabels.website},
                    {keys={"RVC", "PIC"},label="PIC/RVC"}
                },
                
                semantics = {
                    properties = {
                        ["Has TLD type"] = "type",
                        ["Has TLD subtype"] = "subtype",
                        ["Has TLD status"] = "status",
                        -- "Has country" is handled separately through addMultiCountrySemanticProperties
                        ["Date introduced"] = "introduced",
                        ["Date implemented"] = "date", -- Will also handle implemented
                        ["Uses writing script"] = "script", -- Will also handle language
                        ["Has registry operator"] = "registry", -- Will also handle registryprovider
                        ["Has PIC or RVC"] = "RVC" -- Will also handle PIC
                        -- "Is IDN" is handled separately as a boolean property
                    },
                    
                    additionalProperties = {
                        ["Has country"] = {"country", "territory"},
                        ["Date implemented"] = {"date", "implemented"},
                        ["Uses writing script"] = {"script", "language"},
                        ["Has registry operator"] = {"registry", "registryprovider"},
                        ["Has PIC or RVC"] = {"RVC", "PIC"}
                    },
                    
                    transforms = {
                        ["Has TLD type"] = function(value)
                            local CanonicalForms = require('Module:CanonicalForms')
                            -- Use the loaded template from cache to avoid circular reference
                            local tldTemplate = loadedTemplates["TLD"]
                            if not tldTemplate then
                                -- Fallback to direct access if not loaded yet
                                return select(1, CanonicalForms.normalize(value, p.templates.TLD.mappings.type)) or value
                            end
                            return select(1, CanonicalForms.normalize(value, tldTemplate.mappings.type)) or value
                        end,
                        ["Has TLD subtype"] = function(value)
                            local CanonicalForms = require('Module:CanonicalForms')
                            -- Use the loaded template from cache to avoid circular reference
                            local tldTemplate = loadedTemplates["TLD"]
                            if not tldTemplate then
                                -- Fallback to direct access if not loaded yet
                                return select(1, CanonicalForms.normalize(value, p.templates.TLD.mappings.subtype)) or value
                            end
                            return select(1, CanonicalForms.normalize(value, tldTemplate.mappings.subtype)) or value
                        end,
                        ["Date introduced"] = function(value)
                            return tostring(require('Module:NormalizationDate').formatDate(value))
                        end,
                        ["Date implemented"] = function(value)
                            return tostring(require('Module:NormalizationDate').formatDate(value))
                        end,
                        ["Has PIC or RVC"] = function(value)
                            -- If value exists and is not empty, return "true"
                            if value and value ~= "" then
                                return "true"
                            end
                            return nil -- Return nil for empty values
                        end
                    },
                    
                    skipProperties = { -- Handled separately
                        ["Has country"] = true
                    }
                }
            }
        elseif key == "LibraryInterview" then
            -- LibraryInterview template definition (legacy)
            return {
                meta = {
                    description = "Module for rendering the Library Interview template with semantics"
                },
                
                categories = { -- Default categories
                    base = {
                        "Internet & Digital Governance Library",
                        "ICANNWiki Interviews"
                    }
                },
                
                constants = {
                    title = "Internet & Digital Governance Library",
                    tableClass = "library-box"
                },
                
                fields = {
                    {key="Title", label="Title"},
                    {key="Format", label="Format"},
                    {key="Date", label=p.fieldLabels.date},
                    {key="Interviewer", label="Interviewer", autoWikiLink=true},
                    {key="Interviewee", label="Interviewee", autoWikiLink=true},
                    {key="ID", label="Permanent ID"}
                },
                
                semantics = {
                    -- Direct property mappings (1:1 relationship between property and field)
                    properties = {
                        ["Has interview format"] = "Format",
                        ["Has date"] = "Date",
                        ["Has interviewer"] = "Interviewer", 
                        ["Has interviewee"] = "Interviewee",
                        ["Permanent ID"] = "ID"
                    },
                    
                    -- Properties that map to multiple possible fields
                    additionalProperties = {
                        ["Has person"] = {"Interviewer", "Interviewee"} -- Links interviews to person pages
                    },
                    
                    -- Transform functions to standardize property values
                    transforms = {
                        ["Has date"] = function(value) 
                            return tostring(require('Module:NormalizationDate').formatDate(value)) 
                        end,
                        ["Permanent ID"] = function(value)
                            -- Simply return the value as is, ensuring it's a string
                            return tostring(value or "")
                        end
                    },
                    
                    skipProperties = {
                        -- No properties to skip
                    }
                }
            }
        end
        
        -- Return empty table if template not found
        return {}
    end
})

------------------------------------------------------------------------------
-- CONFIGURATION ACCESS FUNCTIONS
------------------------------------------------------------------------------

-- Get configuration for a specific template type
function p.getConfig(templateType)
    -- First check if we have a lazy-loaded template
    if loadedTemplates[templateType] then
        -- Return from cache if already loaded
        return loadedTemplates[templateType]
    end
    
    -- Check if we have a template definition for this type
    if templateDefinitions[templateType] then
        -- Load the template definition
        loadedTemplates[templateType] = templateDefinitions[templateType]()
        return loadedTemplates[templateType]
    end
    
    -- Fall back to the traditional approach for other templates
    return p.templates[templateType] or {}
end

-- Creates a standardized configuration structure for template modules
function p.createStandardConfig(config)
    config = config or {}
    
    -- Initialize with defaults
    local standardConfig = {
        meta = config.meta or {
            description = "Template module configuration"
        },
        mappings = config.mappings or {},
        fields = config.fields or {},
        semantics = config.semantics or {
            properties = {},
            transforms = {},
            additionalProperties = {}
        },
        constants = config.constants or {},
        patterns = config.patterns or {},
        categories = config.categories or {} -- Add categories field to preserve base categories
    }
    
    return standardConfig
end

-- Get a standard config object for use with existing template modules
function p.getStandardConfig(templateType, customOverrides)
    -- Get the base configuration
    local baseConfig = p.getConfig(templateType)
    
    -- Create a standard config
    return p.createStandardConfig(baseConfig, customOverrides)
end

return p