Module:CountryData: Difference between revisions

// via Wikitext Extension for VSCode
 
// via Wikitext Extension for VSCode
 
(90 intermediate revisions by the same user not shown)
Line 1: Line 1:
-- Module:CountryData
--[[
-- Unified module for country data management, providing a single source of truth
* Name: CountryData
-- for country names, codes, regions, and variations.
* Author: Mark W. Datysgeld
--
* Description: Unified module for country data management with JSON loading, normalization, region mapping, and Semantic MediaWiki integration
-- Features:
* Notes: Loads from Data:CountryDataset.json; normalizes country names to canonical forms; maps countries to ICANN regions; provides extensible property access; formats country lists with region-specific emoji styling; processes countries for category assignment
--  * Loads country data from JSON stored in MediaWiki
]]
--  * Normalizes country names to canonical forms
--  * Maps countries to ICANN regions
--  * Provides extensible property access
--  * Integrates with Semantic MediaWiki
--
-- Dependencies:
--  * Module:DiacriticNormalization - For diacritic removal


local DiacriticNormalization = require('Module:DiacriticNormalization')
-- Dependencies
local DiacriticNormalization = require('Module:NormalizationDiacritic')
local NormalizationText = require('Module:NormalizationText')
local loader = require('Module:DatasetLoader')


-- Module-level cache tables for improved performance
-- Module-level cache tables for improved performance
Line 33: Line 29:
-- Helper Functions
-- Helper Functions
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
-- Helper function to remove diacritics and normalize text
local function normalizeText(text)
    if not text or text == "" then
        return text
    end
   
    -- Consolidate string operations to reduce iterations
    -- 1. Trim leading/trailing whitespace
    -- 2. Convert to lowercase
    -- 3. Normalize apostrophes
    -- 4. Remove periods
    -- 5. Collapse multiple spaces
    local normalized = text:gsub("^%s*(.-)%s*$", "%1"):lower():gsub("['`]", "'"):gsub("%.", ""):gsub("%s+", " ")
    return normalized
end


-- Create a cache key from a function name and arguments
-- Create a cache key from a function name and arguments
Line 60: Line 40:
end
end


-- Function to safely check if a table has a property
-- Reset the module-level caches (useful for testing)
local function hasProperty(tbl, property)
local function resetCaches()
     return tbl and type(tbl) == "table" and tbl[property] ~= nil
     dataCache = nil
    nameLookupCache = nil
    regionLookupCache = nil
    propertyCache = {}
    functionCache = {}
end
end


--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
-- Data Loading Layer
-- Data Loading and Cache Building Layer (Refactored)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------


-- Get name lookup cache - builds if not already cached
-- Data loading function using DatasetLoader
local function getNameLookup(data)
local function loadData()
    if dataCache then
        return dataCache
    end
    local raw = loader.get('CountryDataset')
    dataCache = {
        countries      = raw.countries      or {},
        icann_regions  = raw.icann_regions  or {},
        schema_version = raw.schema_version,
        last_updated  = raw.last_updated
    }
    return dataCache
end
 
-- Builds the primary name-to-code lookup cache.
-- This is the core of the refactoring, ensuring correct normalization order.
local function buildNameLookupCache(data)
     if nameLookupCache then
     if nameLookupCache then
         return nameLookupCache
         return nameLookupCache
     end
     end
   
 
    -- If no data provided, return empty lookup
     if not data or not data.countries then
     if not data or not data.countries then
         nameLookupCache = {}
         nameLookupCache = {}
         return nameLookupCache
         return nameLookupCache
     end
     end
   
 
     local lookup = {}
     local lookup = {}
   
    -- Pre-count how many mappings we'll create to optimize memory allocation
    local mappingCount = 0
     for code, country in pairs(data.countries) do
     for code, country in pairs(data.countries) do
         -- Count canonical name
         -- Ensure the country has a name to process
         mappingCount = mappingCount + 1
         local canonicalName = country.name or country.canonical_name
          
         if canonicalName then
        -- Count variations if they exist
            -- 1. Add the canonical name itself
        if country.variations and type(country.variations) == "table" then
            local normalizedName = NormalizationText.normalizeText(canonicalName)
             mappingCount = mappingCount + #country.variations
            lookup[normalizedName] = code
           
            -- 2. Add the diacritic-stripped version of the canonical name
            local strippedName = DiacriticNormalization.removeDiacritics(canonicalName)
             if strippedName ~= canonicalName then -- only add if it's different
                lookup[NormalizationText.normalizeText(strippedName)] = code
            end
         end
         end
    end
 
   
         -- 3. Process all variations
    -- Now build the lookup table with pre-counted size
    for code, country in pairs(data.countries) do
        -- Add name field as primary display name
        local displayName = country.name or country.canonical_name
        if displayName then
            lookup[normalizeText(displayName)] = code
        end
       
         -- Add canonical_name if different from name
        if country.canonical_name and country.canonical_name ~= country.name then
            lookup[normalizeText(country.canonical_name)] = code
        end
       
        -- Add variations
         if country.variations and type(country.variations) == "table" then
         if country.variations and type(country.variations) == "table" then
             for _, variation in ipairs(country.variations) do
             for _, variation in pairs(country.variations) do
                 lookup[normalizeText(variation)] = code
                -- Add the variation
                local normalizedVariation = NormalizationText.normalizeText(variation)
                 lookup[normalizedVariation] = code
               
                -- Add the diacritic-stripped version of the variation
                local strippedVariation = DiacriticNormalization.removeDiacritics(variation)
                if strippedVariation ~= variation then
                    lookup[NormalizationText.normalizeText(strippedVariation)] = code
                end
             end
             end
         end
         end
     end
     end
   
 
     nameLookupCache = lookup
     nameLookupCache = lookup
     return lookup
     return lookup
end
end


-- Get region lookup cache - builds if not already cached
-- Builds the region lookup cache.
local function getRegionLookup(data)
local function buildRegionLookupCache(data)
     if regionLookupCache then
     if regionLookupCache then
         return regionLookupCache
         return regionLookupCache
     end
     end
   
 
    -- If no data provided, return empty lookup
     if not data or not data.icann_regions then
     if not data or not data.icann_regions then
         regionLookupCache = {}
         regionLookupCache = {}
         return regionLookupCache
         return regionLookupCache
     end
     end
   
 
     local lookup = {}
     local lookup = {}
   
    -- Pre-count how many mappings we'll create to optimize memory allocation
    local mappingCount = 0
     for code, region in pairs(data.icann_regions) do
     for code, region in pairs(data.icann_regions) do
        -- Count canonical name
        mappingCount = mappingCount + 1
       
        -- Count variations if they exist
        if region.variations and type(region.variations) == "table" then
            mappingCount = mappingCount + #region.variations
        end
    end
   
    -- Now build the lookup table with pre-counted size
    for code, region in pairs(data.icann_regions) do
        -- Add canonical name
         if region.name then
         if region.name then
             lookup[normalizeText(region.name)] = code
             lookup[NormalizationText.normalizeText(region.name)] = code
         end
         end
       
        -- Add variations
         if region.variations and type(region.variations) == "table" then
         if region.variations and type(region.variations) == "table" then
             for _, variation in ipairs(region.variations) do
             for _, variation in pairs(region.variations) do
                 lookup[normalizeText(variation)] = code
                 lookup[NormalizationText.normalizeText(variation)] = code
             end
             end
         end
         end
     end
     end
   
 
     regionLookupCache = lookup
     regionLookupCache = lookup
     return lookup
     return lookup
end
-- Main data loading function with multiple fallback methods
local function loadData(frame)
    -- Use the module-level cache if we already loaded data once
    if dataCache then
        return dataCache
    end
    local success, data = pcall(function()
        -- Get the JSON content using frame:preprocess if available
        local jsonText
        if frame and type(frame) == "table" and frame.preprocess then
            local preprocessSuccess, preprocessResult = pcall(function()
                return frame:preprocess('{{MediaWiki:CountryData.json}}')
            end)
           
            if preprocessSuccess and preprocessResult then
                jsonText = preprocessResult
            end
        end
       
        -- If we couldn't get JSON from frame:preprocess, fall back to direct content loading
        if not jsonText then
            -- Try using mw.loadJsonData first (preferred method)
            if mw.loadJsonData then
                local loadJsonSuccess, jsonData = pcall(function()
                    return mw.loadJsonData('MediaWiki:CountryData.json')
                end)
               
                if loadJsonSuccess and jsonData and type(jsonData) == 'table' then
                    return jsonData
                end
            end
           
            -- Direct content loading approach as fallback
            local pageTitle = mw.title.new('MediaWiki:CountryData.json')
            if not pageTitle or not pageTitle.exists then
                return DEFAULT_DATA
            end
           
            -- Get raw content from the wiki page
            local contentSuccess, content = pcall(function()
                return pageTitle:getContent()
            end)
           
            if contentSuccess and content and content ~= "" then
                -- Remove any BOM or leading whitespace that might cause issues
                content = content:gsub("^%s+", "")
                if content:byte(1) == 239 and content:byte(2) == 187 and content:byte(3) == 191 then
                    content = content:sub(4)
                end
               
                jsonText = content
            else
                return DEFAULT_DATA
            end
        end
       
        -- Try different JSON decode approaches
        if jsonText and mw.text and mw.text.jsonDecode then
            -- First try WITHOUT PRESERVE_KEYS flag (standard approach)
            local jsonDecodeSuccess, jsonData = pcall(function()
                return mw.text.jsonDecode(jsonText)
            end)
           
            if jsonDecodeSuccess and jsonData then
                return jsonData
            end
           
            -- If that failed, try with JSON_TRY_FIXING flag
            jsonDecodeSuccess, jsonData = pcall(function()
                return mw.text.jsonDecode(jsonText, mw.text.JSON_TRY_FIXING)
            end)
           
            if jsonDecodeSuccess and jsonData then
                return jsonData
            end
        end
       
        -- As absolute last resort, use local default data
        return DEFAULT_DATA
    end)
    if not success or not data then
        data = DEFAULT_DATA
    end
    -- Ensure minimum data structure
    if not data.countries then
        data.countries = {}
    end
   
    if not data.icann_regions then
        data.icann_regions = {}
    end
    dataCache = data
    return data
end
-- Reset the module-level caches (useful for testing)
local function resetCaches()
    dataCache = nil
    nameLookupCache = nil
    regionLookupCache = nil
    propertyCache = {}
    functionCache = {}
end
end


--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
-- Core API Functions
-- Core API Functions (Public Interface)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------


Line 281: Line 151:
-- Load data and initialize caches
-- Load data and initialize caches
function CountryData.loadData(frame)
function CountryData.loadData(frame)
     return loadData(frame)
     return loadData()
end
end


Line 290: Line 160:
end
end


-- Get country data by ISO code
-- Get country data by ISO code (Refactored to use new cache logic)
function CountryData.getCountryByCode(code)
function CountryData.getCountryByCode(code)
     if not code or code == "" then
     if not code or code == "" then
Line 296: Line 166:
     end
     end
      
      
    -- Check function cache first
     local cacheKey = createCacheKey("getCountryByCode", code)
     local cacheKey = createCacheKey("getCountryByCode", code)
     if functionCache[cacheKey] ~= nil then
     if functionCache[cacheKey] ~= nil then
Line 303: Line 172:
      
      
     local data = loadData()
     local data = loadData()
    code = code:upper() -- Standardize code
      
      
    -- Standardize code to uppercase for consistency
     local result = data and data.countries and data.countries[code] or nil
    code = code:upper()
   
     local result = nil
    if data and data.countries and data.countries[code] then
        result = data.countries[code]
    end
      
      
    -- Cache the result (including nil)
     functionCache[cacheKey] = result
     functionCache[cacheKey] = result
     return result
     return result
end
end


-- Get country data by name (including variations)
-- Get country data by name (Refactored to use new cache logic)
function CountryData.getCountryByName(name)
function CountryData.getCountryByName(name)
     if not name or name == "" then
     if not name or name == "" then
         return nil
         return nil
     end
     end
   
 
    -- Check function cache first
     local cacheKey = createCacheKey("getCountryByName", name)
     local cacheKey = createCacheKey("getCountryByName", name)
     if functionCache[cacheKey] ~= nil then
     if functionCache[cacheKey] ~= nil then
         return functionCache[cacheKey]
         return functionCache[cacheKey]
     end
     end
   
 
     local data = loadData()
     local data = loadData()
     local nameLookup = getNameLookup(data)
     local nameLookup = buildNameLookupCache(data)
 
    -- Normalize the input name in one go (text normalization includes lowercasing)
    local normalized = NormalizationText.normalizeText(name)
      
      
     -- Normalize the input
     -- First, try a direct lookup with the normalized name
    local normalized = normalizeText(name)
   
    -- Look up the code
     local code = nameLookup[normalized]
     local code = nameLookup[normalized]
      
 
     -- If not found, try looking up the diacritic-stripped version
    if not code then
        local stripped = DiacriticNormalization.removeDiacritics(name)
        if stripped ~= name then
            code = nameLookup[NormalizationText.normalizeText(stripped)]
        end
    end
 
     local result = nil
     local result = nil
     if code and data.countries[code] then
     if code then
         result = data.countries[code]
         result = data.countries[code]
    else
        -- Try with diacritics removed
        local stripped = DiacriticNormalization.removeDiacritics(normalized)
        if stripped ~= normalized then
            code = nameLookup[stripped]
            if code and data.countries[code] then
                result = data.countries[code]
            end
        end
     end
     end
   
 
    -- Cache the result (including nil)
     functionCache[cacheKey] = result
     functionCache[cacheKey] = result
     return result
     return result
Line 363: Line 223:
     end
     end
      
      
    -- Check function cache first
     local cacheKey = createCacheKey("getCountryCodeByName", name)
     local cacheKey = createCacheKey("getCountryCodeByName", name)
     if functionCache[cacheKey] ~= nil then
     if functionCache[cacheKey] ~= nil then
Line 369: Line 228:
     end
     end
      
      
     local data = loadData()
     local country = CountryData.getCountryByName(name)
    local nameLookup = getNameLookup(data)
      
      
     -- Normalize the input
     -- The country object from the dataset doesn't inherently contain its own ISO code key.
    local normalized = normalizeText(name)
     -- We must iterate through the dataset to find the key corresponding to the found country object.
   
     if country then
     -- Look up the code
         local data = loadData()
    local code = nameLookup[normalized]
         for code, countryData in pairs(data.countries) do
   
            if countryData == country then
     if not code then
                functionCache[cacheKey] = code
        -- Try with diacritics removed
                return code
         local stripped = DiacriticNormalization.removeDiacritics(normalized)
            end
         if stripped ~= normalized then
            code = nameLookup[stripped]
         end
         end
     end
     end
   
 
     -- Cache the result (including nil)
     -- If no country was found, or no matching code was found, cache and return nil.
     functionCache[cacheKey] = code
     functionCache[cacheKey] = nil
     return code
     return nil
end
end


-- Normalize country name to canonical form with "(Unrecognized)" fallback
-- (The rest of the functions remain unchanged for now, but will be updated in subsequent phases)
 
function CountryData.normalizeCountryName(name)
function CountryData.normalizeCountryName(name)
     if not name or name == "" then
     if not name or name == "" then
         return name
         return "(Unrecognized)"
     end
     end
      
      
    -- Check function cache first
     local cacheKey = createCacheKey("normalizeCountryName", name)
     local cacheKey = createCacheKey("normalizeCountryName", name)
     if functionCache[cacheKey] ~= nil then
     if functionCache[cacheKey] ~= nil then
Line 406: Line 262:
      
      
     local result
     local result
     if country then
     if country and country.name then
        -- Return name as the primary display name
         result = country.name
         result = country.name or country.canonical_name
     else
     else
        -- If no match, return "(Unrecognized)"
         result = "(Unrecognized)"
         result = "(Unrecognized)"
     end
     end
      
      
    -- Cache the result
     functionCache[cacheKey] = result
     functionCache[cacheKey] = result
     return result
     return result
end
end


-- Get ICANN region for a country
function CountryData.getRegionByCountry(name)
function CountryData.getRegionByCountry(name)
     if not name or name == "" then
     if not name or name == "" then
         return nil
         return "(Unrecognized)"
     end
     end
      
      
    -- Check function cache first
     local cacheKey = createCacheKey("getRegionByCountry", name)
     local cacheKey = createCacheKey("getRegionByCountry", name)
     if functionCache[cacheKey] ~= nil then
     if functionCache[cacheKey] ~= nil then
Line 437: Line 288:
         result = country.icann_region
         result = country.icann_region
     else
     else
        -- Return "(Unrecognized)" for consistency with normalizeCountryName
         result = "(Unrecognized)"
         result = "(Unrecognized)"
     end
     end
      
      
    -- Cache the result
     functionCache[cacheKey] = result
     functionCache[cacheKey] = result
     return result
     return result
end
end


-- Get all countries in a specific region
function CountryData.getCountriesByRegion(region)
function CountryData.getCountriesByRegion(region)
     if not region or region == "" then
     if not region or region == "" then
Line 452: Line 300:
     end
     end
      
      
    -- Check function cache first
     local cacheKey = createCacheKey("getCountriesByRegion", region)
     local cacheKey = createCacheKey("getCountriesByRegion", region)
     if functionCache[cacheKey] ~= nil then
     if functionCache[cacheKey] ~= nil then
Line 459: Line 306:
      
      
     local data = loadData()
     local data = loadData()
     local regionLookup = getRegionLookup(data)
     local regionLookup = buildRegionLookupCache(data)
   
    -- Normalize the input
    local normalized = normalizeText(region)
      
      
     -- Look up the region code
     local normalized = NormalizationText.normalizeText(region)
     local regionCode = regionLookup[normalized]
     local regionCode = regionLookup[normalized]
      
      
     local result = {}
     local result = {}
     if regionCode and data.countries then
     if regionCode and data.countries then
        -- Pre-count number of countries in region for allocation
        local countryCount = 0
        for _, country in pairs(data.countries) do
            if country.icann_region == regionCode then
                countryCount = countryCount + 1
            end
        end
       
        -- Now populate the result with the pre-allocated size
        local index = 1
         for code, country in pairs(data.countries) do
         for code, country in pairs(data.countries) do
             if country.icann_region == regionCode then
             if country.icann_region == regionCode then
                 result[index] = {
                 table.insert(result, {
                     code = code,
                     code = code,
                     name = country.name or country.canonical_name
                     name = country.name
                 }
                 })
                index = index + 1
             end
             end
         end
         end
     end
     end
      
      
    -- Cache the result
     functionCache[cacheKey] = result
     functionCache[cacheKey] = result
     return result
     return result
end
end


-- Get list of all country codes
function CountryData.getAllCountryCodes()
function CountryData.getAllCountryCodes()
    -- Check function cache first
     local cacheKey = "getAllCountryCodes"
     local cacheKey = "getAllCountryCodes"
     if functionCache[cacheKey] ~= nil then
     if functionCache[cacheKey] ~= nil then
Line 505: Line 335:
     local data = loadData()
     local data = loadData()
     local result = {}
     local result = {}
   
     if data and data.countries then
     if data and data.countries then
        -- Pre-allocate the array to the number of countries
        local countryCount = 0
        for _ in pairs(data.countries) do
            countryCount = countryCount + 1
        end
       
        -- Now populate the array
        local index = 1
         for code in pairs(data.countries) do
         for code in pairs(data.countries) do
             result[index] = code
             table.insert(result, code)
            index = index + 1
         end
         end
     end
     end
      
      
    -- Cache the result
     functionCache[cacheKey] = result
     functionCache[cacheKey] = result
     return result
     return result
end
end


-- Get list of all canonical country names
function CountryData.getAllCountryNames()
function CountryData.getAllCountryNames()
    -- Check function cache first
     local cacheKey = "getAllCountryNames"
     local cacheKey = "getAllCountryNames"
     if functionCache[cacheKey] ~= nil then
     if functionCache[cacheKey] ~= nil then
Line 536: Line 353:
     local data = loadData()
     local data = loadData()
     local result = {}
     local result = {}
   
     if data and data.countries then
     if data and data.countries then
        -- Pre-allocate the array to the number of countries
        local countryCount = 0
        for _ in pairs(data.countries) do
            countryCount = countryCount + 1
        end
       
        -- Now populate the array
        local index = 1
         for _, country in pairs(data.countries) do
         for _, country in pairs(data.countries) do
             local name = country.name or country.canonical_name
             table.insert(result, country.name)
            result[index] = name
            index = index + 1
         end
         end
     end
     end
      
      
    -- Cache the result
     functionCache[cacheKey] = result
     functionCache[cacheKey] = result
     return result
     return result
end
end


-- Get any property for a country by code
function CountryData.getCountryProperty(code, property)
function CountryData.getCountryProperty(code, property)
     if not code or code == "" or not property or property == "" then
     if not code or code == "" or not property or property == "" then
Line 564: Line 368:
     end
     end
      
      
    -- Check property cache first
     local cacheKey = createCacheKey("getCountryProperty", code, property)
     local cacheKey = createCacheKey("getCountryProperty", code, property)
     if propertyCache[cacheKey] ~= nil then
     if propertyCache[cacheKey] ~= nil then
Line 571: Line 374:
      
      
     local country = CountryData.getCountryByCode(code)
     local country = CountryData.getCountryByCode(code)
    local result = country and country[property] or nil
      
      
    local result = nil
    if country and country[property] ~= nil then
        result = country[property]
    end
   
    -- Cache the result (including nil)
     propertyCache[cacheKey] = result
     propertyCache[cacheKey] = result
     return result
     return result
end
end


-- Get any property for a country by name
function CountryData.getCountryPropertyByName(name, property)
function CountryData.getCountryPropertyByName(name, property)
     if not name or name == "" or not property or property == "" then
     if not name or name == "" or not property or property == "" then
Line 588: Line 385:
     end
     end
      
      
    -- Check property cache first
     local cacheKey = createCacheKey("getCountryPropertyByName", name, property)
     local cacheKey = createCacheKey("getCountryPropertyByName", name, property)
     if propertyCache[cacheKey] ~= nil then
     if propertyCache[cacheKey] ~= nil then
Line 595: Line 391:
      
      
     local code = CountryData.getCountryCodeByName(name)
     local code = CountryData.getCountryCodeByName(name)
   
     local result = nil
     local result = nil
     if code then
     if code then
Line 601: Line 396:
     end
     end
      
      
    -- Cache the result (including nil)
     propertyCache[cacheKey] = result
     propertyCache[cacheKey] = result
     return result
     return result
end
end


-- List all available properties for a country
function CountryData.getAvailableProperties(code)
function CountryData.getAvailableProperties(code)
     if not code or code == "" then
     if not code or code == "" then
Line 612: Line 405:
     end
     end
      
      
    -- Check function cache first
     local cacheKey = createCacheKey("getAvailableProperties", code)
     local cacheKey = createCacheKey("getAvailableProperties", code)
     if functionCache[cacheKey] ~= nil then
     if functionCache[cacheKey] ~= nil then
Line 618: Line 410:
     end
     end
      
      
     local data = loadData()
     local country = CountryData.getCountryByCode(code)
    if not data or not data.countries or not data.countries[code] then
        return {}
    end
   
     local properties = {}
     local properties = {}
      
     if country then
    -- Pre-allocate the table based on the number of properties
        for property in pairs(country) do
    local propertyCount = 0
            table.insert(properties, property)
    for _ in pairs(data.countries[code]) do
         end
        propertyCount = propertyCount + 1
    end
   
    -- Fill the table with property names
    local index = 1
    for property in pairs(data.countries[code]) do
        properties[index] = property
         index = index + 1
     end
     end
      
      
    -- Cache the result
     functionCache[cacheKey] = properties
     functionCache[cacheKey] = properties
     return properties
     return properties
end
end


-- Get all unique property names across all countries
function CountryData.getAllPropertyNames()
function CountryData.getAllPropertyNames()
    -- Check function cache first
     local cacheKey = "getAllPropertyNames"
     local cacheKey = "getAllPropertyNames"
     if functionCache[cacheKey] ~= nil then
     if functionCache[cacheKey] ~= nil then
Line 652: Line 429:
      
      
     local data = loadData()
     local data = loadData()
    if not data or not data.countries then
        return {}
    end
   
     local properties = {}
     local properties = {}
     local seen = {}
     local seen = {}
      
     if data and data.countries then
    -- First pass: count unique properties for pre-allocation
        for _, country in pairs(data.countries) do
    local propertyCount = 0
            for property in pairs(country) do
    for _, country in pairs(data.countries) do
                if not seen[property] then
        for property in pairs(country) do
                    seen[property] = true
            if not seen[property] then
                    table.insert(properties, property)
                seen[property] = true
                 end
                 propertyCount = propertyCount + 1
             end
             end
         end
         end
     end
     end
      
      
    -- Reset seen table
    seen = {}
   
    -- Second pass: fill the pre-allocated table
    local index = 1
    for _, country in pairs(data.countries) do
        for property in pairs(country) do
            if not seen[property] then
                seen[property] = true
                properties[index] = property
                index = index + 1
            end
        end
    end
   
    -- Cache the result
     functionCache[cacheKey] = properties
     functionCache[cacheKey] = properties
     return properties
     return properties
end
end


-- Get semantic property for a country
function CountryData.getSemanticCountryRegionProperties(countryValue)
function CountryData.getCountrySemanticProperty(name, property)
     local properties = {}
     local code = CountryData.getCountryCodeByName(name)
     if not countryValue or countryValue == "" then
     if not code then
         return properties
         return "(Unrecognized)"
     end
     end
      
      
     local value = CountryData.getCountryProperty(code, property)
     local ConfigRepository = require('Module:ConfigRepository')
     if value == nil then
     local countryPropertyName = ConfigRepository.getSemanticPropertyName("Has country")
        return "(Unrecognized)"
     local regionPropertyName = ConfigRepository.getSemanticPropertyName("Has ICANN region")
     end
      
      
    return value
     if not countryPropertyName or not regionPropertyName then
end
         return properties
 
-- Add country semantic properties to a page
function CountryData.addCountrySemanticProperties(countryValue, semanticOutput)
     if not countryValue or countryValue == "" then
         return semanticOutput
     end
     end
      
      
    -- For non-SMW case, collect property HTML fragments in a table for efficient concatenation
    local propertyHtml = {}
   
    -- Split multi-value country strings
     local countries = {}
     local countries = {}
     for country in string.gmatch(countryValue, "[^;]+") do
     for country in string.gmatch(countryValue, "[^;]+") do
Line 723: Line 468:
     end
     end
      
      
    -- Process each country
     for _, countryName in ipairs(countries) do
     for _, country in ipairs(countries) do
         local normalizedCountry = CountryData.normalizeCountryName(countryName)
         local normalizedCountry = CountryData.normalizeCountryName(country)
       
        -- Only process recognized countries
         if normalizedCountry ~= "(Unrecognized)" then
         if normalizedCountry ~= "(Unrecognized)" then
             -- Add as semantic property
             -- Initialize property tables if they don't exist
             if mw.smw then
             properties[countryPropertyName] = properties[countryPropertyName] or {}
                mw.smw.set({["Has country"] = normalizedCountry})
             table.insert(properties[countryPropertyName], normalizedCountry)
             else
                -- Collect HTML fragments instead of concatenating strings
                table.insert(propertyHtml, '<div style="display:none;">')
                table.insert(propertyHtml, '  {{#set: Has country=' .. normalizedCountry .. ' }}')
                table.insert(propertyHtml, '</div>')
            end
              
              
            -- Add region as semantic property
             local region = CountryData.getRegionByCountry(normalizedCountry)
             local region = CountryData.getRegionByCountry(country)
             if region and region ~= "(Unrecognized)" then
             if region and region ~= "(Unrecognized)" then
                 if mw.smw then
                 properties[regionPropertyName] = properties[regionPropertyName] or {}
                    mw.smw.set({["Has region"] = region})
                 table.insert(properties[regionPropertyName], region)
                 else
                    -- Collect HTML fragments instead of concatenating strings
                    table.insert(propertyHtml, '<div style="display:none;">')
                    table.insert(propertyHtml, '  {{#set: Has region=' .. region .. ' }}')
                    table.insert(propertyHtml, '</div>')
                end
             end
             end
         end
         end
        -- No goto needed, just continue to next iteration
     end
     end
      
      
    -- For non-SMW case, concatenate all property HTML fragments at once
     return properties
    if not mw.smw and #propertyHtml > 0 then
        semanticOutput = semanticOutput .. "\n" .. table.concat(propertyHtml, "\n")
    end
   
     return semanticOutput
end
end


-- Export country data as JSON string (for JavaScript usage)
function CountryData.exportAsJson()
function CountryData.exportAsJson()
     local data = loadData()
     local data = loadData()
   
    -- Ensure we have valid data
     if not data or not data.countries then
     if not data or not data.countries then
         return '{}'
         return '{}'
     end
     end
      
      
    -- Use MediaWiki's JSON encoder
     if mw.text and mw.text.jsonEncode then
     if mw.text and mw.text.jsonEncode then
         local success, result = pcall(function()
         local success, result = pcall(function()
             return mw.text.jsonEncode(data)
             return mw.text.jsonEncode(data)
         end)
         end)
       
         if success and result then
         if success and result then
             return result
             return result
         end
         end
     end
     end
    return '{}'
end
local function getRegionClass(region)
    if not region or region == "(Unrecognized)" then
        return "region-default"
    end
    if region == "NA" or region == "LAC" then
        return "region-americas"
    elseif region == "AP" then
        return "region-asia-pacific"
    else
        return "region-europe-africa"
    end
end
function CountryData.formatCountryList(value)
    if not value or value == "" then return "" end
    local ListGeneration = require('Module:ListGeneration')
    local itemsToProcess = {}
    -- First, check if the entire string is a single, valid country.
    -- This correctly handles names like "Trinidad and Tobago".
    local singleCountryName = CountryData.normalizeCountryName(value)
    if singleCountryName ~= "(Unrecognized)" then
        -- If it's a valid country, treat it as a single item.
        table.insert(itemsToProcess, value)
    else
        -- If not a single country, assume it's a list and split ONLY by semicolon.
        -- This is safer than letting ListGeneration guess the delimiter.
        for item in string.gmatch(value, "[^;]+") do
            local trimmed = item:match("^%s*(.-)%s*$")
            if trimmed and trimmed ~= "" then
                table.insert(itemsToProcess, trimmed)
            end
        end
    end
    -- Define the item hook for country-specific formatting
    local function countryItemHook(countryName)
        local normalized = CountryData.normalizeCountryName(countryName)
        if normalized ~= "(Unrecognized)" then
            local countryRegion = CountryData.getRegionByCountry(normalized)
            -- Return a table with content and class for the li element
            return {
                content = normalized,
                class = getRegionClass(countryRegion)
            }
        end
        return nil -- Exclude unrecognized countries from the list
    end
    -- Set the options for the list generation
    local options = {
        mode = 'bullet',
        listClass = 'template-list-country',
        itemHook = countryItemHook
    }
    -- Pass the pre-processed table of items to the list generator.
    return ListGeneration.createList(itemsToProcess, options)
end
function CountryData.formatCountries(value)
    return CountryData.formatCountryList(value)
end
function CountryData.getCountriesForCategories(value)
    if not value or value == "" then return {} end
      
      
     -- Fallback to simple string if JSON encoding fails
    local countries = {}
     return '{}'
    for countryName in string.gmatch(value, "[^;]+") do
        local trimmed = countryName:match("^%s*(.-)%s*$")
        if trimmed and trimmed ~= "" then
            local normalized = CountryData.normalizeCountryName(trimmed)
            if normalized ~= "(Unrecognized)" then
                table.insert(countries, normalized)
            end
        end
    end
   
    return countries
end
 
function CountryData.getFlagFileName(countryNameOrCode)
    if not countryNameOrCode or countryNameOrCode == '' then return nil end
   
    local inputName = countryNameOrCode:gsub('_', ' ')
    local isoCode
   
     -- First, try to get the ISO code by treating inputName as a country name.
    isoCode = CountryData.getCountryCodeByName(inputName)
   
    -- If no code was found by name, and the inputName itself is 2 characters long,
    -- it might be an ISO code already. Let's validate it.
    if not isoCode and #inputName == 2 then
        if CountryData.getCountryByCode(inputName) then
            isoCode = inputName:upper()
        end
    end
   
    if not isoCode or #isoCode ~= 2 then return nil end
 
     return 'Flag-' .. string.lower(isoCode) .. '.svg'  
end
end


-- Return the module for use
return CountryData
return CountryData