Jump to content

Module:CountryData: Difference between revisions

// via Wikitext Extension for VSCode
// via Wikitext Extension for VSCode
Line 45: Line 45:
end
end


-- Function to safely check if a table has a property
-- Reset the module-level caches (useful for testing)
local function hasProperty(tbl, property)
local function resetCaches()
     return tbl and type(tbl) == "table" and tbl[property] ~= nil
     dataCache = nil
    nameLookupCache = nil
    regionLookupCache = nil
    propertyCache = {}
    functionCache = {}
end
end


--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
-- Data Loading Layer
-- Data Loading and Cache Building Layer (Refactored)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------


-- Get name lookup cache: builds if not already cached
-- Data loading function using DatasetLoader
local function getNameLookup(data)
local function loadData()
    if dataCache then
        return dataCache
    end
    local raw = loader.get('CountryDataset')
    dataCache = {
        countries      = raw.countries      or {},
        icann_regions  = raw.icann_regions  or {},
        schema_version = raw.schema_version,
        last_updated  = raw.last_updated
    }
    return dataCache
end
 
-- Builds the primary name-to-code lookup cache.
-- This is the core of the refactoring, ensuring correct normalization order.
local function buildNameLookupCache(data)
     if nameLookupCache then
     if nameLookupCache then
         return nameLookupCache
         return nameLookupCache
     end
     end
   
 
    -- If no data provided, return empty lookup
     if not data or not data.countries then
     if not data or not data.countries then
         nameLookupCache = {}
         nameLookupCache = {}
         return nameLookupCache
         return nameLookupCache
     end
     end
   
 
     local lookup = {}
     local lookup = {}
   
    -- Optimization: Pre-count number of mappings
    local mappingCount = 0
     for code, country in pairs(data.countries) do
     for code, country in pairs(data.countries) do
         -- Count canonical name
         -- Ensure the country has a name to process
         mappingCount = mappingCount + 1
         local canonicalName = country.name or country.canonical_name
          
         if canonicalName then
        -- Count variations if they exist
            -- 1. Add the canonical name itself
        if country.variations and type(country.variations) == "table" then
            local normalizedName = NormalizationText.normalizeText(canonicalName)
             mappingCount = mappingCount + #country.variations
            lookup[normalizedName] = code
           
            -- 2. Add the diacritic-stripped version of the canonical name
            local strippedName = DiacriticNormalization.removeDiacritics(canonicalName)
             if strippedName ~= canonicalName then -- only add if it's different
                lookup[NormalizationText.normalizeText(strippedName)] = code
            end
         end
         end
    end
 
   
         -- 3. Process all variations
    -- Build the lookup table with pre-counted size
    for code, country in pairs(data.countries) do
        -- Add name field as primary display name
        local displayName = country.name or country.canonical_name
        if displayName then
            lookup[NormalizationText.normalizeText(displayName)] = code
        end
       
         -- Add canonical_name if different from name
        if country.canonical_name and country.canonical_name ~= country.name then
            lookup[NormalizationText.normalizeText(country.canonical_name)] = code
        end
       
        -- Add variations
         if country.variations and type(country.variations) == "table" then
         if country.variations and type(country.variations) == "table" then
            -- Use pairs instead of ipairs to handle both array and object structures
             for _, variation in pairs(country.variations) do
             for _, variation in pairs(country.variations) do
                 lookup[NormalizationText.normalizeText(variation)] = code
                 -- Add the variation
                local normalizedVariation = NormalizationText.normalizeText(variation)
                lookup[normalizedVariation] = code
               
                -- Add the diacritic-stripped version of the variation
                local strippedVariation = DiacriticNormalization.removeDiacritics(variation)
                if strippedVariation ~= variation then
                    lookup[NormalizationText.normalizeText(strippedVariation)] = code
                end
             end
             end
         end
         end
     end
     end
   
 
     nameLookupCache = lookup
     nameLookupCache = lookup
     return lookup
     return lookup
end
end


-- Get region lookup cache: builds if not already cached
-- Builds the region lookup cache.
local function getRegionLookup(data)
local function buildRegionLookupCache(data)
     if regionLookupCache then
     if regionLookupCache then
         return regionLookupCache
         return regionLookupCache
     end
     end
   
 
    -- If no data provided, return empty lookup
     if not data or not data.icann_regions then
     if not data or not data.icann_regions then
         regionLookupCache = {}
         regionLookupCache = {}
         return regionLookupCache
         return regionLookupCache
     end
     end
   
 
     local lookup = {}
     local lookup = {}
   
    -- Optimization: Pre-count number of mappings
    local mappingCount = 0
    for code, region in pairs(data.icann_regions) do
        -- Count canonical name
        mappingCount = mappingCount + 1
       
        -- Count variations if they exist
        if region.variations and type(region.variations) == "table" then
            mappingCount = mappingCount + #region.variations
        end
    end
   
    -- Build the lookup table with pre-counted size
     for code, region in pairs(data.icann_regions) do
     for code, region in pairs(data.icann_regions) do
        -- Add canonical name
         if region.name then
         if region.name then
             lookup[NormalizationText.normalizeText(region.name)] = code
             lookup[NormalizationText.normalizeText(region.name)] = code
         end
         end
       
        -- Add variations
         if region.variations and type(region.variations) == "table" then
         if region.variations and type(region.variations) == "table" then
            -- Use pairs instead of ipairs to handle both array and object structures
             for _, variation in pairs(region.variations) do
             for _, variation in pairs(region.variations) do
                 lookup[NormalizationText.normalizeText(variation)] = code
                 lookup[NormalizationText.normalizeText(variation)] = code
Line 147: Line 143:
         end
         end
     end
     end
   
 
     regionLookupCache = lookup
     regionLookupCache = lookup
     return lookup
     return lookup
end
-- Reset the module-level caches (useful for testing)
local function resetCaches()
    dataCache = nil
    nameLookupCache = nil
    regionLookupCache = nil
    propertyCache = {}
    functionCache = {}
end
-- Data loading function using DatasetLoader
local function loadData(frame)
    if dataCache then
        return dataCache
    end
    local raw = loader.get('CountryDataset')
    dataCache = {
        countries      = raw.countries      or {},
        icann_regions  = raw.icann_regions  or {},
        schema_version = raw.schema_version,
        last_updated  = raw.last_updated
    }
    return dataCache
end
end


--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
-- Core API Functions
-- Core API Functions (Public Interface)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------


Line 184: Line 156:
-- Load data and initialize caches
-- Load data and initialize caches
function CountryData.loadData(frame)
function CountryData.loadData(frame)
     return loadData(frame)
     return loadData()
end
end


Line 193: Line 165:
end
end


-- Get country data by ISO code
-- Get country data by ISO code (Refactored to use new cache logic)
function CountryData.getCountryByCode(code)
function CountryData.getCountryByCode(code)
     if not code or code == "" then
     if not code or code == "" then
Line 199: Line 171:
     end
     end
      
      
    -- Check function cache first
     local cacheKey = createCacheKey("getCountryByCode", code)
     local cacheKey = createCacheKey("getCountryByCode", code)
     if functionCache[cacheKey] ~= nil then
     if functionCache[cacheKey] ~= nil then
Line 206: Line 177:
      
      
     local data = loadData()
     local data = loadData()
    code = code:upper() -- Standardize code
      
      
    -- Standardize code to uppercase for consistency
     local result = data and data.countries and data.countries[code] or nil
    code = code:upper()
   
     local result = nil
    if data and data.countries and data.countries[code] then
        result = data.countries[code]
    end
      
      
    -- Cache the result (including nil)
     functionCache[cacheKey] = result
     functionCache[cacheKey] = result
     return result
     return result
end
end


-- Get country data by name (including variations)
-- Get country data by name (Refactored to use new cache logic)
function CountryData.getCountryByName(name)
function CountryData.getCountryByName(name)
     if not name or name == "" then
     if not name or name == "" then
         return nil
         return nil
     end
     end
   
 
    -- Check function cache first
     local cacheKey = createCacheKey("getCountryByName", name)
     local cacheKey = createCacheKey("getCountryByName", name)
     if functionCache[cacheKey] ~= nil then
     if functionCache[cacheKey] ~= nil then
         return functionCache[cacheKey]
         return functionCache[cacheKey]
     end
     end
   
 
     local data = loadData()
     local data = loadData()
     local nameLookup = getNameLookup(data)
     local nameLookup = buildNameLookupCache(data)
   
 
     -- Normalize the input
     -- Normalize the input name in one go (text normalization includes lowercasing)
     local normalized = NormalizationText.normalizeText(name)
     local normalized = NormalizationText.normalizeText(name)
      
      
     -- Look up the code
     -- First, try a direct lookup with the normalized name
     local code = nameLookup[normalized]
     local code = nameLookup[normalized]
      
 
     -- If not found, try looking up the diacritic-stripped version
    if not code then
        local stripped = DiacriticNormalization.removeDiacritics(name)
        -- Check if stripping made a difference to avoid redundant normalization
        if stripped ~= name then
            local normalizedStripped = NormalizationText.normalizeText(stripped)
            code = nameLookup[normalizedStripped]
        end
    end
 
     local result = nil
     local result = nil
     if code and data.countries[code] then
     if code then
         result = data.countries[code]
         result = data.countries[code]
    else
        -- Try with diacritics removed
        local stripped = DiacriticNormalization.removeDiacritics(normalized)
        if stripped ~= normalized then
            code = nameLookup[stripped]
            if code and data.countries[code] then
                result = data.countries[code]
            end
        end
     end
     end
   
 
    -- Cache the result (including nil)
     functionCache[cacheKey] = result
     functionCache[cacheKey] = result
     return result
     return result
Line 266: Line 230:
     end
     end
      
      
    -- Check function cache first
     local cacheKey = createCacheKey("getCountryCodeByName", name)
     local cacheKey = createCacheKey("getCountryCodeByName", name)
     if functionCache[cacheKey] ~= nil then
     if functionCache[cacheKey] ~= nil then
Line 272: Line 235:
     end
     end
      
      
     local data = loadData()
     local country = CountryData.getCountryByName(name)
     local nameLookup = getNameLookup(data)
     local code = nil
      
      
     -- Normalize the input
     -- To get the code, we need to find which key in the main data table matches the found country object.
    local normalized = NormalizationText.normalizeText(name)
     -- This is less efficient but necessary if the country object itself doesn't store its code.
   
     if country then
     -- Look up the code
         local data = loadData()
    local code = nameLookup[normalized]
         for c, countryData in pairs(data.countries) do
   
            if countryData == country then
     if not code then
                code = c
        -- Try with diacritics removed
                break
         local stripped = DiacriticNormalization.removeDiacritics(normalized)
            end
         if stripped ~= normalized then
            code = nameLookup[stripped]
         end
         end
     end
     end
   
 
    -- Cache the result (including nil)
     functionCache[cacheKey] = code
     functionCache[cacheKey] = code
     return code
     return code
end
end
-- (The rest of the functions remain unchanged for now, but will be updated in subsequent phases)


function CountryData.normalizeCountryName(name)
function CountryData.normalizeCountryName(name)
Line 299: Line 261:
     end
     end
      
      
    -- Check function cache first
     local cacheKey = createCacheKey("normalizeCountryName", name)
     local cacheKey = createCacheKey("normalizeCountryName", name)
     if functionCache[cacheKey] ~= nil then
     if functionCache[cacheKey] ~= nil then
Line 309: Line 270:
     local result
     local result
     if country then
     if country then
        -- Return name as the primary display name
         result = country.name or country.canonical_name
         result = country.name or country.canonical_name
        -- make it category‑friendly:
         result = result
         result = result
             :gsub(",%s*", "")                 -- drop any commas
             :gsub(",%s*", "")
             :gsub("%sand the%s+", " and ")   -- turn “ and the ” into “ and ”
             :gsub("%sand the%s+", " and ")
     else
     else
        -- If no match, return "(Unrecognized)"
         result = "(Unrecognized)"
         result = "(Unrecognized)"
     end
     end
      
      
    -- Cache the result
     functionCache[cacheKey] = result
     functionCache[cacheKey] = result
     return result
     return result
end
end


-- Get ICANN region for a country
function CountryData.getRegionByCountry(name)
function CountryData.getRegionByCountry(name)
     if not name or name == "" then
     if not name or name == "" then
Line 331: Line 287:
     end
     end
      
      
    -- Check function cache first
     local cacheKey = createCacheKey("getRegionByCountry", name)
     local cacheKey = createCacheKey("getRegionByCountry", name)
     if functionCache[cacheKey] ~= nil then
     if functionCache[cacheKey] ~= nil then
Line 343: Line 298:
         result = country.icann_region
         result = country.icann_region
     else
     else
        -- Return "(Unrecognized)" for consistency with normalizeCountryName
         result = "(Unrecognized)"
         result = "(Unrecognized)"
     end
     end
      
      
    -- Cache the result
     functionCache[cacheKey] = result
     functionCache[cacheKey] = result
     return result
     return result
end
end


-- Get all countries in a specific region
function CountryData.getCountriesByRegion(region)
function CountryData.getCountriesByRegion(region)
     if not region or region == "" then
     if not region or region == "" then
Line 358: Line 310:
     end
     end
      
      
    -- Check function cache first
     local cacheKey = createCacheKey("getCountriesByRegion", region)
     local cacheKey = createCacheKey("getCountriesByRegion", region)
     if functionCache[cacheKey] ~= nil then
     if functionCache[cacheKey] ~= nil then
Line 365: Line 316:
      
      
     local data = loadData()
     local data = loadData()
     local regionLookup = getRegionLookup(data)
     local regionLookup = buildRegionLookupCache(data)
      
      
    -- Normalize the input
     local normalized = NormalizationText.normalizeText(region)
     local normalized = NormalizationText.normalizeText(region)
   
    -- Look up the region code
     local regionCode = regionLookup[normalized]
     local regionCode = regionLookup[normalized]
      
      
     local result = {}
     local result = {}
     if regionCode and data.countries then
     if regionCode and data.countries then
        -- Pre-count number of countries in region for allocation
        local countryCount = 0
        for _, country in pairs(data.countries) do
            if country.icann_region == regionCode then
                countryCount = countryCount + 1
            end
        end
       
        -- Populate the result with the pre-allocated size
        local index = 1
         for code, country in pairs(data.countries) do
         for code, country in pairs(data.countries) do
             if country.icann_region == regionCode then
             if country.icann_region == regionCode then
                 result[index] = {
                 table.insert(result, {
                     code = code,
                     code = code,
                     name = country.name or country.canonical_name
                     name = country.name or country.canonical_name
                 }
                 })
                index = index + 1
             end
             end
         end
         end
     end
     end
      
      
    -- Cache the result
     functionCache[cacheKey] = result
     functionCache[cacheKey] = result
     return result
     return result
end
end


-- Get list of all country codes
function CountryData.getAllCountryCodes()
function CountryData.getAllCountryCodes()
    -- Check function cache first
     local cacheKey = "getAllCountryCodes"
     local cacheKey = "getAllCountryCodes"
     if functionCache[cacheKey] ~= nil then
     if functionCache[cacheKey] ~= nil then
Line 411: Line 345:
     local data = loadData()
     local data = loadData()
     local result = {}
     local result = {}
   
     if data and data.countries then
     if data and data.countries then
        -- Pre-allocate the array to the number of countries
        local countryCount = 0
        for _ in pairs(data.countries) do
            countryCount = countryCount + 1
        end
       
        -- Now populate the array
        local index = 1
         for code in pairs(data.countries) do
         for code in pairs(data.countries) do
             result[index] = code
             table.insert(result, code)
            index = index + 1
         end
         end
     end
     end
      
      
    -- Cache the result
     functionCache[cacheKey] = result
     functionCache[cacheKey] = result
     return result
     return result
end
end


-- Get list of all canonical country names
function CountryData.getAllCountryNames()
function CountryData.getAllCountryNames()
    -- Check function cache first
     local cacheKey = "getAllCountryNames"
     local cacheKey = "getAllCountryNames"
     if functionCache[cacheKey] ~= nil then
     if functionCache[cacheKey] ~= nil then
Line 442: Line 363:
     local data = loadData()
     local data = loadData()
     local result = {}
     local result = {}
   
     if data and data.countries then
     if data and data.countries then
        -- Pre-allocate the array to the number of countries
        local countryCount = 0
        for _ in pairs(data.countries) do
            countryCount = countryCount + 1
        end
       
        -- Populate the array
        local index = 1
         for _, country in pairs(data.countries) do
         for _, country in pairs(data.countries) do
             local name = country.name or country.canonical_name
             table.insert(result, country.name or country.canonical_name)
            result[index] = name
            index = index + 1
         end
         end
     end
     end
      
      
    -- Cache the result
     functionCache[cacheKey] = result
     functionCache[cacheKey] = result
     return result
     return result
end
end


-- Get any property for a country by code
function CountryData.getCountryProperty(code, property)
function CountryData.getCountryProperty(code, property)
     if not code or code == "" or not property or property == "" then
     if not code or code == "" or not property or property == "" then
Line 470: Line 378:
     end
     end
      
      
    -- Check property cache first
     local cacheKey = createCacheKey("getCountryProperty", code, property)
     local cacheKey = createCacheKey("getCountryProperty", code, property)
     if propertyCache[cacheKey] ~= nil then
     if propertyCache[cacheKey] ~= nil then
Line 477: Line 384:
      
      
     local country = CountryData.getCountryByCode(code)
     local country = CountryData.getCountryByCode(code)
    local result = country and country[property] or nil
      
      
    local result = nil
    if country and country[property] ~= nil then
        result = country[property]
    end
   
    -- Cache the result (including nil)
     propertyCache[cacheKey] = result
     propertyCache[cacheKey] = result
     return result
     return result
end
end


-- Get any property for a country by name
function CountryData.getCountryPropertyByName(name, property)
function CountryData.getCountryPropertyByName(name, property)
     if not name or name == "" or not property or property == "" then
     if not name or name == "" or not property or property == "" then
Line 494: Line 395:
     end
     end
      
      
    -- Check property cache first
     local cacheKey = createCacheKey("getCountryPropertyByName", name, property)
     local cacheKey = createCacheKey("getCountryPropertyByName", name, property)
     if propertyCache[cacheKey] ~= nil then
     if propertyCache[cacheKey] ~= nil then
Line 501: Line 401:
      
      
     local code = CountryData.getCountryCodeByName(name)
     local code = CountryData.getCountryCodeByName(name)
   
     local result = nil
     local result = nil
     if code then
     if code then
Line 507: Line 406:
     end
     end
      
      
    -- Cache the result (including nil)
     propertyCache[cacheKey] = result
     propertyCache[cacheKey] = result
     return result
     return result
end
end


-- List all available properties for a country
function CountryData.getAvailableProperties(code)
function CountryData.getAvailableProperties(code)
     if not code or code == "" then
     if not code or code == "" then
Line 518: Line 415:
     end
     end
      
      
    -- Check function cache first
     local cacheKey = createCacheKey("getAvailableProperties", code)
     local cacheKey = createCacheKey("getAvailableProperties", code)
     if functionCache[cacheKey] ~= nil then
     if functionCache[cacheKey] ~= nil then
Line 524: Line 420:
     end
     end
      
      
     local data = loadData()
     local country = CountryData.getCountryByCode(code)
    if not data or not data.countries or not data.countries[code] then
        return {}
    end
   
     local properties = {}
     local properties = {}
      
     if country then
    -- Pre-allocate the table based on the number of properties
        for property in pairs(country) do
    local propertyCount = 0
            table.insert(properties, property)
    for _ in pairs(data.countries[code]) do
         end
        propertyCount = propertyCount + 1
    end
   
    -- Fill the table with property names
    local index = 1
    for property in pairs(data.countries[code]) do
        properties[index] = property
         index = index + 1
     end
     end
      
      
    -- Cache the result
     functionCache[cacheKey] = properties
     functionCache[cacheKey] = properties
     return properties
     return properties
end
end


-- Get all unique property names across all countries
function CountryData.getAllPropertyNames()
function CountryData.getAllPropertyNames()
    -- Check function cache first
     local cacheKey = "getAllPropertyNames"
     local cacheKey = "getAllPropertyNames"
     if functionCache[cacheKey] ~= nil then
     if functionCache[cacheKey] ~= nil then
Line 558: Line 439:
      
      
     local data = loadData()
     local data = loadData()
    if not data or not data.countries then
        return {}
    end
   
     local properties = {}
     local properties = {}
     local seen = {}
     local seen = {}
      
     if data and data.countries then
    -- First pass: count unique properties for pre-allocation
        for _, country in pairs(data.countries) do
    local propertyCount = 0
            for property in pairs(country) do
    for _, country in pairs(data.countries) do
                if not seen[property] then
        for property in pairs(country) do
                    seen[property] = true
            if not seen[property] then
                    table.insert(properties, property)
                seen[property] = true
                 end
                 propertyCount = propertyCount + 1
             end
             end
         end
         end
     end
     end
      
      
    -- Reset seen table
    seen = {}
   
    -- Second pass: fill the pre-allocated table
    local index = 1
    for _, country in pairs(data.countries) do
        for property in pairs(country) do
            if not seen[property] then
                seen[property] = true
                properties[index] = property
                index = index + 1
            end
        end
    end
   
    -- Cache the result
     functionCache[cacheKey] = properties
     functionCache[cacheKey] = properties
     return properties
     return properties
end
end


-- Get semantic property name from ConfigRepository
function CountryData.getSemanticPropertyName(propertyKey)
function CountryData.getSemanticPropertyName(propertyKey)
     local ConfigRepository = require('Module:ConfigRepository')
     local ConfigRepository = require('Module:ConfigRepository')
   
    -- Look through all template configurations
     for templateName, templateConfig in pairs(ConfigRepository.templates) do
     for templateName, templateConfig in pairs(ConfigRepository.templates) do
        -- Check if this template has semantics configuration
         if templateConfig.semantics and templateConfig.semantics.additionalProperties then
         if templateConfig.semantics and templateConfig.semantics.additionalProperties then
            -- Check if the property key exists in additionalProperties
             if templateConfig.semantics.additionalProperties[propertyKey] then
             if templateConfig.semantics.additionalProperties[propertyKey] then
                 return propertyKey
                 return propertyKey
Line 611: Line 465:
         end
         end
     end
     end
   
    -- If not found, return nil
     return nil
     return nil
end
end


-- Get semantic properties for countries and regions
-- Returns a table of properties that can be integrated with the batch processing system
function CountryData.getSemanticCountryRegionProperties(countryValue)
function CountryData.getSemanticCountryRegionProperties(countryValue)
    -- Initialize return table
     local properties = {}
     local properties = {}
   
     if not countryValue or countryValue == "" then
     if not countryValue or countryValue == "" then
         return properties
         return properties
     end
     end
      
      
    -- Get property names from ConfigRepository
     local countryPropertyName = CountryData.getSemanticPropertyName("Has country")
     local countryPropertyName = CountryData.getSemanticPropertyName("Has country")
     local regionPropertyName = CountryData.getSemanticPropertyName("Has ICANN region")
     local regionPropertyName = CountryData.getSemanticPropertyName("Has ICANN region")
      
      
    -- If property names are not found in ConfigRepository, we can't proceed
     if not countryPropertyName or not regionPropertyName then
     if not countryPropertyName or not regionPropertyName then
         return properties
         return properties
     end
     end
      
      
    -- Split multi-value country strings
     local countries = {}
     local countries = {}
     for country in string.gmatch(countryValue, "[^;]+") do
     for country in string.gmatch(countryValue, "[^;]+") do
Line 644: Line 489:
     end
     end
      
      
    -- Process each country
     for _, country in ipairs(countries) do
     for _, country in ipairs(countries) do
         local normalizedCountry = CountryData.normalizeCountryName(country)
         local normalizedCountry = CountryData.normalizeCountryName(country)
       
        -- Only process recognized countries
         if normalizedCountry ~= "(Unrecognized)" then
         if normalizedCountry ~= "(Unrecognized)" then
            -- Add country to properties table
             properties[countryPropertyName] = properties[countryPropertyName] or {}
             properties[countryPropertyName] = properties[countryPropertyName] or {}
             table.insert(properties[countryPropertyName], normalizedCountry)
             table.insert(properties[countryPropertyName], normalizedCountry)
              
              
            -- Add region to properties table
             local region = CountryData.getRegionByCountry(country)
             local region = CountryData.getRegionByCountry(country)
             if region and region ~= "(Unrecognized)" then
             if region and region ~= "(Unrecognized)" then
Line 666: Line 506:
end
end


-- Export country data as JSON string (for JavaScript usage)
function CountryData.exportAsJson()
function CountryData.exportAsJson()
     local data = loadData()
     local data = loadData()
   
    -- Ensure we have valid data
     if not data or not data.countries then
     if not data or not data.countries then
         return '{}'
         return '{}'
     end
     end
      
      
    -- Use MediaWiki's JSON encoder
     if mw.text and mw.text.jsonEncode then
     if mw.text and mw.text.jsonEncode then
         local success, result = pcall(function()
         local success, result = pcall(function()
             return mw.text.jsonEncode(data)
             return mw.text.jsonEncode(data)
         end)
         end)
       
         if success and result then
         if success and result then
             return result
             return result
         end
         end
     end
     end
   
    -- Fallback to simple string if JSON encoding fails
     return '{}'
     return '{}'
end
end


--------------------------------------------------------------------------------
-- Country Display Functions with contextual emoji
--------------------------------------------------------------------------------
-- Get region-specific CSS class for country display
local function getRegionClass(region)
local function getRegionClass(region)
     if not region or region == "(Unrecognized)" then
     if not region or region == "(Unrecognized)" then
         return "region-default"
         return "region-default"
     end
     end
   
     if region == "NA" or region == "LAC" then
     if region == "NA" or region == "LAC" then
         return "region-americas"
         return "region-americas"
Line 708: Line 536:
end
end


-- Format a list of countries from a semicolon-separated string
-- Returns either plain text (single country) or bullet points (multiple countries)
-- Each country gets its own region-specific class for styling
function CountryData.formatCountryList(value)
function CountryData.formatCountryList(value)
     if not value or value == "" then return "" end
     if not value or value == "" then return "" end
      
      
    -- Split and normalize countries
     local countries = {}
     local countries = {}
     for country in string.gmatch(value, "[^;]+") do
     for country in string.gmatch(value, "[^;]+") do
Line 724: Line 548:
      
      
     local normalizedCountries = {}
     local normalizedCountries = {}
    local validCountriesCount = 0
   
     for _, country in ipairs(countries) do
     for _, country in ipairs(countries) do
         local normalized = CountryData.normalizeCountryName(country)
         local normalized = CountryData.normalizeCountryName(country)
        -- Only include recognized countries
         if normalized ~= "(Unrecognized)" then
         if normalized ~= "(Unrecognized)" then
             validCountriesCount = validCountriesCount + 1
             table.insert(normalizedCountries, normalized)
            normalizedCountries[validCountriesCount] = normalized
         end
         end
     end
     end
      
      
    -- Generate output based on number of countries
     if #normalizedCountries > 0 then
     if validCountriesCount > 1 then
         local listItems = {}
         local listItems = {}
       
         for _, country in ipairs(normalizedCountries) do
         for _, country in ipairs(normalizedCountries) do
            -- Get the region for this specific country
             local countryRegion = CountryData.getRegionByCountry(country)
             local countryRegion = CountryData.getRegionByCountry(country)
             local regionClass = getRegionClass(countryRegion)
             local regionClass = getRegionClass(countryRegion)
           
            -- Create a list item with region-specific class
             table.insert(listItems, string.format("<li class=\"%s\">%s</li>", regionClass, country))
             table.insert(listItems, string.format("<li class=\"%s\">%s</li>", regionClass, country))
         end
         end
       
         return string.format("<ul class=\"template-list template-list-country\">%s</ul>", table.concat(listItems, ""))
         return string.format("<ul class=\"template-list template-list-country\">%s</ul>",  
                            table.concat(listItems, ""))
    elseif validCountriesCount == 1 then
        -- For a single country, create a similar list with just one item
        local countryRegion = CountryData.getRegionByCountry(normalizedCountries[1])
        local regionClass = getRegionClass(countryRegion)
       
        -- Single item list with the same styling
        return string.format("<ul class=\"template-list template-list-country\"><li class=\"%s\">%s</li></ul>",
                            regionClass, normalizedCountries[1])
     end
     end
      
      
Line 763: Line 568:
end
end


-- Alias for backward compatibility
function CountryData.formatCountries(value)
function CountryData.formatCountries(value)
     return CountryData.formatCountryList(value)
     return CountryData.formatCountryList(value)
end
end


-- Get a list of normalized countries for category assignment
function CountryData.getCountriesForCategories(value)
function CountryData.getCountriesForCategories(value)
     if not value or value == "" then return {} end
     if not value or value == "" then return {} end
Line 781: Line 584:
      
      
     local normalizedCountries = {}
     local normalizedCountries = {}
    local validCount = 0
   
     for _, country in ipairs(countries) do
     for _, country in ipairs(countries) do
         local normalized = CountryData.normalizeCountryName(country)
         local normalized = CountryData.normalizeCountryName(country)
        -- Only include recognized countries
         if normalized ~= "(Unrecognized)" then
         if normalized ~= "(Unrecognized)" then
             validCount = validCount + 1
             table.insert(normalizedCountries, normalized)
            normalizedCountries[validCount] = normalized
         end
         end
     end
     end
Line 795: Line 594:
end
end


-- Return the module for use
-- Adds flag filename lookup
function CountryData.getFlagFileName(countryNameOrCode)
function CountryData.getFlagFileName(countryNameOrCode)
     if not countryNameOrCode or countryNameOrCode == '' then return nil end
     if not countryNameOrCode or countryNameOrCode == '' then return nil end
      
      
     local inputName = countryNameOrCode:gsub('_', ' ') -- Clean the input
     local inputName = countryNameOrCode:gsub('_', ' ')
     local isoCode
     local isoCode = CountryData.getCountryCodeByName(inputName)  
   
    -- First, try to get the ISO code by treating inputName as a country name.
    -- CountryData.getCountryCodeByName handles internal normalization.
    isoCode = CountryData.getCountryCodeByName(inputName)  
      
      
    -- If no code was found by name, and the inputName itself is 2 characters long,
    -- it might be an ISO code already. Let's validate it.
     if not isoCode and #inputName == 2 then
     if not isoCode and #inputName == 2 then
        -- Check if this 2-char string is a valid country code by attempting to fetch country data.
        -- We use getCountryByCode because it directly uses the code.
         if CountryData.getCountryByCode(inputName) then  
         if CountryData.getCountryByCode(inputName) then  
             isoCode = inputName -- It's a valid code
             isoCode = inputName:upper()
         end
         end
     end
     end
      
      
    -- If we still don't have a valid ISO code, we can't proceed.
     if not isoCode or #isoCode ~= 2 then return nil end
     if not isoCode or isoCode == '' then return nil end
   
    -- Ensure the code is indeed 2 letters long (as a final sanity check).
    if #isoCode ~= 2 then return nil end


    -- Construct the filename in the format "Flag-xx.svg" (e.g., "Flag-ad.svg")
     return 'Flag-' .. string.lower(isoCode) .. '.svg'  
     return 'Flag-' .. string.lower(isoCode) .. '.svg'  
end
end


return CountryData
return CountryData

Revision as of 20:02, 3 July 2025

Documentation for this module may be created at Module:CountryData/doc

-- Module:CountryData
-- Unified module for country data management.
--
-- Features:
--   * Loads country data from JSON stored in Data:CountryDataset.json
--   * Normalizes country names to canonical forms
--   * Maps countries to ICANN regions
--   * Provides extensible property access
--   * Integrates with Semantic MediaWiki
--   * Formats country lists with region-specific emoji styling
--   * Processes countries for category assignment

-- Dependencies
local DiacriticNormalization = require('Module:NormalizationDiacritic')
local NormalizationText = require('Module:NormalizationText')
local loader = require('Module:DatasetLoader')

-- Module-level cache tables for improved performance
local dataCache = nil
local nameLookupCache = nil
local regionLookupCache = nil
local propertyCache = {}
local functionCache = {}

-- Default data structure to use if JSON loading fails
local DEFAULT_DATA = {
    schema_version = 1,
    last_updated = os.date('!%Y-%m-%dT%H:%M:%SZ'),
    countries = {},
    icann_regions = {}
}

--------------------------------------------------------------------------------
-- Helper Functions
--------------------------------------------------------------------------------

-- Create a cache key from a function name and arguments
local function createCacheKey(funcName, ...)
    local args = {...}
    local keyParts = {funcName}
    for i = 1, #args do
        table.insert(keyParts, tostring(args[i]) or "nil")
    end
    return table.concat(keyParts, ":")
end

-- Reset the module-level caches (useful for testing)
local function resetCaches()
    dataCache = nil
    nameLookupCache = nil
    regionLookupCache = nil
    propertyCache = {}
    functionCache = {}
end

--------------------------------------------------------------------------------
-- Data Loading and Cache Building Layer (Refactored)
--------------------------------------------------------------------------------

-- Data loading function using DatasetLoader
local function loadData()
    if dataCache then
        return dataCache
    end
    local raw = loader.get('CountryDataset')
    dataCache = {
        countries      = raw.countries      or {},
        icann_regions  = raw.icann_regions  or {},
        schema_version = raw.schema_version,
        last_updated   = raw.last_updated
    }
    return dataCache
end

-- Builds the primary name-to-code lookup cache.
-- This is the core of the refactoring, ensuring correct normalization order.
local function buildNameLookupCache(data)
    if nameLookupCache then
        return nameLookupCache
    end

    if not data or not data.countries then
        nameLookupCache = {}
        return nameLookupCache
    end

    local lookup = {}
    for code, country in pairs(data.countries) do
        -- Ensure the country has a name to process
        local canonicalName = country.name or country.canonical_name
        if canonicalName then
            -- 1. Add the canonical name itself
            local normalizedName = NormalizationText.normalizeText(canonicalName)
            lookup[normalizedName] = code
            
            -- 2. Add the diacritic-stripped version of the canonical name
            local strippedName = DiacriticNormalization.removeDiacritics(canonicalName)
            if strippedName ~= canonicalName then -- only add if it's different
                 lookup[NormalizationText.normalizeText(strippedName)] = code
            end
        end

        -- 3. Process all variations
        if country.variations and type(country.variations) == "table" then
            for _, variation in pairs(country.variations) do
                -- Add the variation
                local normalizedVariation = NormalizationText.normalizeText(variation)
                lookup[normalizedVariation] = code
                
                -- Add the diacritic-stripped version of the variation
                local strippedVariation = DiacriticNormalization.removeDiacritics(variation)
                if strippedVariation ~= variation then
                    lookup[NormalizationText.normalizeText(strippedVariation)] = code
                end
            end
        end
    end

    nameLookupCache = lookup
    return lookup
end

-- Builds the region lookup cache.
local function buildRegionLookupCache(data)
    if regionLookupCache then
        return regionLookupCache
    end

    if not data or not data.icann_regions then
        regionLookupCache = {}
        return regionLookupCache
    end

    local lookup = {}
    for code, region in pairs(data.icann_regions) do
        if region.name then
            lookup[NormalizationText.normalizeText(region.name)] = code
        end
        if region.variations and type(region.variations) == "table" then
            for _, variation in pairs(region.variations) do
                lookup[NormalizationText.normalizeText(variation)] = code
            end
        end
    end

    regionLookupCache = lookup
    return lookup
end

--------------------------------------------------------------------------------
-- Core API Functions (Public Interface)
--------------------------------------------------------------------------------

local CountryData = {}

-- Load data and initialize caches
function CountryData.loadData(frame)
    return loadData()
end

-- Reset all caches (primarily for testing)
function CountryData.resetCaches()
    resetCaches()
    return true
end

-- Get country data by ISO code (Refactored to use new cache logic)
function CountryData.getCountryByCode(code)
    if not code or code == "" then
        return nil
    end
    
    local cacheKey = createCacheKey("getCountryByCode", code)
    if functionCache[cacheKey] ~= nil then
        return functionCache[cacheKey]
    end
    
    local data = loadData()
    code = code:upper() -- Standardize code
    
    local result = data and data.countries and data.countries[code] or nil
    
    functionCache[cacheKey] = result
    return result
end

-- Get country data by name (Refactored to use new cache logic)
function CountryData.getCountryByName(name)
    if not name or name == "" then
        return nil
    end

    local cacheKey = createCacheKey("getCountryByName", name)
    if functionCache[cacheKey] ~= nil then
        return functionCache[cacheKey]
    end

    local data = loadData()
    local nameLookup = buildNameLookupCache(data)

    -- Normalize the input name in one go (text normalization includes lowercasing)
    local normalized = NormalizationText.normalizeText(name)
    
    -- First, try a direct lookup with the normalized name
    local code = nameLookup[normalized]

    -- If not found, try looking up the diacritic-stripped version
    if not code then
        local stripped = DiacriticNormalization.removeDiacritics(name)
        -- Check if stripping made a difference to avoid redundant normalization
        if stripped ~= name then
            local normalizedStripped = NormalizationText.normalizeText(stripped)
            code = nameLookup[normalizedStripped]
        end
    end

    local result = nil
    if code then
        result = data.countries[code]
    end

    functionCache[cacheKey] = result
    return result
end

-- Get country code by name
function CountryData.getCountryCodeByName(name)
    if not name or name == "" then
        return nil
    end
    
    local cacheKey = createCacheKey("getCountryCodeByName", name)
    if functionCache[cacheKey] ~= nil then
        return functionCache[cacheKey]
    end
    
    local country = CountryData.getCountryByName(name)
    local code = nil
    
    -- To get the code, we need to find which key in the main data table matches the found country object.
    -- This is less efficient but necessary if the country object itself doesn't store its code.
    if country then
        local data = loadData()
        for c, countryData in pairs(data.countries) do
            if countryData == country then
                code = c
                break
            end
        end
    end

    functionCache[cacheKey] = code
    return code
end

-- (The rest of the functions remain unchanged for now, but will be updated in subsequent phases)

function CountryData.normalizeCountryName(name)
    if not name or name == "" then
        return name
    end
    
    local cacheKey = createCacheKey("normalizeCountryName", name)
    if functionCache[cacheKey] ~= nil then
        return functionCache[cacheKey]
    end
    
    local country = CountryData.getCountryByName(name)
    
    local result
    if country then
        result = country.name or country.canonical_name
        result = result
            :gsub(",%s*", "")
            :gsub("%sand the%s+", " and ")
    else
        result = "(Unrecognized)"
    end
    
    functionCache[cacheKey] = result
    return result
end

function CountryData.getRegionByCountry(name)
    if not name or name == "" then
        return nil
    end
    
    local cacheKey = createCacheKey("getRegionByCountry", name)
    if functionCache[cacheKey] ~= nil then
        return functionCache[cacheKey]
    end
    
    local country = CountryData.getCountryByName(name)
    
    local result
    if country and country.icann_region then
        result = country.icann_region
    else
        result = "(Unrecognized)"
    end
    
    functionCache[cacheKey] = result
    return result
end

function CountryData.getCountriesByRegion(region)
    if not region or region == "" then
        return {}
    end
    
    local cacheKey = createCacheKey("getCountriesByRegion", region)
    if functionCache[cacheKey] ~= nil then
        return functionCache[cacheKey]
    end
    
    local data = loadData()
    local regionLookup = buildRegionLookupCache(data)
    
    local normalized = NormalizationText.normalizeText(region)
    local regionCode = regionLookup[normalized]
    
    local result = {}
    if regionCode and data.countries then
        for code, country in pairs(data.countries) do
            if country.icann_region == regionCode then
                table.insert(result, {
                    code = code,
                    name = country.name or country.canonical_name
                })
            end
        end
    end
    
    functionCache[cacheKey] = result
    return result
end

function CountryData.getAllCountryCodes()
    local cacheKey = "getAllCountryCodes"
    if functionCache[cacheKey] ~= nil then
        return functionCache[cacheKey]
    end
    
    local data = loadData()
    local result = {}
    if data and data.countries then
        for code in pairs(data.countries) do
            table.insert(result, code)
        end
    end
    
    functionCache[cacheKey] = result
    return result
end

function CountryData.getAllCountryNames()
    local cacheKey = "getAllCountryNames"
    if functionCache[cacheKey] ~= nil then
        return functionCache[cacheKey]
    end
    
    local data = loadData()
    local result = {}
    if data and data.countries then
        for _, country in pairs(data.countries) do
            table.insert(result, country.name or country.canonical_name)
        end
    end
    
    functionCache[cacheKey] = result
    return result
end

function CountryData.getCountryProperty(code, property)
    if not code or code == "" or not property or property == "" then
        return nil
    end
    
    local cacheKey = createCacheKey("getCountryProperty", code, property)
    if propertyCache[cacheKey] ~= nil then
        return propertyCache[cacheKey]
    end
    
    local country = CountryData.getCountryByCode(code)
    local result = country and country[property] or nil
    
    propertyCache[cacheKey] = result
    return result
end

function CountryData.getCountryPropertyByName(name, property)
    if not name or name == "" or not property or property == "" then
        return nil
    end
    
    local cacheKey = createCacheKey("getCountryPropertyByName", name, property)
    if propertyCache[cacheKey] ~= nil then
        return propertyCache[cacheKey]
    end
    
    local code = CountryData.getCountryCodeByName(name)
    local result = nil
    if code then
        result = CountryData.getCountryProperty(code, property)
    end
    
    propertyCache[cacheKey] = result
    return result
end

function CountryData.getAvailableProperties(code)
    if not code or code == "" then
        return {}
    end
    
    local cacheKey = createCacheKey("getAvailableProperties", code)
    if functionCache[cacheKey] ~= nil then
        return functionCache[cacheKey]
    end
    
    local country = CountryData.getCountryByCode(code)
    local properties = {}
    if country then
        for property in pairs(country) do
            table.insert(properties, property)
        end
    end
    
    functionCache[cacheKey] = properties
    return properties
end

function CountryData.getAllPropertyNames()
    local cacheKey = "getAllPropertyNames"
    if functionCache[cacheKey] ~= nil then
        return functionCache[cacheKey]
    end
    
    local data = loadData()
    local properties = {}
    local seen = {}
    if data and data.countries then
        for _, country in pairs(data.countries) do
            for property in pairs(country) do
                if not seen[property] then
                    seen[property] = true
                    table.insert(properties, property)
                end
            end
        end
    end
    
    functionCache[cacheKey] = properties
    return properties
end

function CountryData.getSemanticPropertyName(propertyKey)
    local ConfigRepository = require('Module:ConfigRepository')
    for templateName, templateConfig in pairs(ConfigRepository.templates) do
        if templateConfig.semantics and templateConfig.semantics.additionalProperties then
            if templateConfig.semantics.additionalProperties[propertyKey] then
                return propertyKey
            end
        end
    end
    return nil
end

function CountryData.getSemanticCountryRegionProperties(countryValue)
    local properties = {}
    if not countryValue or countryValue == "" then
        return properties
    end
    
    local countryPropertyName = CountryData.getSemanticPropertyName("Has country")
    local regionPropertyName = CountryData.getSemanticPropertyName("Has ICANN region")
    
    if not countryPropertyName or not regionPropertyName then
        return properties
    end
    
    local countries = {}
    for country in string.gmatch(countryValue, "[^;]+") do
        local trimmedCountry = country:match("^%s*(.-)%s*$")
        if trimmedCountry and trimmedCountry ~= "" then
            table.insert(countries, trimmedCountry)
        end
    end
    
    for _, country in ipairs(countries) do
        local normalizedCountry = CountryData.normalizeCountryName(country)
        if normalizedCountry ~= "(Unrecognized)" then
            properties[countryPropertyName] = properties[countryPropertyName] or {}
            table.insert(properties[countryPropertyName], normalizedCountry)
            
            local region = CountryData.getRegionByCountry(country)
            if region and region ~= "(Unrecognized)" then
                properties[regionPropertyName] = properties[regionPropertyName] or {}
                table.insert(properties[regionPropertyName], region)
            end
        end
    end
    
    return properties
end

function CountryData.exportAsJson()
    local data = loadData()
    if not data or not data.countries then
        return '{}'
    end
    
    if mw.text and mw.text.jsonEncode then
        local success, result = pcall(function()
            return mw.text.jsonEncode(data)
        end)
        if success and result then
            return result
        end
    end
    return '{}'
end

local function getRegionClass(region)
    if not region or region == "(Unrecognized)" then
        return "region-default"
    end
    if region == "NA" or region == "LAC" then
        return "region-americas"
    elseif region == "AP" then
        return "region-asia-pacific"
    else
        return "region-europe-africa"
    end
end

function CountryData.formatCountryList(value)
    if not value or value == "" then return "" end
    
    local countries = {}
    for country in string.gmatch(value, "[^;]+") do
        local trimmed = country:match("^%s*(.-)%s*$")
        if trimmed and trimmed ~= "" then
            table.insert(countries, trimmed)
        end
    end
    
    local normalizedCountries = {}
    for _, country in ipairs(countries) do
        local normalized = CountryData.normalizeCountryName(country)
        if normalized ~= "(Unrecognized)" then
            table.insert(normalizedCountries, normalized)
        end
    end
    
    if #normalizedCountries > 0 then
        local listItems = {}
        for _, country in ipairs(normalizedCountries) do
            local countryRegion = CountryData.getRegionByCountry(country)
            local regionClass = getRegionClass(countryRegion)
            table.insert(listItems, string.format("<li class=\"%s\">%s</li>", regionClass, country))
        end
        return string.format("<ul class=\"template-list template-list-country\">%s</ul>", table.concat(listItems, ""))
    end
    
    return ""
end

function CountryData.formatCountries(value)
    return CountryData.formatCountryList(value)
end

function CountryData.getCountriesForCategories(value)
    if not value or value == "" then return {} end
    
    local countries = {}
    for country in string.gmatch(value, "[^;]+") do
        local trimmed = country:match("^%s*(.-)%s*$")
        if trimmed and trimmed ~= "" then
            table.insert(countries, trimmed)
        end
    end
    
    local normalizedCountries = {}
    for _, country in ipairs(countries) do
        local normalized = CountryData.normalizeCountryName(country)
        if normalized ~= "(Unrecognized)" then
            table.insert(normalizedCountries, normalized)
        end
    end
    
    return normalizedCountries
end

function CountryData.getFlagFileName(countryNameOrCode)
    if not countryNameOrCode or countryNameOrCode == '' then return nil end
    
    local inputName = countryNameOrCode:gsub('_', ' ')
    local isoCode = CountryData.getCountryCodeByName(inputName) 
    
    if not isoCode and #inputName == 2 then
        if CountryData.getCountryByCode(inputName) then 
            isoCode = inputName:upper()
        end
    end
    
    if not isoCode or #isoCode ~= 2 then return nil end

    return 'Flag-' .. string.lower(isoCode) .. '.svg' 
end

return CountryData