Module:CountryData: Difference between revisions

// via Wikitext Extension for VSCode
// via Wikitext Extension for VSCode
Line 67: Line 67:
      
      
     local lookup = {}
     local lookup = {}
   
      
      
     -- Optimization: Pre-count number of mappings
     -- Optimization: Pre-count number of mappings
Line 83: Line 82:
     -- Build the lookup table with pre-counted size
     -- Build the lookup table with pre-counted size
     for code, country in pairs(data.countries) do
     for code, country in pairs(data.countries) do
        local names_to_process = {}
       
         -- Add name field as primary display name
         -- Add name field as primary display name
         local displayName = country.name or country.canonical_name
         local displayName = country.name or country.canonical_name
         if displayName then
         if displayName then
             table.insert(names_to_process, displayName)
             lookup[NormalizationText.normalizeText(displayName)] = code
         end
         end
          
          
         -- Add canonical_name if different from name
         -- Add canonical_name if different from name
         if country.canonical_name and country.canonical_name ~= country.name then
         if country.canonical_name and country.canonical_name ~= country.name then
             table.insert(names_to_process, country.canonical_name)
             lookup[NormalizationText.normalizeText(country.canonical_name)] = code
         end
         end
          
          
         -- Add variations
         -- Add variations
         if country.variations and type(country.variations) == "table" then
         if country.variations and type(country.variations) == "table" then
            -- Use pairs instead of ipairs to handle both array and object structures
             for _, variation in pairs(country.variations) do
             for _, variation in pairs(country.variations) do
                 table.insert(names_to_process, variation)
                 lookup[NormalizationText.normalizeText(variation)] = code
            end
        end
 
        for _, name in ipairs(names_to_process) do
            -- First, create a key for the original name, just normalized
            local normalized_original = NormalizationText.normalizeText(name)
            lookup[normalized_original] = code
 
            -- Second, strip diacritics from the ORIGINAL name (preserving case)
            local stripped = DiacriticNormalization.removeDiacritics(name)
 
            -- Only proceed if stripping actually changed the string
            if stripped ~= name then
                -- Now, normalize the stripped string (which will lowercase it)
                local normalized_stripped = NormalizationText.normalizeText(stripped)
                lookup[normalized_stripped] = code
             end
             end
         end
         end
Line 614: Line 596:
end
end


-- Get semantic property name from ConfigRepository
function CountryData.getSemanticPropertyName(propertyKey)
    local ConfigRepository = require('Module:ConfigRepository')
   
    -- Look through all template configurations
    for templateName, templateConfig in pairs(ConfigRepository.templates) do
        -- Check if this template has semantics configuration
        if templateConfig.semantics and templateConfig.semantics.additionalProperties then
            -- Check if the property key exists in additionalProperties
            if templateConfig.semantics.additionalProperties[propertyKey] then
                return propertyKey
            end
        end
    end
   
    -- If not found, return nil
    return nil
end


-- Get semantic properties for countries and regions
-- Get semantic properties for countries and regions
-- Returns a table of properties that can be integrated with the batch processing system
-- Returns a table of properties that can be integrated with the batch processing system
function CountryData.getSemanticCountryRegionProperties(countryValue, errorContext)
function CountryData.getSemanticCountryRegionProperties(countryValue)
    -- Debug: Function entry - use the passed errorContext if available
    local ErrorHandling = require('Module:ErrorHandling')
    local debugContext = errorContext or ErrorHandling.createContext("CountryData")
    ErrorHandling.addError(debugContext, "FunctionEntryDebug",
        string.format("getSemanticCountryRegionProperties called with: '%s'", countryValue or "nil"),
        "", false)
   
     -- Initialize return table
     -- Initialize return table
     local properties = {}
     local properties = {}
      
      
     if not countryValue or countryValue == "" then
     if not countryValue or countryValue == "" then
        ErrorHandling.addError(debugContext, "EarlyReturnDebug",
            "Returning early: countryValue is nil or empty",
            "", false)
         return properties
         return properties
     end
     end
      
      
     -- Use standard semantic property names directly
     -- Get property names from ConfigRepository
     local countryPropertyName = "Has country"
     local countryPropertyName = CountryData.getSemanticPropertyName("Has country")
     local regionPropertyName = "Has ICANN region"
     local regionPropertyName = CountryData.getSemanticPropertyName("Has ICANN region")
   
    -- If property names are not found in ConfigRepository, we can't proceed
    if not countryPropertyName or not regionPropertyName then
        return properties
    end
      
      
     -- Split multi-value country strings
     -- Split multi-value country strings
Line 647: Line 643:
         end
         end
     end
     end
   
    -- Debug: Track country normalization flow
    ErrorHandling.addError(debugContext, "CountryDebug",
        string.format("Input='%s' Countries=%d FirstCountry='%s'",
            countryValue or "nil",
            #countries,
            countries[1] or "none"),
        "", false)
      
      
     -- Process each country
     -- Process each country
     for _, country in ipairs(countries) do
     for _, country in ipairs(countries) do
        -- Debug: Show each country being processed
        ErrorHandling.addError(debugContext, "ProcessingCountryDebug",
            string.format("Processing country: '%s'", country),
            "", false)
       
        -- STEP 1: Direct lookup with normalization
         local normalizedCountry = CountryData.normalizeCountryName(country)
         local normalizedCountry = CountryData.normalizeCountryName(country)
        -- STEP 2: If that fails, try a direct diacritic-stripped lookup
        if normalizedCountry == "(Unrecognized)" then
            local stripped = DiacriticNormalization.removeDiacritics(country)
            if stripped ~= country then
                normalizedCountry = CountryData.normalizeCountryName(stripped)
            end
        end
       
        -- Debug: Show normalization result
        ErrorHandling.addError(debugContext, "NormalizationDebug",
            string.format("'%s' normalized to '%s'", country, normalizedCountry or "nil"),
            "", false)
          
          
         -- Only process recognized countries
         -- Only process recognized countries
         if normalizedCountry ~= "(Unrecognized)" then
         if normalizedCountry ~= "(Unrecognized)" then
            -- Debug: Show successful recognition
            ErrorHandling.addError(debugContext, "RecognizedCountryDebug",
                string.format("Country '%s' recognized as '%s'", country, normalizedCountry),
                "", false)
           
             -- Add country to properties table
             -- Add country to properties table
             properties[countryPropertyName] = properties[countryPropertyName] or {}
             properties[countryPropertyName] = properties[countryPropertyName] or {}
Line 695: Line 659:
                 properties[regionPropertyName] = properties[regionPropertyName] or {}
                 properties[regionPropertyName] = properties[regionPropertyName] or {}
                 table.insert(properties[regionPropertyName], region)
                 table.insert(properties[regionPropertyName], region)
               
                -- Debug: Show region assignment
                ErrorHandling.addError(debugContext, "RegionDebug",
                    string.format("Country '%s' assigned to region '%s'", normalizedCountry, region),
                    "", false)
             end
             end
        else
            -- Debug: Show unrecognized country
            ErrorHandling.addError(debugContext, "UnrecognizedCountryDebug",
                string.format("Country '%s' not recognized", country),
                "", false)
         end
         end
     end
     end
   
    -- Debug: Show final properties before return
    local propCount = 0
    for _ in pairs(properties) do propCount = propCount + 1 end
    ErrorHandling.addError(debugContext, "FinalPropertiesDebug",
        string.format("Returning %d properties", propCount),
        "", false)
      
      
     return properties
     return properties