Module:CountryData: Difference between revisions
Appearance
// via Wikitext Extension for VSCode |
// via Wikitext Extension for VSCode |
||
| Line 45: | Line 45: | ||
end | end | ||
-- | -- Reset the module-level caches (useful for testing) | ||
local function | local function resetCaches() | ||
dataCache = nil | |||
nameLookupCache = nil | |||
regionLookupCache = nil | |||
propertyCache = {} | |||
functionCache = {} | |||
end | end | ||
-------------------------------------------------------------------------------- | -------------------------------------------------------------------------------- | ||
-- Data Loading Layer | -- Data Loading and Cache Building Layer (Refactored) | ||
-------------------------------------------------------------------------------- | -------------------------------------------------------------------------------- | ||
-- | -- Data loading function using DatasetLoader | ||
local function | local function loadData() | ||
if dataCache then | |||
return dataCache | |||
end | |||
local raw = loader.get('CountryDataset') | |||
dataCache = { | |||
countries = raw.countries or {}, | |||
icann_regions = raw.icann_regions or {}, | |||
schema_version = raw.schema_version, | |||
last_updated = raw.last_updated | |||
} | |||
return dataCache | |||
end | |||
-- Builds the primary name-to-code lookup cache. | |||
-- This is the core of the refactoring, ensuring correct normalization order. | |||
local function buildNameLookupCache(data) | |||
if nameLookupCache then | if nameLookupCache then | ||
return nameLookupCache | return nameLookupCache | ||
end | end | ||
if not data or not data.countries then | if not data or not data.countries then | ||
nameLookupCache = {} | nameLookupCache = {} | ||
return nameLookupCache | return nameLookupCache | ||
end | end | ||
local lookup = {} | local lookup = {} | ||
for code, country in pairs(data.countries) do | for code, country in pairs(data.countries) do | ||
-- | -- Ensure the country has a name to process | ||
local canonicalName = country.name or country.canonical_name | |||
if canonicalName then | |||
-- 1. Add the canonical name itself | |||
local normalizedName = NormalizationText.normalizeText(canonicalName) | |||
lookup[normalizedName] = code | |||
-- 2. Add the diacritic-stripped version of the canonical name | |||
local strippedName = DiacriticNormalization.removeDiacritics(canonicalName) | |||
if strippedName ~= canonicalName then -- only add if it's different | |||
lookup[NormalizationText.normalizeText(strippedName)] = code | |||
end | |||
end | end | ||
-- 3. Process all variations | |||
-- | |||
if country.variations and type(country.variations) == "table" then | if country.variations and type(country.variations) == "table" then | ||
for _, variation in pairs(country.variations) do | for _, variation in pairs(country.variations) do | ||
lookup[NormalizationText.normalizeText( | -- Add the variation | ||
local normalizedVariation = NormalizationText.normalizeText(variation) | |||
lookup[normalizedVariation] = code | |||
-- Add the diacritic-stripped version of the variation | |||
local strippedVariation = DiacriticNormalization.removeDiacritics(variation) | |||
if strippedVariation ~= variation then | |||
lookup[NormalizationText.normalizeText(strippedVariation)] = code | |||
end | |||
end | end | ||
end | end | ||
end | end | ||
nameLookupCache = lookup | nameLookupCache = lookup | ||
return lookup | return lookup | ||
end | end | ||
-- | -- Builds the region lookup cache. | ||
local function | local function buildRegionLookupCache(data) | ||
if regionLookupCache then | if regionLookupCache then | ||
return regionLookupCache | return regionLookupCache | ||
end | end | ||
if not data or not data.icann_regions then | if not data or not data.icann_regions then | ||
regionLookupCache = {} | regionLookupCache = {} | ||
return regionLookupCache | return regionLookupCache | ||
end | end | ||
local lookup = {} | local lookup = {} | ||
for code, region in pairs(data.icann_regions) do | for code, region in pairs(data.icann_regions) do | ||
if region.name then | if region.name then | ||
lookup[NormalizationText.normalizeText(region.name)] = code | lookup[NormalizationText.normalizeText(region.name)] = code | ||
end | end | ||
if region.variations and type(region.variations) == "table" then | if region.variations and type(region.variations) == "table" then | ||
for _, variation in pairs(region.variations) do | for _, variation in pairs(region.variations) do | ||
lookup[NormalizationText.normalizeText(variation)] = code | lookup[NormalizationText.normalizeText(variation)] = code | ||
| Line 147: | Line 143: | ||
end | end | ||
end | end | ||
regionLookupCache = lookup | regionLookupCache = lookup | ||
return lookup | return lookup | ||
end | end | ||
-------------------------------------------------------------------------------- | -------------------------------------------------------------------------------- | ||
-- Core API Functions | -- Core API Functions (Public Interface) | ||
-------------------------------------------------------------------------------- | -------------------------------------------------------------------------------- | ||
| Line 184: | Line 156: | ||
-- Load data and initialize caches | -- Load data and initialize caches | ||
function CountryData.loadData(frame) | function CountryData.loadData(frame) | ||
return loadData( | return loadData() | ||
end | end | ||
| Line 193: | Line 165: | ||
end | end | ||
-- Get country data by ISO code | -- Get country data by ISO code (Refactored to use new cache logic) | ||
function CountryData.getCountryByCode(code) | function CountryData.getCountryByCode(code) | ||
if not code or code == "" then | if not code or code == "" then | ||
| Line 199: | Line 171: | ||
end | end | ||
local cacheKey = createCacheKey("getCountryByCode", code) | local cacheKey = createCacheKey("getCountryByCode", code) | ||
if functionCache[cacheKey] ~= nil then | if functionCache[cacheKey] ~= nil then | ||
| Line 206: | Line 177: | ||
local data = loadData() | local data = loadData() | ||
code = code:upper() -- Standardize code | |||
local result = data and data.countries and data.countries[code] or nil | |||
local result = | |||
functionCache[cacheKey] = result | functionCache[cacheKey] = result | ||
return result | return result | ||
end | end | ||
-- Get country data by name ( | -- Get country data by name (Refactored to use new cache logic) | ||
function CountryData.getCountryByName(name) | function CountryData.getCountryByName(name) | ||
if not name or name == "" then | if not name or name == "" then | ||
return nil | return nil | ||
end | end | ||
local cacheKey = createCacheKey("getCountryByName", name) | local cacheKey = createCacheKey("getCountryByName", name) | ||
if functionCache[cacheKey] ~= nil then | if functionCache[cacheKey] ~= nil then | ||
return functionCache[cacheKey] | return functionCache[cacheKey] | ||
end | end | ||
local data = loadData() | local data = loadData() | ||
local nameLookup = | local nameLookup = buildNameLookupCache(data) | ||
-- Normalize the input | -- Normalize the input name in one go (text normalization includes lowercasing) | ||
local normalized = NormalizationText.normalizeText(name) | local normalized = NormalizationText.normalizeText(name) | ||
-- | -- First, try a direct lookup with the normalized name | ||
local code = nameLookup[normalized] | local code = nameLookup[normalized] | ||
-- If not found, try looking up the diacritic-stripped version | |||
if not code then | |||
local stripped = DiacriticNormalization.removeDiacritics(name) | |||
-- Check if stripping made a difference to avoid redundant normalization | |||
if stripped ~= name then | |||
local normalizedStripped = NormalizationText.normalizeText(stripped) | |||
code = nameLookup[normalizedStripped] | |||
end | |||
end | |||
local result = nil | local result = nil | ||
if code | if code then | ||
result = data.countries[code] | result = data.countries[code] | ||
end | end | ||
functionCache[cacheKey] = result | functionCache[cacheKey] = result | ||
return result | return result | ||
| Line 266: | Line 230: | ||
end | end | ||
local cacheKey = createCacheKey("getCountryCodeByName", name) | local cacheKey = createCacheKey("getCountryCodeByName", name) | ||
if functionCache[cacheKey] ~= nil then | if functionCache[cacheKey] ~= nil then | ||
| Line 272: | Line 235: | ||
end | end | ||
local | local country = CountryData.getCountryByName(name) | ||
local | local code = nil | ||
-- | -- To get the code, we need to find which key in the main data table matches the found country object. | ||
-- This is less efficient but necessary if the country object itself doesn't store its code. | |||
if country then | |||
-- | local data = loadData() | ||
for c, countryData in pairs(data.countries) do | |||
if countryData == country then | |||
if | code = c | ||
break | |||
local | end | ||
if | |||
end | end | ||
end | end | ||
functionCache[cacheKey] = code | functionCache[cacheKey] = code | ||
return code | return code | ||
end | end | ||
-- (The rest of the functions remain unchanged for now, but will be updated in subsequent phases) | |||
function CountryData.normalizeCountryName(name) | function CountryData.normalizeCountryName(name) | ||
| Line 299: | Line 261: | ||
end | end | ||
local cacheKey = createCacheKey("normalizeCountryName", name) | local cacheKey = createCacheKey("normalizeCountryName", name) | ||
if functionCache[cacheKey] ~= nil then | if functionCache[cacheKey] ~= nil then | ||
| Line 309: | Line 270: | ||
local result | local result | ||
if country then | if country then | ||
result = country.name or country.canonical_name | result = country.name or country.canonical_name | ||
result = result | result = result | ||
:gsub(",%s*", "") | :gsub(",%s*", "") | ||
:gsub("%sand the%s+", " and ") | :gsub("%sand the%s+", " and ") | ||
else | else | ||
result = "(Unrecognized)" | result = "(Unrecognized)" | ||
end | end | ||
functionCache[cacheKey] = result | functionCache[cacheKey] = result | ||
return result | return result | ||
end | end | ||
function CountryData.getRegionByCountry(name) | function CountryData.getRegionByCountry(name) | ||
if not name or name == "" then | if not name or name == "" then | ||
| Line 331: | Line 287: | ||
end | end | ||
local cacheKey = createCacheKey("getRegionByCountry", name) | local cacheKey = createCacheKey("getRegionByCountry", name) | ||
if functionCache[cacheKey] ~= nil then | if functionCache[cacheKey] ~= nil then | ||
| Line 343: | Line 298: | ||
result = country.icann_region | result = country.icann_region | ||
else | else | ||
result = "(Unrecognized)" | result = "(Unrecognized)" | ||
end | end | ||
functionCache[cacheKey] = result | functionCache[cacheKey] = result | ||
return result | return result | ||
end | end | ||
function CountryData.getCountriesByRegion(region) | function CountryData.getCountriesByRegion(region) | ||
if not region or region == "" then | if not region or region == "" then | ||
| Line 358: | Line 310: | ||
end | end | ||
local cacheKey = createCacheKey("getCountriesByRegion", region) | local cacheKey = createCacheKey("getCountriesByRegion", region) | ||
if functionCache[cacheKey] ~= nil then | if functionCache[cacheKey] ~= nil then | ||
| Line 365: | Line 316: | ||
local data = loadData() | local data = loadData() | ||
local regionLookup = | local regionLookup = buildRegionLookupCache(data) | ||
local normalized = NormalizationText.normalizeText(region) | local normalized = NormalizationText.normalizeText(region) | ||
local regionCode = regionLookup[normalized] | local regionCode = regionLookup[normalized] | ||
local result = {} | local result = {} | ||
if regionCode and data.countries then | if regionCode and data.countries then | ||
for code, country in pairs(data.countries) do | for code, country in pairs(data.countries) do | ||
if country.icann_region == regionCode then | if country.icann_region == regionCode then | ||
result | table.insert(result, { | ||
code = code, | code = code, | ||
name = country.name or country.canonical_name | name = country.name or country.canonical_name | ||
} | }) | ||
end | end | ||
end | end | ||
end | end | ||
functionCache[cacheKey] = result | functionCache[cacheKey] = result | ||
return result | return result | ||
end | end | ||
function CountryData.getAllCountryCodes() | function CountryData.getAllCountryCodes() | ||
local cacheKey = "getAllCountryCodes" | local cacheKey = "getAllCountryCodes" | ||
if functionCache[cacheKey] ~= nil then | if functionCache[cacheKey] ~= nil then | ||
| Line 411: | Line 345: | ||
local data = loadData() | local data = loadData() | ||
local result = {} | local result = {} | ||
if data and data.countries then | if data and data.countries then | ||
for code in pairs(data.countries) do | for code in pairs(data.countries) do | ||
result | table.insert(result, code) | ||
end | end | ||
end | end | ||
functionCache[cacheKey] = result | functionCache[cacheKey] = result | ||
return result | return result | ||
end | end | ||
function CountryData.getAllCountryNames() | function CountryData.getAllCountryNames() | ||
local cacheKey = "getAllCountryNames" | local cacheKey = "getAllCountryNames" | ||
if functionCache[cacheKey] ~= nil then | if functionCache[cacheKey] ~= nil then | ||
| Line 442: | Line 363: | ||
local data = loadData() | local data = loadData() | ||
local result = {} | local result = {} | ||
if data and data.countries then | if data and data.countries then | ||
for _, country in pairs(data.countries) do | for _, country in pairs(data.countries) do | ||
table.insert(result, country.name or country.canonical_name) | |||
end | end | ||
end | end | ||
functionCache[cacheKey] = result | functionCache[cacheKey] = result | ||
return result | return result | ||
end | end | ||
function CountryData.getCountryProperty(code, property) | function CountryData.getCountryProperty(code, property) | ||
if not code or code == "" or not property or property == "" then | if not code or code == "" or not property or property == "" then | ||
| Line 470: | Line 378: | ||
end | end | ||
local cacheKey = createCacheKey("getCountryProperty", code, property) | local cacheKey = createCacheKey("getCountryProperty", code, property) | ||
if propertyCache[cacheKey] ~= nil then | if propertyCache[cacheKey] ~= nil then | ||
| Line 477: | Line 384: | ||
local country = CountryData.getCountryByCode(code) | local country = CountryData.getCountryByCode(code) | ||
local result = country and country[property] or nil | |||
propertyCache[cacheKey] = result | propertyCache[cacheKey] = result | ||
return result | return result | ||
end | end | ||
function CountryData.getCountryPropertyByName(name, property) | function CountryData.getCountryPropertyByName(name, property) | ||
if not name or name == "" or not property or property == "" then | if not name or name == "" or not property or property == "" then | ||
| Line 494: | Line 395: | ||
end | end | ||
local cacheKey = createCacheKey("getCountryPropertyByName", name, property) | local cacheKey = createCacheKey("getCountryPropertyByName", name, property) | ||
if propertyCache[cacheKey] ~= nil then | if propertyCache[cacheKey] ~= nil then | ||
| Line 501: | Line 401: | ||
local code = CountryData.getCountryCodeByName(name) | local code = CountryData.getCountryCodeByName(name) | ||
local result = nil | local result = nil | ||
if code then | if code then | ||
| Line 507: | Line 406: | ||
end | end | ||
propertyCache[cacheKey] = result | propertyCache[cacheKey] = result | ||
return result | return result | ||
end | end | ||
function CountryData.getAvailableProperties(code) | function CountryData.getAvailableProperties(code) | ||
if not code or code == "" then | if not code or code == "" then | ||
| Line 518: | Line 415: | ||
end | end | ||
local cacheKey = createCacheKey("getAvailableProperties", code) | local cacheKey = createCacheKey("getAvailableProperties", code) | ||
if functionCache[cacheKey] ~= nil then | if functionCache[cacheKey] ~= nil then | ||
| Line 524: | Line 420: | ||
end | end | ||
local | local country = CountryData.getCountryByCode(code) | ||
local properties = {} | local properties = {} | ||
if country then | |||
for property in pairs(country) do | |||
table.insert(properties, property) | |||
end | |||
end | end | ||
functionCache[cacheKey] = properties | functionCache[cacheKey] = properties | ||
return properties | return properties | ||
end | end | ||
function CountryData.getAllPropertyNames() | function CountryData.getAllPropertyNames() | ||
local cacheKey = "getAllPropertyNames" | local cacheKey = "getAllPropertyNames" | ||
if functionCache[cacheKey] ~= nil then | if functionCache[cacheKey] ~= nil then | ||
| Line 558: | Line 439: | ||
local data = loadData() | local data = loadData() | ||
local properties = {} | local properties = {} | ||
local seen = {} | local seen = {} | ||
if data and data.countries then | |||
for _, country in pairs(data.countries) do | |||
for property in pairs(country) do | |||
if not seen[property] then | |||
seen[property] = true | |||
table.insert(properties, property) | |||
end | |||
end | end | ||
end | end | ||
end | end | ||
functionCache[cacheKey] = properties | functionCache[cacheKey] = properties | ||
return properties | return properties | ||
end | end | ||
function CountryData.getSemanticPropertyName(propertyKey) | function CountryData.getSemanticPropertyName(propertyKey) | ||
local ConfigRepository = require('Module:ConfigRepository') | local ConfigRepository = require('Module:ConfigRepository') | ||
for templateName, templateConfig in pairs(ConfigRepository.templates) do | for templateName, templateConfig in pairs(ConfigRepository.templates) do | ||
if templateConfig.semantics and templateConfig.semantics.additionalProperties then | if templateConfig.semantics and templateConfig.semantics.additionalProperties then | ||
if templateConfig.semantics.additionalProperties[propertyKey] then | if templateConfig.semantics.additionalProperties[propertyKey] then | ||
return propertyKey | return propertyKey | ||
| Line 611: | Line 465: | ||
end | end | ||
end | end | ||
return nil | return nil | ||
end | end | ||
function CountryData.getSemanticCountryRegionProperties(countryValue) | function CountryData.getSemanticCountryRegionProperties(countryValue) | ||
local properties = {} | local properties = {} | ||
if not countryValue or countryValue == "" then | if not countryValue or countryValue == "" then | ||
return properties | return properties | ||
end | end | ||
local countryPropertyName = CountryData.getSemanticPropertyName("Has country") | local countryPropertyName = CountryData.getSemanticPropertyName("Has country") | ||
local regionPropertyName = CountryData.getSemanticPropertyName("Has ICANN region") | local regionPropertyName = CountryData.getSemanticPropertyName("Has ICANN region") | ||
if not countryPropertyName or not regionPropertyName then | if not countryPropertyName or not regionPropertyName then | ||
return properties | return properties | ||
end | end | ||
local countries = {} | local countries = {} | ||
for country in string.gmatch(countryValue, "[^;]+") do | for country in string.gmatch(countryValue, "[^;]+") do | ||
| Line 644: | Line 489: | ||
end | end | ||
for _, country in ipairs(countries) do | for _, country in ipairs(countries) do | ||
local normalizedCountry = CountryData.normalizeCountryName(country) | local normalizedCountry = CountryData.normalizeCountryName(country) | ||
if normalizedCountry ~= "(Unrecognized)" then | if normalizedCountry ~= "(Unrecognized)" then | ||
properties[countryPropertyName] = properties[countryPropertyName] or {} | properties[countryPropertyName] = properties[countryPropertyName] or {} | ||
table.insert(properties[countryPropertyName], normalizedCountry) | table.insert(properties[countryPropertyName], normalizedCountry) | ||
local region = CountryData.getRegionByCountry(country) | local region = CountryData.getRegionByCountry(country) | ||
if region and region ~= "(Unrecognized)" then | if region and region ~= "(Unrecognized)" then | ||
| Line 666: | Line 506: | ||
end | end | ||
function CountryData.exportAsJson() | function CountryData.exportAsJson() | ||
local data = loadData() | local data = loadData() | ||
if not data or not data.countries then | if not data or not data.countries then | ||
return '{}' | return '{}' | ||
end | end | ||
if mw.text and mw.text.jsonEncode then | if mw.text and mw.text.jsonEncode then | ||
local success, result = pcall(function() | local success, result = pcall(function() | ||
return mw.text.jsonEncode(data) | return mw.text.jsonEncode(data) | ||
end) | end) | ||
if success and result then | if success and result then | ||
return result | return result | ||
end | end | ||
end | end | ||
return '{}' | return '{}' | ||
end | end | ||
local function getRegionClass(region) | local function getRegionClass(region) | ||
if not region or region == "(Unrecognized)" then | if not region or region == "(Unrecognized)" then | ||
return "region-default" | return "region-default" | ||
end | end | ||
if region == "NA" or region == "LAC" then | if region == "NA" or region == "LAC" then | ||
return "region-americas" | return "region-americas" | ||
| Line 708: | Line 536: | ||
end | end | ||
function CountryData.formatCountryList(value) | function CountryData.formatCountryList(value) | ||
if not value or value == "" then return "" end | if not value or value == "" then return "" end | ||
local countries = {} | local countries = {} | ||
for country in string.gmatch(value, "[^;]+") do | for country in string.gmatch(value, "[^;]+") do | ||
| Line 724: | Line 548: | ||
local normalizedCountries = {} | local normalizedCountries = {} | ||
for _, country in ipairs(countries) do | for _, country in ipairs(countries) do | ||
local normalized = CountryData.normalizeCountryName(country) | local normalized = CountryData.normalizeCountryName(country) | ||
if normalized ~= "(Unrecognized)" then | if normalized ~= "(Unrecognized)" then | ||
table.insert(normalizedCountries, normalized) | |||
end | end | ||
end | end | ||
if #normalizedCountries > 0 then | |||
if | |||
local listItems = {} | local listItems = {} | ||
for _, country in ipairs(normalizedCountries) do | for _, country in ipairs(normalizedCountries) do | ||
local countryRegion = CountryData.getRegionByCountry(country) | local countryRegion = CountryData.getRegionByCountry(country) | ||
local regionClass = getRegionClass(countryRegion) | local regionClass = getRegionClass(countryRegion) | ||
table.insert(listItems, string.format("<li class=\"%s\">%s</li>", regionClass, country)) | table.insert(listItems, string.format("<li class=\"%s\">%s</li>", regionClass, country)) | ||
end | end | ||
return string.format("<ul class=\"template-list template-list-country\">%s</ul>", table.concat(listItems, "")) | |||
return string.format("<ul class=\"template-list template-list-country\">%s</ul>", | |||
end | end | ||
| Line 763: | Line 568: | ||
end | end | ||
function CountryData.formatCountries(value) | function CountryData.formatCountries(value) | ||
return CountryData.formatCountryList(value) | return CountryData.formatCountryList(value) | ||
end | end | ||
function CountryData.getCountriesForCategories(value) | function CountryData.getCountriesForCategories(value) | ||
if not value or value == "" then return {} end | if not value or value == "" then return {} end | ||
| Line 781: | Line 584: | ||
local normalizedCountries = {} | local normalizedCountries = {} | ||
for _, country in ipairs(countries) do | for _, country in ipairs(countries) do | ||
local normalized = CountryData.normalizeCountryName(country) | local normalized = CountryData.normalizeCountryName(country) | ||
if normalized ~= "(Unrecognized)" then | if normalized ~= "(Unrecognized)" then | ||
table.insert(normalizedCountries, normalized) | |||
end | end | ||
end | end | ||
| Line 795: | Line 594: | ||
end | end | ||
function CountryData.getFlagFileName(countryNameOrCode) | function CountryData.getFlagFileName(countryNameOrCode) | ||
if not countryNameOrCode or countryNameOrCode == '' then return nil end | if not countryNameOrCode or countryNameOrCode == '' then return nil end | ||
local inputName = countryNameOrCode:gsub('_', ' ') | local inputName = countryNameOrCode:gsub('_', ' ') | ||
local | local isoCode = CountryData.getCountryCodeByName(inputName) | ||
if not isoCode and #inputName == 2 then | if not isoCode and #inputName == 2 then | ||
if CountryData.getCountryByCode(inputName) then | if CountryData.getCountryByCode(inputName) then | ||
isoCode = inputName | isoCode = inputName:upper() | ||
end | end | ||
end | end | ||
if not isoCode or #isoCode ~= 2 then return nil end | |||
if not isoCode or | |||
return 'Flag-' .. string.lower(isoCode) .. '.svg' | return 'Flag-' .. string.lower(isoCode) .. '.svg' | ||
end | end | ||
return CountryData | return CountryData | ||
Revision as of 20:02, 3 July 2025
Documentation for this module may be created at Module:CountryData/doc
-- Module:CountryData
-- Unified module for country data management.
--
-- Features:
-- * Loads country data from JSON stored in Data:CountryDataset.json
-- * Normalizes country names to canonical forms
-- * Maps countries to ICANN regions
-- * Provides extensible property access
-- * Integrates with Semantic MediaWiki
-- * Formats country lists with region-specific emoji styling
-- * Processes countries for category assignment
-- Dependencies
local DiacriticNormalization = require('Module:NormalizationDiacritic')
local NormalizationText = require('Module:NormalizationText')
local loader = require('Module:DatasetLoader')
-- Module-level cache tables for improved performance
local dataCache = nil
local nameLookupCache = nil
local regionLookupCache = nil
local propertyCache = {}
local functionCache = {}
-- Default data structure to use if JSON loading fails
local DEFAULT_DATA = {
schema_version = 1,
last_updated = os.date('!%Y-%m-%dT%H:%M:%SZ'),
countries = {},
icann_regions = {}
}
--------------------------------------------------------------------------------
-- Helper Functions
--------------------------------------------------------------------------------
-- Create a cache key from a function name and arguments
local function createCacheKey(funcName, ...)
local args = {...}
local keyParts = {funcName}
for i = 1, #args do
table.insert(keyParts, tostring(args[i]) or "nil")
end
return table.concat(keyParts, ":")
end
-- Reset the module-level caches (useful for testing)
local function resetCaches()
dataCache = nil
nameLookupCache = nil
regionLookupCache = nil
propertyCache = {}
functionCache = {}
end
--------------------------------------------------------------------------------
-- Data Loading and Cache Building Layer (Refactored)
--------------------------------------------------------------------------------
-- Data loading function using DatasetLoader
local function loadData()
if dataCache then
return dataCache
end
local raw = loader.get('CountryDataset')
dataCache = {
countries = raw.countries or {},
icann_regions = raw.icann_regions or {},
schema_version = raw.schema_version,
last_updated = raw.last_updated
}
return dataCache
end
-- Builds the primary name-to-code lookup cache.
-- This is the core of the refactoring, ensuring correct normalization order.
local function buildNameLookupCache(data)
if nameLookupCache then
return nameLookupCache
end
if not data or not data.countries then
nameLookupCache = {}
return nameLookupCache
end
local lookup = {}
for code, country in pairs(data.countries) do
-- Ensure the country has a name to process
local canonicalName = country.name or country.canonical_name
if canonicalName then
-- 1. Add the canonical name itself
local normalizedName = NormalizationText.normalizeText(canonicalName)
lookup[normalizedName] = code
-- 2. Add the diacritic-stripped version of the canonical name
local strippedName = DiacriticNormalization.removeDiacritics(canonicalName)
if strippedName ~= canonicalName then -- only add if it's different
lookup[NormalizationText.normalizeText(strippedName)] = code
end
end
-- 3. Process all variations
if country.variations and type(country.variations) == "table" then
for _, variation in pairs(country.variations) do
-- Add the variation
local normalizedVariation = NormalizationText.normalizeText(variation)
lookup[normalizedVariation] = code
-- Add the diacritic-stripped version of the variation
local strippedVariation = DiacriticNormalization.removeDiacritics(variation)
if strippedVariation ~= variation then
lookup[NormalizationText.normalizeText(strippedVariation)] = code
end
end
end
end
nameLookupCache = lookup
return lookup
end
-- Builds the region lookup cache.
local function buildRegionLookupCache(data)
if regionLookupCache then
return regionLookupCache
end
if not data or not data.icann_regions then
regionLookupCache = {}
return regionLookupCache
end
local lookup = {}
for code, region in pairs(data.icann_regions) do
if region.name then
lookup[NormalizationText.normalizeText(region.name)] = code
end
if region.variations and type(region.variations) == "table" then
for _, variation in pairs(region.variations) do
lookup[NormalizationText.normalizeText(variation)] = code
end
end
end
regionLookupCache = lookup
return lookup
end
--------------------------------------------------------------------------------
-- Core API Functions (Public Interface)
--------------------------------------------------------------------------------
local CountryData = {}
-- Load data and initialize caches
function CountryData.loadData(frame)
return loadData()
end
-- Reset all caches (primarily for testing)
function CountryData.resetCaches()
resetCaches()
return true
end
-- Get country data by ISO code (Refactored to use new cache logic)
function CountryData.getCountryByCode(code)
if not code or code == "" then
return nil
end
local cacheKey = createCacheKey("getCountryByCode", code)
if functionCache[cacheKey] ~= nil then
return functionCache[cacheKey]
end
local data = loadData()
code = code:upper() -- Standardize code
local result = data and data.countries and data.countries[code] or nil
functionCache[cacheKey] = result
return result
end
-- Get country data by name (Refactored to use new cache logic)
function CountryData.getCountryByName(name)
if not name or name == "" then
return nil
end
local cacheKey = createCacheKey("getCountryByName", name)
if functionCache[cacheKey] ~= nil then
return functionCache[cacheKey]
end
local data = loadData()
local nameLookup = buildNameLookupCache(data)
-- Normalize the input name in one go (text normalization includes lowercasing)
local normalized = NormalizationText.normalizeText(name)
-- First, try a direct lookup with the normalized name
local code = nameLookup[normalized]
-- If not found, try looking up the diacritic-stripped version
if not code then
local stripped = DiacriticNormalization.removeDiacritics(name)
-- Check if stripping made a difference to avoid redundant normalization
if stripped ~= name then
local normalizedStripped = NormalizationText.normalizeText(stripped)
code = nameLookup[normalizedStripped]
end
end
local result = nil
if code then
result = data.countries[code]
end
functionCache[cacheKey] = result
return result
end
-- Get country code by name
function CountryData.getCountryCodeByName(name)
if not name or name == "" then
return nil
end
local cacheKey = createCacheKey("getCountryCodeByName", name)
if functionCache[cacheKey] ~= nil then
return functionCache[cacheKey]
end
local country = CountryData.getCountryByName(name)
local code = nil
-- To get the code, we need to find which key in the main data table matches the found country object.
-- This is less efficient but necessary if the country object itself doesn't store its code.
if country then
local data = loadData()
for c, countryData in pairs(data.countries) do
if countryData == country then
code = c
break
end
end
end
functionCache[cacheKey] = code
return code
end
-- (The rest of the functions remain unchanged for now, but will be updated in subsequent phases)
function CountryData.normalizeCountryName(name)
if not name or name == "" then
return name
end
local cacheKey = createCacheKey("normalizeCountryName", name)
if functionCache[cacheKey] ~= nil then
return functionCache[cacheKey]
end
local country = CountryData.getCountryByName(name)
local result
if country then
result = country.name or country.canonical_name
result = result
:gsub(",%s*", "")
:gsub("%sand the%s+", " and ")
else
result = "(Unrecognized)"
end
functionCache[cacheKey] = result
return result
end
function CountryData.getRegionByCountry(name)
if not name or name == "" then
return nil
end
local cacheKey = createCacheKey("getRegionByCountry", name)
if functionCache[cacheKey] ~= nil then
return functionCache[cacheKey]
end
local country = CountryData.getCountryByName(name)
local result
if country and country.icann_region then
result = country.icann_region
else
result = "(Unrecognized)"
end
functionCache[cacheKey] = result
return result
end
function CountryData.getCountriesByRegion(region)
if not region or region == "" then
return {}
end
local cacheKey = createCacheKey("getCountriesByRegion", region)
if functionCache[cacheKey] ~= nil then
return functionCache[cacheKey]
end
local data = loadData()
local regionLookup = buildRegionLookupCache(data)
local normalized = NormalizationText.normalizeText(region)
local regionCode = regionLookup[normalized]
local result = {}
if regionCode and data.countries then
for code, country in pairs(data.countries) do
if country.icann_region == regionCode then
table.insert(result, {
code = code,
name = country.name or country.canonical_name
})
end
end
end
functionCache[cacheKey] = result
return result
end
function CountryData.getAllCountryCodes()
local cacheKey = "getAllCountryCodes"
if functionCache[cacheKey] ~= nil then
return functionCache[cacheKey]
end
local data = loadData()
local result = {}
if data and data.countries then
for code in pairs(data.countries) do
table.insert(result, code)
end
end
functionCache[cacheKey] = result
return result
end
function CountryData.getAllCountryNames()
local cacheKey = "getAllCountryNames"
if functionCache[cacheKey] ~= nil then
return functionCache[cacheKey]
end
local data = loadData()
local result = {}
if data and data.countries then
for _, country in pairs(data.countries) do
table.insert(result, country.name or country.canonical_name)
end
end
functionCache[cacheKey] = result
return result
end
function CountryData.getCountryProperty(code, property)
if not code or code == "" or not property or property == "" then
return nil
end
local cacheKey = createCacheKey("getCountryProperty", code, property)
if propertyCache[cacheKey] ~= nil then
return propertyCache[cacheKey]
end
local country = CountryData.getCountryByCode(code)
local result = country and country[property] or nil
propertyCache[cacheKey] = result
return result
end
function CountryData.getCountryPropertyByName(name, property)
if not name or name == "" or not property or property == "" then
return nil
end
local cacheKey = createCacheKey("getCountryPropertyByName", name, property)
if propertyCache[cacheKey] ~= nil then
return propertyCache[cacheKey]
end
local code = CountryData.getCountryCodeByName(name)
local result = nil
if code then
result = CountryData.getCountryProperty(code, property)
end
propertyCache[cacheKey] = result
return result
end
function CountryData.getAvailableProperties(code)
if not code or code == "" then
return {}
end
local cacheKey = createCacheKey("getAvailableProperties", code)
if functionCache[cacheKey] ~= nil then
return functionCache[cacheKey]
end
local country = CountryData.getCountryByCode(code)
local properties = {}
if country then
for property in pairs(country) do
table.insert(properties, property)
end
end
functionCache[cacheKey] = properties
return properties
end
function CountryData.getAllPropertyNames()
local cacheKey = "getAllPropertyNames"
if functionCache[cacheKey] ~= nil then
return functionCache[cacheKey]
end
local data = loadData()
local properties = {}
local seen = {}
if data and data.countries then
for _, country in pairs(data.countries) do
for property in pairs(country) do
if not seen[property] then
seen[property] = true
table.insert(properties, property)
end
end
end
end
functionCache[cacheKey] = properties
return properties
end
function CountryData.getSemanticPropertyName(propertyKey)
local ConfigRepository = require('Module:ConfigRepository')
for templateName, templateConfig in pairs(ConfigRepository.templates) do
if templateConfig.semantics and templateConfig.semantics.additionalProperties then
if templateConfig.semantics.additionalProperties[propertyKey] then
return propertyKey
end
end
end
return nil
end
function CountryData.getSemanticCountryRegionProperties(countryValue)
local properties = {}
if not countryValue or countryValue == "" then
return properties
end
local countryPropertyName = CountryData.getSemanticPropertyName("Has country")
local regionPropertyName = CountryData.getSemanticPropertyName("Has ICANN region")
if not countryPropertyName or not regionPropertyName then
return properties
end
local countries = {}
for country in string.gmatch(countryValue, "[^;]+") do
local trimmedCountry = country:match("^%s*(.-)%s*$")
if trimmedCountry and trimmedCountry ~= "" then
table.insert(countries, trimmedCountry)
end
end
for _, country in ipairs(countries) do
local normalizedCountry = CountryData.normalizeCountryName(country)
if normalizedCountry ~= "(Unrecognized)" then
properties[countryPropertyName] = properties[countryPropertyName] or {}
table.insert(properties[countryPropertyName], normalizedCountry)
local region = CountryData.getRegionByCountry(country)
if region and region ~= "(Unrecognized)" then
properties[regionPropertyName] = properties[regionPropertyName] or {}
table.insert(properties[regionPropertyName], region)
end
end
end
return properties
end
function CountryData.exportAsJson()
local data = loadData()
if not data or not data.countries then
return '{}'
end
if mw.text and mw.text.jsonEncode then
local success, result = pcall(function()
return mw.text.jsonEncode(data)
end)
if success and result then
return result
end
end
return '{}'
end
local function getRegionClass(region)
if not region or region == "(Unrecognized)" then
return "region-default"
end
if region == "NA" or region == "LAC" then
return "region-americas"
elseif region == "AP" then
return "region-asia-pacific"
else
return "region-europe-africa"
end
end
function CountryData.formatCountryList(value)
if not value or value == "" then return "" end
local countries = {}
for country in string.gmatch(value, "[^;]+") do
local trimmed = country:match("^%s*(.-)%s*$")
if trimmed and trimmed ~= "" then
table.insert(countries, trimmed)
end
end
local normalizedCountries = {}
for _, country in ipairs(countries) do
local normalized = CountryData.normalizeCountryName(country)
if normalized ~= "(Unrecognized)" then
table.insert(normalizedCountries, normalized)
end
end
if #normalizedCountries > 0 then
local listItems = {}
for _, country in ipairs(normalizedCountries) do
local countryRegion = CountryData.getRegionByCountry(country)
local regionClass = getRegionClass(countryRegion)
table.insert(listItems, string.format("<li class=\"%s\">%s</li>", regionClass, country))
end
return string.format("<ul class=\"template-list template-list-country\">%s</ul>", table.concat(listItems, ""))
end
return ""
end
function CountryData.formatCountries(value)
return CountryData.formatCountryList(value)
end
function CountryData.getCountriesForCategories(value)
if not value or value == "" then return {} end
local countries = {}
for country in string.gmatch(value, "[^;]+") do
local trimmed = country:match("^%s*(.-)%s*$")
if trimmed and trimmed ~= "" then
table.insert(countries, trimmed)
end
end
local normalizedCountries = {}
for _, country in ipairs(countries) do
local normalized = CountryData.normalizeCountryName(country)
if normalized ~= "(Unrecognized)" then
table.insert(normalizedCountries, normalized)
end
end
return normalizedCountries
end
function CountryData.getFlagFileName(countryNameOrCode)
if not countryNameOrCode or countryNameOrCode == '' then return nil end
local inputName = countryNameOrCode:gsub('_', ' ')
local isoCode = CountryData.getCountryCodeByName(inputName)
if not isoCode and #inputName == 2 then
if CountryData.getCountryByCode(inputName) then
isoCode = inputName:upper()
end
end
if not isoCode or #isoCode ~= 2 then return nil end
return 'Flag-' .. string.lower(isoCode) .. '.svg'
end
return CountryData