Module:CountryData: Difference between revisions
Appearance
// via Wikitext Extension for VSCode |
// via Wikitext Extension for VSCode |
||
| (22 intermediate revisions by the same user not shown) | |||
| Line 1: | Line 1: | ||
-- | --[[ | ||
* Name: CountryData | |||
* Author: Mark W. Datysgeld | |||
* Description: Unified module for country data management with JSON loading, normalization, region mapping, and Semantic MediaWiki integration | |||
* Notes: Loads from Data:CountryDataset.json; normalizes country names to canonical forms; maps countries to ICANN regions; provides extensible property access; formats country lists with region-specific emoji styling; processes countries for category assignment | |||
]] | |||
-- Dependencies | -- Dependencies | ||
| Line 45: | Line 40: | ||
end | end | ||
-- | -- Reset the module-level caches (useful for testing) | ||
local function | local function resetCaches() | ||
dataCache = nil | |||
nameLookupCache = nil | |||
regionLookupCache = nil | |||
propertyCache = {} | |||
functionCache = {} | |||
end | end | ||
-------------------------------------------------------------------------------- | -------------------------------------------------------------------------------- | ||
-- Data Loading Layer | -- Data Loading and Cache Building Layer (Refactored) | ||
-------------------------------------------------------------------------------- | -------------------------------------------------------------------------------- | ||
-- | -- Data loading function using DatasetLoader | ||
local function | local function loadData() | ||
if dataCache then | |||
return dataCache | |||
end | |||
local raw = loader.get('CountryDataset') | |||
dataCache = { | |||
countries = raw.countries or {}, | |||
icann_regions = raw.icann_regions or {}, | |||
schema_version = raw.schema_version, | |||
last_updated = raw.last_updated | |||
} | |||
return dataCache | |||
end | |||
-- Builds the primary name-to-code lookup cache. | |||
-- This is the core of the refactoring, ensuring correct normalization order. | |||
local function buildNameLookupCache(data) | |||
if nameLookupCache then | if nameLookupCache then | ||
return nameLookupCache | return nameLookupCache | ||
end | end | ||
if not data or not data.countries then | if not data or not data.countries then | ||
nameLookupCache = {} | nameLookupCache = {} | ||
return nameLookupCache | return nameLookupCache | ||
end | end | ||
local lookup = {} | local lookup = {} | ||
for code, country in pairs(data.countries) do | for code, country in pairs(data.countries) do | ||
-- | -- Ensure the country has a name to process | ||
local canonicalName = country.name or country.canonical_name | |||
if canonicalName then | |||
-- 1. Add the canonical name itself | |||
local normalizedName = NormalizationText.normalizeText(canonicalName) | |||
lookup[normalizedName] = code | |||
-- 2. Add the diacritic-stripped version of the canonical name | |||
local strippedName = DiacriticNormalization.removeDiacritics(canonicalName) | |||
if strippedName ~= canonicalName then -- only add if it's different | |||
lookup[NormalizationText.normalizeText(strippedName)] = code | |||
end | |||
end | end | ||
-- 3. Process all variations | |||
-- | |||
if country.variations and type(country.variations) == "table" then | if country.variations and type(country.variations) == "table" then | ||
for _, variation in pairs(country.variations) do | for _, variation in pairs(country.variations) do | ||
-- Add the variation | |||
local normalizedVariation = NormalizationText.normalizeText(variation) | |||
lookup[normalizedVariation] = code | |||
-- Add the diacritic-stripped version of the variation | |||
local strippedVariation = DiacriticNormalization.removeDiacritics(variation) | |||
if strippedVariation ~= variation then | |||
lookup[NormalizationText.normalizeText(strippedVariation)] = code | |||
end | |||
end | end | ||
end | end | ||
end | |||
nameLookupCache = lookup | nameLookupCache = lookup | ||
return lookup | return lookup | ||
end | end | ||
-- | -- Builds the region lookup cache. | ||
local function | local function buildRegionLookupCache(data) | ||
if regionLookupCache then | if regionLookupCache then | ||
return regionLookupCache | return regionLookupCache | ||
end | end | ||
if not data or not data.icann_regions then | if not data or not data.icann_regions then | ||
regionLookupCache = {} | regionLookupCache = {} | ||
return regionLookupCache | return regionLookupCache | ||
end | end | ||
local lookup = {} | local lookup = {} | ||
for code, region in pairs(data.icann_regions) do | for code, region in pairs(data.icann_regions) do | ||
if region.name then | if region.name then | ||
lookup[NormalizationText.normalizeText(region.name)] = code | lookup[NormalizationText.normalizeText(region.name)] = code | ||
end | end | ||
if region.variations and type(region.variations) == "table" then | if region.variations and type(region.variations) == "table" then | ||
for _, variation in pairs(region.variations) do | for _, variation in pairs(region.variations) do | ||
lookup[NormalizationText.normalizeText(variation)] = code | lookup[NormalizationText.normalizeText(variation)] = code | ||
| Line 158: | Line 138: | ||
end | end | ||
end | end | ||
regionLookupCache = lookup | regionLookupCache = lookup | ||
return lookup | return lookup | ||
end | end | ||
-------------------------------------------------------------------------------- | -------------------------------------------------------------------------------- | ||
-- Core API Functions | -- Core API Functions (Public Interface) | ||
-------------------------------------------------------------------------------- | -------------------------------------------------------------------------------- | ||
| Line 195: | Line 151: | ||
-- Load data and initialize caches | -- Load data and initialize caches | ||
function CountryData.loadData(frame) | function CountryData.loadData(frame) | ||
return loadData( | return loadData() | ||
end | end | ||
| Line 204: | Line 160: | ||
end | end | ||
-- Get country data by ISO code | -- Get country data by ISO code (Refactored to use new cache logic) | ||
function CountryData.getCountryByCode(code) | function CountryData.getCountryByCode(code) | ||
if not code or code == "" then | if not code or code == "" then | ||
| Line 210: | Line 166: | ||
end | end | ||
local cacheKey = createCacheKey("getCountryByCode", code) | local cacheKey = createCacheKey("getCountryByCode", code) | ||
if functionCache[cacheKey] ~= nil then | if functionCache[cacheKey] ~= nil then | ||
| Line 217: | Line 172: | ||
local data = loadData() | local data = loadData() | ||
code = code:upper() -- Standardize code | |||
local result = data and data.countries and data.countries[code] or nil | |||
local result = | |||
functionCache[cacheKey] = result | functionCache[cacheKey] = result | ||
return result | return result | ||
end | end | ||
-- Get country data by name ( | -- Get country data by name (Refactored to use new cache logic) | ||
function CountryData.getCountryByName(name) | function CountryData.getCountryByName(name) | ||
if not name or name == "" then | if not name or name == "" then | ||
return nil | return nil | ||
end | end | ||
local cacheKey = createCacheKey("getCountryByName", name) | local cacheKey = createCacheKey("getCountryByName", name) | ||
if functionCache[cacheKey] ~= nil then | if functionCache[cacheKey] ~= nil then | ||
return functionCache[cacheKey] | return functionCache[cacheKey] | ||
end | end | ||
local data = loadData() | local data = loadData() | ||
local nameLookup = | local nameLookup = buildNameLookupCache(data) | ||
-- Normalize the input | -- Normalize the input name in one go (text normalization includes lowercasing) | ||
local normalized = NormalizationText.normalizeText(name) | local normalized = NormalizationText.normalizeText(name) | ||
-- | -- First, try a direct lookup with the normalized name | ||
local code = nameLookup[normalized] | local code = nameLookup[normalized] | ||
-- If not found, try looking up the diacritic-stripped version | |||
if not code then | |||
local stripped = DiacriticNormalization.removeDiacritics(name) | |||
if stripped ~= name then | |||
code = nameLookup[NormalizationText.normalizeText(stripped)] | |||
end | |||
end | |||
local result = nil | local result = nil | ||
if code | if code then | ||
result = data.countries[code] | result = data.countries[code] | ||
end | end | ||
functionCache[cacheKey] = result | functionCache[cacheKey] = result | ||
return result | return result | ||
| Line 277: | Line 223: | ||
end | end | ||
local cacheKey = createCacheKey("getCountryCodeByName", name) | local cacheKey = createCacheKey("getCountryCodeByName", name) | ||
if functionCache[cacheKey] ~= nil then | if functionCache[cacheKey] ~= nil then | ||
| Line 283: | Line 228: | ||
end | end | ||
local | local country = CountryData.getCountryByName(name) | ||
-- | -- The country object from the dataset doesn't inherently contain its own ISO code key. | ||
-- We must iterate through the dataset to find the key corresponding to the found country object. | |||
if country then | |||
-- | local data = loadData() | ||
for code, countryData in pairs(data.countries) do | |||
if countryData == country then | |||
if | functionCache[cacheKey] = code | ||
return code | |||
local | end | ||
if | |||
end | end | ||
end | end | ||
-- | -- If no country was found, or no matching code was found, cache and return nil. | ||
functionCache[cacheKey] = | functionCache[cacheKey] = nil | ||
return | return nil | ||
end | end | ||
-- (The rest of the functions remain unchanged for now, but will be updated in subsequent phases) | |||
function CountryData.normalizeCountryName(name) | function CountryData.normalizeCountryName(name) | ||
if not name or name == "" then | if not name or name == "" then | ||
return | return "(Unrecognized)" | ||
end | end | ||
local cacheKey = createCacheKey("normalizeCountryName", name) | local cacheKey = createCacheKey("normalizeCountryName", name) | ||
if functionCache[cacheKey] ~= nil then | if functionCache[cacheKey] ~= nil then | ||
| Line 319: | Line 262: | ||
local result | local result | ||
if country then | if country and country.name then | ||
result = country.name | |||
result = country.name | |||
else | else | ||
result = "(Unrecognized)" | result = "(Unrecognized)" | ||
end | end | ||
functionCache[cacheKey] = result | functionCache[cacheKey] = result | ||
return result | return result | ||
end | end | ||
function CountryData.getRegionByCountry(name) | function CountryData.getRegionByCountry(name) | ||
if not name or name == "" then | if not name or name == "" then | ||
return | return "(Unrecognized)" | ||
end | end | ||
local cacheKey = createCacheKey("getRegionByCountry", name) | local cacheKey = createCacheKey("getRegionByCountry", name) | ||
if functionCache[cacheKey] ~= nil then | if functionCache[cacheKey] ~= nil then | ||
| Line 354: | Line 288: | ||
result = country.icann_region | result = country.icann_region | ||
else | else | ||
result = "(Unrecognized)" | result = "(Unrecognized)" | ||
end | end | ||
functionCache[cacheKey] = result | functionCache[cacheKey] = result | ||
return result | return result | ||
end | end | ||
function CountryData.getCountriesByRegion(region) | function CountryData.getCountriesByRegion(region) | ||
if not region or region == "" then | if not region or region == "" then | ||
| Line 369: | Line 300: | ||
end | end | ||
local cacheKey = createCacheKey("getCountriesByRegion", region) | local cacheKey = createCacheKey("getCountriesByRegion", region) | ||
if functionCache[cacheKey] ~= nil then | if functionCache[cacheKey] ~= nil then | ||
| Line 376: | Line 306: | ||
local data = loadData() | local data = loadData() | ||
local regionLookup = | local regionLookup = buildRegionLookupCache(data) | ||
local normalized = NormalizationText.normalizeText(region) | local normalized = NormalizationText.normalizeText(region) | ||
local regionCode = regionLookup[normalized] | local regionCode = regionLookup[normalized] | ||
local result = {} | local result = {} | ||
if regionCode and data.countries then | if regionCode and data.countries then | ||
for code, country in pairs(data.countries) do | for code, country in pairs(data.countries) do | ||
if country.icann_region == regionCode then | if country.icann_region == regionCode then | ||
result | table.insert(result, { | ||
code = code, | code = code, | ||
name = country.name | name = country.name | ||
} | }) | ||
end | end | ||
end | end | ||
end | end | ||
functionCache[cacheKey] = result | functionCache[cacheKey] = result | ||
return result | return result | ||
end | end | ||
function CountryData.getAllCountryCodes() | function CountryData.getAllCountryCodes() | ||
local cacheKey = "getAllCountryCodes" | local cacheKey = "getAllCountryCodes" | ||
if functionCache[cacheKey] ~= nil then | if functionCache[cacheKey] ~= nil then | ||
| Line 422: | Line 335: | ||
local data = loadData() | local data = loadData() | ||
local result = {} | local result = {} | ||
if data and data.countries then | if data and data.countries then | ||
for code in pairs(data.countries) do | for code in pairs(data.countries) do | ||
result | table.insert(result, code) | ||
end | end | ||
end | end | ||
functionCache[cacheKey] = result | functionCache[cacheKey] = result | ||
return result | return result | ||
end | end | ||
function CountryData.getAllCountryNames() | function CountryData.getAllCountryNames() | ||
local cacheKey = "getAllCountryNames" | local cacheKey = "getAllCountryNames" | ||
if functionCache[cacheKey] ~= nil then | if functionCache[cacheKey] ~= nil then | ||
| Line 453: | Line 353: | ||
local data = loadData() | local data = loadData() | ||
local result = {} | local result = {} | ||
if data and data.countries then | if data and data.countries then | ||
for _, country in pairs(data.countries) do | for _, country in pairs(data.countries) do | ||
table.insert(result, country.name) | |||
end | end | ||
end | end | ||
functionCache[cacheKey] = result | functionCache[cacheKey] = result | ||
return result | return result | ||
end | end | ||
function CountryData.getCountryProperty(code, property) | function CountryData.getCountryProperty(code, property) | ||
if not code or code == "" or not property or property == "" then | if not code or code == "" or not property or property == "" then | ||
| Line 481: | Line 368: | ||
end | end | ||
local cacheKey = createCacheKey("getCountryProperty", code, property) | local cacheKey = createCacheKey("getCountryProperty", code, property) | ||
if propertyCache[cacheKey] ~= nil then | if propertyCache[cacheKey] ~= nil then | ||
| Line 488: | Line 374: | ||
local country = CountryData.getCountryByCode(code) | local country = CountryData.getCountryByCode(code) | ||
local result = country and country[property] or nil | |||
propertyCache[cacheKey] = result | propertyCache[cacheKey] = result | ||
return result | return result | ||
end | end | ||
function CountryData.getCountryPropertyByName(name, property) | function CountryData.getCountryPropertyByName(name, property) | ||
if not name or name == "" or not property or property == "" then | if not name or name == "" or not property or property == "" then | ||
| Line 505: | Line 385: | ||
end | end | ||
local cacheKey = createCacheKey("getCountryPropertyByName", name, property) | local cacheKey = createCacheKey("getCountryPropertyByName", name, property) | ||
if propertyCache[cacheKey] ~= nil then | if propertyCache[cacheKey] ~= nil then | ||
| Line 512: | Line 391: | ||
local code = CountryData.getCountryCodeByName(name) | local code = CountryData.getCountryCodeByName(name) | ||
local result = nil | local result = nil | ||
if code then | if code then | ||
| Line 518: | Line 396: | ||
end | end | ||
propertyCache[cacheKey] = result | propertyCache[cacheKey] = result | ||
return result | return result | ||
end | end | ||
function CountryData.getAvailableProperties(code) | function CountryData.getAvailableProperties(code) | ||
if not code or code == "" then | if not code or code == "" then | ||
| Line 529: | Line 405: | ||
end | end | ||
local cacheKey = createCacheKey("getAvailableProperties", code) | local cacheKey = createCacheKey("getAvailableProperties", code) | ||
if functionCache[cacheKey] ~= nil then | if functionCache[cacheKey] ~= nil then | ||
| Line 535: | Line 410: | ||
end | end | ||
local | local country = CountryData.getCountryByCode(code) | ||
local properties = {} | local properties = {} | ||
if country then | |||
for property in pairs(country) do | |||
table.insert(properties, property) | |||
end | |||
end | end | ||
functionCache[cacheKey] = properties | functionCache[cacheKey] = properties | ||
return properties | return properties | ||
end | end | ||
function CountryData.getAllPropertyNames() | function CountryData.getAllPropertyNames() | ||
local cacheKey = "getAllPropertyNames" | local cacheKey = "getAllPropertyNames" | ||
if functionCache[cacheKey] ~= nil then | if functionCache[cacheKey] ~= nil then | ||
| Line 569: | Line 429: | ||
local data = loadData() | local data = loadData() | ||
local properties = {} | local properties = {} | ||
local seen = {} | local seen = {} | ||
if data and data.countries then | |||
for _, country in pairs(data.countries) do | |||
for property in pairs(country) do | |||
if not seen[property] then | |||
seen[property] = true | |||
table.insert(properties, property) | |||
end | |||
end | end | ||
end | end | ||
end | end | ||
functionCache[cacheKey] = properties | functionCache[cacheKey] = properties | ||
return properties | return properties | ||
end | end | ||
function CountryData.getSemanticCountryRegionProperties(countryValue) | |||
function CountryData.getSemanticCountryRegionProperties(countryValue | |||
local properties = {} | local properties = {} | ||
if not countryValue or countryValue == "" then | if not countryValue or countryValue == "" then | ||
return properties | return properties | ||
end | end | ||
local ConfigRepository = require('Module:ConfigRepository') | |||
local countryPropertyName = "Has country" | local countryPropertyName = ConfigRepository.getSemanticPropertyName("Has country") | ||
local regionPropertyName = "Has ICANN region" | local regionPropertyName = ConfigRepository.getSemanticPropertyName("Has ICANN region") | ||
if not countryPropertyName or not regionPropertyName then | |||
return properties | |||
end | |||
local countries = {} | local countries = {} | ||
for country in string.gmatch(countryValue, "[^;]+") do | for country in string.gmatch(countryValue, "[^;]+") do | ||
| Line 641: | Line 468: | ||
end | end | ||
for _, countryName in ipairs(countries) do | |||
local normalizedCountry = CountryData.normalizeCountryName(countryName) | |||
for _, | |||
local normalizedCountry = CountryData.normalizeCountryName( | |||
if normalizedCountry ~= "(Unrecognized)" then | if normalizedCountry ~= "(Unrecognized)" then | ||
-- | -- Initialize property tables if they don't exist | ||
properties[countryPropertyName] = properties[countryPropertyName] or {} | properties[countryPropertyName] = properties[countryPropertyName] or {} | ||
table.insert(properties[countryPropertyName], normalizedCountry) | table.insert(properties[countryPropertyName], normalizedCountry) | ||
local region = CountryData.getRegionByCountry(normalizedCountry) | |||
local region = CountryData.getRegionByCountry( | |||
if region and region ~= "(Unrecognized)" then | if region and region ~= "(Unrecognized)" then | ||
properties[regionPropertyName] = properties[regionPropertyName] or {} | properties[regionPropertyName] = properties[regionPropertyName] or {} | ||
table.insert(properties[regionPropertyName], region) | table.insert(properties[regionPropertyName], region) | ||
end | end | ||
end | end | ||
end | end | ||
return properties | return properties | ||
end | end | ||
function CountryData.exportAsJson() | function CountryData.exportAsJson() | ||
local data = loadData() | local data = loadData() | ||
if not data or not data.countries then | if not data or not data.countries then | ||
return '{}' | return '{}' | ||
end | end | ||
if mw.text and mw.text.jsonEncode then | if mw.text and mw.text.jsonEncode then | ||
local success, result = pcall(function() | local success, result = pcall(function() | ||
return mw.text.jsonEncode(data) | return mw.text.jsonEncode(data) | ||
end) | end) | ||
if success and result then | if success and result then | ||
return result | return result | ||
end | end | ||
end | end | ||
return '{}' | return '{}' | ||
end | end | ||
local function getRegionClass(region) | local function getRegionClass(region) | ||
if not region or region == "(Unrecognized)" then | if not region or region == "(Unrecognized)" then | ||
return "region-default" | return "region-default" | ||
end | end | ||
if region == "NA" or region == "LAC" then | if region == "NA" or region == "LAC" then | ||
return "region-americas" | return "region-americas" | ||
| Line 745: | Line 516: | ||
end | end | ||
function CountryData.formatCountryList(value) | function CountryData.formatCountryList(value) | ||
if not value or value == "" then return "" end | if not value or value == "" then return "" end | ||
-- | local ListGeneration = require('Module:ListGeneration') | ||
local | local itemsToProcess = {} | ||
for | |||
-- First, check if the entire string is a single, valid country. | |||
-- This correctly handles names like "Trinidad and Tobago". | |||
local singleCountryName = CountryData.normalizeCountryName(value) | |||
if singleCountryName ~= "(Unrecognized)" then | |||
-- If it's a valid country, treat it as a single item. | |||
table.insert(itemsToProcess, value) | |||
else | |||
-- If not a single country, assume it's a list and split ONLY by semicolon. | |||
-- This is safer than letting ListGeneration guess the delimiter. | |||
for item in string.gmatch(value, "[^;]+") do | |||
local trimmed = item:match("^%s*(.-)%s*$") | |||
if trimmed and trimmed ~= "" then | |||
table.insert(itemsToProcess, trimmed) | |||
end | |||
end | end | ||
end | end | ||
-- Define the item hook for country-specific formatting | |||
local | local function countryItemHook(countryName) | ||
local normalized = CountryData.normalizeCountryName(countryName) | |||
local normalized = CountryData.normalizeCountryName( | |||
if normalized ~= "(Unrecognized)" then | if normalized ~= "(Unrecognized)" then | ||
local countryRegion = CountryData.getRegionByCountry(normalized) | |||
-- Return a table with content and class for the li element | |||
return { | |||
content = normalized, | |||
class = getRegionClass(countryRegion) | |||
} | |||
end | end | ||
return nil -- Exclude unrecognized countries from the list | |||
end | end | ||
-- | -- Set the options for the list generation | ||
local options = { | |||
mode = 'bullet', | |||
listClass = 'template-list-country', | |||
itemHook = countryItemHook | |||
} | |||
-- Pass the pre-processed table of items to the list generator. | |||
return ListGeneration.createList(itemsToProcess, options) | |||
end | end | ||
function CountryData.formatCountries(value) | function CountryData.formatCountries(value) | ||
return CountryData.formatCountryList(value) | return CountryData.formatCountryList(value) | ||
end | end | ||
function CountryData.getCountriesForCategories(value) | function CountryData.getCountriesForCategories(value) | ||
if not value or value == "" then return {} end | if not value or value == "" then return {} end | ||
local countries = {} | local countries = {} | ||
for | for countryName in string.gmatch(value, "[^;]+") do | ||
local trimmed = | local trimmed = countryName:match("^%s*(.-)%s*$") | ||
if trimmed and trimmed ~= "" then | if trimmed and trimmed ~= "" then | ||
table.insert(countries, | local normalized = CountryData.normalizeCountryName(trimmed) | ||
if normalized ~= "(Unrecognized)" then | |||
table.insert(countries, normalized) | |||
end | |||
end | end | ||
end | end | ||
return countries | |||
end | end | ||
function CountryData.getFlagFileName(countryNameOrCode) | function CountryData.getFlagFileName(countryNameOrCode) | ||
if not countryNameOrCode or countryNameOrCode == '' then return nil end | if not countryNameOrCode or countryNameOrCode == '' then return nil end | ||
local inputName = countryNameOrCode:gsub('_', ' ') | local inputName = countryNameOrCode:gsub('_', ' ') | ||
local isoCode | local isoCode | ||
-- First, try to get the ISO code by treating inputName as a country name. | -- First, try to get the ISO code by treating inputName as a country name. | ||
isoCode = CountryData.getCountryCodeByName(inputName) | isoCode = CountryData.getCountryCodeByName(inputName) | ||
| Line 847: | Line 597: | ||
-- it might be an ISO code already. Let's validate it. | -- it might be an ISO code already. Let's validate it. | ||
if not isoCode and #inputName == 2 then | if not isoCode and #inputName == 2 then | ||
if CountryData.getCountryByCode(inputName) then | if CountryData.getCountryByCode(inputName) then | ||
isoCode = inputName | isoCode = inputName:upper() | ||
end | end | ||
end | end | ||
if not isoCode or #isoCode ~= 2 then return nil end | |||
if not isoCode or | |||
return 'Flag-' .. string.lower(isoCode) .. '.svg' | return 'Flag-' .. string.lower(isoCode) .. '.svg' | ||
end | end | ||
return CountryData | return CountryData | ||
Latest revision as of 02:57, 25 August 2025
Documentation for this module may be created at Module:CountryData/doc
--[[
* Name: CountryData
* Author: Mark W. Datysgeld
* Description: Unified module for country data management with JSON loading, normalization, region mapping, and Semantic MediaWiki integration
* Notes: Loads from Data:CountryDataset.json; normalizes country names to canonical forms; maps countries to ICANN regions; provides extensible property access; formats country lists with region-specific emoji styling; processes countries for category assignment
]]
-- Dependencies
local DiacriticNormalization = require('Module:NormalizationDiacritic')
local NormalizationText = require('Module:NormalizationText')
local loader = require('Module:DatasetLoader')
-- Module-level cache tables for improved performance
local dataCache = nil
local nameLookupCache = nil
local regionLookupCache = nil
local propertyCache = {}
local functionCache = {}
-- Default data structure to use if JSON loading fails
local DEFAULT_DATA = {
schema_version = 1,
last_updated = os.date('!%Y-%m-%dT%H:%M:%SZ'),
countries = {},
icann_regions = {}
}
--------------------------------------------------------------------------------
-- Helper Functions
--------------------------------------------------------------------------------
-- Create a cache key from a function name and arguments
local function createCacheKey(funcName, ...)
local args = {...}
local keyParts = {funcName}
for i = 1, #args do
table.insert(keyParts, tostring(args[i]) or "nil")
end
return table.concat(keyParts, ":")
end
-- Reset the module-level caches (useful for testing)
local function resetCaches()
dataCache = nil
nameLookupCache = nil
regionLookupCache = nil
propertyCache = {}
functionCache = {}
end
--------------------------------------------------------------------------------
-- Data Loading and Cache Building Layer (Refactored)
--------------------------------------------------------------------------------
-- Data loading function using DatasetLoader
local function loadData()
if dataCache then
return dataCache
end
local raw = loader.get('CountryDataset')
dataCache = {
countries = raw.countries or {},
icann_regions = raw.icann_regions or {},
schema_version = raw.schema_version,
last_updated = raw.last_updated
}
return dataCache
end
-- Builds the primary name-to-code lookup cache.
-- This is the core of the refactoring, ensuring correct normalization order.
local function buildNameLookupCache(data)
if nameLookupCache then
return nameLookupCache
end
if not data or not data.countries then
nameLookupCache = {}
return nameLookupCache
end
local lookup = {}
for code, country in pairs(data.countries) do
-- Ensure the country has a name to process
local canonicalName = country.name or country.canonical_name
if canonicalName then
-- 1. Add the canonical name itself
local normalizedName = NormalizationText.normalizeText(canonicalName)
lookup[normalizedName] = code
-- 2. Add the diacritic-stripped version of the canonical name
local strippedName = DiacriticNormalization.removeDiacritics(canonicalName)
if strippedName ~= canonicalName then -- only add if it's different
lookup[NormalizationText.normalizeText(strippedName)] = code
end
end
-- 3. Process all variations
if country.variations and type(country.variations) == "table" then
for _, variation in pairs(country.variations) do
-- Add the variation
local normalizedVariation = NormalizationText.normalizeText(variation)
lookup[normalizedVariation] = code
-- Add the diacritic-stripped version of the variation
local strippedVariation = DiacriticNormalization.removeDiacritics(variation)
if strippedVariation ~= variation then
lookup[NormalizationText.normalizeText(strippedVariation)] = code
end
end
end
end
nameLookupCache = lookup
return lookup
end
-- Builds the region lookup cache.
local function buildRegionLookupCache(data)
if regionLookupCache then
return regionLookupCache
end
if not data or not data.icann_regions then
regionLookupCache = {}
return regionLookupCache
end
local lookup = {}
for code, region in pairs(data.icann_regions) do
if region.name then
lookup[NormalizationText.normalizeText(region.name)] = code
end
if region.variations and type(region.variations) == "table" then
for _, variation in pairs(region.variations) do
lookup[NormalizationText.normalizeText(variation)] = code
end
end
end
regionLookupCache = lookup
return lookup
end
--------------------------------------------------------------------------------
-- Core API Functions (Public Interface)
--------------------------------------------------------------------------------
local CountryData = {}
-- Load data and initialize caches
function CountryData.loadData(frame)
return loadData()
end
-- Reset all caches (primarily for testing)
function CountryData.resetCaches()
resetCaches()
return true
end
-- Get country data by ISO code (Refactored to use new cache logic)
function CountryData.getCountryByCode(code)
if not code or code == "" then
return nil
end
local cacheKey = createCacheKey("getCountryByCode", code)
if functionCache[cacheKey] ~= nil then
return functionCache[cacheKey]
end
local data = loadData()
code = code:upper() -- Standardize code
local result = data and data.countries and data.countries[code] or nil
functionCache[cacheKey] = result
return result
end
-- Get country data by name (Refactored to use new cache logic)
function CountryData.getCountryByName(name)
if not name or name == "" then
return nil
end
local cacheKey = createCacheKey("getCountryByName", name)
if functionCache[cacheKey] ~= nil then
return functionCache[cacheKey]
end
local data = loadData()
local nameLookup = buildNameLookupCache(data)
-- Normalize the input name in one go (text normalization includes lowercasing)
local normalized = NormalizationText.normalizeText(name)
-- First, try a direct lookup with the normalized name
local code = nameLookup[normalized]
-- If not found, try looking up the diacritic-stripped version
if not code then
local stripped = DiacriticNormalization.removeDiacritics(name)
if stripped ~= name then
code = nameLookup[NormalizationText.normalizeText(stripped)]
end
end
local result = nil
if code then
result = data.countries[code]
end
functionCache[cacheKey] = result
return result
end
-- Get country code by name
function CountryData.getCountryCodeByName(name)
if not name or name == "" then
return nil
end
local cacheKey = createCacheKey("getCountryCodeByName", name)
if functionCache[cacheKey] ~= nil then
return functionCache[cacheKey]
end
local country = CountryData.getCountryByName(name)
-- The country object from the dataset doesn't inherently contain its own ISO code key.
-- We must iterate through the dataset to find the key corresponding to the found country object.
if country then
local data = loadData()
for code, countryData in pairs(data.countries) do
if countryData == country then
functionCache[cacheKey] = code
return code
end
end
end
-- If no country was found, or no matching code was found, cache and return nil.
functionCache[cacheKey] = nil
return nil
end
-- (The rest of the functions remain unchanged for now, but will be updated in subsequent phases)
function CountryData.normalizeCountryName(name)
if not name or name == "" then
return "(Unrecognized)"
end
local cacheKey = createCacheKey("normalizeCountryName", name)
if functionCache[cacheKey] ~= nil then
return functionCache[cacheKey]
end
local country = CountryData.getCountryByName(name)
local result
if country and country.name then
result = country.name
else
result = "(Unrecognized)"
end
functionCache[cacheKey] = result
return result
end
function CountryData.getRegionByCountry(name)
if not name or name == "" then
return "(Unrecognized)"
end
local cacheKey = createCacheKey("getRegionByCountry", name)
if functionCache[cacheKey] ~= nil then
return functionCache[cacheKey]
end
local country = CountryData.getCountryByName(name)
local result
if country and country.icann_region then
result = country.icann_region
else
result = "(Unrecognized)"
end
functionCache[cacheKey] = result
return result
end
function CountryData.getCountriesByRegion(region)
if not region or region == "" then
return {}
end
local cacheKey = createCacheKey("getCountriesByRegion", region)
if functionCache[cacheKey] ~= nil then
return functionCache[cacheKey]
end
local data = loadData()
local regionLookup = buildRegionLookupCache(data)
local normalized = NormalizationText.normalizeText(region)
local regionCode = regionLookup[normalized]
local result = {}
if regionCode and data.countries then
for code, country in pairs(data.countries) do
if country.icann_region == regionCode then
table.insert(result, {
code = code,
name = country.name
})
end
end
end
functionCache[cacheKey] = result
return result
end
function CountryData.getAllCountryCodes()
local cacheKey = "getAllCountryCodes"
if functionCache[cacheKey] ~= nil then
return functionCache[cacheKey]
end
local data = loadData()
local result = {}
if data and data.countries then
for code in pairs(data.countries) do
table.insert(result, code)
end
end
functionCache[cacheKey] = result
return result
end
function CountryData.getAllCountryNames()
local cacheKey = "getAllCountryNames"
if functionCache[cacheKey] ~= nil then
return functionCache[cacheKey]
end
local data = loadData()
local result = {}
if data and data.countries then
for _, country in pairs(data.countries) do
table.insert(result, country.name)
end
end
functionCache[cacheKey] = result
return result
end
function CountryData.getCountryProperty(code, property)
if not code or code == "" or not property or property == "" then
return nil
end
local cacheKey = createCacheKey("getCountryProperty", code, property)
if propertyCache[cacheKey] ~= nil then
return propertyCache[cacheKey]
end
local country = CountryData.getCountryByCode(code)
local result = country and country[property] or nil
propertyCache[cacheKey] = result
return result
end
function CountryData.getCountryPropertyByName(name, property)
if not name or name == "" or not property or property == "" then
return nil
end
local cacheKey = createCacheKey("getCountryPropertyByName", name, property)
if propertyCache[cacheKey] ~= nil then
return propertyCache[cacheKey]
end
local code = CountryData.getCountryCodeByName(name)
local result = nil
if code then
result = CountryData.getCountryProperty(code, property)
end
propertyCache[cacheKey] = result
return result
end
function CountryData.getAvailableProperties(code)
if not code or code == "" then
return {}
end
local cacheKey = createCacheKey("getAvailableProperties", code)
if functionCache[cacheKey] ~= nil then
return functionCache[cacheKey]
end
local country = CountryData.getCountryByCode(code)
local properties = {}
if country then
for property in pairs(country) do
table.insert(properties, property)
end
end
functionCache[cacheKey] = properties
return properties
end
function CountryData.getAllPropertyNames()
local cacheKey = "getAllPropertyNames"
if functionCache[cacheKey] ~= nil then
return functionCache[cacheKey]
end
local data = loadData()
local properties = {}
local seen = {}
if data and data.countries then
for _, country in pairs(data.countries) do
for property in pairs(country) do
if not seen[property] then
seen[property] = true
table.insert(properties, property)
end
end
end
end
functionCache[cacheKey] = properties
return properties
end
function CountryData.getSemanticCountryRegionProperties(countryValue)
local properties = {}
if not countryValue or countryValue == "" then
return properties
end
local ConfigRepository = require('Module:ConfigRepository')
local countryPropertyName = ConfigRepository.getSemanticPropertyName("Has country")
local regionPropertyName = ConfigRepository.getSemanticPropertyName("Has ICANN region")
if not countryPropertyName or not regionPropertyName then
return properties
end
local countries = {}
for country in string.gmatch(countryValue, "[^;]+") do
local trimmedCountry = country:match("^%s*(.-)%s*$")
if trimmedCountry and trimmedCountry ~= "" then
table.insert(countries, trimmedCountry)
end
end
for _, countryName in ipairs(countries) do
local normalizedCountry = CountryData.normalizeCountryName(countryName)
if normalizedCountry ~= "(Unrecognized)" then
-- Initialize property tables if they don't exist
properties[countryPropertyName] = properties[countryPropertyName] or {}
table.insert(properties[countryPropertyName], normalizedCountry)
local region = CountryData.getRegionByCountry(normalizedCountry)
if region and region ~= "(Unrecognized)" then
properties[regionPropertyName] = properties[regionPropertyName] or {}
table.insert(properties[regionPropertyName], region)
end
end
end
return properties
end
function CountryData.exportAsJson()
local data = loadData()
if not data or not data.countries then
return '{}'
end
if mw.text and mw.text.jsonEncode then
local success, result = pcall(function()
return mw.text.jsonEncode(data)
end)
if success and result then
return result
end
end
return '{}'
end
local function getRegionClass(region)
if not region or region == "(Unrecognized)" then
return "region-default"
end
if region == "NA" or region == "LAC" then
return "region-americas"
elseif region == "AP" then
return "region-asia-pacific"
else
return "region-europe-africa"
end
end
function CountryData.formatCountryList(value)
if not value or value == "" then return "" end
local ListGeneration = require('Module:ListGeneration')
local itemsToProcess = {}
-- First, check if the entire string is a single, valid country.
-- This correctly handles names like "Trinidad and Tobago".
local singleCountryName = CountryData.normalizeCountryName(value)
if singleCountryName ~= "(Unrecognized)" then
-- If it's a valid country, treat it as a single item.
table.insert(itemsToProcess, value)
else
-- If not a single country, assume it's a list and split ONLY by semicolon.
-- This is safer than letting ListGeneration guess the delimiter.
for item in string.gmatch(value, "[^;]+") do
local trimmed = item:match("^%s*(.-)%s*$")
if trimmed and trimmed ~= "" then
table.insert(itemsToProcess, trimmed)
end
end
end
-- Define the item hook for country-specific formatting
local function countryItemHook(countryName)
local normalized = CountryData.normalizeCountryName(countryName)
if normalized ~= "(Unrecognized)" then
local countryRegion = CountryData.getRegionByCountry(normalized)
-- Return a table with content and class for the li element
return {
content = normalized,
class = getRegionClass(countryRegion)
}
end
return nil -- Exclude unrecognized countries from the list
end
-- Set the options for the list generation
local options = {
mode = 'bullet',
listClass = 'template-list-country',
itemHook = countryItemHook
}
-- Pass the pre-processed table of items to the list generator.
return ListGeneration.createList(itemsToProcess, options)
end
function CountryData.formatCountries(value)
return CountryData.formatCountryList(value)
end
function CountryData.getCountriesForCategories(value)
if not value or value == "" then return {} end
local countries = {}
for countryName in string.gmatch(value, "[^;]+") do
local trimmed = countryName:match("^%s*(.-)%s*$")
if trimmed and trimmed ~= "" then
local normalized = CountryData.normalizeCountryName(trimmed)
if normalized ~= "(Unrecognized)" then
table.insert(countries, normalized)
end
end
end
return countries
end
function CountryData.getFlagFileName(countryNameOrCode)
if not countryNameOrCode or countryNameOrCode == '' then return nil end
local inputName = countryNameOrCode:gsub('_', ' ')
local isoCode
-- First, try to get the ISO code by treating inputName as a country name.
isoCode = CountryData.getCountryCodeByName(inputName)
-- If no code was found by name, and the inputName itself is 2 characters long,
-- it might be an ISO code already. Let's validate it.
if not isoCode and #inputName == 2 then
if CountryData.getCountryByCode(inputName) then
isoCode = inputName:upper()
end
end
if not isoCode or #isoCode ~= 2 then return nil end
return 'Flag-' .. string.lower(isoCode) .. '.svg'
end
return CountryData