Module:CountryData: Difference between revisions
// via Wikitext Extension for VSCode |
// via Wikitext Extension for VSCode |
||
| (25 intermediate revisions by the same user not shown) | |||
| Line 1: | Line 1: | ||
-- | --[[ | ||
* Name: CountryData | |||
* Author: Mark W. Datysgeld | |||
* Description: Unified module for country data management with JSON loading, normalization, region mapping, and Semantic MediaWiki integration | |||
* Notes: Loads from Data:CountryDataset.json; normalizes country names to canonical forms; maps countries to ICANN regions; provides extensible property access; formats country lists with region-specific emoji styling; processes countries for category assignment | |||
]] | |||
-- Dependencies | -- Dependencies | ||
| Line 45: | Line 40: | ||
end | end | ||
-- | -- Reset the module-level caches (useful for testing) | ||
local function | local function resetCaches() | ||
dataCache = nil | |||
nameLookupCache = nil | |||
regionLookupCache = nil | |||
propertyCache = {} | |||
functionCache = {} | |||
end | end | ||
-------------------------------------------------------------------------------- | -------------------------------------------------------------------------------- | ||
-- Data Loading Layer | -- Data Loading and Cache Building Layer (Refactored) | ||
-------------------------------------------------------------------------------- | -------------------------------------------------------------------------------- | ||
-- | -- Data loading function using DatasetLoader | ||
local function | local function loadData() | ||
if dataCache then | |||
return dataCache | |||
end | |||
local raw = loader.get('CountryDataset') | |||
dataCache = { | |||
countries = raw.countries or {}, | |||
icann_regions = raw.icann_regions or {}, | |||
schema_version = raw.schema_version, | |||
last_updated = raw.last_updated | |||
} | |||
return dataCache | |||
end | |||
-- Builds the primary name-to-code lookup cache. | |||
-- This is the core of the refactoring, ensuring correct normalization order. | |||
local function buildNameLookupCache(data) | |||
if nameLookupCache then | if nameLookupCache then | ||
return nameLookupCache | return nameLookupCache | ||
end | end | ||
if not data or not data.countries then | if not data or not data.countries then | ||
nameLookupCache = {} | nameLookupCache = {} | ||
return nameLookupCache | return nameLookupCache | ||
end | end | ||
local lookup = {} | local lookup = {} | ||
for code, country in pairs(data.countries) do | for code, country in pairs(data.countries) do | ||
-- | -- Ensure the country has a name to process | ||
local canonicalName = country.name or country.canonical_name | |||
if canonicalName then | |||
-- 1. Add the canonical name itself | |||
local normalizedName = NormalizationText.normalizeText(canonicalName) | |||
lookup[normalizedName] = code | |||
-- 2. Add the diacritic-stripped version of the canonical name | |||
local strippedName = DiacriticNormalization.removeDiacritics(canonicalName) | |||
if strippedName ~= canonicalName then -- only add if it's different | |||
lookup[NormalizationText.normalizeText(strippedName)] = code | |||
end | |||
end | end | ||
-- 3. Process all variations | |||
-- | |||
if country.variations and type(country.variations) == "table" then | if country.variations and type(country.variations) == "table" then | ||
for _, variation in pairs(country.variations) do | for _, variation in pairs(country.variations) do | ||
-- Add the variation | |||
local normalizedVariation = NormalizationText.normalizeText(variation) | |||
lookup[normalizedVariation] = code | |||
-- Add the diacritic-stripped version of the variation | |||
local strippedVariation = DiacriticNormalization.removeDiacritics(variation) | |||
if strippedVariation ~= variation then | |||
lookup[NormalizationText.normalizeText(strippedVariation)] = code | |||
end | |||
end | end | ||
end | end | ||
end | |||
nameLookupCache = lookup | nameLookupCache = lookup | ||
return lookup | return lookup | ||
end | end | ||
-- | -- Builds the region lookup cache. | ||
local function | local function buildRegionLookupCache(data) | ||
if regionLookupCache then | if regionLookupCache then | ||
return regionLookupCache | return regionLookupCache | ||
end | end | ||
if not data or not data.icann_regions then | if not data or not data.icann_regions then | ||
regionLookupCache = {} | regionLookupCache = {} | ||
return regionLookupCache | return regionLookupCache | ||
end | end | ||
local lookup = {} | local lookup = {} | ||
for code, region in pairs(data.icann_regions) do | for code, region in pairs(data.icann_regions) do | ||
if region.name then | if region.name then | ||
lookup[NormalizationText.normalizeText(region.name)] = code | lookup[NormalizationText.normalizeText(region.name)] = code | ||
end | end | ||
if region.variations and type(region.variations) == "table" then | if region.variations and type(region.variations) == "table" then | ||
for _, variation in pairs(region.variations) do | for _, variation in pairs(region.variations) do | ||
lookup[NormalizationText.normalizeText(variation)] = code | lookup[NormalizationText.normalizeText(variation)] = code | ||
| Line 158: | Line 138: | ||
end | end | ||
end | end | ||
regionLookupCache = lookup | regionLookupCache = lookup | ||
return lookup | return lookup | ||
end | end | ||
-------------------------------------------------------------------------------- | -------------------------------------------------------------------------------- | ||
-- Core API Functions | -- Core API Functions (Public Interface) | ||
-------------------------------------------------------------------------------- | -------------------------------------------------------------------------------- | ||
| Line 195: | Line 151: | ||
-- Load data and initialize caches | -- Load data and initialize caches | ||
function CountryData.loadData(frame) | function CountryData.loadData(frame) | ||
return loadData( | return loadData() | ||
end | end | ||
| Line 204: | Line 160: | ||
end | end | ||
-- Get country data by ISO code | -- Get country data by ISO code (Refactored to use new cache logic) | ||
function CountryData.getCountryByCode(code) | function CountryData.getCountryByCode(code) | ||
if not code or code == "" then | if not code or code == "" then | ||
| Line 210: | Line 166: | ||
end | end | ||
local cacheKey = createCacheKey("getCountryByCode", code) | local cacheKey = createCacheKey("getCountryByCode", code) | ||
if functionCache[cacheKey] ~= nil then | if functionCache[cacheKey] ~= nil then | ||
| Line 217: | Line 172: | ||
local data = loadData() | local data = loadData() | ||
code = code:upper() -- Standardize code | |||
local result = data and data.countries and data.countries[code] or nil | |||
local result = | |||
functionCache[cacheKey] = result | functionCache[cacheKey] = result | ||
return result | return result | ||
end | end | ||
-- Get country data by name ( | -- Get country data by name (Refactored to use new cache logic) | ||
function CountryData.getCountryByName(name) | function CountryData.getCountryByName(name) | ||
if not name or name == "" then | if not name or name == "" then | ||
return nil | return nil | ||
end | end | ||
local cacheKey = createCacheKey("getCountryByName", name) | local cacheKey = createCacheKey("getCountryByName", name) | ||
if functionCache[cacheKey] ~= nil then | if functionCache[cacheKey] ~= nil then | ||
return functionCache[cacheKey] | return functionCache[cacheKey] | ||
end | end | ||
local data = loadData() | local data = loadData() | ||
local nameLookup = | local nameLookup = buildNameLookupCache(data) | ||
-- Normalize the input | -- Normalize the input name in one go (text normalization includes lowercasing) | ||
local normalized = NormalizationText.normalizeText(name) | local normalized = NormalizationText.normalizeText(name) | ||
-- | -- First, try a direct lookup with the normalized name | ||
local code = nameLookup[normalized] | local code = nameLookup[normalized] | ||
-- If not found, try looking up the diacritic-stripped version | |||
if not code then | |||
local stripped = DiacriticNormalization.removeDiacritics(name) | |||
if stripped ~= name then | |||
code = nameLookup[NormalizationText.normalizeText(stripped)] | |||
end | |||
end | |||
local result = nil | local result = nil | ||
if code | if code then | ||
result = data.countries[code] | result = data.countries[code] | ||
end | end | ||
functionCache[cacheKey] = result | functionCache[cacheKey] = result | ||
return result | return result | ||
| Line 277: | Line 223: | ||
end | end | ||
local cacheKey = createCacheKey("getCountryCodeByName", name) | local cacheKey = createCacheKey("getCountryCodeByName", name) | ||
if functionCache[cacheKey] ~= nil then | if functionCache[cacheKey] ~= nil then | ||
| Line 283: | Line 228: | ||
end | end | ||
local | local country = CountryData.getCountryByName(name) | ||
-- | -- The country object from the dataset doesn't inherently contain its own ISO code key. | ||
-- We must iterate through the dataset to find the key corresponding to the found country object. | |||
if country then | |||
-- | local data = loadData() | ||
for code, countryData in pairs(data.countries) do | |||
if countryData == country then | |||
if | functionCache[cacheKey] = code | ||
return code | |||
local | end | ||
if | |||
end | end | ||
end | end | ||
-- | -- If no country was found, or no matching code was found, cache and return nil. | ||
functionCache[cacheKey] = | functionCache[cacheKey] = nil | ||
return | return nil | ||
end | end | ||
-- (The rest of the functions remain unchanged for now, but will be updated in subsequent phases) | |||
function CountryData.normalizeCountryName(name) | function CountryData.normalizeCountryName(name) | ||
if not name or name == "" then | if not name or name == "" then | ||
return | return "(Unrecognized)" | ||
end | end | ||
local cacheKey = createCacheKey("normalizeCountryName", name) | local cacheKey = createCacheKey("normalizeCountryName", name) | ||
if functionCache[cacheKey] ~= nil then | if functionCache[cacheKey] ~= nil then | ||
| Line 319: | Line 262: | ||
local result | local result | ||
if country then | if country and country.name then | ||
result = country.name | |||
result = country.name | |||
else | else | ||
result = "(Unrecognized)" | result = "(Unrecognized)" | ||
end | end | ||
functionCache[cacheKey] = result | functionCache[cacheKey] = result | ||
return result | return result | ||
end | end | ||
function CountryData.getRegionByCountry(name) | function CountryData.getRegionByCountry(name) | ||
if not name or name == "" then | if not name or name == "" then | ||
return | return "(Unrecognized)" | ||
end | end | ||
local cacheKey = createCacheKey("getRegionByCountry", name) | local cacheKey = createCacheKey("getRegionByCountry", name) | ||
if functionCache[cacheKey] ~= nil then | if functionCache[cacheKey] ~= nil then | ||
| Line 354: | Line 288: | ||
result = country.icann_region | result = country.icann_region | ||
else | else | ||
result = "(Unrecognized)" | result = "(Unrecognized)" | ||
end | end | ||
functionCache[cacheKey] = result | functionCache[cacheKey] = result | ||
return result | return result | ||
end | end | ||
function CountryData.getCountriesByRegion(region) | function CountryData.getCountriesByRegion(region) | ||
if not region or region == "" then | if not region or region == "" then | ||
| Line 369: | Line 300: | ||
end | end | ||
local cacheKey = createCacheKey("getCountriesByRegion", region) | local cacheKey = createCacheKey("getCountriesByRegion", region) | ||
if functionCache[cacheKey] ~= nil then | if functionCache[cacheKey] ~= nil then | ||
| Line 376: | Line 306: | ||
local data = loadData() | local data = loadData() | ||
local regionLookup = | local regionLookup = buildRegionLookupCache(data) | ||
local normalized = NormalizationText.normalizeText(region) | local normalized = NormalizationText.normalizeText(region) | ||
local regionCode = regionLookup[normalized] | local regionCode = regionLookup[normalized] | ||
local result = {} | local result = {} | ||
if regionCode and data.countries then | if regionCode and data.countries then | ||
for code, country in pairs(data.countries) do | for code, country in pairs(data.countries) do | ||
if country.icann_region == regionCode then | if country.icann_region == regionCode then | ||
result | table.insert(result, { | ||
code = code, | code = code, | ||
name = country.name | name = country.name | ||
} | }) | ||
end | end | ||
end | end | ||
end | end | ||
functionCache[cacheKey] = result | functionCache[cacheKey] = result | ||
return result | return result | ||
end | end | ||
function CountryData.getAllCountryCodes() | function CountryData.getAllCountryCodes() | ||
local cacheKey = "getAllCountryCodes" | local cacheKey = "getAllCountryCodes" | ||
if functionCache[cacheKey] ~= nil then | if functionCache[cacheKey] ~= nil then | ||
| Line 422: | Line 335: | ||
local data = loadData() | local data = loadData() | ||
local result = {} | local result = {} | ||
if data and data.countries then | if data and data.countries then | ||
for code in pairs(data.countries) do | for code in pairs(data.countries) do | ||
result | table.insert(result, code) | ||
end | end | ||
end | end | ||
functionCache[cacheKey] = result | functionCache[cacheKey] = result | ||
return result | return result | ||
end | end | ||
function CountryData.getAllCountryNames() | function CountryData.getAllCountryNames() | ||
local cacheKey = "getAllCountryNames" | local cacheKey = "getAllCountryNames" | ||
if functionCache[cacheKey] ~= nil then | if functionCache[cacheKey] ~= nil then | ||
| Line 453: | Line 353: | ||
local data = loadData() | local data = loadData() | ||
local result = {} | local result = {} | ||
if data and data.countries then | if data and data.countries then | ||
for _, country in pairs(data.countries) do | for _, country in pairs(data.countries) do | ||
table.insert(result, country.name) | |||
end | end | ||
end | end | ||
functionCache[cacheKey] = result | functionCache[cacheKey] = result | ||
return result | return result | ||
end | end | ||
function CountryData.getCountryProperty(code, property) | function CountryData.getCountryProperty(code, property) | ||
if not code or code == "" or not property or property == "" then | if not code or code == "" or not property or property == "" then | ||
| Line 481: | Line 368: | ||
end | end | ||
local cacheKey = createCacheKey("getCountryProperty", code, property) | local cacheKey = createCacheKey("getCountryProperty", code, property) | ||
if propertyCache[cacheKey] ~= nil then | if propertyCache[cacheKey] ~= nil then | ||
| Line 488: | Line 374: | ||
local country = CountryData.getCountryByCode(code) | local country = CountryData.getCountryByCode(code) | ||
local result = country and country[property] or nil | |||
propertyCache[cacheKey] = result | propertyCache[cacheKey] = result | ||
return result | return result | ||
end | end | ||
function CountryData.getCountryPropertyByName(name, property) | function CountryData.getCountryPropertyByName(name, property) | ||
if not name or name == "" or not property or property == "" then | if not name or name == "" or not property or property == "" then | ||
| Line 505: | Line 385: | ||
end | end | ||
local cacheKey = createCacheKey("getCountryPropertyByName", name, property) | local cacheKey = createCacheKey("getCountryPropertyByName", name, property) | ||
if propertyCache[cacheKey] ~= nil then | if propertyCache[cacheKey] ~= nil then | ||
| Line 512: | Line 391: | ||
local code = CountryData.getCountryCodeByName(name) | local code = CountryData.getCountryCodeByName(name) | ||
local result = nil | local result = nil | ||
if code then | if code then | ||
| Line 518: | Line 396: | ||
end | end | ||
propertyCache[cacheKey] = result | propertyCache[cacheKey] = result | ||
return result | return result | ||
end | end | ||
function CountryData.getAvailableProperties(code) | function CountryData.getAvailableProperties(code) | ||
if not code or code == "" then | if not code or code == "" then | ||
| Line 529: | Line 405: | ||
end | end | ||
local cacheKey = createCacheKey("getAvailableProperties", code) | local cacheKey = createCacheKey("getAvailableProperties", code) | ||
if functionCache[cacheKey] ~= nil then | if functionCache[cacheKey] ~= nil then | ||
| Line 535: | Line 410: | ||
end | end | ||
local | local country = CountryData.getCountryByCode(code) | ||
local properties = {} | local properties = {} | ||
if country then | |||
for property in pairs(country) do | |||
table.insert(properties, property) | |||
end | |||
end | end | ||
functionCache[cacheKey] = properties | functionCache[cacheKey] = properties | ||
return properties | return properties | ||
end | end | ||
function CountryData.getAllPropertyNames() | function CountryData.getAllPropertyNames() | ||
local cacheKey = "getAllPropertyNames" | local cacheKey = "getAllPropertyNames" | ||
if functionCache[cacheKey] ~= nil then | if functionCache[cacheKey] ~= nil then | ||
| Line 569: | Line 429: | ||
local data = loadData() | local data = loadData() | ||
local properties = {} | local properties = {} | ||
local seen = {} | local seen = {} | ||
if data and data.countries then | |||
for _, country in pairs(data.countries) do | |||
for property in pairs(country) do | |||
if not seen[property] then | |||
seen[property] = true | |||
table.insert(properties, property) | |||
end | |||
end | end | ||
end | end | ||
end | end | ||
functionCache[cacheKey] = properties | functionCache[cacheKey] = properties | ||
return properties | return properties | ||
end | end | ||
function CountryData.getSemanticCountryRegionProperties(countryValue) | |||
function CountryData.getSemanticCountryRegionProperties(countryValue | |||
local properties = {} | local properties = {} | ||
if not countryValue or countryValue == "" then | if not countryValue or countryValue == "" then | ||
return properties | return properties | ||
end | end | ||
local ConfigRepository = require('Module:ConfigRepository') | |||
local countryPropertyName = "Has country" | local countryPropertyName = ConfigRepository.getSemanticPropertyName("Has country") | ||
local regionPropertyName = "Has ICANN region" | local regionPropertyName = ConfigRepository.getSemanticPropertyName("Has ICANN region") | ||
if not countryPropertyName or not regionPropertyName then | |||
return properties | |||
end | |||
local countries = {} | local countries = {} | ||
for country in string.gmatch(countryValue, "[^;]+") do | for country in string.gmatch(countryValue, "[^;]+") do | ||
| Line 631: | Line 468: | ||
end | end | ||
for _, countryName in ipairs(countries) do | |||
local normalizedCountry = CountryData.normalizeCountryName(countryName) | |||
for _, | |||
local normalizedCountry = CountryData.normalizeCountryName( | |||
if normalizedCountry ~= "(Unrecognized)" then | if normalizedCountry ~= "(Unrecognized)" then | ||
-- | -- Initialize property tables if they don't exist | ||
properties[countryPropertyName] = properties[countryPropertyName] or {} | properties[countryPropertyName] = properties[countryPropertyName] or {} | ||
table.insert(properties[countryPropertyName], normalizedCountry) | table.insert(properties[countryPropertyName], normalizedCountry) | ||
local region = CountryData.getRegionByCountry(normalizedCountry) | |||
local region = CountryData.getRegionByCountry( | |||
if region and region ~= "(Unrecognized)" then | if region and region ~= "(Unrecognized)" then | ||
properties[regionPropertyName] = properties[regionPropertyName] or {} | properties[regionPropertyName] = properties[regionPropertyName] or {} | ||
| Line 672: | Line 486: | ||
end | end | ||
function CountryData.exportAsJson() | function CountryData.exportAsJson() | ||
local data = loadData() | local data = loadData() | ||
if not data or not data.countries then | if not data or not data.countries then | ||
return '{}' | return '{}' | ||
end | end | ||
if mw.text and mw.text.jsonEncode then | if mw.text and mw.text.jsonEncode then | ||
local success, result = pcall(function() | local success, result = pcall(function() | ||
return mw.text.jsonEncode(data) | return mw.text.jsonEncode(data) | ||
end) | end) | ||
if success and result then | if success and result then | ||
return result | return result | ||
end | end | ||
end | end | ||
return '{}' | return '{}' | ||
end | end | ||
local function getRegionClass(region) | local function getRegionClass(region) | ||
if not region or region == "(Unrecognized)" then | if not region or region == "(Unrecognized)" then | ||
return "region-default" | return "region-default" | ||
end | end | ||
if region == "NA" or region == "LAC" then | if region == "NA" or region == "LAC" then | ||
return "region-americas" | return "region-americas" | ||
| Line 714: | Line 516: | ||
end | end | ||
function CountryData.formatCountryList(value) | function CountryData.formatCountryList(value) | ||
if not value or value == "" then return "" end | if not value or value == "" then return "" end | ||
-- | local ListGeneration = require('Module:ListGeneration') | ||
local | local itemsToProcess = {} | ||
for | |||
-- First, check if the entire string is a single, valid country. | |||
-- This correctly handles names like "Trinidad and Tobago". | |||
local singleCountryName = CountryData.normalizeCountryName(value) | |||
if singleCountryName ~= "(Unrecognized)" then | |||
-- If it's a valid country, treat it as a single item. | |||
table.insert(itemsToProcess, value) | |||
else | |||
-- If not a single country, assume it's a list and split ONLY by semicolon. | |||
-- This is safer than letting ListGeneration guess the delimiter. | |||
for item in string.gmatch(value, "[^;]+") do | |||
local trimmed = item:match("^%s*(.-)%s*$") | |||
if trimmed and trimmed ~= "" then | |||
table.insert(itemsToProcess, trimmed) | |||
end | |||
end | end | ||
end | end | ||
-- Define the item hook for country-specific formatting | |||
local | local function countryItemHook(countryName) | ||
local normalized = CountryData.normalizeCountryName(countryName) | |||
local normalized = CountryData.normalizeCountryName( | |||
if normalized ~= "(Unrecognized)" then | if normalized ~= "(Unrecognized)" then | ||
local countryRegion = CountryData.getRegionByCountry(normalized) | |||
-- Return a table with content and class for the li element | |||
return { | |||
content = normalized, | |||
class = getRegionClass(countryRegion) | |||
} | |||
end | end | ||
return nil -- Exclude unrecognized countries from the list | |||
end | end | ||
-- | -- Set the options for the list generation | ||
local options = { | |||
mode = 'bullet', | |||
listClass = 'template-list-country', | |||
itemHook = countryItemHook | |||
} | |||
-- Pass the pre-processed table of items to the list generator. | |||
return ListGeneration.createList(itemsToProcess, options) | |||
end | end | ||
function CountryData.formatCountries(value) | function CountryData.formatCountries(value) | ||
return CountryData.formatCountryList(value) | return CountryData.formatCountryList(value) | ||
end | end | ||
function CountryData.getCountriesForCategories(value) | function CountryData.getCountriesForCategories(value) | ||
if not value or value == "" then return {} end | if not value or value == "" then return {} end | ||
local countries = {} | local countries = {} | ||
for | for countryName in string.gmatch(value, "[^;]+") do | ||
local trimmed = | local trimmed = countryName:match("^%s*(.-)%s*$") | ||
if trimmed and trimmed ~= "" then | if trimmed and trimmed ~= "" then | ||
table.insert(countries, | local normalized = CountryData.normalizeCountryName(trimmed) | ||
if normalized ~= "(Unrecognized)" then | |||
table.insert(countries, normalized) | |||
end | |||
end | end | ||
end | end | ||
return countries | |||
end | end | ||
function CountryData.getFlagFileName(countryNameOrCode) | function CountryData.getFlagFileName(countryNameOrCode) | ||
if not countryNameOrCode or countryNameOrCode == '' then return nil end | if not countryNameOrCode or countryNameOrCode == '' then return nil end | ||
local inputName = countryNameOrCode:gsub('_', ' ') | local inputName = countryNameOrCode:gsub('_', ' ') | ||
local isoCode | local isoCode | ||
-- First, try to get the ISO code by treating inputName as a country name. | -- First, try to get the ISO code by treating inputName as a country name. | ||
isoCode = CountryData.getCountryCodeByName(inputName) | isoCode = CountryData.getCountryCodeByName(inputName) | ||
| Line 816: | Line 597: | ||
-- it might be an ISO code already. Let's validate it. | -- it might be an ISO code already. Let's validate it. | ||
if not isoCode and #inputName == 2 then | if not isoCode and #inputName == 2 then | ||
if CountryData.getCountryByCode(inputName) then | if CountryData.getCountryByCode(inputName) then | ||
isoCode = inputName | isoCode = inputName:upper() | ||
end | end | ||
end | end | ||
if not isoCode or #isoCode ~= 2 then return nil end | |||
if not isoCode or | |||
return 'Flag-' .. string.lower(isoCode) .. '.svg' | return 'Flag-' .. string.lower(isoCode) .. '.svg' | ||
end | end | ||
return CountryData | return CountryData | ||