Module:CountryData: Difference between revisions
// via Wikitext Extension for VSCode |
// via Wikitext Extension for VSCode |
||
| Line 45: | Line 45: | ||
end | end | ||
-- | -- Reset the module-level caches (useful for testing) | ||
local function | local function resetCaches() | ||
dataCache = nil | |||
nameLookupCache = nil | |||
regionLookupCache = nil | |||
propertyCache = {} | |||
functionCache = {} | |||
end | end | ||
-------------------------------------------------------------------------------- | -------------------------------------------------------------------------------- | ||
-- Data Loading Layer | -- Data Loading and Cache Building Layer (Refactored) | ||
-------------------------------------------------------------------------------- | -------------------------------------------------------------------------------- | ||
-- | -- Data loading function using DatasetLoader | ||
local function | local function loadData() | ||
if dataCache then | |||
return dataCache | |||
end | |||
local raw = loader.get('CountryDataset') | |||
dataCache = { | |||
countries = raw.countries or {}, | |||
icann_regions = raw.icann_regions or {}, | |||
schema_version = raw.schema_version, | |||
last_updated = raw.last_updated | |||
} | |||
return dataCache | |||
end | |||
-- Builds the primary name-to-code lookup cache. | |||
-- This is the core of the refactoring, ensuring correct normalization order. | |||
local function buildNameLookupCache(data) | |||
if nameLookupCache then | if nameLookupCache then | ||
return nameLookupCache | return nameLookupCache | ||
end | end | ||
if not data or not data.countries then | if not data or not data.countries then | ||
nameLookupCache = {} | nameLookupCache = {} | ||
return nameLookupCache | return nameLookupCache | ||
end | end | ||
local lookup = {} | local lookup = {} | ||
for code, country in pairs(data.countries) do | for code, country in pairs(data.countries) do | ||
-- | -- Ensure the country has a name to process | ||
local canonicalName = country.name or country.canonical_name | |||
if canonicalName then | |||
-- 1. Add the canonical name itself | |||
local normalizedName = NormalizationText.normalizeText(canonicalName) | |||
lookup[normalizedName] = code | |||
-- 2. Add the diacritic-stripped version of the canonical name | |||
local strippedName = DiacriticNormalization.removeDiacritics(canonicalName) | |||
if strippedName ~= canonicalName then -- only add if it's different | |||
lookup[NormalizationText.normalizeText(strippedName)] = code | |||
end | |||
end | end | ||
-- 3. Process all variations | |||
-- | |||
if country.variations and type(country.variations) == "table" then | if country.variations and type(country.variations) == "table" then | ||
for _, variation in pairs(country.variations) do | for _, variation in pairs(country.variations) do | ||
lookup[NormalizationText.normalizeText( | -- Add the variation | ||
local normalizedVariation = NormalizationText.normalizeText(variation) | |||
lookup[normalizedVariation] = code | |||
-- Add the diacritic-stripped version of the variation | |||
local strippedVariation = DiacriticNormalization.removeDiacritics(variation) | |||
if strippedVariation ~= variation then | |||
lookup[NormalizationText.normalizeText(strippedVariation)] = code | |||
end | |||
end | end | ||
end | end | ||
end | end | ||
nameLookupCache = lookup | nameLookupCache = lookup | ||
return lookup | return lookup | ||
end | end | ||
-- | -- Builds the region lookup cache. | ||
local function | local function buildRegionLookupCache(data) | ||
if regionLookupCache then | if regionLookupCache then | ||
return regionLookupCache | return regionLookupCache | ||
end | end | ||
if not data or not data.icann_regions then | if not data or not data.icann_regions then | ||
regionLookupCache = {} | regionLookupCache = {} | ||
return regionLookupCache | return regionLookupCache | ||
end | end | ||
local lookup = {} | local lookup = {} | ||
for code, region in pairs(data.icann_regions) do | for code, region in pairs(data.icann_regions) do | ||
if region.name then | if region.name then | ||
lookup[NormalizationText.normalizeText(region.name)] = code | lookup[NormalizationText.normalizeText(region.name)] = code | ||
end | end | ||
if region.variations and type(region.variations) == "table" then | if region.variations and type(region.variations) == "table" then | ||
for _, variation in pairs(region.variations) do | for _, variation in pairs(region.variations) do | ||
lookup[NormalizationText.normalizeText(variation)] = code | lookup[NormalizationText.normalizeText(variation)] = code | ||
| Line 147: | Line 143: | ||
end | end | ||
end | end | ||
regionLookupCache = lookup | regionLookupCache = lookup | ||
return lookup | return lookup | ||
end | end | ||
-------------------------------------------------------------------------------- | -------------------------------------------------------------------------------- | ||
-- Core API Functions | -- Core API Functions (Public Interface) | ||
-------------------------------------------------------------------------------- | -------------------------------------------------------------------------------- | ||
| Line 184: | Line 156: | ||
-- Load data and initialize caches | -- Load data and initialize caches | ||
function CountryData.loadData(frame) | function CountryData.loadData(frame) | ||
return loadData( | return loadData() | ||
end | end | ||
| Line 193: | Line 165: | ||
end | end | ||
-- Get country data by ISO code | -- Get country data by ISO code (Refactored to use new cache logic) | ||
function CountryData.getCountryByCode(code) | function CountryData.getCountryByCode(code) | ||
if not code or code == "" then | if not code or code == "" then | ||
| Line 199: | Line 171: | ||
end | end | ||
local cacheKey = createCacheKey("getCountryByCode", code) | local cacheKey = createCacheKey("getCountryByCode", code) | ||
if functionCache[cacheKey] ~= nil then | if functionCache[cacheKey] ~= nil then | ||
| Line 206: | Line 177: | ||
local data = loadData() | local data = loadData() | ||
code = code:upper() -- Standardize code | |||
local result = data and data.countries and data.countries[code] or nil | |||
local result = | |||
functionCache[cacheKey] = result | functionCache[cacheKey] = result | ||
return result | return result | ||
end | end | ||
-- Get country data by name ( | -- Get country data by name (Refactored to use new cache logic) | ||
function CountryData.getCountryByName(name) | function CountryData.getCountryByName(name) | ||
if not name or name == "" then | if not name or name == "" then | ||
return nil | return nil | ||
end | end | ||
local cacheKey = createCacheKey("getCountryByName", name) | local cacheKey = createCacheKey("getCountryByName", name) | ||
if functionCache[cacheKey] ~= nil then | if functionCache[cacheKey] ~= nil then | ||
return functionCache[cacheKey] | return functionCache[cacheKey] | ||
end | end | ||
local data = loadData() | local data = loadData() | ||
local nameLookup = | local nameLookup = buildNameLookupCache(data) | ||
-- Normalize the input | -- Normalize the input name in one go (text normalization includes lowercasing) | ||
local normalized = NormalizationText.normalizeText(name) | local normalized = NormalizationText.normalizeText(name) | ||
-- | -- First, try a direct lookup with the normalized name | ||
local code = nameLookup[normalized] | local code = nameLookup[normalized] | ||
-- If not found, try looking up the diacritic-stripped version | |||
if not code then | |||
local stripped = DiacriticNormalization.removeDiacritics(name) | |||
-- Check if stripping made a difference to avoid redundant normalization | |||
if stripped ~= name then | |||
local normalizedStripped = NormalizationText.normalizeText(stripped) | |||
code = nameLookup[normalizedStripped] | |||
end | |||
end | |||
local result = nil | local result = nil | ||
if code | if code then | ||
result = data.countries[code] | result = data.countries[code] | ||
end | end | ||
functionCache[cacheKey] = result | functionCache[cacheKey] = result | ||
return result | return result | ||
| Line 266: | Line 230: | ||
end | end | ||
local cacheKey = createCacheKey("getCountryCodeByName", name) | local cacheKey = createCacheKey("getCountryCodeByName", name) | ||
if functionCache[cacheKey] ~= nil then | if functionCache[cacheKey] ~= nil then | ||
| Line 272: | Line 235: | ||
end | end | ||
local | local country = CountryData.getCountryByName(name) | ||
local | local code = nil | ||
-- | -- To get the code, we need to find which key in the main data table matches the found country object. | ||
-- This is less efficient but necessary if the country object itself doesn't store its code. | |||
if country then | |||
-- | local data = loadData() | ||
for c, countryData in pairs(data.countries) do | |||
if countryData == country then | |||
if | code = c | ||
break | |||
local | end | ||
if | |||
end | end | ||
end | end | ||
functionCache[cacheKey] = code | functionCache[cacheKey] = code | ||
return code | return code | ||
end | end | ||
-- (The rest of the functions remain unchanged for now, but will be updated in subsequent phases) | |||
function CountryData.normalizeCountryName(name) | function CountryData.normalizeCountryName(name) | ||
| Line 299: | Line 261: | ||
end | end | ||
local cacheKey = createCacheKey("normalizeCountryName", name) | local cacheKey = createCacheKey("normalizeCountryName", name) | ||
if functionCache[cacheKey] ~= nil then | if functionCache[cacheKey] ~= nil then | ||
| Line 309: | Line 270: | ||
local result | local result | ||
if country then | if country then | ||
result = country.name or country.canonical_name | result = country.name or country.canonical_name | ||
result = result | result = result | ||
:gsub(",%s*", "") | :gsub(",%s*", "") | ||
:gsub("%sand the%s+", " and ") | :gsub("%sand the%s+", " and ") | ||
else | else | ||
result = "(Unrecognized)" | result = "(Unrecognized)" | ||
end | end | ||
functionCache[cacheKey] = result | functionCache[cacheKey] = result | ||
return result | return result | ||
end | end | ||
function CountryData.getRegionByCountry(name) | function CountryData.getRegionByCountry(name) | ||
if not name or name == "" then | if not name or name == "" then | ||
| Line 331: | Line 287: | ||
end | end | ||
local cacheKey = createCacheKey("getRegionByCountry", name) | local cacheKey = createCacheKey("getRegionByCountry", name) | ||
if functionCache[cacheKey] ~= nil then | if functionCache[cacheKey] ~= nil then | ||
| Line 343: | Line 298: | ||
result = country.icann_region | result = country.icann_region | ||
else | else | ||
result = "(Unrecognized)" | result = "(Unrecognized)" | ||
end | end | ||
functionCache[cacheKey] = result | functionCache[cacheKey] = result | ||
return result | return result | ||
end | end | ||
function CountryData.getCountriesByRegion(region) | function CountryData.getCountriesByRegion(region) | ||
if not region or region == "" then | if not region or region == "" then | ||
| Line 358: | Line 310: | ||
end | end | ||
local cacheKey = createCacheKey("getCountriesByRegion", region) | local cacheKey = createCacheKey("getCountriesByRegion", region) | ||
if functionCache[cacheKey] ~= nil then | if functionCache[cacheKey] ~= nil then | ||
| Line 365: | Line 316: | ||
local data = loadData() | local data = loadData() | ||
local regionLookup = | local regionLookup = buildRegionLookupCache(data) | ||
local normalized = NormalizationText.normalizeText(region) | local normalized = NormalizationText.normalizeText(region) | ||
local regionCode = regionLookup[normalized] | local regionCode = regionLookup[normalized] | ||
local result = {} | local result = {} | ||
if regionCode and data.countries then | if regionCode and data.countries then | ||
for code, country in pairs(data.countries) do | for code, country in pairs(data.countries) do | ||
if country.icann_region == regionCode then | if country.icann_region == regionCode then | ||
result | table.insert(result, { | ||
code = code, | code = code, | ||
name = country.name or country.canonical_name | name = country.name or country.canonical_name | ||
} | }) | ||
end | end | ||
end | end | ||
end | end | ||
functionCache[cacheKey] = result | functionCache[cacheKey] = result | ||
return result | return result | ||
end | end | ||
function CountryData.getAllCountryCodes() | function CountryData.getAllCountryCodes() | ||
local cacheKey = "getAllCountryCodes" | local cacheKey = "getAllCountryCodes" | ||
if functionCache[cacheKey] ~= nil then | if functionCache[cacheKey] ~= nil then | ||
| Line 411: | Line 345: | ||
local data = loadData() | local data = loadData() | ||
local result = {} | local result = {} | ||
if data and data.countries then | if data and data.countries then | ||
for code in pairs(data.countries) do | for code in pairs(data.countries) do | ||
result | table.insert(result, code) | ||
end | end | ||
end | end | ||
functionCache[cacheKey] = result | functionCache[cacheKey] = result | ||
return result | return result | ||
end | end | ||
function CountryData.getAllCountryNames() | function CountryData.getAllCountryNames() | ||
local cacheKey = "getAllCountryNames" | local cacheKey = "getAllCountryNames" | ||
if functionCache[cacheKey] ~= nil then | if functionCache[cacheKey] ~= nil then | ||
| Line 442: | Line 363: | ||
local data = loadData() | local data = loadData() | ||
local result = {} | local result = {} | ||
if data and data.countries then | if data and data.countries then | ||
for _, country in pairs(data.countries) do | for _, country in pairs(data.countries) do | ||
table.insert(result, country.name or country.canonical_name) | |||
end | end | ||
end | end | ||
functionCache[cacheKey] = result | functionCache[cacheKey] = result | ||
return result | return result | ||
end | end | ||
function CountryData.getCountryProperty(code, property) | function CountryData.getCountryProperty(code, property) | ||
if not code or code == "" or not property or property == "" then | if not code or code == "" or not property or property == "" then | ||
| Line 470: | Line 378: | ||
end | end | ||
local cacheKey = createCacheKey("getCountryProperty", code, property) | local cacheKey = createCacheKey("getCountryProperty", code, property) | ||
if propertyCache[cacheKey] ~= nil then | if propertyCache[cacheKey] ~= nil then | ||
| Line 477: | Line 384: | ||
local country = CountryData.getCountryByCode(code) | local country = CountryData.getCountryByCode(code) | ||
local result = country and country[property] or nil | |||
propertyCache[cacheKey] = result | propertyCache[cacheKey] = result | ||
return result | return result | ||
end | end | ||
function CountryData.getCountryPropertyByName(name, property) | function CountryData.getCountryPropertyByName(name, property) | ||
if not name or name == "" or not property or property == "" then | if not name or name == "" or not property or property == "" then | ||
| Line 494: | Line 395: | ||
end | end | ||
local cacheKey = createCacheKey("getCountryPropertyByName", name, property) | local cacheKey = createCacheKey("getCountryPropertyByName", name, property) | ||
if propertyCache[cacheKey] ~= nil then | if propertyCache[cacheKey] ~= nil then | ||
| Line 501: | Line 401: | ||
local code = CountryData.getCountryCodeByName(name) | local code = CountryData.getCountryCodeByName(name) | ||
local result = nil | local result = nil | ||
if code then | if code then | ||
| Line 507: | Line 406: | ||
end | end | ||
propertyCache[cacheKey] = result | propertyCache[cacheKey] = result | ||
return result | return result | ||
end | end | ||
function CountryData.getAvailableProperties(code) | function CountryData.getAvailableProperties(code) | ||
if not code or code == "" then | if not code or code == "" then | ||
| Line 518: | Line 415: | ||
end | end | ||
local cacheKey = createCacheKey("getAvailableProperties", code) | local cacheKey = createCacheKey("getAvailableProperties", code) | ||
if functionCache[cacheKey] ~= nil then | if functionCache[cacheKey] ~= nil then | ||
| Line 524: | Line 420: | ||
end | end | ||
local | local country = CountryData.getCountryByCode(code) | ||
local properties = {} | local properties = {} | ||
if country then | |||
for property in pairs(country) do | |||
table.insert(properties, property) | |||
end | |||
end | end | ||
functionCache[cacheKey] = properties | functionCache[cacheKey] = properties | ||
return properties | return properties | ||
end | end | ||
function CountryData.getAllPropertyNames() | function CountryData.getAllPropertyNames() | ||
local cacheKey = "getAllPropertyNames" | local cacheKey = "getAllPropertyNames" | ||
if functionCache[cacheKey] ~= nil then | if functionCache[cacheKey] ~= nil then | ||
| Line 558: | Line 439: | ||
local data = loadData() | local data = loadData() | ||
local properties = {} | local properties = {} | ||
local seen = {} | local seen = {} | ||
if data and data.countries then | |||
for _, country in pairs(data.countries) do | |||
for property in pairs(country) do | |||
if not seen[property] then | |||
seen[property] = true | |||
table.insert(properties, property) | |||
end | |||
end | end | ||
end | end | ||
end | end | ||
functionCache[cacheKey] = properties | functionCache[cacheKey] = properties | ||
return properties | return properties | ||
end | end | ||
function CountryData.getSemanticPropertyName(propertyKey) | function CountryData.getSemanticPropertyName(propertyKey) | ||
local ConfigRepository = require('Module:ConfigRepository') | local ConfigRepository = require('Module:ConfigRepository') | ||
for templateName, templateConfig in pairs(ConfigRepository.templates) do | for templateName, templateConfig in pairs(ConfigRepository.templates) do | ||
if templateConfig.semantics and templateConfig.semantics.additionalProperties then | if templateConfig.semantics and templateConfig.semantics.additionalProperties then | ||
if templateConfig.semantics.additionalProperties[propertyKey] then | if templateConfig.semantics.additionalProperties[propertyKey] then | ||
return propertyKey | return propertyKey | ||
| Line 611: | Line 465: | ||
end | end | ||
end | end | ||
return nil | return nil | ||
end | end | ||
function CountryData.getSemanticCountryRegionProperties(countryValue) | function CountryData.getSemanticCountryRegionProperties(countryValue) | ||
local properties = {} | local properties = {} | ||
if not countryValue or countryValue == "" then | if not countryValue or countryValue == "" then | ||
return properties | return properties | ||
end | end | ||
local countryPropertyName = CountryData.getSemanticPropertyName("Has country") | local countryPropertyName = CountryData.getSemanticPropertyName("Has country") | ||
local regionPropertyName = CountryData.getSemanticPropertyName("Has ICANN region") | local regionPropertyName = CountryData.getSemanticPropertyName("Has ICANN region") | ||
if not countryPropertyName or not regionPropertyName then | if not countryPropertyName or not regionPropertyName then | ||
return properties | return properties | ||
end | end | ||
local countries = {} | local countries = {} | ||
for country in string.gmatch(countryValue, "[^;]+") do | for country in string.gmatch(countryValue, "[^;]+") do | ||
| Line 644: | Line 489: | ||
end | end | ||
for _, country in ipairs(countries) do | for _, country in ipairs(countries) do | ||
local normalizedCountry = CountryData.normalizeCountryName(country) | local normalizedCountry = CountryData.normalizeCountryName(country) | ||
if normalizedCountry ~= "(Unrecognized)" then | if normalizedCountry ~= "(Unrecognized)" then | ||
properties[countryPropertyName] = properties[countryPropertyName] or {} | properties[countryPropertyName] = properties[countryPropertyName] or {} | ||
table.insert(properties[countryPropertyName], normalizedCountry) | table.insert(properties[countryPropertyName], normalizedCountry) | ||
local region = CountryData.getRegionByCountry(country) | local region = CountryData.getRegionByCountry(country) | ||
if region and region ~= "(Unrecognized)" then | if region and region ~= "(Unrecognized)" then | ||
| Line 666: | Line 506: | ||
end | end | ||
function CountryData.exportAsJson() | function CountryData.exportAsJson() | ||
local data = loadData() | local data = loadData() | ||
if not data or not data.countries then | if not data or not data.countries then | ||
return '{}' | return '{}' | ||
end | end | ||
if mw.text and mw.text.jsonEncode then | if mw.text and mw.text.jsonEncode then | ||
local success, result = pcall(function() | local success, result = pcall(function() | ||
return mw.text.jsonEncode(data) | return mw.text.jsonEncode(data) | ||
end) | end) | ||
if success and result then | if success and result then | ||
return result | return result | ||
end | end | ||
end | end | ||
return '{}' | return '{}' | ||
end | end | ||
local function getRegionClass(region) | local function getRegionClass(region) | ||
if not region or region == "(Unrecognized)" then | if not region or region == "(Unrecognized)" then | ||
return "region-default" | return "region-default" | ||
end | end | ||
if region == "NA" or region == "LAC" then | if region == "NA" or region == "LAC" then | ||
return "region-americas" | return "region-americas" | ||
| Line 708: | Line 536: | ||
end | end | ||
function CountryData.formatCountryList(value) | function CountryData.formatCountryList(value) | ||
if not value or value == "" then return "" end | if not value or value == "" then return "" end | ||
local countries = {} | local countries = {} | ||
for country in string.gmatch(value, "[^;]+") do | for country in string.gmatch(value, "[^;]+") do | ||
| Line 724: | Line 548: | ||
local normalizedCountries = {} | local normalizedCountries = {} | ||
for _, country in ipairs(countries) do | for _, country in ipairs(countries) do | ||
local normalized = CountryData.normalizeCountryName(country) | local normalized = CountryData.normalizeCountryName(country) | ||
if normalized ~= "(Unrecognized)" then | if normalized ~= "(Unrecognized)" then | ||
table.insert(normalizedCountries, normalized) | |||
end | end | ||
end | end | ||
if #normalizedCountries > 0 then | |||
if | |||
local listItems = {} | local listItems = {} | ||
for _, country in ipairs(normalizedCountries) do | for _, country in ipairs(normalizedCountries) do | ||
local countryRegion = CountryData.getRegionByCountry(country) | local countryRegion = CountryData.getRegionByCountry(country) | ||
local regionClass = getRegionClass(countryRegion) | local regionClass = getRegionClass(countryRegion) | ||
table.insert(listItems, string.format("<li class=\"%s\">%s</li>", regionClass, country)) | table.insert(listItems, string.format("<li class=\"%s\">%s</li>", regionClass, country)) | ||
end | end | ||
return string.format("<ul class=\"template-list template-list-country\">%s</ul>", table.concat(listItems, "")) | |||
return string.format("<ul class=\"template-list template-list-country\">%s</ul>", | |||
end | end | ||
| Line 763: | Line 568: | ||
end | end | ||
function CountryData.formatCountries(value) | function CountryData.formatCountries(value) | ||
return CountryData.formatCountryList(value) | return CountryData.formatCountryList(value) | ||
end | end | ||
function CountryData.getCountriesForCategories(value) | function CountryData.getCountriesForCategories(value) | ||
if not value or value == "" then return {} end | if not value or value == "" then return {} end | ||
| Line 781: | Line 584: | ||
local normalizedCountries = {} | local normalizedCountries = {} | ||
for _, country in ipairs(countries) do | for _, country in ipairs(countries) do | ||
local normalized = CountryData.normalizeCountryName(country) | local normalized = CountryData.normalizeCountryName(country) | ||
if normalized ~= "(Unrecognized)" then | if normalized ~= "(Unrecognized)" then | ||
table.insert(normalizedCountries, normalized) | |||
end | end | ||
end | end | ||
| Line 795: | Line 594: | ||
end | end | ||
function CountryData.getFlagFileName(countryNameOrCode) | function CountryData.getFlagFileName(countryNameOrCode) | ||
if not countryNameOrCode or countryNameOrCode == '' then return nil end | if not countryNameOrCode or countryNameOrCode == '' then return nil end | ||
local inputName = countryNameOrCode:gsub('_', ' ') | local inputName = countryNameOrCode:gsub('_', ' ') | ||
local | local isoCode = CountryData.getCountryCodeByName(inputName) | ||
if not isoCode and #inputName == 2 then | if not isoCode and #inputName == 2 then | ||
if CountryData.getCountryByCode(inputName) then | if CountryData.getCountryByCode(inputName) then | ||
isoCode = inputName | isoCode = inputName:upper() | ||
end | end | ||
end | end | ||
if not isoCode or #isoCode ~= 2 then return nil end | |||
if not isoCode or | |||
return 'Flag-' .. string.lower(isoCode) .. '.svg' | return 'Flag-' .. string.lower(isoCode) .. '.svg' | ||
end | end | ||
return CountryData | return CountryData | ||