Module:LinkParser: Difference between revisions

// via Wikitext Extension for VSCode
Tag: Reverted
// via Wikitext Extension for VSCode
 
(7 intermediate revisions by the same user not shown)
Line 1: Line 1:
-- Module:LinkParser
--[[
-- Parses links inserted by editors in templates, strips them of the protocol part, leading "www.", and any trailing "/", while otherwise printing exactly what was entered to minimize spoofing.
* Name: LinkParser
* Author: Mark W. Datysgeld
* Description: Comprehensive link handling module for both external links and internal wiki links with caching
* Notes: URL normalization (protocol stripping, www removal); wiki link extraction, preservation, and automatic application; caching for both link types; supports autoWikiLink field configuration; preserves wiki links during processing
]]


local p = {}
local p = {}


-- Cache for previously processed URLs (persists during a single page render)
--------------------------------------------------------------------------------
-- Caching and Utilities
--------------------------------------------------------------------------------
 
-- Cache for processed URLs and wiki links (non-persistent)
local stripCache = {}
local stripCache = {}
local wikiLinkCache = {}


----------------------------------
--------------------------------------------------------------------------------
-- Remove protocol, leading www., and trailing /
-- External Link Processing
----------------------------------
--------------------------------------------------------------------------------
 
-- Strip URL protocol, www prefix, and trailing slash
function p.strip(link)
function p.strip(link)
-- Input validation
-- Input validation
Line 21: Line 32:
end
end
-- Combine all operations into a single chain to reduce string allocations
-- Chain operations to minimize string allocations
local result = link:gsub("^%a+://", ""):gsub("^www%.", ""):gsub("/$", "")
local result = link:gsub("^%a+://", ""):gsub("^www%.", ""):gsub("/$", "")
Line 29: Line 40:
end
end


----------------------------------
--------------------------------------------------------------------------------
-- Return a MediaWiki link with:
-- Creates MediaWiki link: [ originalURL strippedDisplayText ]
--  [ originalURL strippedDisplayText ]
--------------------------------------------------------------------------------
----------------------------------
function p.render(frame)
function p.render(frame)
-- Input validation
-- Input validation
Line 47: Line 57:
local rawLink = args["link"] or ""
local rawLink = args["link"] or ""
-- Early return for empty links
-- Skip empty links
if not rawLink or rawLink == "" then
if not rawLink or rawLink == "" then
return ""
return ""
end
end


-- Strip protocol, leading 'www.', and trailing '/'
-- Clean URL for display
local displayText = p.strip(rawLink)
local displayText = p.strip(rawLink)


-- Construct an external link of the form:
-- Format as [ rawLink displayText ]
--   [ rawLink displayText ]
return string.format("[%s %s]", rawLink, displayText)
-- Use mw.text.nowiki to prevent the link from being interpreted as wikitext
end
-- when it's included in HTML contexts
 
return mw.text.nowiki(string.format("[%s %s]", rawLink, displayText))
--------------------------------------------------------------------------------
-- Wiki Link Processing
--------------------------------------------------------------------------------
 
-- Process wiki links in different modes (extract, strip, check)
-- @param value Wiki link to process
-- @param mode Processing mode: "extract", "strip", or "check"
-- @return Processed value or boolean based on mode
function p.processWikiLink(value, mode)
    mode = mode or "extract"
    if not value or value == "" then
        return mode == "check" and false or value
    end
 
    local cacheKey = value .. ":" .. mode
    if wikiLinkCache[cacheKey] ~= nil then
        return wikiLinkCache[cacheKey]
    end
 
    -- A single regex to capture the page name and optional display text
    local pageName, displayText = value:match("^%[%[([^|]+)|?(.*)%]%]$")
 
    if mode == "check" then
        local isWikiLink = (pageName ~= nil)
        wikiLinkCache[cacheKey] = isWikiLink
        return isWikiLink
    end
 
    if not pageName then
        wikiLinkCache[cacheKey] = value
        return value
    end
 
    -- If displayText is empty, it means the format was [[PageName]], so use pageName
    if displayText == "" then
        displayText = pageName
    end
 
    local result
    if mode == "extract" then
        result = pageName
    elseif mode == "strip" then
        result = displayText
    else
        result = pageName -- Default to extract for unknown modes
    end
 
    wikiLinkCache[cacheKey] = result
    return result
end
 
-- Extract page name from wiki link
-- @param value Wiki link to process
-- @return Extracted page name
function p.extractFromWikiLink(value)
    return p.processWikiLink(value, "extract")
end
 
-- Add wiki links based on field config
-- @param value Value to process
-- @param field Field configuration
-- @return Value with wiki links if needed
function p.applyWikiLinkHandling(value, field)
    -- Skip invalid values
    if not value or type(value) ~= "string" or value == "" then
        return value
    end
   
    -- Add wiki links if autoWikiLink is true
    -- Prevents stripping by later processing
    if field.autoWikiLink and not value:match("%[%[.-%]%]") then
        return "[[" .. value .. "]]"
    end
   
    return value
end
 
-- Preserve wiki links in processed values
-- Keeps original if links were lost during processing
-- @param originalValue Value before processing
-- @param processedValue Value after processing
-- @param preserveWikiLinks Whether to keep wiki links
-- @return Value with links preserved if needed
function p.preserveWikiLinks(originalValue, processedValue, preserveWikiLinks)
    -- Skip non-string values
    if type(originalValue) ~= "string" or type(processedValue) ~= "string" then
        return processedValue
    end
   
    -- Restore original if links were lost
    if preserveWikiLinks and
      originalValue:match("%[%[.-%]%]") and
      not processedValue:match("%[%[.-%]%]") then
        return originalValue
    end
   
    return processedValue
end
end


return p
return p