Jump to content

Module:LinkParser: Difference between revisions

// via Wikitext Extension for VSCode
// via Wikitext Extension for VSCode
 
(4 intermediate revisions by the same user not shown)
Line 1: Line 1:
-- Module:LinkParser
--[[
-- Handles link processing for both external links and wiki links
* Name: LinkParser
--
* Author: Mark W. Datysgeld
-- This module contains the following sections:
* Description: Comprehensive link handling module for both external links and internal wiki links with caching
-- * External Link Processing - For handling URLs and external links
* Notes: URL normalization (protocol stripping, www removal); wiki link extraction, preservation, and automatic application; caching for both link types; supports autoWikiLink field configuration; preserves wiki links during processing
-- * Wiki Link Processing - For handling internal wiki links
]]
-- * Caching and Utilities - Shared functionality for link handling


local p = {}
local p = {}
Line 13: Line 12:
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------


-- Cache for previously processed URLs and wiki links (no persistence)
-- Cache for processed URLs and wiki links (non-persistent)
local stripCache = {}
local stripCache = {}
local wikiLinkCache = {}
local wikiLinkCache = {}
Line 21: Line 20:
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------


-- Remove protocol, leading www., and trailing /
-- Strip URL protocol, www prefix, and trailing slash
function p.strip(link)
function p.strip(link)
-- Input validation
-- Input validation
Line 33: Line 32:
end
end
-- Combine all operations into a single chain to reduce string allocations
-- Chain operations to minimize string allocations
local result = link:gsub("^%a+://", ""):gsub("^www%.", ""):gsub("/$", "")
local result = link:gsub("^%a+://", ""):gsub("^www%.", ""):gsub("/$", "")
Line 42: Line 41:


--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
-- Return a MediaWiki link with:
-- Creates MediaWiki link: [ originalURL strippedDisplayText ]
--  [ originalURL strippedDisplayText ]
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
function p.render(frame)
function p.render(frame)
Line 59: Line 57:
local rawLink = args["link"] or ""
local rawLink = args["link"] or ""
-- Early return for empty links
-- Skip empty links
if not rawLink or rawLink == "" then
if not rawLink or rawLink == "" then
return ""
return ""
end
end


-- Strip protocol, leading 'www.', and trailing '/'
-- Clean URL for display
local displayText = p.strip(rawLink)
local displayText = p.strip(rawLink)


-- Construct an external link of the form:
-- Format as [ rawLink displayText ]
--  [ rawLink displayText ]
return string.format("[%s %s]", rawLink, displayText)
return string.format("[%s %s]", rawLink, displayText)
end
end
Line 76: Line 73:
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------


-- Process wiki links with different modes: extract, strip, or check
-- Process wiki links in different modes (extract, strip, check)
-- @param value The wiki link to process
-- @param value Wiki link to process
-- @param mode The processing mode: "extract", "strip", or "check"
-- @param mode Processing mode: "extract", "strip", or "check"
-- @return Processed value or boolean depending on mode
-- @return Processed value or boolean based on mode
function p.processWikiLink(value, mode)
function p.processWikiLink(value, mode)
    -- Default mode is "extract"
     mode = mode or "extract"
     mode = mode or "extract"
   
    -- Early return for nil or empty values
     if not value or value == "" then
     if not value or value == "" then
         return mode == "check" and false or value
         return mode == "check" and false or value
     end
     end
   
 
    -- Create cache key combining value and mode
     local cacheKey = value .. ":" .. mode
     local cacheKey = value .. ":" .. mode
   
    -- Check cache first
     if wikiLinkCache[cacheKey] ~= nil then
     if wikiLinkCache[cacheKey] ~= nil then
         return wikiLinkCache[cacheKey]
         return wikiLinkCache[cacheKey]
     end
     end
   
 
     -- Check if the value is a wiki link
     -- A single regex to capture the page name and optional display text
     local isWikiLink = value:match("^%[%[.-%]%]$") ~= nil
     local pageName, displayText = value:match("^%[%[([^|]+)|?(.*)%]%]$")
   
 
    -- For check mode, just return whether it's a wiki link
     if mode == "check" then
     if mode == "check" then
        local isWikiLink = (pageName ~= nil)
         wikiLinkCache[cacheKey] = isWikiLink
         wikiLinkCache[cacheKey] = isWikiLink
         return isWikiLink
         return isWikiLink
     end
     end
   
 
    -- If it's not a wiki link, return the original value
     if not pageName then
     if not isWikiLink then
         wikiLinkCache[cacheKey] = value
         wikiLinkCache[cacheKey] = value
         return value
         return value
     end
     end
   
 
     -- Extract components from the wiki link
     -- If displayText is empty, it means the format was [[PageName]], so use pageName
    local pageName, displayText
     if displayText == "" then
   
         displayText = pageName
    -- Try to match [[PageName|DisplayText]] format
     pageName, displayText = value:match("^%[%[([^%|%]]+)%|([^%]]+)%]%]$")
   
    if not pageName then
        -- Try to match [[PageName]] format
        pageName = value:match("^%[%[([^%|%]]+)%]%]$")
         displayText = pageName -- In this case, display text is the same as page name
     end
     end
   
 
    -- Determine result based on mode
     local result
     local result
     if mode == "extract" then
     if mode == "extract" then
         result = pageName or value
         result = pageName
     elseif mode == "strip" then
     elseif mode == "strip" then
         result = displayText or value
         result = displayText
     else
     else
         -- Default to extract mode for unknown modes
         result = pageName -- Default to extract for unknown modes
        result = pageName or value
     end
     end
   
 
    -- Store result in cache
     wikiLinkCache[cacheKey] = result
     wikiLinkCache[cacheKey] = result
   
     return result
     return result
end
end


-- Extract page name from wiki link [[Name]] or [[Name|Text]]
-- Extract page name from wiki link
-- @param value The wiki link to extract from
-- @param value Wiki link to process
-- @return The extracted page name
-- @return Extracted page name
function p.extractFromWikiLink(value)
function p.extractFromWikiLink(value)
     return p.processWikiLink(value, "extract")
     return p.processWikiLink(value, "extract")
end
end


-- Adds wiki links to a value if needed based on field configuration
-- Add wiki links based on field config
-- @param value The value to process
-- @param value Value to process
-- @param field The field configuration
-- @param field Field configuration
-- @return The processed value with wiki links added if needed
-- @return Value with wiki links if needed
function p.applyWikiLinkHandling(value, field)
function p.applyWikiLinkHandling(value, field)
     -- Skip if value is not a string or is nil/empty
     -- Skip invalid values
     if not value or type(value) ~= "string" or value == "" then
     if not value or type(value) ~= "string" or value == "" then
         return value
         return value
     end
     end
      
      
     -- Add wiki links at the very end of processing if autoWikiLink is true
     -- Add wiki links if autoWikiLink is true
     -- This ensures they won't be stripped by any subsequent processing
     -- Prevents stripping by later processing
     if field.autoWikiLink and not value:match("%[%[.-%]%]") then
     if field.autoWikiLink and not value:match("%[%[.-%]%]") then
         return "[[" .. value .. "]]"
         return "[[" .. value .. "]]"
Line 167: Line 146:
end
end


-- Helper function to preserve wiki links in processed values
-- Preserve wiki links in processed values
-- If the original value had wiki links but the processed value doesn't,
-- Keeps original if links were lost during processing
-- returns the original value to preserve the wiki links
-- @param originalValue Value before processing
-- @param originalValue The original value before processing
-- @param processedValue Value after processing
-- @param processedValue The value after processing
-- @param preserveWikiLinks Whether to keep wiki links
-- @param preserveWikiLinks Whether to preserve wiki links
-- @return Value with links preserved if needed
-- @return The value with wiki links preserved if needed
function p.preserveWikiLinks(originalValue, processedValue, preserveWikiLinks)
function p.preserveWikiLinks(originalValue, processedValue, preserveWikiLinks)
     -- Skip if any value is not a string
     -- Skip non-string values
     if type(originalValue) ~= "string" or type(processedValue) ~= "string" then
     if type(originalValue) ~= "string" or type(processedValue) ~= "string" then
         return processedValue
         return processedValue
     end
     end
      
      
     -- If preserveWikiLinks is true and the processor stripped wiki links,
     -- Restore original if links were lost
    -- use the original value with wiki links
     if preserveWikiLinks and  
     if preserveWikiLinks and  
       originalValue:match("%[%[.-%]%]") and  
       originalValue:match("%[%[.-%]%]") and  

Latest revision as of 03:09, 25 August 2025

Documentation for this module may be created at Module:LinkParser/doc

--[[
* Name: LinkParser
* Author: Mark W. Datysgeld
* Description: Comprehensive link handling module for both external links and internal wiki links with caching
* Notes: URL normalization (protocol stripping, www removal); wiki link extraction, preservation, and automatic application; caching for both link types; supports autoWikiLink field configuration; preserves wiki links during processing
]]

local p = {}

--------------------------------------------------------------------------------
-- Caching and Utilities
--------------------------------------------------------------------------------

-- Cache for processed URLs and wiki links (non-persistent)
local stripCache = {}
local wikiLinkCache = {}

--------------------------------------------------------------------------------
-- External Link Processing
--------------------------------------------------------------------------------

-- Strip URL protocol, www prefix, and trailing slash
function p.strip(link)
	-- Input validation
	if not link or link == "" then
		return link
	end
	
	-- Check cache first for previously processed URLs
	if stripCache[link] then
		return stripCache[link]
	end
	
	-- Chain operations to minimize string allocations
	local result = link:gsub("^%a+://", ""):gsub("^www%.", ""):gsub("/$", "")
	
	-- Cache the result before returning
	stripCache[link] = result
	return result
end

--------------------------------------------------------------------------------
-- Creates MediaWiki link: [ originalURL strippedDisplayText ]
--------------------------------------------------------------------------------
function p.render(frame)
	-- Input validation
	if not frame or not frame.getParent then
		return ""
	end
	
	local parent = frame:getParent()
	if not parent or not parent.args then
		return ""
	end
	
	local args = parent.args
	local rawLink = args["link"] or ""
	
	-- Skip empty links
	if not rawLink or rawLink == "" then
		return ""
	end

	-- Clean URL for display
	local displayText = p.strip(rawLink)

	-- Format as [ rawLink displayText ]
	return string.format("[%s %s]", rawLink, displayText)
end

--------------------------------------------------------------------------------
-- Wiki Link Processing
--------------------------------------------------------------------------------

-- Process wiki links in different modes (extract, strip, check)
-- @param value Wiki link to process
-- @param mode Processing mode: "extract", "strip", or "check"
-- @return Processed value or boolean based on mode
function p.processWikiLink(value, mode)
    mode = mode or "extract"
    if not value or value == "" then
        return mode == "check" and false or value
    end

    local cacheKey = value .. ":" .. mode
    if wikiLinkCache[cacheKey] ~= nil then
        return wikiLinkCache[cacheKey]
    end

    -- A single regex to capture the page name and optional display text
    local pageName, displayText = value:match("^%[%[([^|]+)|?(.*)%]%]$")

    if mode == "check" then
        local isWikiLink = (pageName ~= nil)
        wikiLinkCache[cacheKey] = isWikiLink
        return isWikiLink
    end

    if not pageName then
        wikiLinkCache[cacheKey] = value
        return value
    end

    -- If displayText is empty, it means the format was [[PageName]], so use pageName
    if displayText == "" then
        displayText = pageName
    end

    local result
    if mode == "extract" then
        result = pageName
    elseif mode == "strip" then
        result = displayText
    else
        result = pageName -- Default to extract for unknown modes
    end

    wikiLinkCache[cacheKey] = result
    return result
end

-- Extract page name from wiki link
-- @param value Wiki link to process
-- @return Extracted page name
function p.extractFromWikiLink(value)
    return p.processWikiLink(value, "extract")
end

-- Add wiki links based on field config
-- @param value Value to process
-- @param field Field configuration
-- @return Value with wiki links if needed
function p.applyWikiLinkHandling(value, field)
    -- Skip invalid values
    if not value or type(value) ~= "string" or value == "" then
        return value
    end
    
    -- Add wiki links if autoWikiLink is true
    -- Prevents stripping by later processing
    if field.autoWikiLink and not value:match("%[%[.-%]%]") then
        return "[[" .. value .. "]]"
    end
    
    return value
end

-- Preserve wiki links in processed values
-- Keeps original if links were lost during processing
-- @param originalValue Value before processing
-- @param processedValue Value after processing
-- @param preserveWikiLinks Whether to keep wiki links
-- @return Value with links preserved if needed
function p.preserveWikiLinks(originalValue, processedValue, preserveWikiLinks)
    -- Skip non-string values
    if type(originalValue) ~= "string" or type(processedValue) ~= "string" then
        return processedValue
    end
    
    -- Restore original if links were lost
    if preserveWikiLinks and 
       originalValue:match("%[%[.-%]%]") and 
       not processedValue:match("%[%[.-%]%]") then
        return originalValue
    end
    
    return processedValue
end

return p