Permanently protected module
From Wikipedia, the free encyclopedia


require('strict');

local getArgs = require ('Module:Arguments').getArgs;



local amendment_pattern = '%s*%(amended version of (%d%d%d%d) assessment%)';

local errata_pattern = '%s*%(errata version published in (%d%d%d%d)%)';

local green_status_pattern = '%s*%((Green Status assessment)%)';





--[[--------------------------< I U C N _ I D E N T I F I E R S _ G E T >--------------------------------------



cs1|2 templates cite single sources;  when the identifiers in |doi=, |id=, and |page= are different from each other

then the template is attempting to cite multiple sources.  This function evaluates the identifier portions of these

parameters. returns seven values: identifyier parts (or nil when parameter not used) and a message (nil on success,

error message else)



the identifier portions of the several parameters must be properly formed



]]



local function iucn_identifiers_get (args, error_msgs_t)

	local doi_taxon_ID, doi_assesment_ID

	local page_taxon_ID, page_assesment_ID

	local url_taxon_ID, url_assesment_ID



	if args.doi then

		local lang_tag

		doi_taxon_ID, doi_assesment_ID, lang_tag = args.doi:match ('[Tt](%d+)[Aa](%d+)%.(%l%l)$')

		if not doi_taxon_ID or not ({['en' = true, 'es' = true, 'fr' = true, 'pt' = true})[lang_tag then

			table.insert (error_msgs_t, 'malformed |doi= identifier');

		end

	end

	if args.page then

		page_taxon_ID, page_assesment_ID = args.page:match ('^[eE]%.[Tt](%d+)[Aa](%d+)$')

		if not page_taxon_ID then

			table.insert (error_msgs_t, 'malformed |page= identifier');

		end

	end

	if args.url then

		if args.url:match ('https://www.iucnredlist.org/species/') then			-- must be a 'new-form' url

			url_taxon_ID, url_assesment_ID = args.url:match ('/species/(%d+)/(%d+)')

			if not url_taxon_ID then

				table.insert (error_msgs_t, 'malformed |url= identifier');

			end

		end

	end



	if not error_msgs_t1 then

		if doi_taxon_ID and page_taxon_ID then

			if (doi_taxon_ID ~= page_taxon_ID or ((doi_assesment_ID ~= page_assesment_ID) and not args.errata)) then

				table.insert (error_msgs_t, '|doi= / |page= mismatch');

			end

		end

		if doi_taxon_ID and url_taxon_ID then

			if (doi_taxon_ID ~= url_taxon_ID or ((doi_assesment_ID ~= url_assesment_ID) and not args.errata)) then

				table.insert (error_msgs_t, '|doi= / |url= mismatch');

			end

		end

		

		if page_taxon_ID and url_taxon_ID then

			if (page_taxon_ID ~= url_taxon_ID or ((page_assesment_ID ~= url_assesment_ID) and not args.errata)) then

				table.insert (error_msgs_t, '|page= / |url= mismatch');

			end

		end

	end



	return doi_taxon_ID, doi_assesment_ID, page_taxon_ID, page_assesment_ID;

end





--[[--------------------------< I U C N _ V O L U M E _ C H E C K >--------------------------------------------



compares volume in |volume= (if present) against year in |date= or |year= (if present) against volume in |doi= (if present)



returns nil if all that are present are correct; message else



]]



local function iucn_volume_check (args, maint_msgs_t)

	local vol = args.volume;

	local date = args.date or args.year;

	local doi = args.doi and args.doi:match ('[Ii][Uu][Cc][Nn]%.[Uu][Kk]%.(%d%d%d%d)')



	if vol and date and (vol ~= date) then

		table.insert (maint_msgs_t, '|volume= / |date= mismatch');

	end



	if vol and doi and ((vol ~= doi) and not args.amends) then

		table.insert (maint_msgs_t, '|volume= / |doi= mismatch');

	end



	if date and doi and ((doi ~= date) and not args.amends) then

		table.insert (maint_msgs_t, '|date= / |doi= mismatch');

	end

end





--[[--------------------------< C I T E >----------------------------------------------------------------------



Wraps {{cite journal}}:

     takes cite journal parameters but updates old style url using electronic page number

     page should be in format e.T13922A45199653

     the url uses                13922/45199653

     so we need to extract the number between T and A (taxon ID) and the number after A (assessment ID)

     the target url is https://www.iucnredlist.org/species/13922/45199653

     usage: {{#invoke:iucn|cite}}

     template: {{Template:Cite iucn}}



]]



local function cite (frame)

	local error_msgs_t = {};													-- holds error messages for rendering

	local maint_msgs_t = {};														-- holds hidden maint messages for rendering

	local namespace = mw.title.getCurrentTitle().namespace;						-- used for categorization

	local args = getArgs (frame);												-- local copy of template arguments



	if args.title and (args.title:match (errata_pattern) or args.title:match (amendment_pattern)) then

		table.insert (error_msgs_t, 'title has extraneous text');					-- announce that this template has has errata or amendment text

	end



	local doi_taxon_ID, doi_assesment_ID;										-- all of these contain the same identifying info in slightly

	local page_taxon_ID, page_assesment_ID;										-- different forms. when any combination of these is present,



	doi_taxon_ID, doi_assesment_ID, page_taxon_ID, page_assesment_ID = iucn_identifiers_get (args, error_msgs_t);



	args.id = nil																-- unset; not supported



	local url_taxon_ID = page_taxon_ID or doi_taxon_ID;							-- select for use in url that we will create

	local url_assesment_ID = page_assesment_ID  or doi_assesment_ID;

	

	local url = args.url;

	if url then

		if url:find ('iucnredlist.org/details/', 1, true) then					-- old-form url

			if url_taxon_ID then												-- when there is an identifier

				url = nil														-- unset; we'll create new url below

			else																-- here when old-form but no identifier that we can use to create new url

				args.url = args.url:gsub ("http:", "https:")					-- sometimes works with redirect on iucn site

			end

			table.insert (maint_msgs_t, 'old-form url')							-- announce that this template has has an old-form url

		elseif url:find ('iucnredlist.org/species/', 1, true) then				-- new-form url

--			table.insert (maint_msgs_t, 'new-form url')				--TODO: restore this line when most new-form urls have been removed from article space		-- announce that this template has has an new-form url

		else

			table.insert (error_msgs_t, 'unknown url')							-- emit error message

		end

	end



	if not url then																-- when no url or unset old-form url

		if url_taxon_ID then

			args.url = "https://www.iucnredlist.org/species/" .. url_taxon_ID .. '/' .. url_assesment_ID

		else

			table.insert (error_msgs_t, 'no identifier')						-- emit error message

		end

	end



	-- add journal if not provided (TODO decide if this should override provided value)

	if not args'journal' and not args'work' then

		args'journal' = "[[IUCN Red List|IUCN Red List of Threatened Species]]"

	end

	

	iucn_volume_check (args, maint_msgs_t);										-- |volume=, |year= (|date=), |doi= must all refer to the same volume



	if not args.volume and (args.year or args.date) then

		args.volume = args.year or args.date

	end

	

	if args.errata then

		args'orig-date' = 'errata version of ' .. (args.year or args.date or args.volume) .. ' assessment';

		args.date = args.errata;												-- update publication data to errata year

		args.year = nil;														-- unset these as no longer needed

		args.errata = nil;

	elseif args.amends then

		args'orig-date' = 'amended version of ' .. args.amends .. ' assessment';

		args.amends = nil;														-- unset as no longer needed

	end

																				-- add free-to-read icon to mark a correctly formed doi

	args'doi-access' = args.doi and args.doi:match ('10%.2305/[Ii][Uu][Cc][Nn].+[Tt]%d+[Aa]%d+%.%a%a') and 'free' or nil

	

	local out_t = {};

	if error_msgs_t1 then

		table.insert (out_t, ' <span class="error" style="font-size:100%">{{[[Template:cite iucn|cite iucn]]}}: error: ');

		table.insert (out_t, table.concat (error_msgs_t, ', '));

		table.insert (out_t, ' ([[Template:Cite iucn#Error messages|help]])</span>');

		if (0 == namespace) then

			table.insert (out_t, '[[Category:cite IUCN errors]]');

		end

	end



	if maint_msgs_t1 then

		table.insert (out_t, '<span class="citation-comment" style="display: none; color: #33aa33; margin-left: 0.3em;">');

		if not error_msgs_t1 then

			table.insert (out_t, '{{[[Template:cite iucn|cite iucn]]}}: ')

			table.insert (out_t, table.concat (maint_msgs_t, ', '));

			table.insert (out_t, ' ([[Template:Cite iucn#Maintenance messages|help]])</span>');

			if (0 == namespace) then

				table.insert (out_t, '[[Category:cite IUCN maint]]');

			end

		end

	end



	if (not args'doi-access']) and (0 == namespace) then

		table.insert (out_t, '[[Category:cite IUCN without doi]]');

	end

	

	return frame:expandTemplate{ title = 'cite journal', args = args } ..							-- the template

		table.concat (out_t);																		-- error and maint messages and categories

end





--[=[-------------------------< E T _ A L _ P A T T E R N S >--------------------------------------------------



This adapted from Module:Citation/CS1/Configuration



This table provides Lua patterns for the phrase "et al" and variants in a name.



]=]



local et_al_patterns = {

	"[;,]? *[\"']*%f[%a][Ee][Tt]%.? *[Aa][Ll][%.;,\"']*$",						-- variations on the 'et al' theme

	"[;,]? *[\"']*%f[%a][Ee][Tt]%.? *[Aa][Ll][Ii][AaIi][Ee]?[%.;,\"']*$",		-- variations on the 'et alia', 'et alii' and 'et aliae' themes (false positive 'et aliie' unlikely to match)

	"[;,]? *%f[%a]and [Oo]thers",												-- an alternative to et al.

	}





--[[---------------------< N A M E _ H A S _ E T A L >--------------------------



This adapted from Module:Citation/CS1



Evaluates the content of a name for variations on the theme of et al.  If found,

returns true; nil else



]]



local function name_has_etal (name)

	local etal;



	if name then																-- name can be nil in which case just return

		name = name:gsub ('%b<>', '');											-- remove any html markup (typically <i>...</i>)

		for _, pattern in ipairs (et_al_patterns) do							-- loop through all of the patterns

			if name:match (pattern) then										-- if this 'et al' pattern is found in name

				return true;													-- has etal, so return true

			end

		end

	end

end





--[[--------------------------< A U T H O R _ L I S T _ M A K E >----------------------------------------------



creates a list of individual |authorn= parameters from the list of names provided in the raw iucn citation.  names

must have the form: Surname, I. (more than one 'I.' pair allowed but no spaces between I. pairs)



assumes that parenthetical text at the end of the author-name-list is a collaboration

	Name, I.I., & Name, I.I. (Colaboration name)



assumes that <i>et al.</i> is the last name in a list of names



]]



--local function author_names_get (raw_iucn_cite)

local function author_names_get (raw_iucn_cite, params_t)						-- EXPERIMENT

	local list = {};															-- table that holds name list parts

	local author_names = raw_iucn_cite:match ('^([^%d]-)%s+%d%d%d%d');			-- extract author name-list from raw iucn citation

	local collaboration = author_names:match ('%s*(%b())$');					-- get collaboration name if it exists



	if collaboration then														-- when there is a colaboration

		collaboration = collaboration:gsub ('[%(%)]', '');						-- remove bounding parentheses

		author_names = author_names:gsub ('%s*(%b())$', '');					-- and remove collaboration from author-name-list

	end

	

	local names = author_names:gsub ('%.?,?%s+&%s+', '.|');						-- replace 'separators' (<optional dot><optional comma><space><ampersand><space>) with <dot><pipe>

	names = names:gsub ('%.,%s*', '.|');										-- replace 'separators' (<dot><comma><optional space>) with <dot><pipe>

	names = names:gsub ('(%.%u),', '%1.|');										-- special case for when last initial is missing its trailing dot

	list = mw.text.split (names, '|');											-- split the string on the pipes into entries in list{}

	

	if 0 == #list then

		params_t'author' = author_names;										-- EXPERIMENT

		return table.concat ({'|author=', author_names}), params_t;				-- EXPERIMENT		-- no 'names' of the proper form; return the original as a single |author= parameter

--		return table.concat ({'|author=', author_names})						-- no 'names' of the proper form; return the original as a single |author= parameter

	else

		for i, name in ipairs (list) do											-- spin through the list and 

			if name_has_etal (name) then										-- if this name has some form of 'et al'

				params_t'display-authors' = 'etal';							-- EXPERIMENT

				listi = '|display-authors=etal';								-- add |dispaly-authors=etal parameter and 

				break;															-- assume that the etal was the last 'name' so stop processing names

			else

				params_t'author' .. i = name;									-- EXPERIMENT

				listi = table.concat ({'|author', (i == 1) and '' or i, '=', name});	-- add |authorn= parameter names; create |author= instead of |author1=

			end

		end

		if collaboration then

			params_t'collaboration' = collaboration;							-- EXPERIMENT

			table.insert (list, table.concat ({'|collaboration', '=', collaboration}));	-- add |collaboration= parameter

		end

		return table.concat (list, ' ');										-- make a big string and return that

	end

end





--[[--------------------------< T I T L E _ G E T >------------------------------------------------------------



extract and format citation title; attempts to get the italic right



''binomen'' (amended or errata title)

''binomen''

''binomen'' ssp. ''subspecies''

''binomen'' subsp. ''subspecies''

''binomen'' var. ''variety''

''binomen'' subvar. ''subvariety''



all of the above may have trailing amended or errata text in parentheses



TODO: are there others?



]]



local function title_get (raw_iucn_cite)

	local title = raw_iucn_cite:match ('%d%d%d%d%.%s+(.-)%s*%. The IUCN Red List of Threatened Species');



	local patterns = {															-- tables of string.match patterns [1] and string.gsub patterns [2]

		{'(.-)%sssp%.%s+(.-)%s(%b())$', "''%1'' ssp. ''%2'' %3"},				-- binomen ssp. subspecies (zoology) with errata or amended text

		{'(.-)%sssp%.%s+(.+)', "''%1'' ssp. ''%2''"},							-- binomen ssp. subspecies (zoology)

		{'(.-)%ssubsp%.%s+(.-)%s(%b())$', "''%1'' subsp. ''%2'' %3"},			-- binomen subsp. subspecies (botany) with errata or amended text

		{'(.-)%ssubsp%.%s+(.+)', "''%1'' subsp. ''%2''"},						-- binomen subsp. subspecies (botany)

		{'(.-)%svar%.%s+(.-)%s+(%b())$', "''%1'' var. ''%2'' %3"},				-- binomen var. variety (botany) with errata or amended text

		{'(.-)%svar%.%s+(.+)', "''%1'' var. ''%2''"},							-- binomen var. variety (botany)

		{'(.-)%ssubvar%.%s+(.-)%s(%b())$', "''%1'' subvar. ''%2'' %3"},			-- binomen subvar. subvariety (botany) with errata or amended text

		{'(.-)%ssubvar%.%s+(.+)', "''%1'' subvar. ''%2''"},						-- binomen subvar. subvariety (botany)

		{'(.-)%s*(%b())$', "''%1'' %2"},										-- binomen with errata or amended text

		{'(.+)', "''%1''"},														-- binomen

		}

	

	for i, v in ipairs (patterns) do											-- spin through the patterns

		if title:match (v1]) then												-- when a match

			title = title:gsub (v1], v2]);									-- add italics 

			break;																-- and done

		end

	end



--	return table.concat ({' |title=', title});									-- return the |title= parameter

	return title;																-- return the formatted title

end





--[[--------------------------< M A K E _ C I T E _ I U C N >--------------------------------------------------



parses apart an iucn-format citation copied from their webpage and reformats that into a {{cite iucn}} template for substing



automatic substing by User:AnomieBOT/docs/TemplateSubster



]]



local function make_cite_iucn (frame)

	local args_t = getArgs (frame);

	local raw_iucn_cite = args_t1];



	local template_t = {'{{cite iucn '};										-- sequence that holds the {{cite iucn}} template as it is being assembled; for nowiki'd output

	local params_t = {};														-- table of parameter/value pairs for substing

	

	local year, volume, page, doi, accessdate;



	year = raw_iucn_cite:match ('^%D+(%d%d%d%d)');

	volume, page = raw_iucn_cite:match ('(%d%d%d%d):%s+(e%.T%d+A+%d+)%.%s?');

	doi = raw_iucn_cite:match ('10%.2305/IUCN%.UK%.[%d%-]+%.RLTS%.T%d+A%d+%.%a%a');



	accessdate = raw_iucn_cite:match ('Accessed on (.-)%.?$') or raw_iucn_cite:match ('Downloaded on (.-)%.?$');	-- 'Downloaded' → 'Accessed' change occured December 2021;

	accessdate = accessdate:gsub ('^0', '');									-- strips leading 0 in day 01 January 2020 -> 1 January 2020



	table.insert (template_t, author_names_get (raw_iucn_cite, params_t));		-- add author name parameters; as a single string to <template_t>; as individual entries to <params_t>



	table.insert (template_t, table.concat ({' |year=', year}));				-- add formatted year

	params_t.year = year;

	

	local title = title_get (raw_iucn_cite);

	local type_p = title:match (green_status_pattern);

	if type_p then

		title = title:match ('^([^%(]+)%s*%(');

		table.insert (template_t, table.concat ({' |type=', type_p}));			-- add formatted errata

		params_t.type = type_p;

	end



	local errata = title:match (errata_pattern);								-- nil unless IUCN citation has errata annotation; else year that this errata published (|date=)

	if errata then

		table.insert (template_t, table.concat ({' |errata=', errata}));		-- add formatted errata

		params_t.errata = errata;

		title = title:gsub (errata_pattern, '');								-- remove errata annotation

	end

	local amends = title:match (amendment_pattern);								-- nil unless IUCN citation has amendment annotation; else year that this assessment amends (|orig-date=)

	if amends then

		table.insert (template_t, table.concat ({' |amends=', amends}));		-- add year of assessment that this assessment amends

		params_t.amends = amends;

		title = title:gsub (amendment_pattern, '');								-- remove amendment annotation

	end



	table.insert (template_t, table.concat ({' |title=', title}));				-- add formatted title

	params_t.title = title;

	table.insert (template_t, table.concat ({' |volume=', volume}));			-- add formatted volume

	params_t.volume = volume;

	table.insert (template_t, table.concat ({' |page=', page}));				-- add formatted page

	params_t.page = page;

	table.insert (template_t, table.concat ({' |doi=', doi}));					-- add formatted doi

	params_t.doi = doi;



	table.insert (template_t, table.concat ({' |access-date=', accessdate}));	-- add formatted access-date

	params_t'access-date' = accessdate;



	table.insert (template_t, '}}');											-- close the template



	if args_t2 then															-- if anything in args_t[2], write a nowiki'd version that editors can copy into <ref> tags

		return frame:preprocess (table.concat ({'<syntaxhighlight lang="wikitext" inline="1">', table.concat (template_t), '</syntaxhighlight>'})); -- caveat lector: if left long enough anomiebot will subst this

	end



	if args_t'ref' then														-- enable subst of ref tags with name

		return frame:preprocess ('<ref name=' .. args_t'ref' .. '>' .. table.concat (template_t) .. '</ref>')

	end



	return frame:preprocess (table.concat (template_t));						-- render {{cite iucn}} template; substable

end





--[[--------------------------< E X P O R T E D   F U N C T I O N S >------------------------------------------

]]



return {

	cite = cite,

	make_cite_iucn = make_cite_iucn,

	}