Jump to content

Module:Wikt-lang: Difference between revisions

From Wikipedia, the free encyclopedia
Content deleted Content added
+translingual, for scientific names and such things
add shorter ways to turn off italics
Line 241: Line 241:
local word1 = frame.args[2]
local word1 = frame.args[2]
local word2 = frame.args[3]
local word2 = frame.args[3]
local italics = frame.args.italics or "auto"
local italics = frame.args.italics or frame.args.i or "auto"
italics = (italics == "n" or italics == "-") and "no" or italics
local entry, linkText
local entry, linkText
if languageCode then
if languageCode then

Revision as of 08:33, 9 October 2016

require('Module:No globals')

local p = {}

local gsub = mw.ustring.gsub
local find = mw.ustring.find
local match = mw.ustring.match
local U = mw.ustring.char

--[[ Name is the "canonical name" used on Wiktionary. Article is the Wikipedia article. Script is the ISO 15924 code. ]]

local languages = {
	["ar"] = {
		["name"] = "Arabic",
		["article"] = "Arabic language",
		["scripts"] = { "Arab" },
			--[[ Dagger ālif is replaced by full-size ālif;
			fatḥatan, ḍammatan, kasratan, fatḥa, ḍamma, kasra, madda, and sukūn are removed. ]]
		["replacements"] = {
			[U(0x0671)] = U(0x0627),
			[U(0x064B)] = "",
			[U(0x064C)] = "",
			[U(0x064D)] = "",
			[U(0x064E)] = "",
			[U(0x064F)] = "",
			[U(0x0650)] = "",
			[U(0x0651)] = "",
			[U(0x0652)] = "",
			[U(0x0670)] = "",
			[U(0x0640)] = "",
		},
	},
	["bn"] = {
		["name"] = "Bengali",
		["article"] = "Bengali language",
		["scripts"] = { "Beng" },
	},
	["de"] = {
		["name"] = "German",
		["article"] = "German language",
		["scripts"] = { "Latn" },
		--[[
		["replacements"] = {
			["ae"]    = "ä",
			["oe"]    = "ö",
			["ue"]    = "ü",
			["A[Ee]"] = "Ä",
			["O[Ee]"] = "Ö",
			["U[Ee]"] = "Ü",
		},
		]]
	},
	["en"] = {
		["name"] = "English",
		["article"] = "English language",
		["scripts"] = { "Latn" },
	},
	["es"] = {
		["name"] = "Spanish",
		["article"] = "Spanish language",
		["scripts"] = { "Latn" },
	},
	["fr"] = {
		["name"] = "French",
		["article"] = "French language",
		["scripts"] = { "Latn" },
	},
	["frm"] = {
		["name"] = "Middle French",
		["article"] = "Middle French",
		["scripts"] = { "Latn" },
	},
	["grc"] = {
		["name"] = "Ancient Greek",
		["article"] = "Ancient Greek",
		["scripts"] = { "Grek" },
		["replacements"] = {
			-- Vowels with macrons or breves are replaced with plain letters.
			["[ᾱᾰ]"] = "α",
			["[ᾹᾸ]"] = "Α",
			["[ῑῐ]"] = "ι",
			["[ῙῘ]"] = "Ι",
			["[ῡῠ]"] = "υ",
			["[ῩῨ]"] = "Υ",
			["ϑ"]    = "θ",
			["ϱ"]    = "ρ"
		},
	},
	["hi"] = {
		["name"] = "Hindi",
		["article"] = "Hindi",
		["scripts"] = { "Deva" },
	},
	["ja"] = {
		["name"] = "Japanese",
		["article"] = "Japanese language",
		["scripts"] = { "Jpan" },
	},
	["la"] = {
		["name"] = "Latin",
		["article"] = "Latin",
		["scripts"] = { "Latn" },
		["replacements"] = {
			-- Vowels with macrons, breves, or diaereses are replaced with plain letters.
			["[ĀĂ]"]  = "A",
			["[āă]"]  = "a",
			["[ĒĔ]"]  = "E",
			["[ēĕë]"] = "e",
			["[ĪĬÏ]"] = "I",
			["[īĭï]"] = "i",
			["[ŌŎ]"]  = "O",
			["[ōŏ]"]  = "o",
			["[ŪŬÜ]"] = "U",
			["[ūŭü]"] = "u",
			["Ȳ"]     = "Y",
			["ȳ"]     = "y"
		},
	},
	["mul"] = {
		["name"] = "Translingual",
		["article"] = "",
		["script"] = { "" },
	},
	["pt"] = {
		["name"] = "Portuguese",
		["article"] = "Portuguese language",
		["scripts"] = { "Latn" },
	},
	["pa"] = {
		["name"] = "Punjabi",
		["article"] = "Punjabi language",
		["scripts"] = { "Guru", "Arab", }
	},
	["ru"] = {
		["name"] = "Russian",
		["article"] = "Russian language",
		["scripts"] = { "Cyrl" },
		-- Combining acute accent is removed.
		["replacements"] = { [U(0x0301)] = "", }
	},
	["ur"] = {
		["name"] = "Urdu",
		["article"] = "Urdu",
		["scripts"] = { "Arab" },
	},
	["zh"] = {
		["name"] = "Chinese",
		["article"] = "Chinese language",
		["scripts"] = { "Hani" },
	},
}

--[[

	[""] = {
		["name"] = "",
		["article"] = "",
		["script"] = { "" },
	},

	[""] = {
		["name"] = "",
		["article"] = "",
		["script"] = { "" },
		["replacements"] = {
		},
	},

]]

local function checkForString(variable)
	return variable ~= "" and variable ~= nil
end

local function generatePrefix(languageCode)
	local data = languages[languageCode]
	local article = data["article"]
	local name = data["name"]
	return "[[" .. article .. "|" .. name .. "]]: "
end

local function strip(word, languageCode)
	local data = languages[languageCode]
	word = tostring(word)
	if word == nil then
		error("The function strip requires a string argument")
	elseif word == "" then
		return ""
	else
		-- Remove bold and italics, so that words that contain bolding or emphasis can be linked without piping.
		word = gsub(word, "\'\'\'", "")
		word = gsub(word, "\'\'", "")
		if data == nil then
			return word
		else
			local replacements = data and data["replacements"]
			if replacements == nil then
				return word
			else
				for regex, replacement in pairs(replacements) do
					word = gsub(word, regex, replacement)
				end
				return word
			end
		end
	end
end

local function languageSpan(languageCode, text, script, italics)
	local data = languages[languageCode]
	local script = script or data and data["scripts"][1] or "unknown"
	local italicize = ( (script == "unknown" and italics == "yes") or (script == "Latn" and italics ~= "no") ) and "yes" or "no"
	local out = italicize == "no" and "<span lang=\"" .. languageCode .. "\" xml:lang=\"" .. languageCode .. "\">" .. text .. "</span>" or italicize == "yes" and "<i lang=\"" .. languageCode .. "\" xml:lang=\"" .. languageCode .. "\">" .. text .. "</i>"
	return out
end

function p.lang(frame)
	return languageSpan(frame.args[1], frame.args[2])
end

local function wiktionaryLink(languageCode, entry, linkText, script, italics)
	local data = languages[languageCode]
	local name
	if languageCode then
		name = data and data.name or mw.language.fetchLanguageName(languageCode, 'en') -- On other languages' wikis, use mw.getContentLanguage():getCode(), or replace with that wiki's language code.
		if entry and linkText then
			return languageSpan(languageCode, "[[wikt:" .. entry .. "#" .. name .. "|" .. linkText .. "]]", script, italics)
		else
			error("wiktionaryLink needs a Wiktionary entry or link text, or both")
		end
	else
		error("wiktionaryLink needs a language code")
	end
end

function p.wikt(frame)
	local languageCode = match(frame.args[1], "^%l%l%l") or match(frame.args[1], "^%l%l")
	-- A three- or two-letter lowercase sequence at beginning of first parameter
	local scriptCode = match(frame.args[1], "%u%l%l%l$") or nil
	-- One uppercase and three lowercase letters at the end of the first parameter
	local word1 = frame.args[2]
	local word2 = frame.args[3]
	local italics = frame.args.italics or frame.args.i or "auto"
	italics = (italics == "n" or italics == "-") and "no" or italics
	local entry, linkText
	if languageCode then
		if checkForString(word2) and checkForString(word1) then
			entry = strip(word1, languageCode)
			linkText = word2
		elseif checkForString(word1) then
			entry = strip(word1, languageCode)
			linkText = word1
		else
			error("Please provide a word in the second parameter")
		end
	else
		error("Please provide a language code in the first parameter")
	end
	return wiktionaryLink(languageCode, entry, linkText, scriptCode, italics)
end

return p