-- Dump a table to help develop other modules.
-- It is also possible to use mw.dumpObject() but the result from this
-- module is clearer and is close to valid Lua source.
-- The main purpose is to allow easy inspection of Wikidata items.
-- Preview the following in a sandbox to see entity Q833639 as a Lua table:
--   {{#invoke:dump|wikidata|Q833639}}
-- Preview the following to dump a built-in table:
--   {{#invoke:dump|testcase}}

local function collection()
	-- Return a table to hold items.
	return {
		n = 0,
		add = function (self, item)
			self.n = self.n + 1
			self[self.n] = item
		end,
		join = function (self, sep)
			return table.concat(self, sep)
		end,
		sort = function (self, comp)
			table.sort(self, comp)
		end,
	}
end

local function pre_block(text)
	-- Pre tags returned by a module do not act like wikitext <pre>...</pre>.
	return '<pre>\n' ..
		mw.text.nowiki(text) ..
		(text:sub(-1) == '\n' and '' or '\n') ..
		'</pre>\n'
end

local function make_tabstr(indent)
	-- Return a string to generate one level of indent.
	if indent == 'tab' then
		-- Tabs do not work well in a browser edit window, but can force them.
		return '\t'
	end
	indent = tonumber(indent)
	if not (type(indent) == 'number' and 1 <= indent and indent <= 32) then
		indent = 4
	end
	return string.rep(' ', indent)
end

local function _dumphtml(html, tabwidth)
	-- Return a pretty-text formatted dump of an html string.
	-- This assumes clean html, for example, tag "<table>" not "< table >".
	if type(html) ~= 'string' then
		return ''
	end
	local selfClosingTags = {  -- from mw.html.lua
		area = true,
		base = true,
		br = true,
		col = true,
		command = true,
		embed = true,
		hr = true,
		img = true,
		input = true,
		keygen = true,
		link = true,
		meta = true,
		param = true,
		source = true,
		track = true,
		wbr = true,
	}
	local tabstr = make_tabstr(tabwidth)
	local function indent_pad(depth, isfirst)
		-- Return a string with an indent to match depth.
		if depth > 0 then
			return '\n' .. string.rep(tabstr, depth)
		end
		return isfirst and '' or '\n'
	end
	local function extract(result, html, pos, len, depth, currenttag)
		-- Dump more of html into table result and return new pos.
		local has_child
		while pos <= len do
			local s, e = html:find('<[^<>]*>', pos)
			if s then
				if s > pos then
					table.insert(result, html:sub(pos, s-1))
				end
				if html:sub(s+1, s+1) == '/' then
					-- A closing tag.
					local tag = html:match('^([a-zA-Z0-9]+)>', s+2) or 'NOTAG'
					if tag == currenttag then
						local indent = has_child and indent_pad(depth - 1) or ''
						table.insert(result, indent .. '</' .. tag .. '>')
					else
						-- Should never happen.
						table.insert(result, '\n</' .. tag .. '>')
					end
					return e + 1
				end
				local tag = html:match('^[a-zA-Z0-9]+', s+1) or 'NOTAG'
				if html:sub(e-1, e-1) == '/' or selfClosingTags[tag] then
					-- A self-closing tag.
					table.insert(result, html:sub(s, e))
					pos = e + 1
				else
					-- An opening tag.
					table.insert(result, indent_pad(depth, pos == 1) .. html:sub(s, e))
					pos = extract(result, html, e+1, len, depth+1, tag)
					has_child = true
				end
			else
				table.insert(result, html:sub(pos))
				break
			end
		end
		return len + 1
	end
	local result = {}
	html = html:gsub('>%s+<', '><'):gsub('\n%s*', ' ')
	extract(result, html, 1, #html, 0)
	return pre_block(table.concat(result))
end

local function dumphtml(frame)
	local args = frame.args
	local pargs = frame:getParent().args
	local text = args[1] or pargs[1]
	local indent = args.indent or pargs.indent
	return _dumphtml(text, indent)
end

local function quoted(str)
	return (string.format('%q', str):gsub('\\\n', '\\n'))
end

local function iterkeys(var, control)
	-- Return an iterator over the keys of var (which should be a table).
	-- The keys are sorted with numbered keys first, then other types.
	-- The iterator returns key, repr where key is the actual key, and
	-- repr is its representation: a number for the ipairs keys, or
	-- a string, including for number keys above the table length.
	if type(var) ~= 'table' then
		return function () return nil end
	end
	local nums = {}
	local results = collection()
	for i, _ in ipairs(var) do
		nums[i] = true
		results:add({ i, i })
	end
	local keys = collection()
	for k, _ in pairs(var) do
		if not nums[k] then
			keys:add(k)
		end
	end
	local autoname = control.autoname
	keys:sort(function (a, b)
			local ta, tb = type(a), type(b)
			if ta == tb then
				if ta == 'number' or ta == 'string' then
					return a < b
				end
				if ta == 'boolean' then
					return b and not a
				end
				return autoname(a) < autoname(b)
			end
			if ta == 'number' then
				return true
			elseif tb == 'number' then
				return false
			else
				return ta < tb
			end
		end)
	for _, k in ipairs(keys) do
		local repr
		local tk = type(k)
		if tk == 'number' then
			repr = '[' .. k .. ']'
		elseif tk == 'string' then
			if k:match('^[%a_][%w_]*$') then
				repr = k
			else
				repr = '[' .. quoted(k) .. ']'
			end
		elseif tk == 'boolean' then
			repr = '[' .. tostring(k) .. ']'
		else
			repr = autoname(k)
			control.needed[repr] = true
		end
		results:add({ k, repr })
	end
	local last = 0
	return function ()
		if last < results.n then
			last = last + 1
			return unpack(results[last])
		end
	end
end

local function vardump(var, vname, depth, control, self, parents)
	-- Update items in control with results from dumping a variable.
	local function put(value, options)
		options = options or {}
		local indent = options.indent or depth
		local comma = (options.kind == 'open' or indent == 0) and '' or ','
		control.items:add({
			key = (type(vname) == 'string' and options.kind ~= 'close') and vname or nil,
			value = value .. comma,
			depth = indent,
			note = options.note
		})
	end
	if var == nil then
		put('nil')
	elseif type(var) == 'string' then
		put(quoted(var))
	elseif type(var) == 'table' then
		local this = control.autoname(var)
		if depth >= control.limitdepth then
			put(this)
		elseif parents and parents[this] then
			control.needed[this] = true
			if self == this then
				put(this, {note = 'self'})
				control.needed['self'] = true
			else
				put(this, {note = 'repeat'})
				control.needed['repeat'] = true
			end
		else
			parents = parents or {}
			parents[this] = true
			self = this
			put('{', {kind = 'open', note = this})
			local mt = getmetatable(var)
			if mt then
				vardump(mt, '__metatable', depth + 1, control, self, parents)
			end
			local maxsize = control.items.n + control.limititems
			for key, keyrep in iterkeys(var, control) do
				if control.items.n > maxsize then
					put('...more...')
					break
				end
				vardump(var[key], keyrep, depth + 1, control, self, parents)
			end
			put('}', { kind = 'close' })
		end
	elseif type(var) == 'boolean' or type(var) == 'number' then
		put(tostring(var))
	else  -- function (or userdata or thread)
		put(control.autoname(var))
	end
end

local function dumper(var, vname, tabwidth, wantraw, limititems, limitdepth)
	-- Return a string representing var in almost-correct Lua syntax.
	-- There is no newline at the end of the result.
	local onames = {}
	local tcounts = {}
	local function autoname(var)
		-- Return a string that is a unique name for var, given it is not
		-- a number or string.
		if not onames[var] then
			local name = type(var)
			tcounts[name] = (tcounts[name] or 0) + 1
			onames[var] = name .. '_' .. tcounts[name]
		end
		return onames[var]
	end
	local control = {
		autoname = autoname,
		limititems = limititems or 10000,
		limitdepth = limitdepth or 50,
		items = collection(),
		needed = {},
	}
	vardump(var, tostring(vname or 'variable'), 0, control)
	local tabstr = make_tabstr(tabwidth)
	local lines = collection()
	for i, v in ipairs(control.items) do
		local indent = string.rep(tabstr, v.depth)
		local note = v.note
		if note and control.needed[note] then
			note = '  -- ' .. note
		else
			note = ''
		end
		local k = v.key and (v.key .. ' = ') or ''
		lines:add(indent .. k .. v.value .. note)
	end
	local raw = lines:join('\n')
	return wantraw and raw or pre_block(raw)
end

local function dump_testcase(frame)
	local item = frame.args[1]
	if item == 'G' or item == '_G' then
		return dumper(_G, '_G', frame.args.indent)
	end
	local fruit = { 'apple', 'banana', [0] = 'zero', [{'anon'}] = 'anon' }
	local testcase = {
		[100] = 'one hundred',
		[99] = 'ninety nine',
		[0.5] = 'one half',
		[-1] = 'negative one',
		'one',
		'two',
		[' '] = 'space',
		['1 –◆— z'] = 'unicode',
		alpha = 'aaa',
		beta = 'bbb',
		c = 123,
		data = {
			dumper = dumper,
			[dumper] = 'dumper',
			'three',
			'four',
			T = true,
			[true] = 'T',
			alpha2 = 'aaa2',
			beta2 = 'bbb2',
			F = false,
			[false] = 'F',
			c2 = 1234,
			data2 = {
				'five',
				'six',
				alpha3 = 'aaa3',
				beta3 = 'bbb3',
				c3 = 12345,
				fruit = fruit,
				[fruit] = 'fruit',
			},
		},
		z = 'zoo',
	}
	testcase.testcase = testcase
	testcase.data.me = testcase.data
	testcase.data.data2.me = testcase
	testcase.data.data2.fruit.back = testcase.data
	setmetatable(testcase.data, {
		__index = function (self, key) return type(key) == 'string' and #key or nil end,
		__tostring = function (self) return tostring(#self) end,
	})
	return dumper(testcase, 'testcase', frame.args.indent)
end

local function wikidata(frame)
	local item = frame.args[1]
	if item then
		local id = item:match('^%s*([PQ]%d+)%s*$')
		if id then
			local entity = mw.wikibase.getEntity(id)
			return dumper(entity, id, frame.args.indent)
		end
	end
	return 'Parameter should be a Wikidata identifier such as P2386 or Q833639'
end

return {
	_dump = dumper,
	_dumphtml = _dumphtml,
	dumphtml = dumphtml,
	testcase = dump_testcase,
	wikidata = wikidata,
}