Module:Str find word
Jump to navigation
Jump to search
require('strict')
local p = {}
local getArgs = require('Module:Arguments').getArgs
local str = require('Module:String')
local yesno = require('Module:Yesno')
local defaultSep = ','
local iMaxWords = 16
local warningIMaxWordsReached = nil
local xpLitWordCount = 0
local report -- to be initinated when explain needed
-- Initialise the /report subpage.
-- only invoked when 'explain' asked
local function initReport()
report = require('Module:Str find word/report')
end
-- Turn "A" into "A" etc. asap
-- and reduce multi-spaces (including nbsp etc.) into single space
local function decodeUnicode(str)
return mw.ustring.gsub(mw.text.decode(str), '%s+', ' ')
end
-- %-Escape any word (character string) before feeding it into a string pattern function
-- all punctuation (%p) will be %-escaped
local function escape_word(word)
return str._escapePattern(word)
end
-- Reads and parses a word list and returns a table with words (simple array)
-- words list can be: source, andwords-to-check, orwords-to-check
-- step 1: when case-insensitive, turn string into lowercase
-- step 2: read & remove Literals ("..")
-- step 3: read comma-separated words
-- step 4: when booleans=T, change boolean words into true/false (module:yesno rules)
-- all words returned are trimmed, TODO and all ws into single-plainspace?
-- only T/F words are edited, other words remain, untouched
-- return the table (a straight array)
local function buildWordTable(tArgs, sWordlist)
local wordTable = {}
local hitWord = ''
local hitCount = 0
if sWordlist == '' then return wordTable end
-- Step 1: case-sensitive
if yesno(tArgs.case, true) == false then
sWordlist = string.lower(sWordlist)
end
-- Step 2: read "literals",
-- then remove them from the string:
-- replaced by single comma; idle & keeps word separation
--- if yesno(tArgs.literals, false) then
if false then
local _, sCount
_, sCount = mw.ustring.gsub(sWordlist, '"', '')
if sCount > 1 then
local litWord = ''
local i, j
while sCount > 1 do -- could do here: only when even?
i = string.find(sWordlist, '%"', 1, false)
j = string.find(sWordlist, '%"', i+1, false)
litWord = mw.text.trim(string.sub(sWordlist, i+1, j-1))
if #litWord > 0 then -- not an empty string or spaces only
xpLitWordCount = xpLitWordCount + 1
table.insert(wordTable, litWord)
end
-- remove from source, and do next gsub search:
sWordlist = string.gsub(sWordlist, '%"%s*'
.. escape_word(litWord)
.. '%s*%"', ',')
_, sCount = mw.ustring.gsub(sWordlist, '"', '')
end
end
end
-- Step 3: parse comma-delimited words
hitCount = 0
sWordlist = tArgs.sep .. sWordlist .. tArgs.sep
local eSep
eSep = escape_word(tArgs.sep)
local patstring = '%f[^' .. eSep .. '][^' .. eSep .. ']+%f[' .. eSep .. ']'
if yesno(tArgs.explain, false) then
report.xpMessage('1.eSep: ' .. eSep) -- dev
report.xpMessage('2.pattern: ' .. patstring) -- dev
end
while hitCount <= iMaxWords do
hitCount = hitCount + 1
hitWord = str._match(sWordlist, patstring, 1, hitCount, false, tArgs.sep)
hitWord = mw.text.trim(hitWord)
if hitWord == tArgs.sep then
-- no more words found in the string
break
elseif hitWord ~= '' then
table.insert(wordTable, hitWord)
end
end
if hitCount > iMaxWords then
warningIMaxWordsReached = 'Max number of words (' .. tostring(iMaxWords) .. ') reached. Extra words are ignored.'
.. ' (' .. mw.ustring.sub(mw.text.trim(sWordlist), 1, 90) .. ' ...). '
end
-- Step 4: when read booleans, converse words to true/false
-- todo: check parameter here not elsewhere
if tArgs.booleans then -- TODO if Yesno(tArgs.booleans) ...
local sBool
for i, v in ipairs(wordTable) do
sBool = yesno(v)
if sBool ~= nil then
wordTable[i] = tostring(sBool)
end
end
end
return wordTable
end
-- Check whether a single word is in a table (a simple array of words)
-- returns hitword or nil
local function findWordInTable(sourceWordTable, word)
local bHit = false
for i, v in ipairs(sourceWordTable) do
if v == word then
bHit = true
break
end
end
if bHit then
return word
else
return nil
end
end
-- AND-logic with andWordTable words: ALL words must be found
-- returns {T/F, hittable}
-- T when *all* AND words are found
-- hittable with all hit words
-- note 1: when F, the hittable still contains the words that were found
-- note 2: empty AND-wordlist => True by logic (because: not falsified)
local function checkANDwords(sourceWordTable, andWordTable)
local result1
local bAND
local tHits
bAND = true
tHits = {}
result1 = nil
if #andWordTable > 0 then
for i, word in ipairs(andWordTable) do
result1 = findWordInTable(sourceWordTable, word) or nil
if result1 == nil then
bAND = false -- Falsified!
-- could break after this logically but
-- continue to complete the table (bAND remains false)
else
table.insert(tHits, result1)
end
end
else
bAND = true
end
return bAND, tHits
end
-- OR-logic with orWordTable words: at least one word must be found
-- returns {T/F, hittable}
-- True when at least one OR word is found
-- hittable has all hit words
-- note 1: empty OR-wordlist => True by logic (because: not falsified)
-- note 2: while just one hitword is a True result, the hittable contains all words found
local function checkORwords(sourceWordTable, orWordTable)
local result1
local bOR
local tHits
bOR = false
tHits = {}
result1 = nil
if #orWordTable > 0 then
for i, word in ipairs(orWordTable) do
result1 = findWordInTable(sourceWordTable, word) or nil
if result1 == nil then
-- this one is false; bOR unchanged; do next
else
bOR = true -- Confirmed!
table.insert(tHits, result1)
-- could break here logically, but complete the check
end
end
else
bOR = true
end
return bOR, tHits
end
-- Determine the requested return value (string).
-- sYeslist is the _main return value (logically defined value)
-- this function applies tArgs.yes / tArgs.no return value
-- note: yes='' implies: blank return value
-- note: no parameter yes= (that is, yes=nil) implies: by default, return the sYeslist
local function yesnoReturnstring(tArgs, sYeslist)
if sYeslist == '' then -- False
return tArgs.no or ''
else -- True
if tArgs.yes == nil then
return sYeslist
else -- some |yes= value is entered, could be ''
return tArgs.yes
end
end
end
local function isPreview()
local ifPreview = require('Module:If preview')
return not (ifPreview._warning( {'is_preview'} ) == '')
end
-- Explain options (=report info), interprets parameter explain=
-- returns true/false/'testcases'
-- explain=true => show report in Preview
-- explain=testcases => WHEN in ns: template: or user: AND subpage = '/testcases' THEN show permanently
local function checkExplain(tArgs)
return false -- never. 22Mar2023 checkExplain(newArgs)
end
-- ===== ===== ===== ===== ===== ===== ===== ===== ===== ===== ===== ===== =====
-- _main function: check for presence of words in source string
-- Checks and returns:
-- when T: the string of all hitwords (default), or the |yes=... input
-- when F: empty string '' (default), or the |no=... input
-- steps:
-- 1. input word strings are prepared (parsed into an array of words)
-- 2. words checks are made (applying AND-logic, OR-logic)
-- 3. final conclusion drawn (T/F)
-- 4. optionally, the preview report is prepared (debug, feedback)
-- 5. based on T or F status, the return value (string) is established and returned
-- note 1: each return value (yes=.., no=..) can be '' (nulstring)
function p._main(tArgs)
local sourceWordTable = {}
local andWordTable = {}
local orWordTable = {}
local tANDhits
local tORhits
-- logical finding:
local bANDresult = false
local bORresult = false
local resultALL = false
local sYeslist = ''
sourceWordTable = buildWordTable(tArgs, tArgs.source)
andWordTable = buildWordTable(tArgs, tArgs.andString)
orWordTable = buildWordTable(tArgs, tArgs.orString)
if (#sourceWordTable == 0) or (#andWordTable + #orWordTable == 0) then
-- No words to check
resultALL = false
if yesno(tArgs.explain, false) then
report.xpNoWords(tArgs, sourceWordTable, andWordTable, orWordTable)
end
else
bANDresult, tANDhits = checkANDwords(sourceWordTable, andWordTable)
bORresult, tORhits = checkORwords(sourceWordTable, orWordTable)
resultALL = (bANDresult) and (bORresult)
end
sYeslist = ''
if resultALL then
-- concat the sYeslist (= all hit words; from 2 tables)
if bANDresult then
sYeslist = sYeslist .. table.concat(tANDhits, tArgs.sep)
end
if #tORhits > 0 then
if #tANDhits > 0 then
sYeslist = sYeslist .. tArgs.sep
end
sYeslist = sYeslist .. table.concat(tORhits, tArgs.sep)
end
end
if yesno(tArgs.explain, false) then
if tArgs.yes ~= nil then
if (tArgs.yes == '') and (tArgs.no == '') then
report.xpYesNoBothBlank()
end
end
if warningIMaxWordsReached ~= nil then
report.xpMessage(warningIMaxWordsReached)
end
report.xpBuildReport(tArgs, sourceWordTable,
bANDresult, andWordTable, tANDhits,
bORresult, orWordTable, tORhits,
sYeslist, xpLitWordCount)
end
return yesnoReturnstring(tArgs, sYeslist)
end
-- set wordt separator
local function setSep(sSep)
if sSep == nil then return defaultSep end
local msg = ''
-- todo what with {{!}}
local newSep = defaultSep
newSep = sSep
sSep = decodeUnicode(sSep)
if string.match(sSep, '[%s%w%d]') ~= nil then -- not ok
msg = 'Irregular characters in sep: ' .. sSep
newSep = defaultSep
end
newSep = string.sub(sSep, 1, 1)
if newSep == '' then --- ???
newSep = defaultSep
end
return newSep
end
local function concatAndLists(s1, s2, newSep)
local tLists = {} -- working table: both s1 and s2 to concat
table.insert(tLists, s1)
table.insert(tLists, s2)
return table.concat(tLists, newSep)
end
local function parseArgs(origArgs)
local newArgs = {}
newArgs['sep'] = setSep(origArgs['sep']) -- do first, needed below
newArgs['source'] = decodeUnicode(origArgs['s'] or origArgs['source'] or '')
newArgs['andString'] = decodeUnicode(concatAndLists(
origArgs['w'] or origArgs['word'] or nil,
origArgs['andw'] or origArgs['andwords'] or nil,
newArgs.sSep)
)
newArgs['orString'] = decodeUnicode(origArgs['orw'] or origArgs['orwords'] or '')
-- boolean options: catch both parameters, also handle nil & nonsense input values:
newArgs['case'] = yesno(origArgs['case'] or origArgs['casesensitive'] or true, true) -- defaults to True
newArgs['booleans'] = yesno(origArgs['bool'] or origArgs['booleans'] or false, false) -- defaults to False
newArgs['literals'] = yesno(origArgs['literals'] or origArgs['lit'] or true, true) -- defaults to True
newArgs['yes'] = origArgs['yes'] or nil -- nil; default so return sYeslist; keep '' as legal input & return value
newArgs['no'] = origArgs['no'] or ''
newArgs['explain'] = false -- never. 22Mar2023 checkExplain(newArgs)
newArgs.explain = false -- never. 22Mar2023 checkExplain(newArgs)
return newArgs
end
function p.main(frame)
local origArgs = getArgs(frame)
local sReturn = ''
local tArgs = {}
tArgs = parseArgs(origArgs)
if yesno(tArgs.explain, false) then
initReport()
report.xpListArguments(origArgs)
end
sReturn = p._main(tArgs)
if warningIMaxWordsReached ~=nil then
local preview = require('Module:If preview')
sReturn = sReturn .. preview._warning({warningIMaxWordsReached})
end
if yesno(tArgs.explain, false) then
return sReturn .. report.xpPresent(tArgs.explain)
else
return sReturn
end
end
return p