Module:Wikidata/Chemin
Aller à la navigation
Aller à la recherche
La documentation pour ce module peut être créée à Module:Wikidata/Chemin/doc
local datastructure = require "Module:Wikidata/Chemin/Path" local parser = require "Module:Wikidata/Chemin/parser" local results = require "Module:Wikidata/Chemin/Resultat" local iter = require "Module:Iterateurs" local props = require "Module:Propriétés" local path = {} -------------- -- TODO : -- * Update the "between" path to handle it better epsilon paths -- * Test full path rendering -- * -------------- -- Definition of a PropertyPath class local PropertyPath = {} PropertyPath.__index = PropertyPath --[[ Datastructure for the paths that will match a path pattern A path matching the pattern "subclass of*" will be a chain of statements and snaks nodes. If we got statements of the form (no qualifiers here, just subject with the main statement snak) : * <human> <subclass of> <ape> * <ape> <subclass of> <mammal> * <mammal> <subclass of> <animal> a matching path like "<human> -> <ape> -> <mammal> -> <animal>" will be reprensented by a linked list of "ResultNode" objects. A result node object is a mw.wikibase "statement" standard object augmented with a few methods and a link that goes from the statement or snak to the previous node in the path. { <mammal> <subclass of> <animal> "parent" = { <ape> <subclass of> <mammal> "parent" = { <human> <subclass of> <ape> "parent" = EpsilonRNode(<human>, "parent" = nil) } } } --]] local ResultNode = results.ResultNode local StatementRNode = results.StatementRNode local QualifierRNode = results.QualifierRNode local EpsilonRNode = results.EpsilonRNode ------------------------------------------------------------------------------------------------------ local function iterate_on_snaks( start_rnode, property_filter_criteria, snak_map_iterator, rnode_type) assert(snak_map_iterator) return iter.pair_map( iter.flatten( iter.select_vals( iter.pair_filter( snak_map_iterator, property_filter_criteria ) ), iter.on_vals ), function(value) return rnode_type:create(value, start_rnode) end ) end -- creates an iterator that will iterate over all the statements -- of a specific property of an item local function iterate_on_statement(start_rnode, property_filter_criteria) local item = mw.wikibase.getEntity(start_rnode:item_value()) return iterate_on_snaks( start_rnode, property_filter_criteria, iter.on_pairs(item.claims), StatementRNode ) end local function iterate_on_statement_qualifier(statement, qualifier_filter_criteria) if statement.qualifiers then return iterate_on_snaks( statement, qualifier_filter_criteria, iter.on_pairs(statement.qualifiers), QualifierRNode ) else -- no qualifier table when the statement has no qualifiers return function() return nil end end end local iterate_on_statement_from_property = function(start_rnode, pid) local claims = mw.wikibase.getBestStatements( start_rnode:item_value(), props.normalize(pid) ) or {} return iter.pair_map( iter.pair_filter(iter.on_pairs(claims), function(key, val) return true end), function(key, value) return StatementRNode:create(value, start_rnode) end ) end local function iterate_on_star(start_rnode, child_pnode, depth, iterated, max) -- start_rnode : the result node from which we will iterate -- child_pnode : the path within the star operator (for example P31/P31 if our node is (P31/P31)* iterated = iterated or {} -- iterated is the store of already iterated starting points items to avoid infinite loops -- max : the max number of iteration depth to go, nil for no limit depth = depth or 1 --[[ In pseudo code using a « yield » operator, the algorithm would be algo star(startnode) for each value v which match child_pnode from startnode yield v for each value vchild in star(v) yield vchild end for end end But we can’t use a yield operator if the « coroutine » module on lua is not activated. So we must translate this into something more complicated. Luckily the approach to write iterators in term of composition seems to pay off and it seem possible to write code structurally similar to this algorithm thanks to the « flatten » iterator and a recursive closure that creates iterator to handle the recursivity implied by the « star » operator nature. --]] function creator() return function(start_rnode) local depth_overflow = not (not max or depth < max) if not iterated[start_rnode:item_value()] and not depth_overflow then iterated[start_rnode:item_value()] = true return iterate_on_star(start_rnode, child_pnode, depth + 1, iterated, max) else return function() end end end end return iter.chain( iter.singleton(start_rnode), iter.flatten( child_pnode:iterate(start_rnode), creator() ) ) end local iterate_on_plus = function(start_rnode, child_pnode, max_depth) local first = true iterated = iterated or {} return iter.flatten( child_pnode:iterate(start_rnode), function(rnode) return iterate_on_star(rnode, child_pnode, 1, iterated, max_depth) end ) end --[[ Test : p.test("Q5", "subclass of+") -- at the time writing, "Q215627" is the only direct superclass of human. It does not show up, but there is numerous superclass in the result --]] --[[ an iteraton to handle "/" operator sequences, for example « P31/P279* » "creators" is a table of functions that needs to create iterators of child nodes. In our example, the first cretors element will be a function that takes an item object and will return an iterator over P31-statements of this item the second one will create an iterator over the path « P279* » and so on. The resulting iteratior will iterate on each elements of the second iterator starting from each iterator over the second one for each elements in the first one. --]] local function iterate_on_iterator_creators(start_rnode, creators, i) i = i or 1 if not(tonumber(i)) then i = 1 end -- main iterator : the iterator that will iterate on the values on this node of the path local main_iterator = creators[i]:iterate(start_rnode) if i < #creators then --trying to initialize the iterator for the next node with a value of the current one, if we can local rnode = main_iterator() while rnode and not(rnode:has_an_item()) do rnode = main_iterator() end -- could not initialize the next iterator with a proper item ; returnun the empty iterator function if not rnode then return function() return end end -- we found a proper value to iterate on for the next node in the path -- final iterator : the iterator that will iterate -- on elems that will be returned by each iterations -- on the iterator created by the main client local final_iterator = iterate_on_iterator_creators(rnode, creators, i+1) return function() while final_iterator ~= nil do -- pulling the element from the next node iterator in the sequence local final_elem = final_iterator() if final_elem then return final_elem else -- we pulled the last elem for this value, getting a new value -- for this node path and regenerate the next node iterator to pull new final values local rnode_value = main_iterator() -- return the element pulled from the next node iterator -- if the property has item datatype is not a special value and has the right snaktype -- as we can't continue path on other kind of values if rnode_value then if rnode_value:has_an_item() then final_iterator = iterate_on_iterator_creators(rnode_value, creators, i+1) end else --we're over, no next value for this node to continue the path return end end end end elseif i == #creators then return main_iterator end end --[[ JSBach : Q1339 ; Testing with : test("Q1339", "child/child") wikidata query equivalent query : select ?grandchild where { wd:Q1339 wdt:P40/wdt:P40 ?grandchild } Adam : wd:Q70899 test("Q70899", "child/child/child") wikidata query equivalent query : select ?grandgrandchild where { wd:Q70899 wdt:P40/wdt:P40/wdt:P40 ?grandgrandchild } --]] local iterate_on_alternatives = function(start_rnode, pnodes) local i=1 local current_iter = pnodes[i]:iterate(start_rnode) return function () -- loop to go to next iterator if there is empty one in the list while true do local res = current_iter() -- res is an iterator itself ; getting its result if res then return res else i = i + 1 if i <= #pnodes then -- following to next iterator and resume loop current_iter = pnodes[i]:iterate(start_rnode) else -- no current iterator : ending return nil end end end end end --[[ Adam's father or mother : no value of course p.test('Q70899', "P22|P25") JS Bach's p.test("Q1339", "P22|P25") --]] local function iterate_on_nodes_beetween(start_rnode, pnode, min, max) local seq = {} local i = 1 while i <= min do table.insert(seq, pnode) i = i + 1 end local sequence_obj = {} function sequence_obj:iterate(next_rnode) return iterate_on_iterator_creators(next_rnode, seq, min) end if max then local star_obj = {} function star_obj:iterate(next_rnode) return iterate_on_star(next_rnode, pnode, 1, iterated, max-min) end return iterate_on_iterator_creators( start_rnode, { sequence_obj, star_obj } ) else return sequence_obj:iterate() end end local function iterate_maybe(start_rnode, pnode) local iterator = pnode:iterate(start_rnode) local self_done = false return function() if not self_done then local val = iterator() if val then return val else self_done = true return start_rnode end end end end function PropertyPath:new(str) local obj = {["path"]=str} setmetatable(obj, self) local ast = parser.parse_path(str) assert(ast, "parser did not return a node") obj.node = ast return obj end local function entityId(entity) if type(entity) == 'string' then return entity end return entity.id end local function norm_start_point(start_point) if type(start_point) == "string" then return EpsilonRNode:create(start_point) elseif type(start_point) == "table" then if start_point["claims"] ~= nil then -- assume this is an item or entity object return EpsilonRNode:create(start_point.id) elseif start_point["is_RNode"] then return start_point elseif start_point["qualifiers"] or start_point["mainsnak"] then local itemid = string.gmatch(start_point.id, "^.*[^$]")() -- extract the item id from the starting statement return StatementRNode:create(start_point, EpsilonRNode:create(itemid)) end end mw.logObject(start_point) error("from function norm_start_point of module PropertyPath : wrong type for start_point", tostring(start_point)) -- TODO : Log a better error end function PropertyPath:iterate(start_point) start_point = norm_start_point(start_point) return self.node:iterate(start_point) end local PropertyNode = datastructure.PropertyNode local AlternativeNode = datastructure.AlternativeNode local SequenceNode = datastructure.SequenceNode local QualifiedStatementNode = datastructure.QualifiedStatementNode local NegatedPropertySetNode = datastructure.NegatedPropertySetNode local PlusNode = datastructure.PlusNode local StarNode = datastructure.StarNode local BetweenNode = datastructure.BetweenNode local MaybeNode = datastructure.MaybeNode local QualifierSnakNode = datastructure.QualifierSnakNode function PropertyNode:iterate(rnode) return iterate_on_statement_from_property(rnode, self.property) end --[[ test("Q5", "subclass of") --]] function AlternativeNode:iterate(rnode) return iterate_on_alternatives(rnode, self.nodes) end function NegatedPropertySetNode:iterate(rnode) return iterate_on_statement(rnode, function (property, val) return self:matches(property) end ) end --[[ test("Q90, ""!(P150)") --]] function SequenceNode:iterate(rnode) return iterate_on_iterator_creators(rnode, self.nodes) end function QualifiedStatementNode:iterate(rnode) local statement_iterator = iterate_on_statement( rnode, function (key, value) return self.property:matches(key) end ) local qualifier_iterator_creator = function(statement) return iterate_on_statement_qualifier( statement, function (key, value) return self.qualifier:matches(key) end ) end return iter.flatten(statement_iterator, qualifier_iterator_creator) end --[[ to test with : p.test("Q79529", "union of>of") p.test("Q105019",'P22{1,6}' --]] function QualifierSnakNode:iterate(statementnode) return iterate_on_statement_qualifier( statementnode, function (key, value) return self:matches(key) end ) end --[[ to test with : for x in p.iterate("Q79529", "union of") do p.test(x, ">of") end --]] function StarNode:iterate(rnode) return iterate_on_star(rnode, self.node) end function PlusNode:iterate(rnode) return iterate_on_plus(rnode, self.node) end function BetweenNode:iterate(rnode) return iterate_on_nodes_beetween(rnode, self.node, self.min, self.max) end function MaybeNode:iterate(rnode) return iterate_maybe(rnode, self.node) end -- returns an iterator on the result set of a path from a specific node -- ppath acn either be a string representing a path or a compiled path function path.iterate(start_node, ppath) if start_node == nil then error("the start node is mandatory to get result on a path, it is nil") end if type(ppath) == "table" then return ppath:iterate(start_node) else return path.PropertyPath:new(ppath):iterate(start_node) end end -- function that return a boolean -- true if there is a path matching ppath from start_node that ends with the value "value" -- (currently only works if "value" is a Qid string) function path.matches(start_node, ppath, value) for val in path.iterate(start_node, ppath) do if val:item_value() == value then return true end end return false end ---------------------------- --[[ p.test("Q5", "P279") p.test(mw.wikibase.getEntity("Q5"), "P279") for x in p.iterate(mw.wikibase.getEntity("Q5"), "P279") do p.test(x, "P279") end -- test if we can continue iteration of an RNode object Complex test : p.test("Q27929033","P1552>!()/P31") => OK p.test("Q27929033","subclass of/P1552>!()/P31") => NOK --]] function path.test(start_point, ppath) for x in path.iterate(start_point, ppath) do mw.log("woot") if x then mw.log(x:item_value()) end end end ----------------- -- fonctions retournant une déclaration ou un snak qualificatif en fonction d’un chemin de propriété -- utilisé pour les tris, retourner une clé de tri pour une déclaration choisie par un chemin ou un ensemble de chemins function path.snak_key_by_path(path) local path_to_key = path.PropertyPath:new(path) return function(claim) return (path_to_key:iterate(claim)()) end end -- takes several property paths and creates a function that returns -- the first value with a match -- example : local get_key = wd.snak_key_by_paths{">P80","P800|P801"} -- get_key(claim) -- returns the qualifier value of P80 of the claim if it exists, if not returns the main statement value -- of P800 of the main value of the « claim » statement, if not the P801 one -- (used in Module:Infobox/fonctions/personne) -- Note on the example : TODO : would be equivalent to a single path ">P80|P800|P801" but it’s not possible yet function path.snak_key_by_paths(paths) local paths_to_key = {} for k, pat in ipairs(paths) do paths_to_key[#paths_to_key + 1] = path.PropertyPath:new(pat) end return function(claim) -- returns the first value of the first matching path starting from « claim » for k, path_to_key in ipairs(paths_to_key) do local res = path_to_key:iterate(claim)() if res then return res end end end end ---------------------------- path.PropertyPath = PropertyPath return path