Skip to content
This repository has been archived by the owner on Jan 28, 2025. It is now read-only.

Probabilistic SHACL implementation #171

Open
wants to merge 1 commit into
base: development
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,8 @@ static void define(String pref, String ns) {
static void defNamespace() {
prefix = new HashMap<>();
define("sh", NSManager.SHAPE);
// Probabilistic SHACL prefix
define("psh", NSManager.PROBSHACL);
define("msh", MSH);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,18 @@ int fib(int n) {
}
}


/**
* @return the result of exp(dt), dt is a number
*/
public IDatatype exponential(IDatatype dt) {
return DatatypeMap.newInstance(Math.exp(dt.doubleValue()));
}

/**
* @return PI value
*/
public IDatatype pi() {
return DatatypeMap.newInstance(Math.PI);
}

}
35 changes: 35 additions & 0 deletions corese-core/src/main/java/fr/inria/corese/core/shacl/Shacl.java
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,12 @@ public class Shacl {
static final String PARSE = SH + "funparse";

static final String SHACL = SH + "shacl";
// Probabilistic SHACL
static final String EXTENDED_SHACL = SH + "extendedshacl";
// modss
public static final int PROBABILISTIC_MODE = 1;
public static final int POSSIBILISTIC_MODE = 2; // not avalaible

static final String SHEX = SH + "shex";
static final String SHAPE = SH + "shaclshape";
static final String NODE = SH + "shaclnode";
Expand Down Expand Up @@ -209,6 +215,35 @@ public Graph eval(Graph shacl) throws EngineException {
return eval(SHACL, shacl);
}

/**
* probabilistic SHACL Evaluation (usage for Corese GUI)
* @return
* @throws EngineException
*/
public Graph eval(int mode) throws EngineException {
return eval(EXTENDED_SHACL, getShacl(), mode, DatatypeMap.createLiteral(String.valueOf(0.1), fr.inria.corese.sparql.datatype.RDF.xsddouble));
}

/**
* probabilistic SHACL Evaluation (usage for Corese server) with p-value
* @return
* @throws EngineException
*/
public Graph eval(Graph shacl, int mode, IDatatype p) throws EngineException {
setShacl(shacl);
return eval(EXTENDED_SHACL, shacl, mode, p);
}

/**
* probabilistic SHACL Evaluation (usage for Corese server) with p-value and the number of considered triples
* @return
* @throws EngineException
*/
public Graph eval(Graph shacl, int mode, IDatatype p, IDatatype nTriples) throws EngineException {
setShacl(shacl);
return eval(EXTENDED_SHACL, shacl, mode, p, nTriples);
}

/**
* Evaluate shape/node
*/
Expand Down
50 changes: 38 additions & 12 deletions corese-core/src/main/resources/function/datashape/core.rq
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ function sh:core(report, sh, vis, nodeList, present) {
sh:pop()
}
} ;
sh:log("core.rq", "sh:core", concat(sh, " is conform ? ", res));
return (res)
}
}
Expand Down Expand Up @@ -323,18 +324,43 @@ function sh:coreboolean(shape, cst) {
}
}

# Probabilistic SHACL
# Binomial formulas
function sh:normDensity(x, mu, sigma) {
set(normDensity_exponent = -0.5 * power(((?x - ?mu) / ?sigma), 2));
set(normDensity_coefficient = 1 / (?sigma * power(2 * fun:pi(), 0.5)));
set(normDensity_res = normDensity_coefficient * fun:exponential(normDensity_exponent));
return (normDensity_res)
}

# Optimized combinaison (n k)
function sh:comb(n, k) {
# apply properties to optimize the computation
if(?n = ?k || ?k = 0) {
return (1)
};
# not scalable :
# return (sh:fac(?n) / (sh:fac(?k) * sh:fac(?n - ?k)))
if(?k>?n-?k) {
?k=?n - ?k;
};
set(coef = 1);
for (i in xt:iota(0, ?k - 1)) {
set(coef=coef*(?n - i));
set(coef=floor(coef/(i+1)));
};
return (coef)
}

# P(X = k)
function sh:binomial(n, p, k) {
if(?n>=30 && ?n*?p>5 && ?n*(1-?p)>5) {
return (sh:normDensity(?k, ?n*?p, power(?n*?p*(1-?p), 0.5)))
};
return (sh:comb(?n, ?k) * power(?p, ?k) * power(1 - ?p, ?n - ?k))
}













# P(X >= k)
function sh:binomialEqualOrGreater(n, p, k) {
if(k = n, sh:binomial(?n, ?p, ?n), sh:binomial(?n, ?p, ?k) + sh:binomialEqualOrGreater(?n, ?p, ?k + 1))
}
92 changes: 89 additions & 3 deletions corese-core/src/main/resources/function/datashape/main.rq
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@
prefix sh: <http://www.w3.org/ns/shacl#>
prefix xsh: <http://www.w3.org/ns/shacl#>
prefix shex: <http://ns.inria.fr/shex/shacl#>
prefix dct: <http://purl.org/dc/terms/>
prefix fun: <function://fr.inria.corese.core.extension.Extension>
prefix psh: <http://ns.inria.fr/probabilistic-shacl/>

@import <api/define.rq> <focus.rq> <ppathparser.rq> <ppath.rq> <operator.rq> <operatorext.rq> <core.rq> <path.rq> <constraint.rq> <target.rq> <report.rq>

Expand All @@ -18,12 +20,14 @@ prefix fun: <function://fr.inria.corese.core.extension.Extension>
#
function dt:graph sh:shacl() {
if (sh:trace(), xt:print("shacl:"), true);
set(extended = false);
sh:shacl(xt:graph(), sh:focus(xt:graph()))
}

# shape is shacl graph
function dt:graph sh:shacl(dt:graph shape) {
if (sh:trace(), xt:print("shacl:"), true);
set(extended = false);
sh:shacl(shape, sh:focus(shape))
}

Expand All @@ -37,6 +41,7 @@ function dt:graph sh:shaclshape(sh) {
# Current graph is RDF and shacl graph
function dt:graph sh:shaclShapeGraph(shacl, sh) {
if (sh:trace(), xt:print("shaclshape:", sh), true);
set(extended = false);
sh:shacl(shacl, sh:focus(shacl, sh))
}

Expand All @@ -50,6 +55,7 @@ function dt:graph sh:shaclshape(sh, node) {

function dt:graph sh:shaclShapeGraph(shacl, sh, node) {
if (sh:trace(), xt:print("shaclshape:", sh, node), true);
set(extended = false);
sh:shacl(shacl, xt:list(xt:list(sh, xt:list(node))))
}

Expand All @@ -62,9 +68,52 @@ function dt:graph sh:shaclnode(node) {

function dt:graph sh:shaclNodeGraph(shacl, node) {
if (sh:trace(), xt:print("shaclnode:", node), true);
set(extended = false);
sh:shacl(shacl, sh:focusnode(shacl, node))
}

#
# extended shacl function : probabilistic or possibilistic mode
#
function dt:graph sh:extendedshacl(dt:graph shape, m, p) {
if (sh:trace(), xt:print("shacl:"), true);
set(extended = true);
# define mod
set(mode = xsd:integer(m));
# define p-value
set(prob = p);
# set default value and enable COUNT query on the RDF data graph
set(numInstances = 0);
# check the used mod
if (mode == 1,
sh:log("main.rq", "sh:extendedshacl", concat("Probabilistic mode used with p=", prob)),
if(mode == 2,
sh:log("main.rq", "sh:extendedshacl", "Possibilistic mode"),
sh:log("main.rq", "sh:extendedshacl", "ERROR this ID is not found !")
)
);
sh:shacl(shape, sh:focus(shape));
}

function dt:graph sh:extendedshacl(dt:graph shape, m, p, nTriples) {
if (sh:trace(), xt:print("shacl:"), true);
set(extended = true);
# define mod
set(mode = xsd:integer(m));
# define p-value
set(prob = p);
# set the number of instances provided by user
set(numInstances = nTriples);
# check the used mod
if (mode == 1,
sh:log("main.rq", "sh:extendedshacl", concat("Probabilistic mode used with p= ", prob, " and n-triples= ", numInstances)),
if(mode == 2,
sh:log("main.rq", "sh:extendedshacl", "Possibilistic mode"),
sh:log("main.rq", "sh:extendedshacl", "ERROR this ID is not found !")
)
);
sh:shacl(shape, sh:focus(shape));
}


function dt:graph sh:shex() {
Expand Down Expand Up @@ -115,20 +164,54 @@ function dt:graph sh:shacl(dt:graph shape, focus) {
# focus = mappings or list(sh=nodeShape; list=targetNodeList)
#
function sh:shacleval(shape, focus) {
# compute the total number of instances in the graph if numInstances == 0
if (extended && numInstances == 0) {
set(numInstances = sh:getNumInstances());
} ;
let (suc = true) {
for ((sh list) in focus) {
if (sh:trace(),
xt:print("target:", coalesce(sh, "undef"), xt:size(list), list), true);
# set the final report as global variable
set(rep = sh:validationReport());
set(abnode = sh:bnodeid());
if (bound(sh) && xt:size(list) > 0) {
let (res = sh:core(sh:validationReport(), sh, true, list)) {
sh:log("main.rq", "sh:shacleval", concat("xt:size(list)= ", xt:size(list)));
if(extended) {
# set default value for reference cardinality of current shape
set(referenceCardinality = 0);
# set the name of the given shape
set(shapeName = sh);
# For each shape, we will create a list of exceptions
set(exceptions = xt:list());
} ;
# start core function
let (res = sh:core(rep, sh, true, list)) {
set(result = res);
if (res, true, set(suc = false))
}
} ;
# write the additionnal component (Extended SHACL)
if(extended) {
sh:addExtendedShaclReport(rep, exceptions);
} ;
}
else {
sh:log("main.rq", "sh:shacleval", "The current shape is not considered: no nodes to be tested");
} ;
} ;
return (suc)
}
}

# Get the total number of instances in the dataset
function sh:getNumInstances() {
let(select (count(*) as ?n) where {
?s ?p ?o .
}) {
return (n)
}
}

function sh:focus() {
sh:focuslist(xt:graph())
}
Expand Down Expand Up @@ -731,4 +814,7 @@ function sh:checkinit() {
if (bound(mapmap), true, sh:start(xt:graph()))
}


# Log a specific msg according to the parameters
function sh:log(file, m, msg) {
xt:print(now()," - [",file,"] - ",m," - ",msg);
}
Loading