Skip to content

Commit

Permalink
dot export: fix file format (#289)
Browse files Browse the repository at this point in the history
as discussed in joernio/joern#5158
I didn't get any feedback if this really solves their issue,
but this is certainly better than before
  • Loading branch information
mpollmeier authored Jan 10, 2025
1 parent de83358 commit c6b0cbc
Show file tree
Hide file tree
Showing 4 changed files with 91 additions and 53 deletions.
8 changes: 5 additions & 3 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@ name := "flatgraph"
ThisBuild / organization := "io.joern"
ThisBuild / scalaVersion := scala3

val slf4jVersion = "2.0.16"
val scala3 = "3.5.2"
val scala2_12 = "2.12.20"
val slf4jVersion = "2.0.16"
val commonsTextVersion = "1.13.0"

/** Only the below listed projects are included in things like `sbt compile`.
* We explicitly want to exclude `benchmarks` which requires qwiet.ai / shiftleft
Expand Down Expand Up @@ -51,6 +52,7 @@ lazy val formats = project
name := "flatgraph-formats",
libraryDependencies ++= Seq(
"com.github.tototoshi" %% "scala-csv" % "2.0.0",
"org.apache.commons" % "commons-text" % commonsTextVersion,
"org.scala-lang.modules" %% "scala-xml" % "2.3.0",
"io.spray" %% "spray-json" % "1.3.6",
"com.github.scopt" %% "scopt" % "4.1.0",
Expand Down Expand Up @@ -81,7 +83,7 @@ lazy val domainClassesGenerator_3 = project
libraryDependencies ++= Seq(
"org.slf4j" % "slf4j-simple" % slf4jVersion % Optional,
"com.lihaoyi" %% "os-lib" % "0.9.1",
"org.apache.commons" % "commons-text" % "1.10.0",
"org.apache.commons" % "commons-text" % commonsTextVersion,
"com.github.scopt" %% "scopt" % "4.1.0",
("org.scalameta" %% "scalafmt-dynamic" % "3.7.17").cross(CrossVersion.for3Use2_13),
),
Expand All @@ -97,7 +99,7 @@ lazy val domainClassesGenerator_2_12 = project
libraryDependencies ++= Seq(
"org.slf4j"% "slf4j-simple" % slf4jVersion % Optional,
"com.lihaoyi" %% "os-lib" % "0.9.1",
"org.apache.commons" % "commons-text" % "1.12.0",
"org.apache.commons" % "commons-text" % commonsTextVersion,
"com.github.scopt" %% "scopt" % "4.1.0",
"org.scalameta" %% "scalafmt-dynamic" % "3.7.17",
),
Expand Down
48 changes: 30 additions & 18 deletions formats/src/main/scala/flatgraph/formats/dot/DotExporter.scala
Original file line number Diff line number Diff line change
@@ -1,22 +1,40 @@
package flatgraph.formats.dot

import flatgraph.formats.{ExportResult, Exporter, iterableForList, resolveOutputFileSingle}
import flatgraph.{Accessors, Edge, GNode, Graph, Schema}
import flatgraph.{Accessors, Edge, GNode, Schema}

import java.nio.file.{Files, Path}
import scala.jdk.CollectionConverters.MapHasAsScala
import org.apache.commons.text.StringEscapeUtils
import org.apache.commons.text.translate.LookupTranslator

import java.util.Collections
import scala.jdk.CollectionConverters.MapHasAsJava
import scala.util.Using

/** Exports flatgraph to graphviz dot/gv file
*
* Note: GraphML doesn't natively support list property types, so we fake it by encoding it as a `;` delimited string. If you import this
* into a different database, you'll need to parse that separately.
*
* Export rules for dot format as per https: //github.com/joernio/joern/issues/5158 1) If the attribute value contains special characters
* such as spaces,<,>,=, etc., it must be enclosed in double quotation marks. Otherwise, it will cause syntax errors. 2) Graphviz requires
* that the node ID must be a valid identifier. If the node ID is a pure number (such as 120259084301), it needs to be enclosed in double
* quotation marks, otherwise it will be mistaken for an integer constant. 3) The attribute value contains special characters such as(such
* as CODE=""), which need to be enclosed in quotation marks or escaped in some cases. 4) In Graphviz's. dot file, it is best to use
* semicolons for each node definition, edge definition, and attribute definition; ending. Your file is missing semicolons.
*
* https://en.wikipedia.org/wiki/DOT_(graph_description_language) https://www.graphviz.org/doc/info/lang.html
* http://magjac.com/graphviz-visual-editor/ https://www.slideshare.net/albazo/graphiz-using-the-dot-language
*/
object DotExporter extends Exporter {
override def defaultFileExtension = "dot"
val EndOfLine = ';'

private val lookupMap = Map(
"""\""" -> """\\""", // \ -> \\
"\"" -> """\"""" // " -> \"
)
val translator = new LookupTranslator(Collections.unmodifiableMap(lookupMap.asJava))

override def runExport(schema: Schema, nodes: IterableOnce[GNode], edges: IterableOnce[Edge], outputFile: Path) = {
val outFile = resolveOutputFileSingle(outputFile, s"export.$defaultFileExtension")
Expand All @@ -34,31 +52,29 @@ object DotExporter extends Exporter {
nodeCount += 1
val line = new StringBuffer()
.append(" ")
.append(node.id)
.append(s"[label=${node.label} ")
.append(s""""${node.id}" """)
.append(s"""[label="${node.label}" """)
.append(
Accessors
.getNodeProperties(node)
.iterator
.map { case (key, value) =>
s"$key=${encodePropertyValue(value)}"
s"""$key="${encodePropertyValue(value)}""""
}
.mkString(" ")
)
.append("]")
.append(EndOfLine)
writeLine(line.toString)
}

edges.iterator.foreach { edge =>
edgeCount += 1
val propertyMaybe = Option(edge.property).map(property => s"""property="${encodePropertyValue(property)}"""").getOrElse("")
val line = new StringBuffer()
.append(s" ${edge.src.id()} -> ${edge.dst.id()} ")
.append(s"[label=${edge.label} ")

if (edge.property != null)
line.append(s"property=${encodePropertyValue(edge.property)}")

line.append("]")
.append(s""" "${edge.src.id()}" -> "${edge.dst.id()}"""")
.append(s""" [label="${edge.label}" $propertyMaybe]""")
.append(EndOfLine)
writeLine(line.toString)
}

Expand All @@ -71,13 +87,9 @@ object DotExporter extends Exporter {
private def encodePropertyValue(value: Any): String = {
value match {
case value: String =>
val escaped = value
.replace("""\""", """\\""") // escape escape chars - this should come first
.replace("\"", "\\\"") // escape double quotes, because we use them to enclose strings
s"\"$escaped\""
StringEscapeUtils.builder(translator).escape(value).toString
case list if iterableForList.isDefinedAt(list) =>
val values = iterableForList(list).mkString(";")
s"\"$values\""
iterableForList(list).map(encodePropertyValue).mkString(";")
case value => value.toString
}
}
Expand Down
56 changes: 56 additions & 0 deletions tests/src/test/scala/flatgraph/formats/dot/DotExporterTests.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
package flatgraph.formats.dot

import better.files.*
import flatgraph.*
import org.scalatest.matchers.should.Matchers.*
import org.scalatest.wordspec.AnyWordSpec
import testdomains.generic.GenericDomain
import testdomains.generic.edges.ConnectedTo
import testdomains.generic.nodes.NewNodeA

class DotExporterTests extends AnyWordSpec {

"Exporter should export valid dot" in {
val graph = GenericDomain.empty.graph
val node1 = NewNodeA()
.stringMandatory("regular string")
.stringOptional("""<escapeMe1> [escapeMe2] escape=Me3 escape"Me4 escape\Me5 """)
.stringList(Seq("one", "two"))
val node2 = NewNodeA().intMandatory(1).intOptional(2).intList(Seq(10, 11))

DiffGraphApplier.applyDiff(
graph,
GenericDomain.newDiffGraphBuilder
.addEdge(node1, node2, ConnectedTo.Label, "edge property")
)

File.usingTemporaryDirectory(getClass.getName) { exportRootDirectory =>
val exportResult = DotExporter.runExport(graph, exportRootDirectory.pathAsString)
exportResult.nodeCount shouldBe 2
exportResult.edgeCount shouldBe 1
val Seq(exportedFile) = exportResult.files

val result = better.files.File(exportedFile).contentAsString.trim

/* Export rules for dot format as per https: //github.com/joernio/joern/issues/5158 :
* 1) If the attribute value contains special characters such as spaces,<,>,=, etc., it must be enclosed in double quotation marks.
* Otherwise, it will cause syntax errors.
* 2) Graphviz requires that the node ID must be a valid identifier. If the node ID is a pure number (such as 120259084301),
* it needs to be enclosed in double quotation marks, otherwise it will be mistaken for an integer constant.
* 3) The attribute value contains special characters such as(such as CODE=""), which need to be enclosed in quotation marks or escaped in some cases.
* 4) In Graphviz's. dot file, it is best to use semicolons for each node definition, edge definition, and attribute definition; ending. Your file is missing semicolons.
*/

withClue(s"actual result was: `$result`") {
result.trim shouldBe
"""digraph {
| "0" [label="node_a" int_mandatory="42" string_list="one;two" string_mandatory="regular string" string_optional="<escapeMe1> [escapeMe2] escape=Me3 escape\"Me4 escape\\Me5 "];
| "1" [label="node_a" int_list="10;11" int_mandatory="1" int_optional="2" string_mandatory="<empty>"];
| "0" -> "1" [label="connected_to" property="edge property"];
|}
|""".stripMargin.trim
}
}
}

}
32 changes: 0 additions & 32 deletions tests/src/test/scala/flatgraph/formats/dot/DotTests.scala

This file was deleted.

0 comments on commit c6b0cbc

Please sign in to comment.