Skip to content

Commit

Permalink
Add importbin to sjsonnet (#251)
Browse files Browse the repository at this point in the history
Per spec
https://jsonnet.org/ref/spec.html#:~:text=UTF%2D8.%20For-,importbin,-%2C%20the%20file%20is

importbin imports the file as an array of ints from 0-255
  • Loading branch information
stephenamar-db authored Jan 2, 2025
1 parent 9d4dde5 commit bbbc702
Show file tree
Hide file tree
Showing 13 changed files with 68 additions and 41 deletions.
2 changes: 1 addition & 1 deletion bench/src/main/scala/sjsonnet/MainBenchmark.scala
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ object MainBenchmark {
parseCache = parseCache
)
val renderer = new Renderer(new StringWriter, indent = 3)
interp.interpret0(interp.resolver.read(path).get.readString(), path, renderer).getOrElse(???)
interp.interpret0(interp.resolver.read(path, binaryData = false).get.readString(), path, renderer).getOrElse(???)
(parseCache.keySet.toIndexedSeq, interp.evaluator)
}

Expand Down
2 changes: 1 addition & 1 deletion bench/src/main/scala/sjsonnet/RunProfiler.scala
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ object RunProfiler extends App {

def run(): Long = {
val renderer = new Renderer(new StringWriter, indent = 3)
val start = interp.resolver.read(path).get.readString()
val start = interp.resolver.read(path, binaryData = false).get.readString()
val t0 = System.nanoTime()
interp.interpret0(start, path, renderer).getOrElse(???)
System.nanoTime() - t0
Expand Down
2 changes: 1 addition & 1 deletion sjsonnet/src-js/sjsonnet/SjsonnetMain.scala
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ object SjsonnetMain {
case null => None
case s => Some(JsVirtualPath(s))
}
def read(path: Path): Option[ResolvedFile] =
def read(path: Path, binaryData: Boolean): Option[ResolvedFile] =
Option(StaticResolvedFile(importLoader(path.asInstanceOf[JsVirtualPath].path)))
},
parseCache = new DefaultParseCache,
Expand Down
28 changes: 21 additions & 7 deletions sjsonnet/src-jvm-native/sjsonnet/CachedResolvedFile.scala
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
package sjsonnet

import java.io.{BufferedInputStream, File, FileInputStream}
import fastparse.ParserInput

import java.io.File
import java.nio.charset.StandardCharsets
import java.nio.file.Files

import fastparse.ParserInput

/**
* A class that encapsulates a resolved import. This is used to cache the result of
* resolving an import. If the import is deemed too large (IE it's a large file), then we will avoid keeping it in
Expand All @@ -18,16 +18,17 @@ import fastparse.ParserInput
* @param cacheThresholdBytes The maximum size of a file that we will cache in memory. If the file
* is larger than this, then we will serve it from disk
*/
class CachedResolvedFile(val resolvedImportPath: OsPath, memoryLimitBytes: Long, cacheThresholdBytes: Long = 1024 * 1024) extends ResolvedFile {
class CachedResolvedFile(val resolvedImportPath: OsPath, memoryLimitBytes: Long, cacheThresholdBytes: Long = 1024 * 1024, binaryData: Boolean = false) extends ResolvedFile {

private val jFile: File = resolvedImportPath.p.toIO

assert(jFile.exists(), s"Resolved import path ${resolvedImportPath} does not exist")
assert(jFile.exists(), s"Resolved import path $resolvedImportPath does not exist")
// Assert that the file is less than limit
assert(jFile.length() <= memoryLimitBytes, s"Resolved import path ${resolvedImportPath} is too large: ${jFile.length()} bytes > ${memoryLimitBytes} bytes")
assert(jFile.length() <= memoryLimitBytes, s"Resolved import path $resolvedImportPath is too large: ${jFile.length()} bytes > ${memoryLimitBytes} bytes")

private[this] val resolvedImportContent: StaticResolvedFile = {
if (jFile.length() > cacheThresholdBytes) {
// TODO: Support caching binary data
if (jFile.length() > cacheThresholdBytes || binaryData) {
// If the file is too large, then we will just read it from disk
null
} else {
Expand All @@ -39,6 +40,8 @@ class CachedResolvedFile(val resolvedImportPath: OsPath, memoryLimitBytes: Long,
new String(Files.readAllBytes(jFile.toPath), StandardCharsets.UTF_8);
}

private[this] def readRawBytes(jFile: File): Array[Byte] = Files.readAllBytes(jFile.toPath)

/**
* A method that will return a reader for the resolved import. If the import is too large, then this will return
* a reader that will read the file from disk. Otherwise, it will return a reader that reads from memory.
Expand All @@ -61,6 +64,7 @@ class CachedResolvedFile(val resolvedImportPath: OsPath, memoryLimitBytes: Long,
}
}


override lazy val contentHash: String = {
if (resolvedImportContent == null) {
// If the file is too large, then we will just read it from disk
Expand All @@ -69,5 +73,15 @@ class CachedResolvedFile(val resolvedImportPath: OsPath, memoryLimitBytes: Long,
resolvedImportContent.contentHash
}
}

override def readRawBytes(): Array[Byte] = {
if (resolvedImportContent == null) {
// If the file is too large, then we will just read it from disk
readRawBytes(jFile)
} else {
// Otherwise, we will read it from memory
resolvedImportContent.readRawBytes()
}
}
}

12 changes: 6 additions & 6 deletions sjsonnet/src-jvm-native/sjsonnet/SjsonnetMain.scala
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ object SjsonnetMain {
.find(os.exists)
.flatMap(p => try Some(OsPath(p)) catch{case NonFatal(_) => None})

def read(path: Path): Option[ResolvedFile] = {
readPath(path)
def read(path: Path, binaryData: Boolean = false): Option[ResolvedFile] = {
readPath(path, binaryData)
}
}

Expand Down Expand Up @@ -200,8 +200,8 @@ object SjsonnetMain {
case Some(i) => new Importer {
def resolve(docBase: Path, importName: String): Option[Path] =
i(docBase, importName).map(OsPath)
def read(path: Path): Option[ResolvedFile] = {
readPath(path)
def read(path: Path, binaryData: Boolean): Option[ResolvedFile] = {
readPath(path, binaryData)
}
}
case None => resolveImport(config.jpaths.map(os.Path(_, wd)).map(OsPath), allowedInputs)
Expand Down Expand Up @@ -295,10 +295,10 @@ object SjsonnetMain {
* of caching on top of the underlying file system. Small files are read into memory, while large
* files are read from disk.
*/
private[this] def readPath(path: Path): Option[ResolvedFile] = {
private[this] def readPath(path: Path, binaryData: Boolean): Option[ResolvedFile] = {
val osPath = path.asInstanceOf[OsPath].p
if (os.exists(osPath) && os.isFile(osPath)) {
Some(new CachedResolvedFile(path.asInstanceOf[OsPath], memoryLimitBytes = Int.MaxValue.toLong))
Some(new CachedResolvedFile(path.asInstanceOf[OsPath], memoryLimitBytes = Int.MaxValue.toLong, binaryData = binaryData))
} else {
None
}
Expand Down
2 changes: 1 addition & 1 deletion sjsonnet/src/sjsonnet/Error.scala
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ trait EvalErrorScope {
def wd: Path

def prettyIndex(pos: Position): Option[(Int, Int)] = {
importer.read(pos.currentFile).map { s =>
importer.read(pos.currentFile, binaryData = false).map { s =>
val Array(line, col) =
s.getParserInput().prettyIndex(pos.offset).split(':')
(line.toInt, col.toInt)
Expand Down
9 changes: 7 additions & 2 deletions sjsonnet/src/sjsonnet/Evaluator.scala
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ class Evaluator(resolver: CachedResolver,
case e: Import => visitImport(e)
case e: Apply0 => visitApply0(e)
case e: ImportStr => visitImportStr(e)
case e: ImportBin => visitImportBin(e)
case e: Expr.Error => visitError(e)
case e => visitInvalid(e)
}
Expand Down Expand Up @@ -301,10 +302,14 @@ class Evaluator(resolver: CachedResolver,
}

def visitImportStr(e: ImportStr)(implicit scope: ValScope): Val.Str =
Val.Str(e.pos, importer.resolveAndReadOrFail(e.value, e.pos)._2.readString())
Val.Str(e.pos, importer.resolveAndReadOrFail(e.value, e.pos, binaryData = false)._2.readString())

def visitImportBin(e: ImportBin): Val.Arr =
new Val.Arr(e.pos, importer.resolveAndReadOrFail(e.value, e.pos, binaryData = true)._2.readRawBytes().map(
x => Val.Num(e.pos, (x & 0xff).doubleValue)))

def visitImport(e: Import)(implicit scope: ValScope): Val = {
val (p, str) = importer.resolveAndReadOrFail(e.value, e.pos)
val (p, str) = importer.resolveAndReadOrFail(e.value, e.pos, binaryData = false)
cachedImports.getOrElseUpdate(
p,
{
Expand Down
2 changes: 1 addition & 1 deletion sjsonnet/src/sjsonnet/Expr.scala
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ object Expr{
case class Bind(pos: Position, name: String, args: Params, rhs: Expr) extends Member
case class Import(pos: Position, value: String) extends Expr
case class ImportStr(pos: Position, value: String) extends Expr
case class ImportBin(pos: Position, value: String) extends Expr
case class Error(pos: Position, value: Expr) extends Expr
case class Apply(pos: Position, value: Expr, args: Array[Expr], namedNames: Array[String]) extends Expr
case class Apply0(pos: Position, value: Expr) extends Expr
Expand Down Expand Up @@ -181,5 +182,4 @@ object Expr{
override def toString = s"ObjComp($pos, ${arrStr(preLocals)}, $key, $value, ${arrStr(postLocals)}, $first, $rest)"
}
}

}
33 changes: 18 additions & 15 deletions sjsonnet/src/sjsonnet/Importer.scala
Original file line number Diff line number Diff line change
@@ -1,33 +1,31 @@
package sjsonnet

import java.io.{BufferedInputStream, BufferedReader, ByteArrayInputStream, File, FileInputStream, FileReader, InputStream, RandomAccessFile, Reader, StringReader}
import java.nio.file.Files
import java.security.MessageDigest
import scala.collection.mutable
import fastparse.{IndexedParserInput, Parsed, ParserInput}

import java.io.{BufferedInputStream, File, FileInputStream, RandomAccessFile}
import java.nio.charset.StandardCharsets
import scala.collection.mutable


/** Resolve and read imported files */
abstract class Importer {
def resolve(docBase: Path, importName: String): Option[Path]
def read(path: Path): Option[ResolvedFile]
def read(path: Path, binaryData: Boolean): Option[ResolvedFile]

def resolveAndRead(docBase: Path, importName: String): Option[(Path, ResolvedFile)] = for {
private def resolveAndRead(docBase: Path, importName: String, binaryData: Boolean): Option[(Path, ResolvedFile)] = for {
path <- resolve(docBase, importName)
txt <- read(path)
txt <- read(path, binaryData)
} yield (path, txt)

def resolveAndReadOrFail(value: String, pos: Position)(implicit ev: EvalErrorScope): (Path, ResolvedFile) =
resolveAndRead(pos.fileScope.currentFile.parent(), value)
def resolveAndReadOrFail(value: String, pos: Position, binaryData: Boolean)(implicit ev: EvalErrorScope): (Path, ResolvedFile) =
resolveAndRead(pos.fileScope.currentFile.parent(), value, binaryData = binaryData)
.getOrElse(Error.fail("Couldn't import file: " + pprint.Util.literalize(value), pos))
}

object Importer {
val empty: Importer = new Importer {
def resolve(docBase: Path, importName: String): Option[Path] = None
def read(path: Path): Option[ResolvedFile] = None
def read(path: Path, binaryData: Boolean): Option[ResolvedFile] = None
}
}

Expand Down Expand Up @@ -97,7 +95,7 @@ class BufferedRandomAccessFile(fileName: String, bufferSize: Int) {
private val fileLength: Long = file.length()

private def fillBuffer(position: Long): Unit = {
if (file.getFilePointer() != position) {
if (file.getFilePointer != position) {
file.seek(position)
}
val bytesRead = file.read(buffer, 0, bufferSize)
Expand Down Expand Up @@ -150,6 +148,9 @@ trait ResolvedFile {

// Get a content hash of the file suitable for detecting changes in a given file.
def contentHash(): String

// Used by importbin
def readRawBytes(): Array[Byte]
}

case class StaticResolvedFile(content: String) extends ResolvedFile {
Expand All @@ -159,19 +160,21 @@ case class StaticResolvedFile(content: String) extends ResolvedFile {

// We just cheat, the content hash can be the content itself for static imports
lazy val contentHash: String = content

override def readRawBytes(): Array[Byte] = content.getBytes(StandardCharsets.UTF_8)
}

class CachedImporter(parent: Importer) extends Importer {
val cache = mutable.HashMap.empty[Path, ResolvedFile]

def resolve(docBase: Path, importName: String): Option[Path] = parent.resolve(docBase, importName)

def read(path: Path): Option[ResolvedFile] = cache.get(path) match {
def read(path: Path, binaryData: Boolean): Option[ResolvedFile] = cache.get(path) match {
case s @ Some(x) =>
if(x == null) None else s
case None =>
val x = parent.read(path)
cache.put(path, x.getOrElse(null))
val x = parent.read(path, binaryData)
cache.put(path, x.orNull)
x
}
}
Expand All @@ -184,7 +187,7 @@ class CachedResolver(
internedStaticFieldSets: mutable.HashMap[Val.StaticObjectFieldSet, java.util.LinkedHashMap[String, java.lang.Boolean]]) extends CachedImporter(parentImporter) {

def parse(path: Path, content: ResolvedFile)(implicit ev: EvalErrorScope): Either[Error, (Expr, FileScope)] = {
parseCache.getOrElseUpdate((path, content.contentHash.toString), {
parseCache.getOrElseUpdate((path, content.contentHash()), {
val parsed = fastparse.parse(content.getParserInput(), new Parser(path, strictImportSyntax, internedStrings, internedStaticFieldSets).document(_)) match {
case f @ Parsed.Failure(_, _, _) =>
val traced = f.trace()
Expand Down
4 changes: 3 additions & 1 deletion sjsonnet/src/sjsonnet/Parser.scala
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ object Parser {

val keywords = Set(
"assert", "else", "error", "false", "for", "function", "if", "import", "importstr",
"in", "local", "null", "tailstrict", "then", "self", "super", "true"
"in", "local", "null", "tailstrict", "then", "self", "super", "true", "importbin"
)

def idStartChar(c: Char) = c == '_' || ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z')
Expand Down Expand Up @@ -239,6 +239,7 @@ class Parser(val currentFile: Path,

def local[_: P] = P( localExpr )
def importStr[_: P](pos: Position) = P( importExpr.map(Expr.ImportStr(pos, _)) )
def importBin[_: P](pos: Position) = P( importExpr.map(Expr.ImportBin(pos, _)) )
def `import`[_: P](pos: Position) = P( importExpr.map(Expr.Import(pos, _)) )
def error[_: P](pos: Position) = P(expr.map(Expr.Error(pos, _)) )

Expand Down Expand Up @@ -298,6 +299,7 @@ class Parser(val currentFile: Path,
case "if" => Pass ~ ifElse(pos)
case "function" => Pass ~ function(pos)
case "importstr" => Pass ~ importStr(pos)
case "importbin" => Pass ~ importBin(pos)
case "import" => Pass ~ `import`(pos)
case "error" => Pass ~ error(pos)
case "assert" => Pass ~ assertExpr(pos)
Expand Down
7 changes: 5 additions & 2 deletions sjsonnet/test/resources/test_suite/import.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,17 @@ std.assertEqual((import 'lib/A_20_func.libsonnet')(), 20) &&
std.assertEqual((import 'lib/A_20_func.libsonnet')(), 20) &&
// The block string is hard to test because the filename would include a terminating \n

// Each import has its own environment, can't be overidden.
// Each import has its own environment, can't be overridden.
std.assertEqual(local A = 7; local lib = import 'lib/A_20.libsonnet'; lib, 20) &&
std.assertEqual(local A = 7, lib = import 'lib/A_20.libsonnet'; lib, 20) &&

std.assertEqual(importstr 'lib/some_file.txt', 'Hello World!\n') &&
std.assertEqual(importstr 'lib/some_file.txt', 'Hello World!\n') &&

std.assertEqual(importbin 'lib/nonutf8.bin', [255, 0, 254]) &&
std.assertEqual(importbin 'lib/nonutf8.bin', [255, 0, 254]) &&

std.assertEqual(import 'lib/rel_path.libsonnet', 'rel_path') &&
std.assertEqual(import 'lib/rel_path4.libsonnet', 'rel_path') &&

true
true
Binary file not shown.
6 changes: 3 additions & 3 deletions sjsonnet/test/src/sjsonnet/Std0150FunctionsTests.scala
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ object Std0150FunctionsTests extends TestSuite {
parseCache = new DefaultParseCache,
)

def check(s: String, expected: ujson.Value) =
def check(s: String, expected: ujson.Value): Unit =
interpreter.interpret(s, DummyPath("(memory)")) ==> Right(expected)

check("""std.extVar("num")""", 1)
Expand Down Expand Up @@ -121,14 +121,14 @@ object Std0150FunctionsTests extends TestSuite {
override def resolve(docBase: Path, importName: String): Option[Path] = importName match{
case "bar.json" => Some(DummyPath("bar"))
}
override def read(path: Path): Option[ResolvedFile] = path match{
override def read(path: Path, binaryData: Boolean): Option[ResolvedFile] = path match{
case DummyPath("bar") => Some(StaticResolvedFile("""{"x": "y"}"""))
}
},
parseCache = new DefaultParseCache,
)

def check(s: String, expected: ujson.Value) =
def check(s: String, expected: ujson.Value): Unit =
interpreter.interpret(s, DummyPath("(memory)")) ==> Right(expected)

check("""function(num) num""", 1)
Expand Down

0 comments on commit bbbc702

Please sign in to comment.