From bbbc70273cafee814d66b34aa1f4de420fc497a9 Mon Sep 17 00:00:00 2001 From: Stephen Amar Date: Thu, 2 Jan 2025 12:49:07 -0800 Subject: [PATCH] Add importbin to sjsonnet (#251) Per spec https://jsonnet.org/ref/spec.html#:~:text=UTF%2D8.%20For-,importbin,-%2C%20the%20file%20is importbin imports the file as an array of ints from 0-255 --- .../main/scala/sjsonnet/MainBenchmark.scala | 2 +- .../src/main/scala/sjsonnet/RunProfiler.scala | 2 +- sjsonnet/src-js/sjsonnet/SjsonnetMain.scala | 2 +- .../sjsonnet/CachedResolvedFile.scala | 28 +++++++++++---- .../sjsonnet/SjsonnetMain.scala | 12 +++---- sjsonnet/src/sjsonnet/Error.scala | 2 +- sjsonnet/src/sjsonnet/Evaluator.scala | 9 +++-- sjsonnet/src/sjsonnet/Expr.scala | 2 +- sjsonnet/src/sjsonnet/Importer.scala | 33 ++++++++++-------- sjsonnet/src/sjsonnet/Parser.scala | 4 ++- .../test/resources/test_suite/import.jsonnet | 7 ++-- .../test/resources/test_suite/lib/nonutf8.bin | Bin 0 -> 3 bytes .../src/sjsonnet/Std0150FunctionsTests.scala | 6 ++-- 13 files changed, 68 insertions(+), 41 deletions(-) create mode 100644 sjsonnet/test/resources/test_suite/lib/nonutf8.bin diff --git a/bench/src/main/scala/sjsonnet/MainBenchmark.scala b/bench/src/main/scala/sjsonnet/MainBenchmark.scala index 8c1c37ec..ef8fa34a 100644 --- a/bench/src/main/scala/sjsonnet/MainBenchmark.scala +++ b/bench/src/main/scala/sjsonnet/MainBenchmark.scala @@ -31,7 +31,7 @@ object MainBenchmark { parseCache = parseCache ) val renderer = new Renderer(new StringWriter, indent = 3) - interp.interpret0(interp.resolver.read(path).get.readString(), path, renderer).getOrElse(???) + interp.interpret0(interp.resolver.read(path, binaryData = false).get.readString(), path, renderer).getOrElse(???) (parseCache.keySet.toIndexedSeq, interp.evaluator) } diff --git a/bench/src/main/scala/sjsonnet/RunProfiler.scala b/bench/src/main/scala/sjsonnet/RunProfiler.scala index 536e5d3f..27bfdda2 100644 --- a/bench/src/main/scala/sjsonnet/RunProfiler.scala +++ b/bench/src/main/scala/sjsonnet/RunProfiler.scala @@ -24,7 +24,7 @@ object RunProfiler extends App { def run(): Long = { val renderer = new Renderer(new StringWriter, indent = 3) - val start = interp.resolver.read(path).get.readString() + val start = interp.resolver.read(path, binaryData = false).get.readString() val t0 = System.nanoTime() interp.interpret0(start, path, renderer).getOrElse(???) System.nanoTime() - t0 diff --git a/sjsonnet/src-js/sjsonnet/SjsonnetMain.scala b/sjsonnet/src-js/sjsonnet/SjsonnetMain.scala index 6170676d..51e5047a 100644 --- a/sjsonnet/src-js/sjsonnet/SjsonnetMain.scala +++ b/sjsonnet/src-js/sjsonnet/SjsonnetMain.scala @@ -24,7 +24,7 @@ object SjsonnetMain { case null => None case s => Some(JsVirtualPath(s)) } - def read(path: Path): Option[ResolvedFile] = + def read(path: Path, binaryData: Boolean): Option[ResolvedFile] = Option(StaticResolvedFile(importLoader(path.asInstanceOf[JsVirtualPath].path))) }, parseCache = new DefaultParseCache, diff --git a/sjsonnet/src-jvm-native/sjsonnet/CachedResolvedFile.scala b/sjsonnet/src-jvm-native/sjsonnet/CachedResolvedFile.scala index a5c93946..d8e2df2f 100644 --- a/sjsonnet/src-jvm-native/sjsonnet/CachedResolvedFile.scala +++ b/sjsonnet/src-jvm-native/sjsonnet/CachedResolvedFile.scala @@ -1,11 +1,11 @@ package sjsonnet -import java.io.{BufferedInputStream, File, FileInputStream} +import fastparse.ParserInput + +import java.io.File import java.nio.charset.StandardCharsets import java.nio.file.Files -import fastparse.ParserInput - /** * A class that encapsulates a resolved import. This is used to cache the result of * resolving an import. If the import is deemed too large (IE it's a large file), then we will avoid keeping it in @@ -18,16 +18,17 @@ import fastparse.ParserInput * @param cacheThresholdBytes The maximum size of a file that we will cache in memory. If the file * is larger than this, then we will serve it from disk */ -class CachedResolvedFile(val resolvedImportPath: OsPath, memoryLimitBytes: Long, cacheThresholdBytes: Long = 1024 * 1024) extends ResolvedFile { +class CachedResolvedFile(val resolvedImportPath: OsPath, memoryLimitBytes: Long, cacheThresholdBytes: Long = 1024 * 1024, binaryData: Boolean = false) extends ResolvedFile { private val jFile: File = resolvedImportPath.p.toIO - assert(jFile.exists(), s"Resolved import path ${resolvedImportPath} does not exist") + assert(jFile.exists(), s"Resolved import path $resolvedImportPath does not exist") // Assert that the file is less than limit - assert(jFile.length() <= memoryLimitBytes, s"Resolved import path ${resolvedImportPath} is too large: ${jFile.length()} bytes > ${memoryLimitBytes} bytes") + assert(jFile.length() <= memoryLimitBytes, s"Resolved import path $resolvedImportPath is too large: ${jFile.length()} bytes > ${memoryLimitBytes} bytes") private[this] val resolvedImportContent: StaticResolvedFile = { - if (jFile.length() > cacheThresholdBytes) { + // TODO: Support caching binary data + if (jFile.length() > cacheThresholdBytes || binaryData) { // If the file is too large, then we will just read it from disk null } else { @@ -39,6 +40,8 @@ class CachedResolvedFile(val resolvedImportPath: OsPath, memoryLimitBytes: Long, new String(Files.readAllBytes(jFile.toPath), StandardCharsets.UTF_8); } + private[this] def readRawBytes(jFile: File): Array[Byte] = Files.readAllBytes(jFile.toPath) + /** * A method that will return a reader for the resolved import. If the import is too large, then this will return * a reader that will read the file from disk. Otherwise, it will return a reader that reads from memory. @@ -61,6 +64,7 @@ class CachedResolvedFile(val resolvedImportPath: OsPath, memoryLimitBytes: Long, } } + override lazy val contentHash: String = { if (resolvedImportContent == null) { // If the file is too large, then we will just read it from disk @@ -69,5 +73,15 @@ class CachedResolvedFile(val resolvedImportPath: OsPath, memoryLimitBytes: Long, resolvedImportContent.contentHash } } + + override def readRawBytes(): Array[Byte] = { + if (resolvedImportContent == null) { + // If the file is too large, then we will just read it from disk + readRawBytes(jFile) + } else { + // Otherwise, we will read it from memory + resolvedImportContent.readRawBytes() + } + } } diff --git a/sjsonnet/src-jvm-native/sjsonnet/SjsonnetMain.scala b/sjsonnet/src-jvm-native/sjsonnet/SjsonnetMain.scala index e4764c34..04d1f8c7 100644 --- a/sjsonnet/src-jvm-native/sjsonnet/SjsonnetMain.scala +++ b/sjsonnet/src-jvm-native/sjsonnet/SjsonnetMain.scala @@ -22,8 +22,8 @@ object SjsonnetMain { .find(os.exists) .flatMap(p => try Some(OsPath(p)) catch{case NonFatal(_) => None}) - def read(path: Path): Option[ResolvedFile] = { - readPath(path) + def read(path: Path, binaryData: Boolean = false): Option[ResolvedFile] = { + readPath(path, binaryData) } } @@ -200,8 +200,8 @@ object SjsonnetMain { case Some(i) => new Importer { def resolve(docBase: Path, importName: String): Option[Path] = i(docBase, importName).map(OsPath) - def read(path: Path): Option[ResolvedFile] = { - readPath(path) + def read(path: Path, binaryData: Boolean): Option[ResolvedFile] = { + readPath(path, binaryData) } } case None => resolveImport(config.jpaths.map(os.Path(_, wd)).map(OsPath), allowedInputs) @@ -295,10 +295,10 @@ object SjsonnetMain { * of caching on top of the underlying file system. Small files are read into memory, while large * files are read from disk. */ - private[this] def readPath(path: Path): Option[ResolvedFile] = { + private[this] def readPath(path: Path, binaryData: Boolean): Option[ResolvedFile] = { val osPath = path.asInstanceOf[OsPath].p if (os.exists(osPath) && os.isFile(osPath)) { - Some(new CachedResolvedFile(path.asInstanceOf[OsPath], memoryLimitBytes = Int.MaxValue.toLong)) + Some(new CachedResolvedFile(path.asInstanceOf[OsPath], memoryLimitBytes = Int.MaxValue.toLong, binaryData = binaryData)) } else { None } diff --git a/sjsonnet/src/sjsonnet/Error.scala b/sjsonnet/src/sjsonnet/Error.scala index edbe18d2..49e30f32 100644 --- a/sjsonnet/src/sjsonnet/Error.scala +++ b/sjsonnet/src/sjsonnet/Error.scala @@ -101,7 +101,7 @@ trait EvalErrorScope { def wd: Path def prettyIndex(pos: Position): Option[(Int, Int)] = { - importer.read(pos.currentFile).map { s => + importer.read(pos.currentFile, binaryData = false).map { s => val Array(line, col) = s.getParserInput().prettyIndex(pos.offset).split(':') (line.toInt, col.toInt) diff --git a/sjsonnet/src/sjsonnet/Evaluator.scala b/sjsonnet/src/sjsonnet/Evaluator.scala index 42ee3d70..48213291 100644 --- a/sjsonnet/src/sjsonnet/Evaluator.scala +++ b/sjsonnet/src/sjsonnet/Evaluator.scala @@ -61,6 +61,7 @@ class Evaluator(resolver: CachedResolver, case e: Import => visitImport(e) case e: Apply0 => visitApply0(e) case e: ImportStr => visitImportStr(e) + case e: ImportBin => visitImportBin(e) case e: Expr.Error => visitError(e) case e => visitInvalid(e) } @@ -301,10 +302,14 @@ class Evaluator(resolver: CachedResolver, } def visitImportStr(e: ImportStr)(implicit scope: ValScope): Val.Str = - Val.Str(e.pos, importer.resolveAndReadOrFail(e.value, e.pos)._2.readString()) + Val.Str(e.pos, importer.resolveAndReadOrFail(e.value, e.pos, binaryData = false)._2.readString()) + + def visitImportBin(e: ImportBin): Val.Arr = + new Val.Arr(e.pos, importer.resolveAndReadOrFail(e.value, e.pos, binaryData = true)._2.readRawBytes().map( + x => Val.Num(e.pos, (x & 0xff).doubleValue))) def visitImport(e: Import)(implicit scope: ValScope): Val = { - val (p, str) = importer.resolveAndReadOrFail(e.value, e.pos) + val (p, str) = importer.resolveAndReadOrFail(e.value, e.pos, binaryData = false) cachedImports.getOrElseUpdate( p, { diff --git a/sjsonnet/src/sjsonnet/Expr.scala b/sjsonnet/src/sjsonnet/Expr.scala index 1ee29c97..8216142c 100644 --- a/sjsonnet/src/sjsonnet/Expr.scala +++ b/sjsonnet/src/sjsonnet/Expr.scala @@ -126,6 +126,7 @@ object Expr{ case class Bind(pos: Position, name: String, args: Params, rhs: Expr) extends Member case class Import(pos: Position, value: String) extends Expr case class ImportStr(pos: Position, value: String) extends Expr + case class ImportBin(pos: Position, value: String) extends Expr case class Error(pos: Position, value: Expr) extends Expr case class Apply(pos: Position, value: Expr, args: Array[Expr], namedNames: Array[String]) extends Expr case class Apply0(pos: Position, value: Expr) extends Expr @@ -181,5 +182,4 @@ object Expr{ override def toString = s"ObjComp($pos, ${arrStr(preLocals)}, $key, $value, ${arrStr(postLocals)}, $first, $rest)" } } - } diff --git a/sjsonnet/src/sjsonnet/Importer.scala b/sjsonnet/src/sjsonnet/Importer.scala index c4e616a2..7974b761 100644 --- a/sjsonnet/src/sjsonnet/Importer.scala +++ b/sjsonnet/src/sjsonnet/Importer.scala @@ -1,33 +1,31 @@ package sjsonnet -import java.io.{BufferedInputStream, BufferedReader, ByteArrayInputStream, File, FileInputStream, FileReader, InputStream, RandomAccessFile, Reader, StringReader} -import java.nio.file.Files -import java.security.MessageDigest -import scala.collection.mutable import fastparse.{IndexedParserInput, Parsed, ParserInput} +import java.io.{BufferedInputStream, File, FileInputStream, RandomAccessFile} import java.nio.charset.StandardCharsets +import scala.collection.mutable /** Resolve and read imported files */ abstract class Importer { def resolve(docBase: Path, importName: String): Option[Path] - def read(path: Path): Option[ResolvedFile] + def read(path: Path, binaryData: Boolean): Option[ResolvedFile] - def resolveAndRead(docBase: Path, importName: String): Option[(Path, ResolvedFile)] = for { + private def resolveAndRead(docBase: Path, importName: String, binaryData: Boolean): Option[(Path, ResolvedFile)] = for { path <- resolve(docBase, importName) - txt <- read(path) + txt <- read(path, binaryData) } yield (path, txt) - def resolveAndReadOrFail(value: String, pos: Position)(implicit ev: EvalErrorScope): (Path, ResolvedFile) = - resolveAndRead(pos.fileScope.currentFile.parent(), value) + def resolveAndReadOrFail(value: String, pos: Position, binaryData: Boolean)(implicit ev: EvalErrorScope): (Path, ResolvedFile) = + resolveAndRead(pos.fileScope.currentFile.parent(), value, binaryData = binaryData) .getOrElse(Error.fail("Couldn't import file: " + pprint.Util.literalize(value), pos)) } object Importer { val empty: Importer = new Importer { def resolve(docBase: Path, importName: String): Option[Path] = None - def read(path: Path): Option[ResolvedFile] = None + def read(path: Path, binaryData: Boolean): Option[ResolvedFile] = None } } @@ -97,7 +95,7 @@ class BufferedRandomAccessFile(fileName: String, bufferSize: Int) { private val fileLength: Long = file.length() private def fillBuffer(position: Long): Unit = { - if (file.getFilePointer() != position) { + if (file.getFilePointer != position) { file.seek(position) } val bytesRead = file.read(buffer, 0, bufferSize) @@ -150,6 +148,9 @@ trait ResolvedFile { // Get a content hash of the file suitable for detecting changes in a given file. def contentHash(): String + + // Used by importbin + def readRawBytes(): Array[Byte] } case class StaticResolvedFile(content: String) extends ResolvedFile { @@ -159,6 +160,8 @@ case class StaticResolvedFile(content: String) extends ResolvedFile { // We just cheat, the content hash can be the content itself for static imports lazy val contentHash: String = content + + override def readRawBytes(): Array[Byte] = content.getBytes(StandardCharsets.UTF_8) } class CachedImporter(parent: Importer) extends Importer { @@ -166,12 +169,12 @@ class CachedImporter(parent: Importer) extends Importer { def resolve(docBase: Path, importName: String): Option[Path] = parent.resolve(docBase, importName) - def read(path: Path): Option[ResolvedFile] = cache.get(path) match { + def read(path: Path, binaryData: Boolean): Option[ResolvedFile] = cache.get(path) match { case s @ Some(x) => if(x == null) None else s case None => - val x = parent.read(path) - cache.put(path, x.getOrElse(null)) + val x = parent.read(path, binaryData) + cache.put(path, x.orNull) x } } @@ -184,7 +187,7 @@ class CachedResolver( internedStaticFieldSets: mutable.HashMap[Val.StaticObjectFieldSet, java.util.LinkedHashMap[String, java.lang.Boolean]]) extends CachedImporter(parentImporter) { def parse(path: Path, content: ResolvedFile)(implicit ev: EvalErrorScope): Either[Error, (Expr, FileScope)] = { - parseCache.getOrElseUpdate((path, content.contentHash.toString), { + parseCache.getOrElseUpdate((path, content.contentHash()), { val parsed = fastparse.parse(content.getParserInput(), new Parser(path, strictImportSyntax, internedStrings, internedStaticFieldSets).document(_)) match { case f @ Parsed.Failure(_, _, _) => val traced = f.trace() diff --git a/sjsonnet/src/sjsonnet/Parser.scala b/sjsonnet/src/sjsonnet/Parser.scala index e1555b27..1c06df6a 100644 --- a/sjsonnet/src/sjsonnet/Parser.scala +++ b/sjsonnet/src/sjsonnet/Parser.scala @@ -36,7 +36,7 @@ object Parser { val keywords = Set( "assert", "else", "error", "false", "for", "function", "if", "import", "importstr", - "in", "local", "null", "tailstrict", "then", "self", "super", "true" + "in", "local", "null", "tailstrict", "then", "self", "super", "true", "importbin" ) def idStartChar(c: Char) = c == '_' || ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') @@ -239,6 +239,7 @@ class Parser(val currentFile: Path, def local[_: P] = P( localExpr ) def importStr[_: P](pos: Position) = P( importExpr.map(Expr.ImportStr(pos, _)) ) + def importBin[_: P](pos: Position) = P( importExpr.map(Expr.ImportBin(pos, _)) ) def `import`[_: P](pos: Position) = P( importExpr.map(Expr.Import(pos, _)) ) def error[_: P](pos: Position) = P(expr.map(Expr.Error(pos, _)) ) @@ -298,6 +299,7 @@ class Parser(val currentFile: Path, case "if" => Pass ~ ifElse(pos) case "function" => Pass ~ function(pos) case "importstr" => Pass ~ importStr(pos) + case "importbin" => Pass ~ importBin(pos) case "import" => Pass ~ `import`(pos) case "error" => Pass ~ error(pos) case "assert" => Pass ~ assertExpr(pos) diff --git a/sjsonnet/test/resources/test_suite/import.jsonnet b/sjsonnet/test/resources/test_suite/import.jsonnet index 1236e737..be834b37 100644 --- a/sjsonnet/test/resources/test_suite/import.jsonnet +++ b/sjsonnet/test/resources/test_suite/import.jsonnet @@ -24,14 +24,17 @@ std.assertEqual((import 'lib/A_20_func.libsonnet')(), 20) && std.assertEqual((import 'lib/A_20_func.libsonnet')(), 20) && // The block string is hard to test because the filename would include a terminating \n -// Each import has its own environment, can't be overidden. +// Each import has its own environment, can't be overridden. std.assertEqual(local A = 7; local lib = import 'lib/A_20.libsonnet'; lib, 20) && std.assertEqual(local A = 7, lib = import 'lib/A_20.libsonnet'; lib, 20) && std.assertEqual(importstr 'lib/some_file.txt', 'Hello World!\n') && std.assertEqual(importstr 'lib/some_file.txt', 'Hello World!\n') && +std.assertEqual(importbin 'lib/nonutf8.bin', [255, 0, 254]) && +std.assertEqual(importbin 'lib/nonutf8.bin', [255, 0, 254]) && + std.assertEqual(import 'lib/rel_path.libsonnet', 'rel_path') && std.assertEqual(import 'lib/rel_path4.libsonnet', 'rel_path') && -true +true \ No newline at end of file diff --git a/sjsonnet/test/resources/test_suite/lib/nonutf8.bin b/sjsonnet/test/resources/test_suite/lib/nonutf8.bin new file mode 100644 index 0000000000000000000000000000000000000000..90db00e1d6cf116716ba949a1cca330ac8c63634 GIT binary patch literal 3 Kcmey*@DBh3{sH~~ literal 0 HcmV?d00001 diff --git a/sjsonnet/test/src/sjsonnet/Std0150FunctionsTests.scala b/sjsonnet/test/src/sjsonnet/Std0150FunctionsTests.scala index 85691505..a7d4540e 100644 --- a/sjsonnet/test/src/sjsonnet/Std0150FunctionsTests.scala +++ b/sjsonnet/test/src/sjsonnet/Std0150FunctionsTests.scala @@ -89,7 +89,7 @@ object Std0150FunctionsTests extends TestSuite { parseCache = new DefaultParseCache, ) - def check(s: String, expected: ujson.Value) = + def check(s: String, expected: ujson.Value): Unit = interpreter.interpret(s, DummyPath("(memory)")) ==> Right(expected) check("""std.extVar("num")""", 1) @@ -121,14 +121,14 @@ object Std0150FunctionsTests extends TestSuite { override def resolve(docBase: Path, importName: String): Option[Path] = importName match{ case "bar.json" => Some(DummyPath("bar")) } - override def read(path: Path): Option[ResolvedFile] = path match{ + override def read(path: Path, binaryData: Boolean): Option[ResolvedFile] = path match{ case DummyPath("bar") => Some(StaticResolvedFile("""{"x": "y"}""")) } }, parseCache = new DefaultParseCache, ) - def check(s: String, expected: ujson.Value) = + def check(s: String, expected: ujson.Value): Unit = interpreter.interpret(s, DummyPath("(memory)")) ==> Right(expected) check("""function(num) num""", 1)