Skip to content

Commit

Permalink
update licenses.json and parsing without xml conversion (#94)
Browse files Browse the repository at this point in the history
* get rid of license to xml conversion step to make updates to licenses easier

* updated licenses.json

* make license search unambiguous and normalize ids

* normalize www.

* only normalize x.0 versions with end anchor

* fix imports
  • Loading branch information
lhns authored Dec 9, 2024
1 parent 09961a9 commit 7b63539
Show file tree
Hide file tree
Showing 9 changed files with 6,074 additions and 8,211 deletions.
2 changes: 2 additions & 0 deletions project/Dependencies.scala
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ import sbt._

object Dependencies {
lazy val library = Seq(
"io.circe" %% "circe-generic" % "0.14.10",
"io.circe" %% "circe-parser" % "0.14.10",
"org.cyclonedx" % "cyclonedx-core-java" % "9.1.0",
"org.scalatest" %% "scalatest" % "3.2.19" % Test,
"org.scalamock" %% "scalamock" % "6.0.0" % Test
Expand Down
9,172 changes: 5,932 additions & 3,240 deletions src/main/resources/licenses.json

Large diffs are not rendered by default.

5 changes: 1 addition & 4 deletions src/main/resources/licenses.readme.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,2 @@
Updated version of licenses.json file may be found here:
https://github.com/spdx/license-list-data

licenses.xml is generated from original licenses.json using online xml converter:
http://convertjson.com/json-to-xml.htm
https://github.com/spdx/license-list-data/blob/main/json/licenses.json
4,887 changes: 0 additions & 4,887 deletions src/main/resources/licenses.xml

This file was deleted.

2 changes: 1 addition & 1 deletion src/main/scala/com/github/sbt/sbom/licenses/License.scala
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
package com.github.sbt.sbom.licenses

final case class License(id: Option[String] = None, name: Option[String] = None, references: Seq[String] = Seq())
final case class License(id: String, name: String, references: Seq[String])
52 changes: 41 additions & 11 deletions src/main/scala/com/github/sbt/sbom/licenses/LicensesArchive.scala
Original file line number Diff line number Diff line change
@@ -1,23 +1,53 @@
package com.github.sbt.sbom.licenses

import com.github.sbt.sbom.licenses.LicensesArchive.{ normalizeId, normalizeUrl }

import scala.io.Source

class LicensesArchive(licenses: Seq[License]) {
private val licensesByUrl: Map[String, License] = licenses.foldLeft(Map[String, License]()) { (map, license) =>
map ++ license.references.foldLeft(Map[String, License]()) { (map, ref) =>
map + (ref -> license)
}
}
private val licensesByNormalizedUrl: Map[String, Seq[License]] =
licenses.iterator
.flatMap { license =>
license.references.map { reference =>
(normalizeUrl(reference), license)
}
}
.toList
.groupBy(_._1)
.mapValues(_.map(_._2))

def findByUrl(url: String): Option[License] = licensesByUrl.get(url)
private val licenseByNormalizedId: Map[String, License] =
licenses
.groupBy(license => normalizeId(license.id))
.mapValues {
case Seq(license) => license
case licenses => throw new RuntimeException(s"conflicting licenses: $licenses")
}

def findById(id: String): Option[License] = licenses.find(_.id.contains(id))
def findByUrl(url: String): Seq[License] = licensesByNormalizedUrl.getOrElse(normalizeUrl(url), Seq.empty)

def findById(id: String): Option[License] = licenseByNormalizedId.get(normalizeId(id))
}

object LicensesArchive {
private lazy val fileStream = getClass.getResourceAsStream("/licenses.xml")
private lazy val archiveText = Source.fromInputStream(fileStream).mkString
private lazy val archive = new LicensesArchive(new LicensesArchiveParser(archiveText).licenses)
private def normalizeUrl(url: String): String = url.toLowerCase
.replaceFirst("^https?://(www\\.)?", "https://")
.replaceFirst("/$", "")
.replaceFirst("\\.(html|txt)$", "")

// Apache-2.0 will be normalized to apache 2, BSD 3-Clause will be normalized to bsd 3 clause
private def normalizeId(id: String): String = id.toLowerCase
.replace("-", " ")
.replaceFirst("(?<=\\d)\\.0$", "")

private def loadResourceAsString(resource: String): String = {
val fileStream = getClass.getResourceAsStream(resource)
Source.fromInputStream(fileStream).mkString
}

def fromJsonString(json: String): LicensesArchive =
new LicensesArchive(LicensesArchiveJsonParser.parseString(json))

def findByUrl(url: String): Option[License] = archive.findByUrl(url)
lazy val bundled: LicensesArchive =
fromJsonString(loadResourceAsString("/licenses.json"))
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
package com.github.sbt.sbom.licenses

import io.circe.Decoder
import io.circe.generic.semiauto.deriveDecoder
import io.circe.parser._

import scala.util.control.NonFatal

private[licenses] object LicensesArchiveJsonParser {
private case class LicenseJson(
licenseId: String,
name: String,
seeAlso: Seq[String]
)

private object LicenseJson {
implicit val decoder: Decoder[LicenseJson] = deriveDecoder
}

private case class LicensesArchiveJson(
licenses: Seq[LicenseJson]
)

private object LicensesArchiveJson {
implicit val decoder: Decoder[LicensesArchiveJson] = deriveDecoder
}

private def licenseFromLicenseEntry(licenseEntry: LicenseJson): License = License(
id = licenseEntry.licenseId,
name = licenseEntry.name,
references = licenseEntry.seeAlso
)

def parseString(string: String): Seq[License] = {
val licensesArchiveJson =
try {
decode[LicensesArchiveJson](string).toTry.get
} catch {
case NonFatal(e) => throw new RuntimeException("failed to parse licenses archive json", e)
}

licensesArchiveJson.licenses.map(licenseFromLicenseEntry)
}
}

This file was deleted.

98 changes: 53 additions & 45 deletions src/test/scala/com/github/sbt/sbom/LicensesArchiveSpec.scala
Original file line number Diff line number Diff line change
@@ -1,83 +1,91 @@
package com.github.sbt.sbom

import com.github.sbt.sbom.licenses.{ LicensesArchive, LicensesArchiveParser }
import com.github.sbt.sbom.licenses.LicensesArchive
import org.scalatest.matchers.should.Matchers
import org.scalatest.wordspec.AnyWordSpec

class LicensesArchiveSpec extends AnyWordSpec with Matchers {
"LicensesArchiveParser" should {
"fail parsing a not valid archive" in {
new LicensesArchiveParser("").isValid shouldBe false
assertThrows[RuntimeException] {
LicensesArchive.fromJsonString("")
}
}

"parse a valid archive" in {
new LicensesArchiveParser(xml).isValid shouldBe true
LicensesArchive.fromJsonString(json)
}
}

"LicenseRegister" should {
"find no license by ref" in {
val register = new LicensesArchive(new LicensesArchiveParser(xml).licenses)
register.findByUrl("http://www.domain.com/missingLicense") shouldBe None
val register = LicensesArchive.fromJsonString(json)
register.findByUrl("http://www.domain.com/missingLicense") shouldBe Seq.empty
}

"find licenses by ref" in {
val register = new LicensesArchive(new LicensesArchiveParser(xml).licenses)
val gps2 = register.findByUrl("http://www.opensource.org/licenses/GPL-2.0")
val register = LicensesArchive.fromJsonString(json)
val gpl2 = register.findByUrl("https://opensource.org/licenses/GPL-2.0")
val zeroBsd = register.findByUrl("http://landley.net/toybox/license.html")

gps2.isDefined shouldBe true
gps2.get.id shouldBe Some("GPL-2.0")
zeroBsd.isDefined shouldBe true
zeroBsd.get.id shouldBe Some("0BSD")
gpl2.size shouldBe 1
gpl2.head.id shouldBe "GPL-2.0"
zeroBsd.size shouldBe 1
zeroBsd.head.id shouldBe "0BSD"
}

"find no licenses by id" in {
val register = new LicensesArchive(new LicensesArchiveParser(xml).licenses)
val register = LicensesArchive.fromJsonString(json)
register.findById("an invalid id") shouldBe None
}

"shoud read licenses from resource file" in {
val gpl2OrLater = LicensesArchive.findByUrl("https://opensource.org/licenses/GPL-2.0")
gpl2OrLater.isDefined shouldBe true
gpl2OrLater.get.id shouldBe Some("GPL-2.0")
val gpl2 = LicensesArchive.bundled.findByUrl("https://opensource.org/licenses/GPL-2.0")
gpl2.nonEmpty shouldBe true
gpl2.exists(_.id == "GPL-2.0") shouldBe true
}

"find licenses by id" in {
val register = new LicensesArchive(new LicensesArchiveParser(xml).licenses)
val register = LicensesArchive.fromJsonString(json)
val gpl2 = register.findById("GPL-2.0")
gpl2.isDefined shouldBe true
gpl2.get.id shouldBe Some("GPL-2.0")
gpl2.get.id shouldBe "GPL-2.0"
}
}

val xml: String =
"""
|<root>
| <licenseListVersion>v3.4-5-gb3d735f</licenseListVersion>
| <licenses>
| <reference>./0BSD.html</reference>
| <isDeprecatedLicenseId>false</isDeprecatedLicenseId>
| <detailsUrl>http://spdx.org/licenses/0BSD.json</detailsUrl>
| <referenceNumber>310</referenceNumber>
| <name>BSD Zero Clause License</name>
| <licenseId>0BSD</licenseId>
| <seeAlso>http://landley.net/toybox/license.html</seeAlso>
| <isOsiApproved>true</isOsiApproved>
| </licenses>
| <licenses>
| <reference>./GPL-2.0.html</reference>
| <isDeprecatedLicenseId>true</isDeprecatedLicenseId>
| <isFsfLibre>true</isFsfLibre>
| <detailsUrl>http://spdx.org/licenses/GPL-2.0.json</detailsUrl>
| <referenceNumber>140</referenceNumber>
| <name>GNU General Public License v2.0 only</name>
| <licenseId>GPL-2.0</licenseId>
| <seeAlso>http://www.gnu.org/licenses/old-licenses/gpl-2.0-standalone.html</seeAlso>
| <seeAlso>http://www.opensource.org/licenses/GPL-2.0</seeAlso>
| <isOsiApproved>true</isOsiApproved>
| </licenses>
| <releaseDate>2019-01-16</releaseDate>
|</root>
lazy val json: String =
"""{
| "licenseListVersion": "b5a3b2e",
| "licenses": [
| {
| "reference": "https://spdx.org/licenses/0BSD.html",
| "isDeprecatedLicenseId": false,
| "detailsUrl": "https://spdx.org/licenses/0BSD.json",
| "referenceNumber": 430,
| "name": "BSD Zero Clause License",
| "licenseId": "0BSD",
| "seeAlso": [
| "http://landley.net/toybox/license.html",
| "https://opensource.org/licenses/0BSD"
| ],
| "isOsiApproved": true
| },
| {
| "reference": "https://spdx.org/licenses/GPL-2.0.html",
| "isDeprecatedLicenseId": true,
| "detailsUrl": "https://spdx.org/licenses/GPL-2.0.json",
| "referenceNumber": 47,
| "name": "GNU General Public License v2.0 only",
| "licenseId": "GPL-2.0",
| "seeAlso": [
| "https://www.gnu.org/licenses/old-licenses/gpl-2.0-standalone.html",
| "https://opensource.org/licenses/GPL-2.0"
| ],
| "isOsiApproved": true,
| "isFsfLibre": true
| }
| ],
| "releaseDate": "2024-06-28"
|}
""".stripMargin
}

0 comments on commit 7b63539

Please sign in to comment.