-
Notifications
You must be signed in to change notification settings - Fork 8
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
update licenses.json and parsing without xml conversion (#94)
* get rid of license to xml conversion step to make updates to licenses easier * updated licenses.json * make license search unambiguous and normalize ids * normalize www. * only normalize x.0 versions with end anchor * fix imports
- Loading branch information
Showing
9 changed files
with
6,074 additions
and
8,211 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,2 @@ | ||
Updated version of licenses.json file may be found here: | ||
https://github.com/spdx/license-list-data | ||
|
||
licenses.xml is generated from original licenses.json using online xml converter: | ||
http://convertjson.com/json-to-xml.htm | ||
https://github.com/spdx/license-list-data/blob/main/json/licenses.json |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,3 @@ | ||
package com.github.sbt.sbom.licenses | ||
|
||
final case class License(id: Option[String] = None, name: Option[String] = None, references: Seq[String] = Seq()) | ||
final case class License(id: String, name: String, references: Seq[String]) |
52 changes: 41 additions & 11 deletions
52
src/main/scala/com/github/sbt/sbom/licenses/LicensesArchive.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,23 +1,53 @@ | ||
package com.github.sbt.sbom.licenses | ||
|
||
import com.github.sbt.sbom.licenses.LicensesArchive.{ normalizeId, normalizeUrl } | ||
|
||
import scala.io.Source | ||
|
||
class LicensesArchive(licenses: Seq[License]) { | ||
private val licensesByUrl: Map[String, License] = licenses.foldLeft(Map[String, License]()) { (map, license) => | ||
map ++ license.references.foldLeft(Map[String, License]()) { (map, ref) => | ||
map + (ref -> license) | ||
} | ||
} | ||
private val licensesByNormalizedUrl: Map[String, Seq[License]] = | ||
licenses.iterator | ||
.flatMap { license => | ||
license.references.map { reference => | ||
(normalizeUrl(reference), license) | ||
} | ||
} | ||
.toList | ||
.groupBy(_._1) | ||
.mapValues(_.map(_._2)) | ||
|
||
def findByUrl(url: String): Option[License] = licensesByUrl.get(url) | ||
private val licenseByNormalizedId: Map[String, License] = | ||
licenses | ||
.groupBy(license => normalizeId(license.id)) | ||
.mapValues { | ||
case Seq(license) => license | ||
case licenses => throw new RuntimeException(s"conflicting licenses: $licenses") | ||
} | ||
|
||
def findById(id: String): Option[License] = licenses.find(_.id.contains(id)) | ||
def findByUrl(url: String): Seq[License] = licensesByNormalizedUrl.getOrElse(normalizeUrl(url), Seq.empty) | ||
|
||
def findById(id: String): Option[License] = licenseByNormalizedId.get(normalizeId(id)) | ||
} | ||
|
||
object LicensesArchive { | ||
private lazy val fileStream = getClass.getResourceAsStream("/licenses.xml") | ||
private lazy val archiveText = Source.fromInputStream(fileStream).mkString | ||
private lazy val archive = new LicensesArchive(new LicensesArchiveParser(archiveText).licenses) | ||
private def normalizeUrl(url: String): String = url.toLowerCase | ||
.replaceFirst("^https?://(www\\.)?", "https://") | ||
.replaceFirst("/$", "") | ||
.replaceFirst("\\.(html|txt)$", "") | ||
|
||
// Apache-2.0 will be normalized to apache 2, BSD 3-Clause will be normalized to bsd 3 clause | ||
private def normalizeId(id: String): String = id.toLowerCase | ||
.replace("-", " ") | ||
.replaceFirst("(?<=\\d)\\.0$", "") | ||
|
||
private def loadResourceAsString(resource: String): String = { | ||
val fileStream = getClass.getResourceAsStream(resource) | ||
Source.fromInputStream(fileStream).mkString | ||
} | ||
|
||
def fromJsonString(json: String): LicensesArchive = | ||
new LicensesArchive(LicensesArchiveJsonParser.parseString(json)) | ||
|
||
def findByUrl(url: String): Option[License] = archive.findByUrl(url) | ||
lazy val bundled: LicensesArchive = | ||
fromJsonString(loadResourceAsString("/licenses.json")) | ||
} |
44 changes: 44 additions & 0 deletions
44
src/main/scala/com/github/sbt/sbom/licenses/LicensesArchiveJsonParser.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
package com.github.sbt.sbom.licenses | ||
|
||
import io.circe.Decoder | ||
import io.circe.generic.semiauto.deriveDecoder | ||
import io.circe.parser._ | ||
|
||
import scala.util.control.NonFatal | ||
|
||
private[licenses] object LicensesArchiveJsonParser { | ||
private case class LicenseJson( | ||
licenseId: String, | ||
name: String, | ||
seeAlso: Seq[String] | ||
) | ||
|
||
private object LicenseJson { | ||
implicit val decoder: Decoder[LicenseJson] = deriveDecoder | ||
} | ||
|
||
private case class LicensesArchiveJson( | ||
licenses: Seq[LicenseJson] | ||
) | ||
|
||
private object LicensesArchiveJson { | ||
implicit val decoder: Decoder[LicensesArchiveJson] = deriveDecoder | ||
} | ||
|
||
private def licenseFromLicenseEntry(licenseEntry: LicenseJson): License = License( | ||
id = licenseEntry.licenseId, | ||
name = licenseEntry.name, | ||
references = licenseEntry.seeAlso | ||
) | ||
|
||
def parseString(string: String): Seq[License] = { | ||
val licensesArchiveJson = | ||
try { | ||
decode[LicensesArchiveJson](string).toTry.get | ||
} catch { | ||
case NonFatal(e) => throw new RuntimeException("failed to parse licenses archive json", e) | ||
} | ||
|
||
licensesArchiveJson.licenses.map(licenseFromLicenseEntry) | ||
} | ||
} |
23 changes: 0 additions & 23 deletions
23
src/main/scala/com/github/sbt/sbom/licenses/LicensesArchiveParser.scala
This file was deleted.
Oops, something went wrong.
98 changes: 53 additions & 45 deletions
98
src/test/scala/com/github/sbt/sbom/LicensesArchiveSpec.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,83 +1,91 @@ | ||
package com.github.sbt.sbom | ||
|
||
import com.github.sbt.sbom.licenses.{ LicensesArchive, LicensesArchiveParser } | ||
import com.github.sbt.sbom.licenses.LicensesArchive | ||
import org.scalatest.matchers.should.Matchers | ||
import org.scalatest.wordspec.AnyWordSpec | ||
|
||
class LicensesArchiveSpec extends AnyWordSpec with Matchers { | ||
"LicensesArchiveParser" should { | ||
"fail parsing a not valid archive" in { | ||
new LicensesArchiveParser("").isValid shouldBe false | ||
assertThrows[RuntimeException] { | ||
LicensesArchive.fromJsonString("") | ||
} | ||
} | ||
|
||
"parse a valid archive" in { | ||
new LicensesArchiveParser(xml).isValid shouldBe true | ||
LicensesArchive.fromJsonString(json) | ||
} | ||
} | ||
|
||
"LicenseRegister" should { | ||
"find no license by ref" in { | ||
val register = new LicensesArchive(new LicensesArchiveParser(xml).licenses) | ||
register.findByUrl("http://www.domain.com/missingLicense") shouldBe None | ||
val register = LicensesArchive.fromJsonString(json) | ||
register.findByUrl("http://www.domain.com/missingLicense") shouldBe Seq.empty | ||
} | ||
|
||
"find licenses by ref" in { | ||
val register = new LicensesArchive(new LicensesArchiveParser(xml).licenses) | ||
val gps2 = register.findByUrl("http://www.opensource.org/licenses/GPL-2.0") | ||
val register = LicensesArchive.fromJsonString(json) | ||
val gpl2 = register.findByUrl("https://opensource.org/licenses/GPL-2.0") | ||
val zeroBsd = register.findByUrl("http://landley.net/toybox/license.html") | ||
|
||
gps2.isDefined shouldBe true | ||
gps2.get.id shouldBe Some("GPL-2.0") | ||
zeroBsd.isDefined shouldBe true | ||
zeroBsd.get.id shouldBe Some("0BSD") | ||
gpl2.size shouldBe 1 | ||
gpl2.head.id shouldBe "GPL-2.0" | ||
zeroBsd.size shouldBe 1 | ||
zeroBsd.head.id shouldBe "0BSD" | ||
} | ||
|
||
"find no licenses by id" in { | ||
val register = new LicensesArchive(new LicensesArchiveParser(xml).licenses) | ||
val register = LicensesArchive.fromJsonString(json) | ||
register.findById("an invalid id") shouldBe None | ||
} | ||
|
||
"shoud read licenses from resource file" in { | ||
val gpl2OrLater = LicensesArchive.findByUrl("https://opensource.org/licenses/GPL-2.0") | ||
gpl2OrLater.isDefined shouldBe true | ||
gpl2OrLater.get.id shouldBe Some("GPL-2.0") | ||
val gpl2 = LicensesArchive.bundled.findByUrl("https://opensource.org/licenses/GPL-2.0") | ||
gpl2.nonEmpty shouldBe true | ||
gpl2.exists(_.id == "GPL-2.0") shouldBe true | ||
} | ||
|
||
"find licenses by id" in { | ||
val register = new LicensesArchive(new LicensesArchiveParser(xml).licenses) | ||
val register = LicensesArchive.fromJsonString(json) | ||
val gpl2 = register.findById("GPL-2.0") | ||
gpl2.isDefined shouldBe true | ||
gpl2.get.id shouldBe Some("GPL-2.0") | ||
gpl2.get.id shouldBe "GPL-2.0" | ||
} | ||
} | ||
|
||
val xml: String = | ||
""" | ||
|<root> | ||
| <licenseListVersion>v3.4-5-gb3d735f</licenseListVersion> | ||
| <licenses> | ||
| <reference>./0BSD.html</reference> | ||
| <isDeprecatedLicenseId>false</isDeprecatedLicenseId> | ||
| <detailsUrl>http://spdx.org/licenses/0BSD.json</detailsUrl> | ||
| <referenceNumber>310</referenceNumber> | ||
| <name>BSD Zero Clause License</name> | ||
| <licenseId>0BSD</licenseId> | ||
| <seeAlso>http://landley.net/toybox/license.html</seeAlso> | ||
| <isOsiApproved>true</isOsiApproved> | ||
| </licenses> | ||
| <licenses> | ||
| <reference>./GPL-2.0.html</reference> | ||
| <isDeprecatedLicenseId>true</isDeprecatedLicenseId> | ||
| <isFsfLibre>true</isFsfLibre> | ||
| <detailsUrl>http://spdx.org/licenses/GPL-2.0.json</detailsUrl> | ||
| <referenceNumber>140</referenceNumber> | ||
| <name>GNU General Public License v2.0 only</name> | ||
| <licenseId>GPL-2.0</licenseId> | ||
| <seeAlso>http://www.gnu.org/licenses/old-licenses/gpl-2.0-standalone.html</seeAlso> | ||
| <seeAlso>http://www.opensource.org/licenses/GPL-2.0</seeAlso> | ||
| <isOsiApproved>true</isOsiApproved> | ||
| </licenses> | ||
| <releaseDate>2019-01-16</releaseDate> | ||
|</root> | ||
lazy val json: String = | ||
"""{ | ||
| "licenseListVersion": "b5a3b2e", | ||
| "licenses": [ | ||
| { | ||
| "reference": "https://spdx.org/licenses/0BSD.html", | ||
| "isDeprecatedLicenseId": false, | ||
| "detailsUrl": "https://spdx.org/licenses/0BSD.json", | ||
| "referenceNumber": 430, | ||
| "name": "BSD Zero Clause License", | ||
| "licenseId": "0BSD", | ||
| "seeAlso": [ | ||
| "http://landley.net/toybox/license.html", | ||
| "https://opensource.org/licenses/0BSD" | ||
| ], | ||
| "isOsiApproved": true | ||
| }, | ||
| { | ||
| "reference": "https://spdx.org/licenses/GPL-2.0.html", | ||
| "isDeprecatedLicenseId": true, | ||
| "detailsUrl": "https://spdx.org/licenses/GPL-2.0.json", | ||
| "referenceNumber": 47, | ||
| "name": "GNU General Public License v2.0 only", | ||
| "licenseId": "GPL-2.0", | ||
| "seeAlso": [ | ||
| "https://www.gnu.org/licenses/old-licenses/gpl-2.0-standalone.html", | ||
| "https://opensource.org/licenses/GPL-2.0" | ||
| ], | ||
| "isOsiApproved": true, | ||
| "isFsfLibre": true | ||
| } | ||
| ], | ||
| "releaseDate": "2024-06-28" | ||
|} | ||
""".stripMargin | ||
} |