Rollup merge of #154823 - jakubadamw:spdx-rs-replacement, r=Mark-Simulacrum

Replace the spdx-rs dependency with a minimal in-tree SPDX tag-value parser

The spdx-rs crate [is no longer maintained](https://github.com/doubleopen-project/spdx-rs/pulls) and is behind on its own dependency updates. It is currently used in [the collect-license-metadata tool](https://github.com/rust-lang/rust/tree/main/src/tools/collect-license-metadata), employing a single function therefrom: `spdx_rs::parsers::spdx_from_tag_value`, which parses the output of the `reuse` tool to extract file names, licences and copyright text.

This PR replaces the use of said function with a small minimal parser that handles just the subset of the SPDX tag-value format that is needed: `Tag: Value` line pairs and multi-line `<text>...</text>` blocks.

Coincidentally, this gets rid of the last transitive dependency on syn v1.
This commit is contained in:
Jonathan Brouwer
2026-04-19 16:04:29 +02:00
committed by GitHub
6 changed files with 283 additions and 117 deletions
+41 -108
View File
@@ -199,7 +199,7 @@ dependencies = [
"rustc-hash 2.1.1",
"serde",
"serde_derive",
"syn 2.0.110",
"syn",
]
[[package]]
@@ -396,7 +396,7 @@ checksum = "89385e82b5d1821d2219e0b095efa2cc1f246cbf99080f3be46a1a85c0d392d9"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.110",
"syn",
]
[[package]]
@@ -573,7 +573,6 @@ checksum = "145052bdd345b87320e369255277e3fb5152762ad123a901ef5c262dd38fe8d2"
dependencies = [
"iana-time-zone",
"num-traits",
"serde",
"windows-link 0.2.1",
]
@@ -635,10 +634,10 @@ version = "4.5.49"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2a0b5487afeab2deb2ff4e03a807ad1a03ac532ff5a2cee5d86884440c7f7671"
dependencies = [
"heck 0.5.0",
"heck",
"proc-macro2",
"quote",
"syn 2.0.110",
"syn",
]
[[package]]
@@ -768,7 +767,6 @@ dependencies = [
"serde",
"serde_json",
"similar",
"spdx-rs",
]
[[package]]
@@ -804,7 +802,7 @@ dependencies = [
"nom",
"proc-macro2",
"quote",
"syn 2.0.110",
"syn",
]
[[package]]
@@ -1047,7 +1045,7 @@ dependencies = [
"proc-macro2",
"quote",
"scratch",
"syn 2.0.110",
"syn",
]
[[package]]
@@ -1061,7 +1059,7 @@ dependencies = [
"indexmap",
"proc-macro2",
"quote",
"syn 2.0.110",
"syn",
]
[[package]]
@@ -1079,7 +1077,7 @@ dependencies = [
"indexmap",
"proc-macro2",
"quote",
"syn 2.0.110",
"syn",
]
[[package]]
@@ -1103,7 +1101,7 @@ dependencies = [
"proc-macro2",
"quote",
"strsim",
"syn 2.0.110",
"syn",
]
[[package]]
@@ -1114,7 +1112,7 @@ checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead"
dependencies = [
"darling_core",
"quote",
"syn 2.0.110",
"syn",
]
[[package]]
@@ -1146,7 +1144,7 @@ checksum = "d08b3a0bcc0d079199cd476b2cae8435016ec11d1c0986c6901c5ac223041534"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.110",
"syn",
]
[[package]]
@@ -1167,7 +1165,7 @@ dependencies = [
"darling",
"proc-macro2",
"quote",
"syn 2.0.110",
"syn",
]
[[package]]
@@ -1177,7 +1175,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c"
dependencies = [
"derive_builder_core",
"syn 2.0.110",
"syn",
]
[[package]]
@@ -1189,7 +1187,7 @@ dependencies = [
"darling",
"proc-macro2",
"quote",
"syn 2.0.110",
"syn",
]
[[package]]
@@ -1264,7 +1262,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.110",
"syn",
]
[[package]]
@@ -1686,12 +1684,6 @@ dependencies = [
"foldhash 0.2.0",
]
[[package]]
name = "heck"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
[[package]]
name = "heck"
version = "0.5.0"
@@ -2092,7 +2084,7 @@ checksum = "980af8b43c3ad5d8d349ace167ec8170839f753a42d233ba19e08afe1850fa69"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.110",
"syn",
]
[[package]]
@@ -2389,7 +2381,7 @@ checksum = "88a9689d8d44bf9964484516275f5cd4c9b59457a6940c1d5d0ecbb94510a36b"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.110",
"syn",
]
[[package]]
@@ -2534,7 +2526,7 @@ checksum = "4568f25ccbd45ab5d5603dc34318c1ec56b117531781260002151b8530a9f931"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.110",
"syn",
]
[[package]]
@@ -2915,7 +2907,7 @@ dependencies = [
"pest_meta",
"proc-macro2",
"quote",
"syn 2.0.110",
"syn",
]
[[package]]
@@ -3125,7 +3117,7 @@ checksum = "7347867d0a7e1208d93b46767be83e2b8f978c3dad35f775ac8d8847551d6fe1"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.110",
"syn",
]
[[package]]
@@ -3316,7 +3308,7 @@ checksum = "b7186006dcb21920990093f30e3dea63b7d6e977bf1256be20c3563a5db070da"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.110",
"syn",
]
[[package]]
@@ -3409,7 +3401,7 @@ checksum = "8100bb34c0a1d0f907143db3149e6b4eea3c33b9ee8b189720168e818303986f"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.110",
"syn",
]
[[package]]
@@ -4125,7 +4117,7 @@ version = "0.0.0"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.110",
"syn",
]
[[package]]
@@ -4271,7 +4263,7 @@ dependencies = [
"fluent-syntax",
"proc-macro2",
"quote",
"syn 2.0.110",
"syn",
"synstructure",
]
@@ -4852,7 +4844,7 @@ version = "0.0.0"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.110",
"syn",
"synstructure",
]
@@ -4952,7 +4944,7 @@ dependencies = [
"proc-macro2",
"quote",
"serde",
"syn 2.0.110",
"syn",
]
[[package]]
@@ -5069,7 +5061,7 @@ dependencies = [
"proc-macro2",
"quote",
"serde_derive_internals",
"syn 2.0.110",
"syn",
]
[[package]]
@@ -5155,7 +5147,7 @@ checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.110",
"syn",
]
[[package]]
@@ -5166,7 +5158,7 @@ checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.110",
"syn",
]
[[package]]
@@ -5299,35 +5291,6 @@ dependencies = [
"color-eyre",
]
[[package]]
name = "spdx-expression"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "53d7ac03c67c572d85049d6db815e20a4a19b41b3d5cca732ac582342021ad77"
dependencies = [
"nom",
"serde",
"thiserror 1.0.69",
"tracing",
]
[[package]]
name = "spdx-rs"
version = "0.5.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "990870190ec8d8c64ba66e4a6746243d6e57d99353991e0e6092334833f429b1"
dependencies = [
"chrono",
"log",
"nom",
"serde",
"spdx-expression",
"strum",
"strum_macros",
"thiserror 1.0.69",
"uuid",
]
[[package]]
name = "stable_deref_trait"
version = "1.2.1"
@@ -5393,36 +5356,6 @@ version = "0.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
[[package]]
name = "strum"
version = "0.24.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "063e6045c0e62079840579a7e47a355ae92f60eb74daaf156fb1e84ba164e63f"
[[package]]
name = "strum_macros"
version = "0.24.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e385be0d24f186b4ce2f9982191e7101bb737312ad61c1f2f984f34bcf85d59"
dependencies = [
"heck 0.4.1",
"proc-macro2",
"quote",
"rustversion",
"syn 1.0.109",
]
[[package]]
name = "syn"
version = "1.0.109"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "syn"
version = "2.0.110"
@@ -5442,7 +5375,7 @@ checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.110",
"syn",
]
[[package]]
@@ -5579,7 +5512,7 @@ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.110",
"syn",
]
[[package]]
@@ -5590,7 +5523,7 @@ checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.110",
"syn",
]
[[package]]
@@ -5832,7 +5765,7 @@ checksum = "81383ab64e72a7a8b8e13130c49e3dab29def6d0c7d76a03087b3cf71c5c6903"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.110",
"syn",
]
[[package]]
@@ -6028,7 +5961,7 @@ checksum = "a1249a628de3ad34b821ecb1001355bca3940bcb2f88558f1a8bd82e977f75b5"
dependencies = [
"proc-macro-hack",
"quote",
"syn 2.0.110",
"syn",
"unic-langid-impl",
]
@@ -6266,7 +6199,7 @@ dependencies = [
"bumpalo",
"proc-macro2",
"quote",
"syn 2.0.110",
"syn",
"wasm-bindgen-shared",
]
@@ -6523,7 +6456,7 @@ checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.110",
"syn",
]
[[package]]
@@ -6534,7 +6467,7 @@ checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.110",
"syn",
]
[[package]]
@@ -6904,7 +6837,7 @@ checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.110",
"syn",
"synstructure",
]
@@ -6925,7 +6858,7 @@ checksum = "88d2b8d9c68ad2b9e4340d7832716a4d21a22a1154777ad56ea55c51a9cf3831"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.110",
"syn",
]
[[package]]
@@ -6945,7 +6878,7 @@ checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.110",
"syn",
"synstructure",
]
@@ -6980,7 +6913,7 @@ checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.110",
"syn",
]
[[package]]
@@ -10,4 +10,3 @@ anyhow = "1.0.65"
serde = { version = "1.0.147", features = ["derive"] }
serde_json = "1.0.85"
similar = "2.7.0"
spdx-rs = "0.5.1"
@@ -1,6 +1,7 @@
mod licenses;
mod path_tree;
mod reuse;
mod spdx;
use std::path::PathBuf;
@@ -15,18 +15,15 @@ pub(crate) fn collect(
let raw = &obtain_spdx_document(reuse_exe)?;
println!("finished gathering the license information from REUSE in {:.2?}", start.elapsed());
let document = spdx_rs::parsers::spdx_from_tag_value(&raw)?;
let files = crate::spdx::parse_tag_value(raw)?;
let mut result = Vec::new();
for file in document.file_information {
let concluded_license = file.concluded_license.expect("File should have licence info");
let copyright_text = file.copyright_text.expect("File should have copyright text");
for file in files {
let license = interner.intern(License {
spdx: concluded_license.to_string(),
copyright: copyright_text.split('\n').map(|s| s.into()).collect(),
spdx: file.concluded_license,
copyright: file.copyright_text.split('\n').map(|s| s.into()).collect(),
});
result.push((file.file_name.into(), license));
result.push((file.name.into(), license));
}
Ok(result)
@@ -0,0 +1,102 @@
use anyhow::Error;
/// A single file entry extracted from an SPDX tag-value document.
pub(crate) struct SpdxFileEntry {
pub(crate) name: String,
pub(crate) concluded_license: String,
pub(crate) copyright_text: String,
}
/// Parses an SPDX tag-value document and extracts file information.
///
/// This is a minimal parser that only extracts the fields we need
/// (FileName, LicenseConcluded, FileCopyrightText) rather than the full model.
/// The format is specified by the SPDX specification:
/// each line is a `Tag: Value` pair,
/// and multi-line values are wrapped in `<text>…</text>`.
pub(crate) fn parse_tag_value(input: &str) -> Result<Vec<SpdxFileEntry>, Error> {
let mut files = Vec::new();
let mut current_name: Option<String> = None;
let mut current_license: Option<String> = None;
let mut current_copyright: Option<String> = None;
let mut lines = input.lines();
while let Some(line) = lines.next() {
let Some((tag, value)) = line.split_once(": ") else {
continue;
};
let value = resolve_multiline_value(value, &mut lines)?;
match tag {
"FileName" => {
// A new file section begins. Flush the previous one if present.
if let Some(name) = current_name.take() {
files.push(build_file_entry(
name,
current_license.take(),
current_copyright.take(),
)?);
}
current_name = Some(value);
current_license = None;
current_copyright = None;
}
"LicenseConcluded" => current_license = Some(value),
"FileCopyrightText" => current_copyright = Some(value),
_ => {}
}
}
// Flush the last file section.
if let Some(name) = current_name {
files.push(build_file_entry(name, current_license, current_copyright)?);
}
Ok(files)
}
/// Resolves a tag value that might span multiple lines using `<text>…</text>`.
fn resolve_multiline_value<'a>(
value: &str,
further_lines: &mut impl Iterator<Item = &'a str>,
) -> Result<String, Error> {
let Some(start) = value.strip_prefix("<text>") else {
return Ok(value.to_string());
};
// The closing tag might be on the same line.
if let Some(content) = start.strip_suffix("</text>") {
return Ok(content.to_string());
}
let mut text = start.to_string();
for line in further_lines.by_ref() {
if let Some(rest) = line.strip_suffix("</text>") {
text.push('\n');
text.push_str(rest);
return Ok(text);
}
text.push('\n');
text.push_str(line);
}
anyhow::bail!("unexpected end of input inside <text> block")
}
fn build_file_entry(
name: String,
concluded_license: Option<String>,
copyright_text: Option<String>,
) -> Result<SpdxFileEntry, Error> {
Ok(SpdxFileEntry {
name,
concluded_license: concluded_license
.ok_or_else(|| anyhow::anyhow!("file missing LicenseConcluded"))?,
copyright_text: copyright_text
.ok_or_else(|| anyhow::anyhow!("file missing FileCopyrightText"))?,
})
}
#[cfg(test)]
mod tests;
@@ -0,0 +1,134 @@
use super::*;
// Clause 8.1 ("File name field") specifies that each file section begins with
// a `FileName` tag whose value is a relative path prefixed with "./".
// Clause 8.5 ("Concluded license") and 8.8 ("Copyright text") give the
// corresponding per-file fields.
// <https://spdx.github.io/spdx-spec/v2.3/file-information/>
#[test]
fn single_file_entry() {
let input = "\
FileName: ./package/foo.c
LicenseConcluded: LGPL-2.0-only
FileCopyrightText: Copyright 2008-2010 John Smith";
let files = parse_tag_value(input).unwrap();
assert_eq!(files.len(), 1);
assert_eq!(files[0].name, "./package/foo.c");
assert_eq!(files[0].concluded_license, "LGPL-2.0-only");
assert_eq!(files[0].copyright_text, "Copyright 2008-2010 John Smith");
}
// Clause 8.5 shows compound SPDX licence expressions as valid values for
// `LicenseConcluded`, e.g. "(LGPL-2.0-only OR LicenseRef-2)".
// <https://spdx.github.io/spdx-spec/v2.3/file-information/>
#[test]
fn compound_license_expression() {
let input = "\
FileName: ./src/lib.rs
LicenseConcluded: (LGPL-2.0-only OR LicenseRef-2)
FileCopyrightText: Copyright Example Company";
let files = parse_tag_value(input).unwrap();
assert_eq!(files.len(), 1);
assert_eq!(files[0].concluded_license, "(LGPL-2.0-only OR LicenseRef-2)");
}
// Clause 8.8 shows the copyright text wrapped in a single-line
// <text>...</text> block: e.g.
// `FileCopyrightText: <text>Copyright 2008-2010 John Smith</text>`
// <https://spdx.github.io/spdx-spec/v2.3/file-information/>
#[test]
fn single_line_text_block() {
let input = "\
FileName: ./package/foo.c
LicenseConcluded: LGPL-2.0-only
FileCopyrightText: <text>Copyright 2008-2010 John Smith</text>";
let files = parse_tag_value(input).unwrap();
assert_eq!(files.len(), 1);
assert_eq!(files[0].copyright_text, "Copyright 2008-2010 John Smith");
}
// Clause 6.10 ("Creator comment") demonstrates a multi-line <text>...</text> block.
// <https://spdx.github.io/spdx-spec/v2.3/document-creation-information/>
#[test]
fn multi_line_text_block() {
let input = "\
FileName: ./package/foo.c
LicenseConcluded: MIT
FileCopyrightText: <text>Copyright 2008-2010 John Smith
Copyright 2019 Jane Doe</text>";
let files = parse_tag_value(input).unwrap();
assert_eq!(files.len(), 1);
assert_eq!(files[0].copyright_text, "Copyright 2008-2010 John Smith\nCopyright 2019 Jane Doe");
}
// Clause 5 ("Composition of an SPDX document") states that a document may
// contain zero or many File Information sections. Each `FileName` tag starts
// a new section, so consecutive file blocks must be parsed independently.
// <https://spdx.github.io/spdx-spec/v2.3/composition-of-an-SPDX-document/>
#[test]
fn multiple_file_entries() {
let input = "\
FileName: ./package/foo.c
LicenseConcluded: LGPL-2.0-only
FileCopyrightText: Copyright 2008-2010 John Smith
FileName: ./package/bar.c
LicenseConcluded: MIT
FileCopyrightText: Copyright Example Company";
let files = parse_tag_value(input).unwrap();
assert_eq!(files.len(), 2);
assert_eq!(files[0].name, "./package/foo.c");
assert_eq!(files[0].concluded_license, "LGPL-2.0-only");
assert_eq!(files[0].copyright_text, "Copyright 2008-2010 John Smith");
assert_eq!(files[1].name, "./package/bar.c");
assert_eq!(files[1].concluded_license, "MIT");
assert_eq!(files[1].copyright_text, "Copyright Example Company");
}
// A file section without a `LicenseConcluded` tag is malformed.
#[test]
fn missing_license_is_an_error() {
let input = "\
FileName: ./package/foo.c
FileCopyrightText: Copyright 2008-2010 John Smith";
assert!(parse_tag_value(input).is_err());
}
// A file section without a `FileCopyrightText` tag is malformed.
#[test]
fn missing_copyright_is_an_error() {
let input = "\
FileName: ./package/foo.c
LicenseConcluded: MIT";
assert!(parse_tag_value(input).is_err());
}
// A section with an unterminated <text> block (no closing </text>) is malformed.
#[test]
fn unterminated_text_block_is_an_error() {
let input = "\
FileName: ./package/foo.c
LicenseConcluded: MIT
FileCopyrightText: <text>Copyright 2008-2010 John Smith";
assert!(parse_tag_value(input).is_err());
}
// A document with no `FileName` tags at all should produce an empty result.
#[test]
fn empty_document_returns_no_entries() {
let input = "\
SPDXVersion: SPDX-2.3
DataLicense: CC0-1.0";
let files = parse_tag_value(input).unwrap();
assert!(files.is_empty());
}