diff --git a/Cargo.lock b/Cargo.lock
index 3b56dd6bd9ce..5a9764bfe084 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -199,7 +199,7 @@ dependencies = [
"rustc-hash 2.1.1",
"serde",
"serde_derive",
- "syn 2.0.110",
+ "syn",
]
[[package]]
@@ -396,7 +396,7 @@ checksum = "89385e82b5d1821d2219e0b095efa2cc1f246cbf99080f3be46a1a85c0d392d9"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.110",
+ "syn",
]
[[package]]
@@ -573,7 +573,6 @@ checksum = "145052bdd345b87320e369255277e3fb5152762ad123a901ef5c262dd38fe8d2"
dependencies = [
"iana-time-zone",
"num-traits",
- "serde",
"windows-link 0.2.1",
]
@@ -635,10 +634,10 @@ version = "4.5.49"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2a0b5487afeab2deb2ff4e03a807ad1a03ac532ff5a2cee5d86884440c7f7671"
dependencies = [
- "heck 0.5.0",
+ "heck",
"proc-macro2",
"quote",
- "syn 2.0.110",
+ "syn",
]
[[package]]
@@ -768,7 +767,6 @@ dependencies = [
"serde",
"serde_json",
"similar",
- "spdx-rs",
]
[[package]]
@@ -804,7 +802,7 @@ dependencies = [
"nom",
"proc-macro2",
"quote",
- "syn 2.0.110",
+ "syn",
]
[[package]]
@@ -1047,7 +1045,7 @@ dependencies = [
"proc-macro2",
"quote",
"scratch",
- "syn 2.0.110",
+ "syn",
]
[[package]]
@@ -1061,7 +1059,7 @@ dependencies = [
"indexmap",
"proc-macro2",
"quote",
- "syn 2.0.110",
+ "syn",
]
[[package]]
@@ -1079,7 +1077,7 @@ dependencies = [
"indexmap",
"proc-macro2",
"quote",
- "syn 2.0.110",
+ "syn",
]
[[package]]
@@ -1103,7 +1101,7 @@ dependencies = [
"proc-macro2",
"quote",
"strsim",
- "syn 2.0.110",
+ "syn",
]
[[package]]
@@ -1114,7 +1112,7 @@ checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead"
dependencies = [
"darling_core",
"quote",
- "syn 2.0.110",
+ "syn",
]
[[package]]
@@ -1146,7 +1144,7 @@ checksum = "d08b3a0bcc0d079199cd476b2cae8435016ec11d1c0986c6901c5ac223041534"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.110",
+ "syn",
]
[[package]]
@@ -1167,7 +1165,7 @@ dependencies = [
"darling",
"proc-macro2",
"quote",
- "syn 2.0.110",
+ "syn",
]
[[package]]
@@ -1177,7 +1175,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c"
dependencies = [
"derive_builder_core",
- "syn 2.0.110",
+ "syn",
]
[[package]]
@@ -1189,7 +1187,7 @@ dependencies = [
"darling",
"proc-macro2",
"quote",
- "syn 2.0.110",
+ "syn",
]
[[package]]
@@ -1264,7 +1262,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.110",
+ "syn",
]
[[package]]
@@ -1686,12 +1684,6 @@ dependencies = [
"foldhash 0.2.0",
]
-[[package]]
-name = "heck"
-version = "0.4.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
-
[[package]]
name = "heck"
version = "0.5.0"
@@ -2092,7 +2084,7 @@ checksum = "980af8b43c3ad5d8d349ace167ec8170839f753a42d233ba19e08afe1850fa69"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.110",
+ "syn",
]
[[package]]
@@ -2389,7 +2381,7 @@ checksum = "88a9689d8d44bf9964484516275f5cd4c9b59457a6940c1d5d0ecbb94510a36b"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.110",
+ "syn",
]
[[package]]
@@ -2534,7 +2526,7 @@ checksum = "4568f25ccbd45ab5d5603dc34318c1ec56b117531781260002151b8530a9f931"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.110",
+ "syn",
]
[[package]]
@@ -2915,7 +2907,7 @@ dependencies = [
"pest_meta",
"proc-macro2",
"quote",
- "syn 2.0.110",
+ "syn",
]
[[package]]
@@ -3125,7 +3117,7 @@ checksum = "7347867d0a7e1208d93b46767be83e2b8f978c3dad35f775ac8d8847551d6fe1"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.110",
+ "syn",
]
[[package]]
@@ -3316,7 +3308,7 @@ checksum = "b7186006dcb21920990093f30e3dea63b7d6e977bf1256be20c3563a5db070da"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.110",
+ "syn",
]
[[package]]
@@ -3409,7 +3401,7 @@ checksum = "8100bb34c0a1d0f907143db3149e6b4eea3c33b9ee8b189720168e818303986f"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.110",
+ "syn",
]
[[package]]
@@ -4125,7 +4117,7 @@ version = "0.0.0"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.110",
+ "syn",
]
[[package]]
@@ -4271,7 +4263,7 @@ dependencies = [
"fluent-syntax",
"proc-macro2",
"quote",
- "syn 2.0.110",
+ "syn",
"synstructure",
]
@@ -4852,7 +4844,7 @@ version = "0.0.0"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.110",
+ "syn",
"synstructure",
]
@@ -4952,7 +4944,7 @@ dependencies = [
"proc-macro2",
"quote",
"serde",
- "syn 2.0.110",
+ "syn",
]
[[package]]
@@ -5069,7 +5061,7 @@ dependencies = [
"proc-macro2",
"quote",
"serde_derive_internals",
- "syn 2.0.110",
+ "syn",
]
[[package]]
@@ -5155,7 +5147,7 @@ checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.110",
+ "syn",
]
[[package]]
@@ -5166,7 +5158,7 @@ checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.110",
+ "syn",
]
[[package]]
@@ -5299,35 +5291,6 @@ dependencies = [
"color-eyre",
]
-[[package]]
-name = "spdx-expression"
-version = "0.5.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "53d7ac03c67c572d85049d6db815e20a4a19b41b3d5cca732ac582342021ad77"
-dependencies = [
- "nom",
- "serde",
- "thiserror 1.0.69",
- "tracing",
-]
-
-[[package]]
-name = "spdx-rs"
-version = "0.5.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "990870190ec8d8c64ba66e4a6746243d6e57d99353991e0e6092334833f429b1"
-dependencies = [
- "chrono",
- "log",
- "nom",
- "serde",
- "spdx-expression",
- "strum",
- "strum_macros",
- "thiserror 1.0.69",
- "uuid",
-]
-
[[package]]
name = "stable_deref_trait"
version = "1.2.1"
@@ -5393,36 +5356,6 @@ version = "0.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
-[[package]]
-name = "strum"
-version = "0.24.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "063e6045c0e62079840579a7e47a355ae92f60eb74daaf156fb1e84ba164e63f"
-
-[[package]]
-name = "strum_macros"
-version = "0.24.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1e385be0d24f186b4ce2f9982191e7101bb737312ad61c1f2f984f34bcf85d59"
-dependencies = [
- "heck 0.4.1",
- "proc-macro2",
- "quote",
- "rustversion",
- "syn 1.0.109",
-]
-
-[[package]]
-name = "syn"
-version = "1.0.109"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
-dependencies = [
- "proc-macro2",
- "quote",
- "unicode-ident",
-]
-
[[package]]
name = "syn"
version = "2.0.110"
@@ -5442,7 +5375,7 @@ checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.110",
+ "syn",
]
[[package]]
@@ -5579,7 +5512,7 @@ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.110",
+ "syn",
]
[[package]]
@@ -5590,7 +5523,7 @@ checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.110",
+ "syn",
]
[[package]]
@@ -5832,7 +5765,7 @@ checksum = "81383ab64e72a7a8b8e13130c49e3dab29def6d0c7d76a03087b3cf71c5c6903"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.110",
+ "syn",
]
[[package]]
@@ -6028,7 +5961,7 @@ checksum = "a1249a628de3ad34b821ecb1001355bca3940bcb2f88558f1a8bd82e977f75b5"
dependencies = [
"proc-macro-hack",
"quote",
- "syn 2.0.110",
+ "syn",
"unic-langid-impl",
]
@@ -6266,7 +6199,7 @@ dependencies = [
"bumpalo",
"proc-macro2",
"quote",
- "syn 2.0.110",
+ "syn",
"wasm-bindgen-shared",
]
@@ -6523,7 +6456,7 @@ checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.110",
+ "syn",
]
[[package]]
@@ -6534,7 +6467,7 @@ checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.110",
+ "syn",
]
[[package]]
@@ -6904,7 +6837,7 @@ checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.110",
+ "syn",
"synstructure",
]
@@ -6925,7 +6858,7 @@ checksum = "88d2b8d9c68ad2b9e4340d7832716a4d21a22a1154777ad56ea55c51a9cf3831"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.110",
+ "syn",
]
[[package]]
@@ -6945,7 +6878,7 @@ checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.110",
+ "syn",
"synstructure",
]
@@ -6980,7 +6913,7 @@ checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.110",
+ "syn",
]
[[package]]
diff --git a/src/tools/collect-license-metadata/Cargo.toml b/src/tools/collect-license-metadata/Cargo.toml
index f84da2442815..ae41d2fbb009 100644
--- a/src/tools/collect-license-metadata/Cargo.toml
+++ b/src/tools/collect-license-metadata/Cargo.toml
@@ -10,4 +10,3 @@ anyhow = "1.0.65"
serde = { version = "1.0.147", features = ["derive"] }
serde_json = "1.0.85"
similar = "2.7.0"
-spdx-rs = "0.5.1"
diff --git a/src/tools/collect-license-metadata/src/main.rs b/src/tools/collect-license-metadata/src/main.rs
index 4e218ea59fda..156871b1b3a8 100644
--- a/src/tools/collect-license-metadata/src/main.rs
+++ b/src/tools/collect-license-metadata/src/main.rs
@@ -1,6 +1,7 @@
mod licenses;
mod path_tree;
mod reuse;
+mod spdx;
use std::path::PathBuf;
diff --git a/src/tools/collect-license-metadata/src/reuse.rs b/src/tools/collect-license-metadata/src/reuse.rs
index dbe46781b7c5..6bc41453a53f 100644
--- a/src/tools/collect-license-metadata/src/reuse.rs
+++ b/src/tools/collect-license-metadata/src/reuse.rs
@@ -15,18 +15,15 @@ pub(crate) fn collect(
let raw = &obtain_spdx_document(reuse_exe)?;
println!("finished gathering the license information from REUSE in {:.2?}", start.elapsed());
- let document = spdx_rs::parsers::spdx_from_tag_value(&raw)?;
+ let files = crate::spdx::parse_tag_value(raw)?;
let mut result = Vec::new();
- for file in document.file_information {
- let concluded_license = file.concluded_license.expect("File should have licence info");
- let copyright_text = file.copyright_text.expect("File should have copyright text");
+ for file in files {
let license = interner.intern(License {
- spdx: concluded_license.to_string(),
- copyright: copyright_text.split('\n').map(|s| s.into()).collect(),
+ spdx: file.concluded_license,
+ copyright: file.copyright_text.split('\n').map(|s| s.into()).collect(),
});
-
- result.push((file.file_name.into(), license));
+ result.push((file.name.into(), license));
}
Ok(result)
diff --git a/src/tools/collect-license-metadata/src/spdx/mod.rs b/src/tools/collect-license-metadata/src/spdx/mod.rs
new file mode 100644
index 000000000000..a94f2bcf51ec
--- /dev/null
+++ b/src/tools/collect-license-metadata/src/spdx/mod.rs
@@ -0,0 +1,102 @@
+use anyhow::Error;
+
+/// A single file entry extracted from an SPDX tag-value document.
+pub(crate) struct SpdxFileEntry {
+ pub(crate) name: String,
+ pub(crate) concluded_license: String,
+ pub(crate) copyright_text: String,
+}
+
+/// Parses an SPDX tag-value document and extracts file information.
+///
+/// This is a minimal parser that only extracts the fields we need
+/// (FileName, LicenseConcluded, FileCopyrightText) rather than the full model.
+/// The format is specified by the SPDX specification:
+/// each line is a `Tag: Value` pair,
+/// and multi-line values are wrapped in `…`.
+pub(crate) fn parse_tag_value(input: &str) -> Result, Error> {
+ let mut files = Vec::new();
+ let mut current_name: Option = None;
+ let mut current_license: Option = None;
+ let mut current_copyright: Option = None;
+
+ let mut lines = input.lines();
+ while let Some(line) = lines.next() {
+ let Some((tag, value)) = line.split_once(": ") else {
+ continue;
+ };
+
+ let value = resolve_multiline_value(value, &mut lines)?;
+
+ match tag {
+ "FileName" => {
+ // A new file section begins. Flush the previous one if present.
+ if let Some(name) = current_name.take() {
+ files.push(build_file_entry(
+ name,
+ current_license.take(),
+ current_copyright.take(),
+ )?);
+ }
+ current_name = Some(value);
+ current_license = None;
+ current_copyright = None;
+ }
+ "LicenseConcluded" => current_license = Some(value),
+ "FileCopyrightText" => current_copyright = Some(value),
+ _ => {}
+ }
+ }
+
+ // Flush the last file section.
+ if let Some(name) = current_name {
+ files.push(build_file_entry(name, current_license, current_copyright)?);
+ }
+
+ Ok(files)
+}
+
+/// Resolves a tag value that might span multiple lines using `…`.
+fn resolve_multiline_value<'a>(
+ value: &str,
+ further_lines: &mut impl Iterator- ,
+) -> Result {
+ let Some(start) = value.strip_prefix("") else {
+ return Ok(value.to_string());
+ };
+
+ // The closing tag might be on the same line.
+ if let Some(content) = start.strip_suffix("") {
+ return Ok(content.to_string());
+ }
+
+ let mut text = start.to_string();
+ for line in further_lines.by_ref() {
+ if let Some(rest) = line.strip_suffix("") {
+ text.push('\n');
+ text.push_str(rest);
+ return Ok(text);
+ }
+ text.push('\n');
+ text.push_str(line);
+ }
+
+ anyhow::bail!("unexpected end of input inside block")
+}
+
+fn build_file_entry(
+ name: String,
+ concluded_license: Option,
+ copyright_text: Option,
+) -> Result {
+ Ok(SpdxFileEntry {
+ name,
+ concluded_license: concluded_license
+ .ok_or_else(|| anyhow::anyhow!("file missing LicenseConcluded"))?,
+ copyright_text: copyright_text
+ .ok_or_else(|| anyhow::anyhow!("file missing FileCopyrightText"))?,
+ })
+}
+
+#[cfg(test)]
+mod tests;
diff --git a/src/tools/collect-license-metadata/src/spdx/tests.rs b/src/tools/collect-license-metadata/src/spdx/tests.rs
new file mode 100644
index 000000000000..5b7cb411931d
--- /dev/null
+++ b/src/tools/collect-license-metadata/src/spdx/tests.rs
@@ -0,0 +1,134 @@
+use super::*;
+
+// Clause 8.1 ("File name field") specifies that each file section begins with
+// a `FileName` tag whose value is a relative path prefixed with "./".
+// Clause 8.5 ("Concluded license") and 8.8 ("Copyright text") give the
+// corresponding per-file fields.
+//
+#[test]
+fn single_file_entry() {
+ let input = "\
+FileName: ./package/foo.c
+LicenseConcluded: LGPL-2.0-only
+FileCopyrightText: Copyright 2008-2010 John Smith";
+
+ let files = parse_tag_value(input).unwrap();
+ assert_eq!(files.len(), 1);
+ assert_eq!(files[0].name, "./package/foo.c");
+ assert_eq!(files[0].concluded_license, "LGPL-2.0-only");
+ assert_eq!(files[0].copyright_text, "Copyright 2008-2010 John Smith");
+}
+
+// Clause 8.5 shows compound SPDX licence expressions as valid values for
+// `LicenseConcluded`, e.g. "(LGPL-2.0-only OR LicenseRef-2)".
+//
+#[test]
+fn compound_license_expression() {
+ let input = "\
+FileName: ./src/lib.rs
+LicenseConcluded: (LGPL-2.0-only OR LicenseRef-2)
+FileCopyrightText: Copyright Example Company";
+
+ let files = parse_tag_value(input).unwrap();
+ assert_eq!(files.len(), 1);
+ assert_eq!(files[0].concluded_license, "(LGPL-2.0-only OR LicenseRef-2)");
+}
+
+// Clause 8.8 shows the copyright text wrapped in a single-line
+// ... block: e.g.
+// `FileCopyrightText: Copyright 2008-2010 John Smith`
+//
+#[test]
+fn single_line_text_block() {
+ let input = "\
+FileName: ./package/foo.c
+LicenseConcluded: LGPL-2.0-only
+FileCopyrightText: Copyright 2008-2010 John Smith";
+
+ let files = parse_tag_value(input).unwrap();
+ assert_eq!(files.len(), 1);
+ assert_eq!(files[0].copyright_text, "Copyright 2008-2010 John Smith");
+}
+
+// Clause 6.10 ("Creator comment") demonstrates a multi-line ... block.
+//
+#[test]
+fn multi_line_text_block() {
+ let input = "\
+FileName: ./package/foo.c
+LicenseConcluded: MIT
+FileCopyrightText: Copyright 2008-2010 John Smith
+Copyright 2019 Jane Doe";
+
+ let files = parse_tag_value(input).unwrap();
+ assert_eq!(files.len(), 1);
+ assert_eq!(files[0].copyright_text, "Copyright 2008-2010 John Smith\nCopyright 2019 Jane Doe");
+}
+
+// Clause 5 ("Composition of an SPDX document") states that a document may
+// contain zero or many File Information sections. Each `FileName` tag starts
+// a new section, so consecutive file blocks must be parsed independently.
+//
+#[test]
+fn multiple_file_entries() {
+ let input = "\
+FileName: ./package/foo.c
+LicenseConcluded: LGPL-2.0-only
+FileCopyrightText: Copyright 2008-2010 John Smith
+FileName: ./package/bar.c
+LicenseConcluded: MIT
+FileCopyrightText: Copyright Example Company";
+
+ let files = parse_tag_value(input).unwrap();
+ assert_eq!(files.len(), 2);
+
+ assert_eq!(files[0].name, "./package/foo.c");
+ assert_eq!(files[0].concluded_license, "LGPL-2.0-only");
+ assert_eq!(files[0].copyright_text, "Copyright 2008-2010 John Smith");
+
+ assert_eq!(files[1].name, "./package/bar.c");
+ assert_eq!(files[1].concluded_license, "MIT");
+ assert_eq!(files[1].copyright_text, "Copyright Example Company");
+}
+
+// A file section without a `LicenseConcluded` tag is malformed.
+#[test]
+fn missing_license_is_an_error() {
+ let input = "\
+FileName: ./package/foo.c
+FileCopyrightText: Copyright 2008-2010 John Smith";
+
+ assert!(parse_tag_value(input).is_err());
+}
+
+// A file section without a `FileCopyrightText` tag is malformed.
+#[test]
+fn missing_copyright_is_an_error() {
+ let input = "\
+FileName: ./package/foo.c
+LicenseConcluded: MIT";
+
+ assert!(parse_tag_value(input).is_err());
+}
+
+// A section with an unterminated block (no closing ) is malformed.
+#[test]
+fn unterminated_text_block_is_an_error() {
+ let input = "\
+FileName: ./package/foo.c
+LicenseConcluded: MIT
+FileCopyrightText: Copyright 2008-2010 John Smith";
+
+ assert!(parse_tag_value(input).is_err());
+}
+
+// A document with no `FileName` tags at all should produce an empty result.
+#[test]
+fn empty_document_returns_no_entries() {
+ let input = "\
+SPDXVersion: SPDX-2.3
+DataLicense: CC0-1.0";
+
+ let files = parse_tag_value(input).unwrap();
+ assert!(files.is_empty());
+}