[pve-devel] [PATCH proxmox-apt 2/2] support quote-word parsing for one-line format
Fabian Ebner
f.ebner at proxmox.com
Thu Jul 1 15:46:34 CEST 2021
so that parsing CD ROM repositories with spaces in the name works too.
But it's not limited to that, and should make one-line parsing rather
similar to what APT does (stanza parsing in APT doesn't use
ParseQuoteWord at all AFAICS).
Signed-off-by: Fabian Ebner <f.ebner at proxmox.com>
---
src/repositories/file/list_parser.rs | 143 +++++++++++++++++-----
src/repositories/repository.rs | 58 ++++++++-
tests/sources.list.d.expected/cdroms.list | 10 ++
tests/sources.list.d.expected/files.list | 4 +
tests/sources.list.d/cdroms.list | 7 ++
tests/sources.list.d/files.list | 2 +
6 files changed, 186 insertions(+), 38 deletions(-)
create mode 100644 tests/sources.list.d.expected/cdroms.list
create mode 100644 tests/sources.list.d.expected/files.list
create mode 100644 tests/sources.list.d/cdroms.list
create mode 100644 tests/sources.list.d/files.list
diff --git a/src/repositories/file/list_parser.rs b/src/repositories/file/list_parser.rs
index 86c9955..04c1729 100644
--- a/src/repositories/file/list_parser.rs
+++ b/src/repositories/file/list_parser.rs
@@ -1,7 +1,6 @@
use std::convert::TryInto;
use std::io::BufRead;
-use std::iter::{Iterator, Peekable};
-use std::str::SplitAsciiWhitespace;
+use std::iter::Iterator;
use anyhow::{bail, format_err, Error};
@@ -9,6 +8,78 @@ use crate::repositories::{APTRepository, APTRepositoryFileType, APTRepositoryOpt
use super::APTRepositoryParser;
+// TODO convert %-escape characters. Also adapt printing back accordingly,
+// because at least '%' needs to be re-escaped when printing.
+/// See APT's ParseQuoteWord in contrib/strutl.cc
+///
+/// Doesn't split on whitespace when between `[]` or `""` and strips `"` from the word.
+///
+/// Currently, %-escaped characters are not interpreted, but passed along as is.
+struct SplitQuoteWord {
+ rest: String,
+ position: usize,
+}
+
+impl SplitQuoteWord {
+ pub fn new(string: String) -> Self {
+ Self {
+ rest: string,
+ position: 0,
+ }
+ }
+}
+
+impl Iterator for SplitQuoteWord {
+ type Item = Result<String, Error>;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ let rest = &self.rest[self.position..];
+
+ let mut start = None;
+ let mut wait_for = None;
+
+ for (n, c) in rest.chars().enumerate() {
+ self.position += 1;
+
+ if let Some(wait_for_char) = wait_for {
+ if wait_for_char == c {
+ wait_for = None;
+ }
+ continue;
+ }
+
+ if char::is_ascii_whitespace(&c) {
+ if let Some(start) = start {
+ return Some(Ok(rest[start..n].replace('"', "")));
+ }
+ continue;
+ }
+
+ if start == None {
+ start = Some(n);
+ }
+
+ if c == '"' {
+ wait_for = Some('"');
+ }
+
+ if c == '[' {
+ wait_for = Some(']');
+ }
+ }
+
+ if let Some(wait_for) = wait_for {
+ return Some(Err(format_err!("missing terminating '{}'", wait_for)));
+ }
+
+ if let Some(start) = start {
+ return Some(Ok(rest[start..].replace('"', "")));
+ }
+
+ None
+ }
+}
+
pub struct APTListFileParser<R: BufRead> {
input: R,
line_nr: usize,
@@ -31,24 +102,18 @@ impl<R: BufRead> APTListFileParser<R> {
/// Errors when options are invalid or not closed by `']'`.
fn parse_options(
options: &mut Vec<APTRepositoryOption>,
- tokens: &mut Peekable<SplitAsciiWhitespace>,
+ tokens: &mut SplitQuoteWord,
) -> Result<(), Error> {
- let mut option = match tokens.peek() {
- Some(token) => {
- match token.strip_prefix('[') {
- Some(option) => option,
- None => return Ok(()), // doesn't look like options
- }
- }
- None => return Ok(()),
- };
-
- tokens.next(); // avoid reading the beginning twice
-
let mut finished = false;
+
loop {
+ let mut option = match tokens.next() {
+ Some(token) => token?,
+ None => bail!("options not closed by ']'"),
+ };
+
if let Some(stripped) = option.strip_suffix(']') {
- option = stripped;
+ option = stripped.to_string();
if option.is_empty() {
break;
}
@@ -83,11 +148,6 @@ impl<R: BufRead> APTListFileParser<R> {
if finished {
break;
}
-
- option = match tokens.next() {
- Some(option) => option,
- None => bail!("options not closed by ']'"),
- }
}
Ok(())
@@ -122,24 +182,43 @@ impl<R: BufRead> APTListFileParser<R> {
line = line_start;
}
- let mut tokens = line.split_ascii_whitespace().peekable();
-
- match tokens.next() {
- Some(package_type) => {
+ // e.g. quoted "deb" is not accepted by APT, so no need for quote word parsing here
+ line = match line.split_once(|c| char::is_ascii_whitespace(&c)) {
+ Some((package_type, rest)) => {
repo.types.push(package_type.try_into()?);
+ rest
}
None => return Ok(None), // empty line
- }
+ };
- Self::parse_options(&mut repo.options, &mut tokens)?;
+ line = line.trim_start_matches(|c| char::is_ascii_whitespace(&c));
+
+ let has_options = match line.strip_prefix('[') {
+ Some(rest) => {
+ // avoid the start of the options to be interpreted as the start of a quote word
+ line = rest;
+ true
+ }
+ None => false,
+ };
+
+ let mut tokens = SplitQuoteWord::new(line.to_string());
+
+ if has_options {
+ Self::parse_options(&mut repo.options, &mut tokens)?;
+ }
// the rest of the line is just '<uri> <suite> [<components>...]'
- let mut tokens = tokens.map(str::to_string);
repo.uris
- .push(tokens.next().ok_or_else(|| format_err!("missing URI"))?);
- repo.suites
- .push(tokens.next().ok_or_else(|| format_err!("missing suite"))?);
- repo.components.extend(tokens);
+ .push(tokens.next().ok_or_else(|| format_err!("missing URI"))??);
+ repo.suites.push(
+ tokens
+ .next()
+ .ok_or_else(|| format_err!("missing suite"))??,
+ );
+ for token in tokens {
+ repo.components.push(token?);
+ }
repo.comment = std::mem::take(&mut self.comment);
diff --git a/src/repositories/repository.rs b/src/repositories/repository.rs
index cf17380..4e1ea6e 100644
--- a/src/repositories/repository.rs
+++ b/src/repositories/repository.rs
@@ -433,6 +433,41 @@ fn suite_variant(suite: &str) -> (&str, &str) {
(suite, "")
}
+/// Strips existing double quotes from the string first, and then adds double quotes at
+/// the beginning and end if there is an ASCII whitespace in the `string`, which is not
+/// escaped by `[]`.
+fn quote_for_one_line(string: &str) -> String {
+ let mut add_quotes = false;
+ let mut wait_for_bracket = false;
+
+ // easier to just quote the whole string, so ignore pre-existing quotes
+ // currently, parsing removes them anyways, but being on the safe side is rather cheap
+ let string = string.replace('"', "");
+
+ for c in string.chars() {
+ if wait_for_bracket {
+ if c == ']' {
+ wait_for_bracket = false;
+ }
+ continue;
+ }
+
+ if char::is_ascii_whitespace(&c) {
+ add_quotes = true;
+ break;
+ }
+
+ if c == '[' {
+ wait_for_bracket = true;
+ }
+ }
+
+ match add_quotes {
+ true => format!("\"{}\"", string),
+ false => string,
+ }
+}
+
/// Writes a repository in one-line format followed by a blank line.
///
/// Expects that `repo.file_type == APTRepositoryFileType::List`.
@@ -457,15 +492,26 @@ fn write_one_line(repo: &APTRepository, w: &mut dyn Write) -> Result<(), Error>
if !repo.options.is_empty() {
write!(w, "[ ")?;
- repo.options
- .iter()
- .try_for_each(|option| write!(w, "{}={} ", option.key, option.values.join(",")))?;
+
+ for option in repo.options.iter() {
+ let option = quote_for_one_line(&format!("{}={}", option.key, option.values.join(",")));
+ write!(w, "{} ", option)?;
+ }
+
write!(w, "] ")?;
};
- write!(w, "{} ", repo.uris[0])?;
- write!(w, "{} ", repo.suites[0])?;
- writeln!(w, "{}", repo.components.join(" "))?;
+ write!(w, "{} ", quote_for_one_line(&repo.uris[0]))?;
+ write!(w, "{} ", quote_for_one_line(&repo.suites[0]))?;
+ writeln!(
+ w,
+ "{}",
+ repo.components
+ .iter()
+ .map(|comp| quote_for_one_line(comp))
+ .collect::<Vec<String>>()
+ .join(" ")
+ )?;
writeln!(w)?;
diff --git a/tests/sources.list.d.expected/cdroms.list b/tests/sources.list.d.expected/cdroms.list
new file mode 100644
index 0000000..7d75573
--- /dev/null
+++ b/tests/sources.list.d.expected/cdroms.list
@@ -0,0 +1,10 @@
+# deb [ trusted=yes ] cdrom:[Proxmox VE 5.1]/ stretch pve
+
+# deb [ trusted=yes ] cdrom:[Proxmox VE 5.1]/proxmox/packages/ /
+
+deb [ trusted=yes ] cdrom:[Proxmox VE 7.0 BETA]/ bullseye pve
+
+deb cdrom:[Proxmox VE 7.0 BETA]/proxmox/packages/ /
+
+deb [ trusted=yes ] cdrom:[Debian GNU/Linux 10.6.0 _Buster_ - Official amd64 NETINST 20200926-10:16]/ buster main
+
diff --git a/tests/sources.list.d.expected/files.list b/tests/sources.list.d.expected/files.list
new file mode 100644
index 0000000..5e77023
--- /dev/null
+++ b/tests/sources.list.d.expected/files.list
@@ -0,0 +1,4 @@
+deb [ trusted=yes ] "file:///some/spacey/mount point/" bullseye pve
+
+deb [ lang=it ] "file:///some/spacey/mount point/proxmox/packages/" /
+
diff --git a/tests/sources.list.d/cdroms.list b/tests/sources.list.d/cdroms.list
new file mode 100644
index 0000000..3b12626
--- /dev/null
+++ b/tests/sources.list.d/cdroms.list
@@ -0,0 +1,7 @@
+#deb [trusted=yes] cdrom:[Proxmox VE 5.1]/ stretch pve
+#deb [trusted=yes] cdrom:[Proxmox VE 5.1]/proxmox/packages/ /
+
+deb [trusted=yes] cdrom:[Proxmox VE 7.0 BETA]/ bullseye pve
+deb cdrom:[Proxmox VE 7.0 BETA]/proxmox/packages/ /
+
+deb [ "trusted=yes" ] cdrom:[Debian GNU/Linux 10.6.0 _Buster_ - Official amd64 NETINST 20200926-10:16]/ buster main
diff --git a/tests/sources.list.d/files.list b/tests/sources.list.d/files.list
new file mode 100644
index 0000000..4a5e4c2
--- /dev/null
+++ b/tests/sources.list.d/files.list
@@ -0,0 +1,2 @@
+deb [trusted=yes] "file:///some/spacey/mount point/" bullseye pve
+deb [lang="it"] file:///some/spacey/"mount point"/proxmox/packages/ /
--
2.30.2
More information about the pve-devel
mailing list