[pbs-devel] [PATCH v2 proxmox-apt 01/10] initial commit
Wolfgang Bumiller
w.bumiller at proxmox.com
Wed Mar 10 16:14:30 CET 2021
comments inline
On Fri, Feb 26, 2021 at 04:09:50PM +0100, Fabian Ebner wrote:
> This create is inteded to hold the code to interact with APT repositories and
> packages.
>
> Starting off with code for parsing and writing .list and .sources repository
> configuration files. The parser is partially inspired by the network
> configuration parser in proxmox-backup, but it does not use a lexer,
> because there's not much variation in how things are to be parsed.
>
> Signed-off-by: Fabian Ebner <f.ebner at proxmox.com>
> ---
>
> Changes from v1:
> * Switched from Perl to Rust.
> * Split things by whitespace instead of using regexes everywhere,
> being the cleaner approach.
> * Used PascalCase for all keys for consistency.
> * File type is now also explicitly returned.
> * Added a writer and tests.
> * The options are not a hash anymore, because it's necessary to keep
> the same order when writing them back out (i.e. Languages-Add
> Languages-Remove)
>
> .cargo/config | 5 +
> .gitignore | 3 +
> Cargo.toml | 22 ++
> rustfmt.toml | 1 +
> src/lib.rs | 3 +
> src/repositories/check.rs | 47 ++++
> src/repositories/list_parser.rs | 181 +++++++++++++
> src/repositories/mod.rs | 246 ++++++++++++++++++
> src/repositories/sources_parser.rs | 217 +++++++++++++++
> src/repositories/writer.rs | 89 +++++++
> src/types.rs | 197 ++++++++++++++
> tests/repositories.rs | 73 ++++++
> .../sources.list.d.expected/multiline.sources | 8 +
> .../options_comment.list | 3 +
> .../pbs-enterprise.list | 2 +
> tests/sources.list.d.expected/pve.list | 13 +
> tests/sources.list.d.expected/standard.list | 7 +
> .../sources.list.d.expected/standard.sources | 11 +
> tests/sources.list.d/multiline.sources | 9 +
> tests/sources.list.d/options_comment.list | 2 +
> tests/sources.list.d/pbs-enterprise.list | 1 +
> tests/sources.list.d/pve.list | 10 +
> tests/sources.list.d/standard.list | 6 +
> tests/sources.list.d/standard.sources | 10 +
> 24 files changed, 1166 insertions(+)
> create mode 100644 .cargo/config
> create mode 100644 .gitignore
> create mode 100644 Cargo.toml
> create mode 100644 rustfmt.toml
> create mode 100644 src/lib.rs
> create mode 100644 src/repositories/check.rs
> create mode 100644 src/repositories/list_parser.rs
> create mode 100644 src/repositories/mod.rs
> create mode 100644 src/repositories/sources_parser.rs
> create mode 100644 src/repositories/writer.rs
> create mode 100644 src/types.rs
> create mode 100644 tests/repositories.rs
> create mode 100644 tests/sources.list.d.expected/multiline.sources
> create mode 100644 tests/sources.list.d.expected/options_comment.list
> create mode 100644 tests/sources.list.d.expected/pbs-enterprise.list
> create mode 100644 tests/sources.list.d.expected/pve.list
> create mode 100644 tests/sources.list.d.expected/standard.list
> create mode 100644 tests/sources.list.d.expected/standard.sources
> create mode 100644 tests/sources.list.d/multiline.sources
> create mode 100644 tests/sources.list.d/options_comment.list
> create mode 100644 tests/sources.list.d/pbs-enterprise.list
> create mode 100644 tests/sources.list.d/pve.list
> create mode 100644 tests/sources.list.d/standard.list
> create mode 100644 tests/sources.list.d/standard.sources
>
> diff --git a/.cargo/config b/.cargo/config
> new file mode 100644
> index 0000000..3b5b6e4
> --- /dev/null
> +++ b/.cargo/config
> @@ -0,0 +1,5 @@
> +[source]
> +[source.debian-packages]
> +directory = "/usr/share/cargo/registry"
> +[source.crates-io]
> +replace-with = "debian-packages"
> diff --git a/.gitignore b/.gitignore
> new file mode 100644
> index 0000000..0d00c41
> --- /dev/null
> +++ b/.gitignore
> @@ -0,0 +1,3 @@
> +Cargo.lock
> +target/
> +tests/sources.list.d.actual
> diff --git a/Cargo.toml b/Cargo.toml
> new file mode 100644
> index 0000000..a0ecc26
> --- /dev/null
> +++ b/Cargo.toml
> @@ -0,0 +1,22 @@
> +[package]
> +name = "proxmox-apt"
> +version = "0.1.0"
> +authors = [
> + "Fabian Ebner <f.ebner at proxmox.com>",
> + "Proxmox Support Team <support at proxmox.com>",
> +]
> +edition = "2018"
> +license = "AGPL-3"
> +description = "Proxmox library for APT"
> +homepage = "https://www.proxmox.com"
> +
> +exclude = [ "debian" ]
> +
> +[lib]
> +name = "proxmox_apt"
> +path = "src/lib.rs"
> +
> +[dependencies]
> +anyhow = "1.0"
> +proxmox = { version = "0.11.0", features = [ "api-macro" ] }
> +serde = { version = "1.0", features = ["derive"] }
> diff --git a/rustfmt.toml b/rustfmt.toml
> new file mode 100644
> index 0000000..32a9786
> --- /dev/null
> +++ b/rustfmt.toml
> @@ -0,0 +1 @@
> +edition = "2018"
> diff --git a/src/lib.rs b/src/lib.rs
> new file mode 100644
> index 0000000..b065c0f
> --- /dev/null
> +++ b/src/lib.rs
> @@ -0,0 +1,3 @@
> +pub mod types;
> +
> +pub mod repositories;
> diff --git a/src/repositories/check.rs b/src/repositories/check.rs
> new file mode 100644
> index 0000000..87fbbac
> --- /dev/null
> +++ b/src/repositories/check.rs
> @@ -0,0 +1,47 @@
> +use anyhow::{bail, Error};
> +
> +use crate::types::{APTRepository, APTRepositoryFileType};
> +
> +impl APTRepository {
> + /// Makes sure that all basic properties of a repository are present and
> + /// not obviously invalid.
> + pub fn basic_check(&self) -> Result<(), Error> {
> + if self.types.is_empty() {
> + bail!("missing package type(s)");
> + }
> + if self.uris.is_empty() {
> + bail!("missing URI(s)");
> + }
> + if self.suites.is_empty() {
> + bail!("missing suite(s)");
> + }
> +
> + for uri in self.uris.iter() {
> + if !uri.contains(':') || uri.len() < 3 {
> + bail!("invalid URI: '{}'", uri);
> + }
> + }
> +
> + for suite in self.suites.iter() {
> + if !suite.ends_with('/') && self.components.is_empty() {
> + bail!("missing component(s)");
> + } else if suite.ends_with('/') && !self.components.is_empty() {
> + bail!("absolute suite '{}' does not allow component(s)", suite);
> + }
> + }
> +
> + if self.file_type == APTRepositoryFileType::List {
> + if self.types.len() > 1 {
> + bail!("more than one package type");
> + }
> + if self.uris.len() > 1 {
> + bail!("more than one URI");
> + }
> + if self.suites.len() > 1 {
> + bail!("more than one suite");
> + }
> + }
> +
> + Ok(())
> + }
> +}
> diff --git a/src/repositories/list_parser.rs b/src/repositories/list_parser.rs
> new file mode 100644
> index 0000000..b26fed2
> --- /dev/null
> +++ b/src/repositories/list_parser.rs
> @@ -0,0 +1,181 @@
> +use std::convert::TryInto;
> +use std::io::BufRead;
> +use std::iter::{Iterator, Peekable};
> +use std::str::SplitAsciiWhitespace;
> +
> +use anyhow::{bail, Error};
> +
> +use super::APTRepositoryParser;
> +use crate::types::{APTRepository, APTRepositoryFileType, APTRepositoryOption};
> +
> +pub struct APTListFileParser<R: BufRead> {
> + path: String,
> + input: R,
> + line_nr: usize,
> + comment: String,
> +}
> +
> +impl<R: BufRead> APTListFileParser<R> {
> + pub fn new(path: String, reader: R) -> Self {
> + Self {
> + path,
> + input: reader,
> + line_nr: 0,
> + comment: String::new(),
> + }
> + }
> +
> + /// Helper to parse options from the existing token stream.
> + /// Also returns `Ok(())` if there are no options.
> + /// Errors when options are invalid or not closed by `']'`.
> + fn parse_options(
> + options: &mut Vec<APTRepositoryOption>,
> + tokens: &mut Peekable<SplitAsciiWhitespace>,
> + ) -> Result<(), Error> {
> + let mut option = match tokens.peek() {
> + Some(token) => {
> + match token.strip_prefix('[') {
> + Some(option) => option,
> + None => return Ok(()), // doesn't look like options
> + }
> + }
> + None => return Ok(()),
> + };
> +
> + tokens.next(); // avoid reading the beginning twice
> +
> + let mut finished = false;
> + loop {
> + if let Some(stripped) = option.strip_suffix(']') {
> + option = stripped;
> + if option.is_empty() {
> + break;
> + }
> + finished = true; // but still need to handle the last one
> + };
> +
> + if let Some(mid) = option.find('=') {
> + let (key, mut value_str) = option.split_at(mid);
> + value_str = &value_str[1..];
> +
> + if key.is_empty() {
> + bail!("option with value '{}' has no key", value_str);
Seems weird to be using `value_str` here right before checking whether
it is empty. Maybe use `("option has no key: '{}'", option)`?
> + } else if value_str.is_empty() {
(And is there a reason you wrote this as an `else if` rather than its
own condition?)
> + bail!("option with key '{}' has no value", key);
> + }
> +
> + let values: Vec<String> = value_str
> + .split(',')
> + .map(|value| value.to_string())
> + .collect();
> +
> + options.push(APTRepositoryOption {
> + key: key.to_string(),
> + values,
> + });
> + } else if !option.is_empty() {
> + bail!("got invalid option - '{}'", option);
> + }
> +
> + if finished {
> + break;
> + }
> +
> + option = match tokens.next() {
> + Some(option) => option,
> + None => bail!("options not closed by ']'"),
> + }
> + }
> +
> + Ok(())
> + }
> +
> + /// Parse a repository or comment in one-line format.
> + /// Commented out repositories are also detected and returned with the
> + /// `enabled` property set to `false`.
> + /// If the line contains a repository, `self.comment` is added to the
> + /// `comment` property.
> + /// If the line contains a comment, it is added to `self.comment`.
> + fn parse_one_line(&mut self, mut line: &str) -> Result<Option<APTRepository>, Error> {
> + line = line.trim_matches(|c| char::is_ascii_whitespace(&c));
> +
> + // check for commented out repository first
> + if let Some(commented_out) = line.strip_prefix('#') {
> + if let Ok(Some(mut repo)) = self.parse_one_line(commented_out) {
> + repo.set_enabled(false);
> + return Ok(Some(repo));
> + }
> + }
> +
> + let mut repo =
> + APTRepository::new(self.path.clone(), self.line_nr, APTRepositoryFileType::List);
> +
> + // now handle "real" comment
> + if let Some(comment_start) = line.find('#') {
> + let (line_start, comment) = line.split_at(comment_start);
> + self.comment = format!("{}{}\n", self.comment, &comment[1..]);
> + line = line_start;
> + }
> +
> + let mut tokens = line.split_ascii_whitespace().peekable();
> +
> + match tokens.next() {
> + Some(package_type) => {
> + repo.types.push(package_type.try_into()?);
> + }
> + None => return Ok(None), // empty line
> + }
> +
> + Self::parse_options(&mut repo.options, &mut tokens)?;
> +
> + match tokens.next() {
> + Some(uri) => repo.uris.push(uri.to_string()),
> + None => bail!("got no URI"),
> + };
> +
> + match tokens.next() {
> + Some(suite) => repo.suites.push(suite.to_string()),
> + None => bail!("got no suite"),
> + };
> +
> + for component in tokens {
> + repo.components.push(component.to_string());
> + }
Nit: not sure if this matters but for "required" next iterator items
like this IMO the surrounding 'match' makes a bit of noise, and the
final loop is basically an `.extend()`. Luckily *all* of these want to
convert the token to a String, so *personally* I would prefer this:
// the rest of the line is just '<uri> <suite> [<components>...]'
let mut tokens = tokens.map(str::to_string);
repo.uris
.push(tokens.next().ok_or_else(|| format_err!("missing URI"))?;
repo.suites
.push(tokens.next().ok_or_else(|| format_err!("missing suite"))?;
repo.components.extend(tokens);
(untested (only compile-tested))
> +
> + repo.comment = self.comment.clone();
String implements Default, so you could use
repo.comment = std::mem::take(&mut self.comment);
instead, since when you return `Some` here you `.clear()` the comment
anyway.
> +
> + Ok(Some(repo))
> + }
> +}
> +
> +impl<R: BufRead> APTRepositoryParser for APTListFileParser<R> {
> + fn parse_repositories(&mut self) -> Result<Vec<APTRepository>, Error> {
> + let mut repositories = Vec::<APTRepository>::new();
> + let mut line = String::new();
> +
> + loop {
> + self.line_nr += 1;
> + line.clear();
> +
> + match self.input.read_line(&mut line) {
> + Err(err) => bail!("input error for '{}' - {}", self.path, err),
> + Ok(0) => break,
> + Ok(_) => match self.parse_one_line(&line) {
> + Ok(Some(repo)) => {
> + repositories.push(repo);
> + self.comment.clear();
> + }
> + Ok(None) => continue,
> + Err(err) => bail!(
> + "malformed entry in '{}' line {} - {}",
> + self.path,
> + self.line_nr,
> + err,
> + ),
> + },
> + }
> + }
> +
> + Ok(repositories)
> + }
> +}
> diff --git a/src/repositories/mod.rs b/src/repositories/mod.rs
> new file mode 100644
> index 0000000..123222a
> --- /dev/null
> +++ b/src/repositories/mod.rs
> @@ -0,0 +1,246 @@
> +use std::collections::BTreeMap;
> +use std::convert::TryFrom;
> +use std::ffi::OsString;
> +use std::path::{Path, PathBuf};
> +
> +use anyhow::{bail, format_err, Error};
> +
> +use crate::types::{APTRepository, APTRepositoryFileType, APTRepositoryOption};
> +
> +mod list_parser;
> +use list_parser::APTListFileParser;
> +
> +mod sources_parser;
> +use sources_parser::APTSourcesFileParser;
> +
> +mod check;
> +mod writer;
> +
> +const APT_SOURCES_LIST_FILENAME: &str = "/etc/apt/sources.list";
> +const APT_SOURCES_LIST_DIRECTORY: &str = "/etc/apt/sources.list.d/";
> +
> +impl APTRepository {
> + /// Crates an empty repository with infomration that is known before parsing.
> + fn new(path: String, number: usize, file_type: APTRepositoryFileType) -> Self {
> + Self {
> + types: vec![],
> + uris: vec![],
> + suites: vec![],
> + components: vec![],
> + options: vec![],
> + comment: String::new(),
> + path,
> + number,
> + file_type,
> + enabled: true,
> + }
> + }
> +
> + /// Changes the `enabled` flag and makes sure the `Enabled` option for
> + /// `APTRepositoryPackageType::Sources` repositories is updated too.
> + fn set_enabled(&mut self, enabled: bool) {
> + self.enabled = enabled;
> +
> + if self.file_type == APTRepositoryFileType::Sources {
> + let enabled_string = match enabled {
> + true => "true".to_string(),
> + false => "false".to_string(),
> + };
> + for option in self.options.iter_mut() {
> + if option.key == "Enabled" {
> + option.values = vec![enabled_string];
> + return;
> + }
> + }
> + self.options.push(APTRepositoryOption {
> + key: "Enabled".to_string(),
> + values: vec![enabled_string],
> + });
> + }
> + }
> +}
> +
> +trait APTRepositoryParser {
> + /// Parse all repositories including the disabled ones.
> + fn parse_repositories(&mut self) -> Result<Vec<APTRepository>, Error>;
> +}
> +
> +/// Helper to decide whether a file name is considered valid by APT and to
> +/// extract its file type and the path as a string.
> +/// Hidden files yield `Ok(None)`, while invalid file names yield an error.
> +fn check_filename<P: AsRef<Path>>(
> + path: P,
> +) -> Result<Option<(APTRepositoryFileType, String)>, OsString> {
> + let path: PathBuf = path.as_ref().to_path_buf();
> + let path_string = path.clone().into_os_string().into_string()?;
> +
> + let file_name = match path.file_name() {
> + Some(file_name) => file_name.to_os_string().into_string()?,
> + None => return Err(OsString::from(path_string)),
> + };
> +
> + // APT silently ignores hidden files
> + if file_name.starts_with('.') {
> + return Ok(None);
> + }
> +
> + let extension = match path.extension() {
> + Some(extension) => extension.to_os_string().into_string()?,
> + None => return Err(OsString::from(path_string)),
> + };
> +
> + let file_type = match APTRepositoryFileType::try_from(&extension[..]) {
> + Ok(file_type) => file_type,
> + _ => return Err(OsString::from(path_string)),
> + };
> +
> + // APT ignores such files but issues a warning
> + if !file_name
> + .chars()
> + .all(|x| x.is_ascii_alphanumeric() || x == '_' || x == '-' || x == '.')
> + {
> + return Err(OsString::from(path_string));
> + }
> +
> + Ok(Some((file_type, path_string)))
> +}
> +
> +/// Returns all APT repositories configured in the specified files including
> +/// disabled ones.
> +/// Warns about invalid file names and some format violations, while other
> +/// format violations result in an error.
> +pub fn repositories_from_files<P: AsRef<Path>>(paths: &[P]) -> Result<Vec<APTRepository>, Error> {
> + let mut repos = Vec::<APTRepository>::new();
> +
> + for path in paths.iter() {
Nit: no need for the .iter() here (and a few other places)
> + let path = path.as_ref();
Nit: given this is a generic but wants to work with &Path, I'd love for
the loop's body here to be its own function to monomorphize the bigger
parts of the code.
> +
> + if !path.is_file() {
> + eprintln!("Ignoring {:?} - not a file", path);
> + continue;
> + }
> +
> + let file_type;
> + let path_string;
> +
> + match check_filename(path) {
> + Ok(Some(res)) => {
> + file_type = res.0;
> + path_string = res.1;
> + }
> + Ok(None) => continue,
> + Err(path) => {
> + eprintln!("Ignoring {:?} - invalid file name", path);
> + continue;
> + }
> + }
> +
> + let contents = std::fs::read(path)
> + .map_err(|err| format_err!("unable to read {:?} - {}", path, err))?;
> +
> + let mut parser: Box<dyn APTRepositoryParser> = match file_type {
> + APTRepositoryFileType::List => {
> + Box::new(APTListFileParser::new(path_string, &contents[..]))
> + }
> + APTRepositoryFileType::Sources => {
> + Box::new(APTSourcesFileParser::new(path_string, &contents[..]))
> + }
> + };
> +
> + repos.append(&mut parser.parse_repositories()?);
Given that `parse_repositories()` comes from a private trait, I think it
would make sense to let it take an `&mut Vec<>` and pass `repos` here
rather than creating a new `Vec` and using `append`.
> + }
> +
> + for repo in repos.iter() {
> + repo.basic_check().map_err(|err| {
> + format_err!("check for {}:{} failed - {}", repo.path, repo.number, err)
> + })?;
> + }
> +
> + Ok(repos)
> +}
> +
> +/// Returns all APT repositories configured in `/etc/apt/sources.list` and
> +/// in `/etc/apt/sources.list.d` including disabled repositories.
> +/// Warns about invalid file names and some format violations, while other
> +/// format violations result in an error.
> +pub fn repositories() -> Result<Vec<APTRepository>, Error> {
> + let mut paths = Vec::<PathBuf>::new();
> +
> + let sources_list_path = PathBuf::from(APT_SOURCES_LIST_FILENAME);
> + if sources_list_path.is_file() {
> + paths.push(sources_list_path)
> + };
> +
> + let sources_list_d_path = PathBuf::from(APT_SOURCES_LIST_DIRECTORY);
> + if sources_list_d_path.is_dir() {
> + for entry in std::fs::read_dir(sources_list_d_path)? {
> + paths.push(entry?.path());
> + }
> + }
> +
> + let repos = repositories_from_files(&paths)?;
> +
> + Ok(repos)
> +}
> +
> +/// Write the repositories to the respective files specified by their
> +/// `path` property and in the order determined by their `number` property.
> +/// Does a `check::basic_check(repository)` for each repository first.
> +pub fn write_repositories<A: AsRef<APTRepository>>(repos: &[A]) -> Result<(), Error> {
> + let mut repos: Vec<&APTRepository> = repos.iter().map(|repo| repo.as_ref()).collect();
Nit: the rest could be its own non-generic function for
monomorphization. Fewer instances of the exact same code ;-)
> +
> + // check before writing
> + for repo in repos.iter() {
> + repo.basic_check().map_err(|err| {
> + format_err!("check for {}:{} failed - {}", repo.path, repo.number, err)
> + })?;
> + }
> +
> + repos.sort_by(|a, b| {
> + if a.path == b.path {
Rather than doing `==` plus `.cmp` I think it may be nicer to write this
as a `match` on `a.path.cmp(&b.path)` and with
`Ordering::Equal => a.number.cmp(&b.number)`...
> + a.number.cmp(&b.number)
> + } else {
> + a.path.cmp(&b.path)
> + }
> + });
> +
> + let mut files = BTreeMap::<String, Vec<u8>>::new();
> +
> + for repo in repos.iter() {
> + let raw = match files.get_mut(&repo.path) {
> + Some(raw) => raw,
> + None => {
> + files.insert(repo.path.clone(), vec![]);
> + files.get_mut(&repo.path).unwrap()
> + }
> + };
^ This makes me cringe but I just have to accept it... :-(
That is until they fix the `Entry` API to work with borrowed keys >.>
(This REALLY should just be writable as
`files.entry(&repo.path).or_default()`, but for now this would require
cloning the path -_-)
> +
> + repo.write(&mut *raw)
> + .map_err(|err| format_err!("writing {}:{} failed - {}", repo.path, repo.number, err))?;
> + }
> +
> + for (path, content) in files.iter() {
> + let path = PathBuf::from(path);
> + let dir = path.parent().unwrap();
> +
> + std::fs::create_dir_all(dir)
> + .map_err(|err| format_err!("unable to create dir {:?} - {}", dir, err))?;
> +
> + let pid = std::process::id();
> + let mut tmp_path = path.clone();
> + tmp_path.set_extension("tmp");
> + tmp_path.set_extension(format!("{}", pid));
> +
> + if let Err(err) = std::fs::write(&tmp_path, &content) {
> + let _ = std::fs::remove_file(&tmp_path);
> + bail!("write failed: {}", err);
> + }
> +
> + if let Err(err) = std::fs::rename(&tmp_path, &path) {
> + let _ = std::fs::remove_file(&tmp_path);
> + bail!("rename failed for {:?} - {}", path, err);
> + }
> + }
> +
> + Ok(())
> +}
> diff --git a/src/repositories/sources_parser.rs b/src/repositories/sources_parser.rs
> new file mode 100644
> index 0000000..fdfe603
> --- /dev/null
> +++ b/src/repositories/sources_parser.rs
> @@ -0,0 +1,217 @@
> +use std::convert::TryInto;
> +use std::io::BufRead;
> +use std::iter::Iterator;
> +
> +use anyhow::{bail, Error};
> +
> +use crate::types::{
> + APTRepository, APTRepositoryFileType, APTRepositoryOption, APTRepositoryPackageType,
> +};
> +
> +use super::APTRepositoryParser;
> +
> +pub struct APTSourcesFileParser<R: BufRead> {
> + path: String,
> + input: R,
> + stanza_nr: usize,
> + comment: String,
> +}
> +
> +/// See `man sources.list` and `man deb822` for the format specification.
> +impl<R: BufRead> APTSourcesFileParser<R> {
> + pub fn new(path: String, reader: R) -> Self {
> + Self {
> + path,
> + input: reader,
> + stanza_nr: 1,
> + comment: String::new(),
> + }
> + }
> +
> + /// Based on APT's `StringToBool` in `strutl.cc`
> + fn string_to_bool(string: &str, default: bool) -> bool {
> + let string = string.trim_matches(|c| char::is_ascii_whitespace(&c));
> + let string = string.to_lowercase();
> +
> + match &string[..] {
> + "1" | "yes" | "true" | "with" | "on" | "enable" => true,
> + "0" | "no" | "false" | "without" | "off" | "disable" => false,
> + _ => default,
> + }
> + }
> +
> + /// Checks if `key` is valid according to deb822
> + fn valid_key(key: &str) -> bool {
> + if key.starts_with('-') {
> + return false;
> + };
> + return key.chars().all(|c| matches!(c, '!'..='9' | ';'..='~'));
> + }
> +
> + /// Try parsing a repository in stanza format from `lines`.
> + /// Returns `Ok(None)` when no stanza can be found.
> + /// Comments are added to `self.comments`. If a stanza can be found,
> + /// `self.comment` is added to the repository's `comment` property.
> + /// Fully commented out stanzas are treated as comments.
> + fn parse_stanza(&mut self, lines: &str) -> Result<Option<APTRepository>, Error> {
> + let mut repo = APTRepository::new(
> + self.path.clone(),
> + self.stanza_nr,
> + APTRepositoryFileType::Sources,
> + );
> +
> + // Values may be folded into multiple lines.
> + // Those lines have to start with a space or a tab.
> + let lines = lines.replace("\n ", " ");
> + let lines = lines.replace("\n\t", " ");
> +
> + let mut got_something = false;
> +
> + for line in lines.lines() {
> + let line = line.trim_matches(|c| char::is_ascii_whitespace(&c));
> + if line.is_empty() {
> + continue;
> + }
> +
> + if let Some(commented_out) = line.strip_prefix('#') {
> + self.comment = format!("{}{}\n", self.comment, commented_out);
> + continue;
> + }
> +
> + if let Some(mid) = line.find(':') {
> + let (key, value_str) = line.split_at(mid);
> + let value_str = &value_str[1..];
> + let key = key.trim_matches(|c| char::is_ascii_whitespace(&c));
> +
> + if key.is_empty() {
> + bail!("option with value '{}' has no key", value_str);
> + } else if value_str.is_empty() {
> + // ignored by APT
> + eprintln!("option with key '{}' has no value", key);
> + continue;
> + }
> +
> + if !Self::valid_key(key) {
> + // ignored by APT
> + eprintln!("option with invalid key '{}'", key);
> + continue;
> + }
> +
> + let values: Vec<String> = value_str
> + .split_ascii_whitespace()
> + .map(|value| value.to_string())
> + .collect();
> +
> + match key {
> + "Types" => {
> + let mut types = Vec::<APTRepositoryPackageType>::new();
> + for package_type in values {
> + types.push((&package_type[..]).try_into()?);
> + }
> + if !repo.types.is_empty() {
Would it make sense to check this first, like the other cases, or do you
explicitly want the errors from `try_into()` to take precedence?
> + eprintln!("key 'Types' was defined twice");
> + }
> + repo.types = types;
> + }
> + "URIs" => {
> + if !repo.uris.is_empty() {
> + eprintln!("key 'URIs' was defined twice");
> + }
> + repo.uris = values;
> + }
> + "Suites" => {
> + if !repo.suites.is_empty() {
> + eprintln!("key 'Suites' was defined twice");
> + }
> + repo.suites = values;
> + }
> + "Components" => {
> + if !repo.components.is_empty() {
> + eprintln!("key 'Components' was defined twice");
> + }
> + repo.components = values;
> + }
> + "Enabled" => {
> + repo.set_enabled(Self::string_to_bool(value_str, true));
> + }
> + _ => repo.options.push(APTRepositoryOption {
> + key: key.to_string(),
> + values,
> + }),
> + }
> + } else {
> + bail!("got invalid line - '{:?}'", line);
> + }
> +
> + got_something = true;
> + }
> +
> + if !got_something {
> + return Ok(None);
> + }
> +
> + repo.comment = self.comment.clone();
^ same as earlier (std::mem::take())
> +
> + Ok(Some(repo))
> + }
> +
> + /// Helper function for `parse_repositories`
> + fn try_parse_stanza(
> + &mut self,
> + lines: &str,
> + repos: &mut Vec<APTRepository>,
> + ) -> Result<(), Error> {
> + match self.parse_stanza(&lines[..]) {
> + Ok(Some(repo)) => {
> + repos.push(repo);
> + self.comment.clear();
> + self.stanza_nr += 1;
> + }
> + Ok(None) => (),
> + Err(err) => {
> + bail!(
> + "malformed entry in '{}' stanza {} - {}",
> + self.path,
> + self.stanza_nr,
> + err,
> + );
> + }
> + }
> +
> + Ok(())
> + }
> +}
> +
> +impl<R: BufRead> APTRepositoryParser for APTSourcesFileParser<R> {
> + fn parse_repositories(&mut self) -> Result<Vec<APTRepository>, Error> {
> + let mut repos = Vec::<APTRepository>::new();
> + let mut lines = String::new();
> + let mut offset: usize = 0;
> +
> + loop {
Can we just
let offset = lines.len();
here or am I missing something? (Maybe call it "previous_len" though?)
> + match self.input.read_line(&mut lines) {
> + Err(err) => bail!("input error for '{}' - {}", self.path, err),
> + Ok(0) => {
> + self.try_parse_stanza(&lines[..], &mut repos)?;
> + break;
> + }
> + Ok(bytes_read) => {
> + if (&lines[offset..])
> + .trim_matches(|c| char::is_ascii_whitespace(&c))
> + .is_empty()
> + {
> + // detected end of stanza
> +
> + self.try_parse_stanza(&lines[..], &mut repos)?;
> + lines.clear();
> + offset = 0;
> + } else {
> + offset += bytes_read;
> + }
> + }
> + }
> + }
> +
> + Ok(repos)
> + }
> +}
> diff --git a/src/repositories/writer.rs b/src/repositories/writer.rs
> new file mode 100644
> index 0000000..6feb1c7
> --- /dev/null
> +++ b/src/repositories/writer.rs
> @@ -0,0 +1,89 @@
> +use std::io::Write;
> +
> +use anyhow::{bail, Error};
> +
> +use crate::types::{APTRepository, APTRepositoryFileType};
> +
> +impl APTRepository {
> + /// Writes a repository in the corresponding format followed by a blank.
> + /// Expects that `basic_check()` for the repository was successful.
> + pub fn write(&self, w: &mut dyn Write) -> Result<(), Error> {
> + match self.file_type {
> + APTRepositoryFileType::List => write_one_line(self, w),
> + APTRepositoryFileType::Sources => write_stanza(self, w),
> + }
> + }
> +}
> +
> +/// Writes a repository in one-line format followed by a blank line.
> +/// Expects that `repo.file_type == APTRepositoryFileType::List`.
> +/// Expects that `basic_check()` for the repository was successful.
> +fn write_one_line(repo: &APTRepository, w: &mut dyn Write) -> Result<(), Error> {
> + if repo.file_type != APTRepositoryFileType::List {
> + bail!("not a .list repository");
> + }
> +
> + if !repo.comment.is_empty() {
> + for line in repo.comment.lines() {
> + writeln!(w, "#{}", line)?;
> + }
> + }
> +
> + if !repo.enabled {
> + write!(w, "# ")?;
> + }
> +
> + write!(w, "{} ", String::from(repo.types[0]))?;
> +
> + if !repo.options.is_empty() {
> + let option_string = repo
> + .options
> + .iter()
> + .map(|option| format!("{}={}", option.key, option.values.join(",")))
> + .collect::<Vec<String>>()
> + .join(" ");
^ I'd prefer a `fold()` instead of `.collect().join()`, seems like
useless extra allocation with the Vec ;-)
In fact, the `fold` could probably be where the `.map()` is already?
Alternatively, you could just write!() directly to `w`?
> +
> + write!(w, "[ {} ] ", option_string)?;
> + };
> +
> + write!(w, "{} ", repo.uris[0])?;
> + write!(w, "{} ", repo.suites[0])?;
> + writeln!(w, "{}", repo.components.join(" "))?;
> +
> + writeln!(w)?;
> +
> + Ok(())
> +}
> +
> +/// Writes a single stanza followed by a blank line.
> +/// Expects that `repo.file_type == APTRepositoryFileType::Sources`.
> +fn write_stanza(repo: &APTRepository, w: &mut dyn Write) -> Result<(), Error> {
> + if repo.file_type != APTRepositoryFileType::Sources {
> + bail!("not a .sources repository");
> + }
> +
> + if !repo.comment.is_empty() {
> + for line in repo.comment.lines() {
> + writeln!(w, "#{}", line)?;
> + }
> + }
> +
> + let type_strings = repo
> + .types
> + .iter()
> + .map(|package_type| String::from(*package_type))
> + .collect::<Vec<String>>();
^ Similar case, `fold()` while including the `join` you do in the next
line for this one.
> +
> + writeln!(w, "Types: {}", type_strings.join(" "))?;
> + writeln!(w, "URIs: {}", repo.uris.join(" "))?;
> + writeln!(w, "Suites: {}", repo.suites.join(" "))?;
> + writeln!(w, "Components: {}", repo.components.join(" "))?;
> +
> + for option in repo.options.iter() {
> + writeln!(w, "{}: {}", option.key, option.values.join(" "))?;
> + }
> +
> + writeln!(w)?;
> +
> + Ok(())
> +}
> diff --git a/src/types.rs b/src/types.rs
> new file mode 100644
> index 0000000..7a95daf
> --- /dev/null
> +++ b/src/types.rs
> @@ -0,0 +1,197 @@
> +use std::convert::{From, TryFrom};
> +
> +use anyhow::{bail, Error};
> +use serde::{Deserialize, Serialize};
> +
> +use proxmox::api::api;
> +
> +#[api()]
Nit: you can skip the `()` here.
> +#[derive(Debug, Copy, Clone, Serialize, Deserialize, PartialEq)]
> +#[serde(rename_all = "lowercase")]
> +pub enum APTRepositoryFileType {
> + /// One-line-style format
> + List,
> + /// DEB822-style format
> + Sources,
> +}
> +
> +impl TryFrom<&str> for APTRepositoryFileType {
> + type Error = Error;
> +
> + fn try_from(string: &str) -> Result<Self, Error> {
> + match &string[..] {
> + "list" => Ok(APTRepositoryFileType::List),
> + "sources" => Ok(APTRepositoryFileType::Sources),
> + _ => bail!("invalid file type '{}'", string),
> + }
> + }
> +}
> +
> +impl From<APTRepositoryFileType> for String {
Please implement `std::fmt::Display` for APTRepositoryFileType instead.
> + fn from(file_type: APTRepositoryFileType) -> String {
> + match file_type {
> + APTRepositoryFileType::List => "list".to_string(),
> + APTRepositoryFileType::Sources => "sources".to_string(),
> + }
> + }
> +}
> +
> +#[api()]
> +#[derive(Debug, Copy, Clone, Serialize, Deserialize, PartialEq)]
> +#[serde(rename_all = "kebab-case")]
> +pub enum APTRepositoryPackageType {
> + /// Debian package
> + Deb,
> + /// Debian source package
> + DebSrc,
> +}
> +
> +impl TryFrom<&str> for APTRepositoryPackageType {
> + type Error = Error;
> +
> + fn try_from(string: &str) -> Result<Self, Error> {
> + match &string[..] {
> + "deb" => Ok(APTRepositoryPackageType::Deb),
> + "deb-src" => Ok(APTRepositoryPackageType::DebSrc),
> + _ => bail!("invalid file type '{}'", string),
> + }
> + }
> +}
> +
> +impl From<APTRepositoryPackageType> for String {
Please implement `std::fmt::Display` for APTRepositoryPackageType
instead.
> + fn from(package_type: APTRepositoryPackageType) -> String {
> + match package_type {
> + APTRepositoryPackageType::Deb => "deb".to_string(),
> + APTRepositoryPackageType::DebSrc => "deb-src".to_string(),
> + }
> + }
> +}
> +
> +#[api(
> + properties: {
> + Key: {
> + description: "Option key.",
> + type: String,
Nit: simple standard types can be left out. Less typing ;-)
> + },
> + Values: {
> + description: "Option values.",
> + type: Array,
> + items: {
> + description: "Value.",
> + type: String,
> + },
> + },
> + },
> +)]
> +#[derive(Debug, Clone, Serialize, Deserialize)]
> +#[serde(rename_all = "PascalCase")] // for consistency
> +/// Additional options for an APT repository.
> +/// Used for both single- and mutli-value options.
> +pub struct APTRepositoryOption {
> + /// Option key.
> + pub key: String,
> + /// Option value(s).
> + pub values: Vec<String>,
> +}
> +
> +#[api(
> + properties: {
> + Types: {
> + description: "List of package types.",
> + type: Array,
> + items: {
> + type: APTRepositoryPackageType,
> + },
> + },
> + URIs: {
> + description: "List of repository URIs.",
> + type: Array,
> + items: {
> + description: "Repository URI.",
> + type: String,
> + },
> + },
> + Suites: {
> + description: "List of distributions.",
> + type: Array,
> + items: {
> + description: "Package distribution.",
> + type: String,
> + },
> + },
> + Components: {
> + description: "List of repository components.",
> + type: Array,
> + items: {
> + description: "Repository component.",
> + type: String,
> + },
> + },
> + Options: {
> + type: Array,
> + optional: true,
> + items: {
> + type: APTRepositoryOption,
> + },
> + },
> + Comment: {
> + description: "Associated comment.",
> + type: String,
> + optional: true,
> + },
> + Path: {
> + description: "Path to the defining file.",
> + type: String,
> + },
> + Number: {
> + description: "Line or stanza number.",
> + type: Integer,
> + },
> + FileType: {
> + type: APTRepositoryFileType,
> + },
> + Enabled: {
> + description: "Whether the repository is enabled or not.",
> + type: Boolean,
> + },
> + },
> +)]
> +#[derive(Debug, Clone, Serialize, Deserialize)]
> +#[serde(rename_all = "PascalCase")]
> +/// Describes an APT repository.
> +pub struct APTRepository {
Nit: With all those attributes I find it easier to read with newlines
between the fields...
> + /// List of package types.
> + #[serde(skip_serializing_if = "Vec::is_empty")]
> + pub types: Vec<APTRepositoryPackageType>,
> + /// List of repository URIs.
> + #[serde(skip_serializing_if = "Vec::is_empty")]
> + #[serde(rename = "URIs")]
> + pub uris: Vec<String>,
> + /// List of package distributions.
> + #[serde(skip_serializing_if = "Vec::is_empty")]
> + pub suites: Vec<String>,
> + /// List of repository components.
> + #[serde(skip_serializing_if = "Vec::is_empty")]
> + pub components: Vec<String>,
> + /// Additional options.
> + #[serde(skip_serializing_if = "Vec::is_empty")]
> + pub options: Vec<APTRepositoryOption>,
> + /// Associated comment.
> + #[serde(skip_serializing_if = "String::is_empty")]
> + pub comment: String,
> + /// Path to the defining file.
> + #[serde(skip_serializing_if = "String::is_empty")]
> + pub path: String,
> + /// Line or stanza number.
> + pub number: usize,
> + /// Format of the defining file.
> + pub file_type: APTRepositoryFileType,
> + /// Whether the repository is enabled or not.
> + pub enabled: bool,
> +}
> +
> +impl AsRef<APTRepository> for APTRepository {
This one deserves a comment as it is not clear why this is necessary.
Looks like an ergonomic decision since this crate itself doesn't really
*need* this (or rather, could be written without requiring this).
> + fn as_ref(&self) -> &APTRepository {
> + &self
> + }
> +}
> diff --git a/tests/repositories.rs b/tests/repositories.rs
> new file mode 100644
> index 0000000..020e133
> --- /dev/null
> +++ b/tests/repositories.rs
> @@ -0,0 +1,73 @@
> +use std::collections::HashMap;
> +use std::path::PathBuf;
> +
> +use anyhow::{bail, format_err, Error};
> +
> +use proxmox_apt::repositories::{repositories_from_files, write_repositories};
> +use proxmox_apt::types::APTRepository;
> +
> +#[test]
> +fn test_parse_write() -> Result<(), Error> {
> + let test_dir = std::env::current_dir()?.join("tests");
> + let read_dir = test_dir.join("sources.list.d");
> + let write_dir = test_dir.join("sources.list.d.actual");
> + let expected_dir = test_dir.join("sources.list.d.expected");
> +
> + let mut paths = Vec::<PathBuf>::new();
> + for entry in std::fs::read_dir(read_dir)? {
> + paths.push(entry?.path());
> + }
> +
> + let repos = repositories_from_files(&paths)?;
> +
> + // used to mess up the order from parsing and to check that each repo has a
> + // unique path:number
> + let mut repo_hash = HashMap::<String, APTRepository>::new();
> +
> + for mut repo in repos {
> + let path = PathBuf::from(repo.path);
> + let new_path = write_dir.join(path.file_name().unwrap());
> +
> + repo.path = new_path.into_os_string().into_string().unwrap();
> +
> + let key = format!("{}:{}", repo.path, repo.number);
> +
> + if repo_hash.insert(key.clone(), repo).is_some() {
> + bail!("key '{}' is not unique!", key);
> + }
> + }
> +
> + if write_dir.is_dir() {
> + std::fs::remove_dir_all(&write_dir)
> + .map_err(|err| format_err!("unable to remove dir {:?} - {}", write_dir, err))?;
> + }
> +
> + std::fs::create_dir_all(&write_dir)
> + .map_err(|err| format_err!("unable to create dir {:?} - {}", write_dir, err))?;
> +
> + let repos_vec: Vec<&APTRepository> = repo_hash.values().collect();
> + write_repositories(&repos_vec)?;
> +
> + let mut expected_count = 0;
> +
> + for entry in std::fs::read_dir(expected_dir)? {
> + expected_count += 1;
> +
> + let expected_path = entry?.path();
> + let actual_path = write_dir.join(expected_path.file_name().unwrap());
> +
> + let expected_contents = std::fs::read(&expected_path)
> + .map_err(|err| format_err!("unable to read {:?} - {}", expected_path, err))?;
> +
> + let actual_contents = std::fs::read(&actual_path)
> + .map_err(|err| format_err!("unable to read {:?} - {}", actual_path, err))?;
> +
> + assert_eq!(expected_contents, actual_contents);
> + }
> +
> + let actual_count = std::fs::read_dir(write_dir)?.count();
> +
> + assert_eq!(expected_count, actual_count);
> +
> + Ok(())
> +}
> diff --git a/tests/sources.list.d.expected/multiline.sources b/tests/sources.list.d.expected/multiline.sources
> new file mode 100644
> index 0000000..91f53c2
> --- /dev/null
> +++ b/tests/sources.list.d.expected/multiline.sources
> @@ -0,0 +1,8 @@
> +# comment in here
> +Types: deb deb-src
> +URIs: http://ftp.at.debian.org/debian
> +Suites: buster buster-updates
> +Components: main contrib
> +Languages: it de fr
> +Enabled: false
> +
> diff --git a/tests/sources.list.d.expected/options_comment.list b/tests/sources.list.d.expected/options_comment.list
> new file mode 100644
> index 0000000..f0952e4
> --- /dev/null
> +++ b/tests/sources.list.d.expected/options_comment.list
> @@ -0,0 +1,3 @@
> +# comment
> +deb [ lang=it,de arch=amd64 ] http://ftp.at.debian.org/debian buster main contrib
> +
> diff --git a/tests/sources.list.d.expected/pbs-enterprise.list b/tests/sources.list.d.expected/pbs-enterprise.list
> new file mode 100644
> index 0000000..acb2990
> --- /dev/null
> +++ b/tests/sources.list.d.expected/pbs-enterprise.list
> @@ -0,0 +1,2 @@
> +deb https://enterprise.proxmox.com/debian/pbs buster pbs-enterprise
> +
> diff --git a/tests/sources.list.d.expected/pve.list b/tests/sources.list.d.expected/pve.list
> new file mode 100644
> index 0000000..127a49a
> --- /dev/null
> +++ b/tests/sources.list.d.expected/pve.list
> @@ -0,0 +1,13 @@
> +deb http://ftp.debian.org/debian buster main contrib
> +
> +deb http://ftp.debian.org/debian buster-updates main contrib
> +
> +# PVE pve-no-subscription repository provided by proxmox.com,
> +# NOT recommended for production use
> +deb http://download.proxmox.com/debian/pve buster pve-no-subscription
> +
> +# deb https://enterprise.proxmox.com/debian/pve buster pve-enterprise
> +
> +# security updates
> +deb http://security.debian.org/debian-security buster/updates main contrib
> +
> diff --git a/tests/sources.list.d.expected/standard.list b/tests/sources.list.d.expected/standard.list
> new file mode 100644
> index 0000000..63c1b60
> --- /dev/null
> +++ b/tests/sources.list.d.expected/standard.list
> @@ -0,0 +1,7 @@
> +deb http://ftp.at.debian.org/debian buster main contrib
> +
> +deb http://ftp.at.debian.org/debian buster-updates main contrib
> +
> +# security updates
> +deb http://security.debian.org buster/updates main contrib
> +
> diff --git a/tests/sources.list.d.expected/standard.sources b/tests/sources.list.d.expected/standard.sources
> new file mode 100644
> index 0000000..56ce280
> --- /dev/null
> +++ b/tests/sources.list.d.expected/standard.sources
> @@ -0,0 +1,11 @@
> +Types: deb
> +URIs: http://ftp.at.debian.org/debian
> +Suites: buster buster-updates
> +Components: main contrib
> +
> +# security updates
> +Types: deb
> +URIs: http://security.debian.org
> +Suites: buster/updates
> +Components: main contrib
> +
> diff --git a/tests/sources.list.d/multiline.sources b/tests/sources.list.d/multiline.sources
> new file mode 100644
> index 0000000..c3a1ff0
> --- /dev/null
> +++ b/tests/sources.list.d/multiline.sources
> @@ -0,0 +1,9 @@
> +Types: deb deb-src
> +URIs: http://ftp.at.debian.org/debian
> +Suites: buster buster-updates
> +# comment in here
> +Components: main contrib
> +Languages: it
> + de
> + fr
> +Enabled: off
> diff --git a/tests/sources.list.d/options_comment.list b/tests/sources.list.d/options_comment.list
> new file mode 100644
> index 0000000..e3f4112
> --- /dev/null
> +++ b/tests/sources.list.d/options_comment.list
> @@ -0,0 +1,2 @@
> +deb [ lang=it,de arch=amd64 ] http://ftp.at.debian.org/debian buster main contrib # comment
> +
> diff --git a/tests/sources.list.d/pbs-enterprise.list b/tests/sources.list.d/pbs-enterprise.list
> new file mode 100644
> index 0000000..5f8763c
> --- /dev/null
> +++ b/tests/sources.list.d/pbs-enterprise.list
> @@ -0,0 +1 @@
> +deb https://enterprise.proxmox.com/debian/pbs buster pbs-enterprise
> diff --git a/tests/sources.list.d/pve.list b/tests/sources.list.d/pve.list
> new file mode 100644
> index 0000000..6213f72
> --- /dev/null
> +++ b/tests/sources.list.d/pve.list
> @@ -0,0 +1,10 @@
> +deb http://ftp.debian.org/debian buster main contrib
> +deb http://ftp.debian.org/debian buster-updates main contrib
> +
> +# PVE pve-no-subscription repository provided by proxmox.com,
> +# NOT recommended for production use
> +deb http://download.proxmox.com/debian/pve buster pve-no-subscription
> +# deb https://enterprise.proxmox.com/debian/pve buster pve-enterprise
> +
> +# security updates
> +deb http://security.debian.org/debian-security buster/updates main contrib
> diff --git a/tests/sources.list.d/standard.list b/tests/sources.list.d/standard.list
> new file mode 100644
> index 0000000..26db887
> --- /dev/null
> +++ b/tests/sources.list.d/standard.list
> @@ -0,0 +1,6 @@
> +deb http://ftp.at.debian.org/debian buster main contrib
> +
> +deb http://ftp.at.debian.org/debian buster-updates main contrib
> +
> +# security updates
> +deb http://security.debian.org buster/updates main contrib
> diff --git a/tests/sources.list.d/standard.sources b/tests/sources.list.d/standard.sources
> new file mode 100644
> index 0000000..605202e
> --- /dev/null
> +++ b/tests/sources.list.d/standard.sources
> @@ -0,0 +1,10 @@
> +Types: deb
> +URIs: http://ftp.at.debian.org/debian
> +Suites: buster buster-updates
> +Components: main contrib
> +
> +# security updates
> +Types: deb
> +URIs: http://security.debian.org
> +Suites: buster/updates
> +Components: main contrib
> --
> 2.20.1
>
>
>
> _______________________________________________
> pbs-devel mailing list
> pbs-devel at lists.proxmox.com
> https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel
>
>
More information about the pbs-devel
mailing list