Make it a real CLI app
continuous-integration/drone/push Build is passing Details

Signed-off-by: Jacob Kiers <jacob@jacobkiers.net>
This commit is contained in:
Jacob Kiers 2022-08-06 23:01:24 +02:00
parent 24fb0c1ba6
commit f529e0ceb6
8 changed files with 504 additions and 179 deletions

138
Cargo.lock generated
View File

@ -30,6 +30,17 @@ dependencies = [
"quick-xml",
]
[[package]]
name = "atty"
version = "0.2.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
dependencies = [
"hermit-abi",
"libc",
"winapi",
]
[[package]]
name = "autocfg"
version = "1.1.0"
@ -101,6 +112,45 @@ dependencies = [
"winapi",
]
[[package]]
name = "clap"
version = "3.2.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a3dbbb6653e7c55cc8595ad3e1f7be8f32aba4eb7ff7f0fd1163d4f3d137c0a9"
dependencies = [
"atty",
"bitflags",
"clap_derive",
"clap_lex",
"indexmap",
"once_cell",
"strsim",
"termcolor",
"textwrap",
]
[[package]]
name = "clap_derive"
version = "3.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9ba52acd3b0a5c33aeada5cdaa3267cdc7c594a98731d4268cdc1532f4264cb4"
dependencies = [
"heck",
"proc-macro-error",
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "clap_lex"
version = "0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2850f2f5a82cbf437dd5af4d49848fbdfc27c157c3d010345776f952765261c5"
dependencies = [
"os_str_bytes",
]
[[package]]
name = "cpufeatures"
version = "0.2.2"
@ -230,6 +280,27 @@ dependencies = [
"version_check",
]
[[package]]
name = "hashbrown"
version = "0.12.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
[[package]]
name = "heck"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2540771e65fc8cb83cd6e8a237f70c319bd5c29f78ed1084ba5d50eeac86f7f9"
[[package]]
name = "hermit-abi"
version = "0.1.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33"
dependencies = [
"libc",
]
[[package]]
name = "ident_case"
version = "1.0.1"
@ -260,6 +331,16 @@ dependencies = [
"nom",
]
[[package]]
name = "indexmap"
version = "1.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "10a35a97730320ffe8e2d410b5d3b69279b98d2c14bdb8b70ea89ecf7888d41e"
dependencies = [
"autocfg",
"hashbrown",
]
[[package]]
name = "js-sys"
version = "0.3.59"
@ -327,11 +408,12 @@ checksum = "c96aba5aa877601bb3f6dd6a63a969e1f82e60646e81e71b14496995e9853c91"
[[package]]
name = "newsletter-to-web"
version = "0.1.0"
version = "0.0.1"
dependencies = [
"atom_syndication",
"base16ct",
"chrono",
"clap",
"imap",
"mail-parser",
"rustls-connector",
@ -374,6 +456,36 @@ version = "1.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "18a6dbe30758c9f83eb00cbea4ac95966305f5a7772f3f42ebfc7fc7eddbd8e1"
[[package]]
name = "os_str_bytes"
version = "6.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "648001efe5d5c0102d8cea768e348da85d90af8ba91f0bea908f157951493cd4"
[[package]]
name = "proc-macro-error"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c"
dependencies = [
"proc-macro-error-attr",
"proc-macro2",
"quote",
"syn",
"version_check",
]
[[package]]
name = "proc-macro-error-attr"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869"
dependencies = [
"proc-macro2",
"quote",
"version_check",
]
[[package]]
name = "proc-macro2"
version = "1.0.43"
@ -534,6 +646,21 @@ dependencies = [
"unicode-ident",
]
[[package]]
name = "termcolor"
version = "1.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bab24d30b911b2376f3a13cc2cd443142f0c81dda04c118693e35b3835757755"
dependencies = [
"winapi-util",
]
[[package]]
name = "textwrap"
version = "0.15.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b1141d4d61095b28419e22cb0bbf02755f5e54e0526f97f1e3d1d160e60885fb"
[[package]]
name = "time"
version = "0.1.44"
@ -674,6 +801,15 @@ version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-util"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178"
dependencies = [
"winapi",
]
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"

View File

@ -1,8 +1,11 @@
[package]
name = "newsletter-to-web"
version = "0.1.0"
version = "0.0.1"
edition = "2021"
description = "Converts email newsletters to static HTML files"
authors = [
"Jacob Kiers <code@kiers.eu>"
]
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
@ -10,6 +13,7 @@ description = "Converts email newsletters to static HTML files"
atom_syndication = "^0.11.0"
base16ct = { version = "^0.1.0", features = [ "alloc" ] }
chrono = "^0.4"
clap = { version = "^3.2.16", features = [ "derive" ] }
imap = { version = "^2.4.1", default-features = false }
mail-parser = "^0.5.0"
rustls-connector = { version = "^0.16.1", default-features = false, features = [ "webpki-roots-certs", "quic" ] }

48
bin/src/cli.rs Normal file
View File

@ -0,0 +1,48 @@
use std::path::PathBuf;
use clap::{Parser, Subcommand};
#[derive(Parser)]
#[clap(author, version, about, long_about = None)]
pub(crate) struct Cli {
#[clap(subcommand)]
pub command: Command,
}
#[derive(Subcommand)]
pub(crate) enum Command {
/// Fetch emails from an IMAP server
FetchFromImap {
#[clap(short, long, value_parser)]
server: String,
#[clap(long, value_parser, default_value_t = 993)]
port: u16,
#[clap(short, long, value_parser)]
username: String,
#[clap(short, long, value_parser)]
password: String,
},
/// Fetch an email from a .eml file
FetchFromFile {
#[clap(value_parser)]
filename: PathBuf,
},
/// Build an ATOM feed containing the full contents of the email
BuildFeed {
/// Feed file
#[clap(value_parser, default_value = "output/feed.atom")]
filename: PathBuf,
},
/// Exports the emails as HTML files
ExportHtml {
/// The directory in which the emails will be stored
#[clap(value_parser, default_value = "output/")]
directory: PathBuf,
},
/// Fetches and exports all mails from an IMAP mailbox. Requires configuration
FetchAndExport {
/// The directory in which the output will be stored
#[clap(value_parser, default_value = "output/")]
directory: PathBuf,
},
}

17
bin/src/debug.rs Normal file
View File

@ -0,0 +1,17 @@
mod debug;
pub(crate) fn write_to_test_path(msg: &Message) {
let test_path: PathBuf = [
Path::new("tests/data"),
Path::new(&format!("{}.eml", &msg.get_uid())),
]
.iter()
.collect();
let _ = OpenOptions::new()
.write(true)
.create(true)
.open(test_path)
.expect("Could not open file fir writing")
.write_all(&msg.data);
}

108
bin/src/feed.rs Normal file
View File

@ -0,0 +1,108 @@
use crate::Message;
use atom_syndication::{
ContentBuilder, Entry, EntryBuilder, Feed, FeedBuilder, Generator, LinkBuilder, Person,
};
use chrono::{DateTime, Utc, TimeZone};
use mail_parser::HeaderValue;
pub(crate) fn add_entry_to_feed(feed: &mut Feed, message: &Message, processed_html: &String) {
let parsed = message.get_parsed().unwrap();
let date = parsed.get_date().expect("Could not extract date");
let from = match parsed.get_from() {
HeaderValue::Address(e) => e,
_ => return,
};
let path = crate::get_path(&parsed, message);
let url = format!("https://newsletters.kiers.eu/{}", &path);
let mut entry: Entry = Newsletter {
author: Person {
name: match &from.name {
Some(n) => n.to_string(),
_ => match &from.address {
Some(e) => e.to_string(),
_ => "".to_string(),
},
},
email: match &from.address {
Some(e) => Some(e.to_string()),
_ => None,
},
uri: None,
},
title: parsed
.get_subject()
.expect("Expected a subject")
.to_string(),
content: Some(processed_html.clone()),
id: url.clone(),
published: Utc.timestamp(date.to_timestamp(), 0), //(format!("{}{}", &date.to_iso8601(), "+00:00").as_str()).`unwrap(),
url: url,
}
.into();
entry.set_updated(Utc.timestamp(date.to_timestamp(), 0));
feed.entries.push(entry);
}
pub(crate) fn build_atom_feed() -> Feed {
FeedBuilder::default()
.title("JJKiers Newsletters")
.id("https://newsletters.kiers.eu/feed.atom")
.link(
LinkBuilder::default()
.href("https://newsletters.kiers.eu/")
.rel("alternate")
.build(),
)
.link(
LinkBuilder::default()
.href("https://newsletters.kiers.eu/feed.atom")
.rel("self")
.build(),
)
.generator(Generator {
value: String::from("newsletter-to-web"),
uri: None,
version: Some(String::from("0.0.1")),
})
.build()
}
//#[derive(Serialize, Deserialize, Debug)]
pub(crate) struct Newsletter {
id: String,
url: String,
title: String,
content: Option<String>,
author: Person,
published: DateTime<Utc>,
}
impl From<Newsletter> for Entry {
fn from(post: Newsletter) -> Self {
let content = post.content.map(|v| {
ContentBuilder::default()
.value(v)
.content_type(Some("html".to_string()))
.build()
});
EntryBuilder::default()
.title(post.title)
.id(post.id)
.published(Some(post.published.clone().into()))
.author(post.author.into())
.content(content)
.link(
LinkBuilder::default()
.href(post.url)
.rel("alternate")
.build(),
)
.build()
}
}

View File

@ -1,124 +1,174 @@
mod cli;
mod feed;
mod message;
mod message_reader;
use chrono::Utc;
use clap::Parser;
use mail_parser::Message as ParsedMessage;
use message_reader::{DataDirectoryMessageReader, EmailReader, ImapReader};
use sha2::{Digest, Sha256};
use std::{
error::Error,
fs::{File, OpenOptions},
io::Write,
path::{Path, PathBuf},
};
use atom_syndication::{
ContentBuilder, Entry, EntryBuilder, Feed, FeedBuilder, Generator, LinkBuilder, Person,
};
use chrono::{DateTime, TimeZone, Utc};
use mail_parser::{HeaderValue, Message as MpMessage};
pub(crate) use message::Message;
use sha2::{Digest, Sha256};
fn main() -> Result<(), Box<dyn Error>> {
let cli = cli::Cli::parse();
use message_reader::{EmailReader, TestMessagesReader};
let data_directory = "data";
pub struct Message {
uid: String,
data: Vec<u8>,
let result = match &cli.command {
cli::Command::FetchFromImap {
server,
port,
username,
password,
} => fetch_from_imap(
data_directory,
server.to_owned(),
*port,
username.to_owned(),
password.to_owned(),
),
cli::Command::BuildFeed { filename } => build_feed(&filename),
_ => unimplemented!("This method is not yet implemented."),
};
result
}
impl Message {
pub fn new(uid: String, data: Vec<u8>) -> Message {
Message { uid, data }
fn create_directory<P: AsRef<Path>>(dir: P) -> Result<(), std::io::Error> {
if !dir.as_ref().exists() {
return std::fs::create_dir(&dir);
}
pub(crate) fn get_parsed(&self) -> Option<MpMessage> {
MpMessage::parse(&self.data)
}
pub fn get_uid(&self) -> &String {
&self.uid
}
Ok(())
}
fn main() {
let dir = Path::new("data");
if !dir.exists() {
std::fs::create_dir(&dir).expect("Could not create directory");
}
fn build_feed(filename: &PathBuf) -> Result<(), Box<dyn Error>> {
let dir = filename.parent().ok_or(format!(
"Could not get parent directory of {}",
filename.display()
))?;
let mut feed = build_atom_feed();
println!(
"Building the feed to {} in {}/",
filename.display(),
dir.display()
);
let mut reader = TestMessagesReader::new((&Path::new("tests/data")).to_path_buf());
create_directory(dir)?;
let mut feed = feed::build_atom_feed();
let mut reader = DataDirectoryMessageReader::new((&Path::new("data")).to_path_buf());
for msg in reader.read_rfc822_messages() {
println!("Processing message {}", msg.get_uid());
let parsed = msg.get_parsed().expect("A parsed messsage.");
let date = parsed.get_date().ok_or(format!(
"Could not get the date of message {}",
msg.get_uid()
))?;
let subject = match parsed.get_subject() {
Some(subject) => subject,
None => "No subject",
};
println!(
"Processing message {} from {} with subject {}",
msg.get_uid(),
date.to_string(),
subject
);
let html_body = parsed.get_html_body(0).expect("Could not read html body");
let processed_html = process_html(&html_body).expect("Could not process the HTML");
let html_bytes = processed_html.as_bytes();
feed::add_entry_to_feed(&mut feed, &msg, &processed_html);
}
if feed.entries.len() > 0 {
feed.set_updated(Utc::now());
println!("Writing feed to {}", filename.display());
let _ = feed.write_to(File::create(filename).unwrap());
}
println!("Finished building the feed.");
Ok(())
}
fn fetch_from_imap(
data_directory: &str,
server: String,
port: u16,
username: String,
password: String,
) -> Result<(), Box<dyn Error>> {
create_directory(data_directory)?;
print!("Getting mail from {} for mailbox {}", server, username);
let mut reader = ImapReader::new(
String::from(server),
port,
String::from(username),
String::from(password),
);
for msg in reader.read_rfc822_messages() {
let parsed = msg.get_parsed().ok_or(format!(
"Could not parse the message with id {}",
msg.get_uid()
))?;
let date = parsed.get_date().ok_or(format!(
"Could not get the date of message {}",
msg.get_uid()
))?;
let subject = match parsed.get_subject() {
Some(subject) => subject,
None => "No subject",
};
println!(
"Processing message {} from {} with subject {}",
msg.get_uid(),
date.to_string(),
subject
);
let path = get_path(&parsed, &msg);
let html_path: PathBuf = [dir, Path::new(&path)].iter().collect();
println!("Storing to {}", &html_path.display());
let html_path: PathBuf = [
Path::new(data_directory),
Path::new(&format!("{}.eml", path)),
]
.iter()
.collect();
add_entry_to_feed(&mut feed, &msg, &processed_html);
println!("Storing to {}", &html_path.display());
OpenOptions::new()
.write(true)
.create(true)
.open(&html_path)
.expect(format!("Could not open file '{}' for writing", &html_path.display()).as_str())
.write_all(&html_bytes)
.write_all(msg.get_data())
.expect(format!("Could not write html to file '{}'.", &html_path.display()).as_str());
println!();
}
if feed.entries.len() > 0 {
feed.set_updated(Utc::now());
let _ = feed.write_to(File::create(format!("{}/feed.atom", dir.display())).unwrap());
}
Ok(())
}
fn add_entry_to_feed(feed: &mut Feed, message: &Message, processed_html: &String) {
let parsed = message.get_parsed().unwrap();
let date = parsed.get_date().expect("Could not extract date");
let from = match parsed.get_from() {
HeaderValue::Address(e) => e,
_ => return,
};
let path = get_path(&parsed, message);
let url = format!("https://newsletters.kiers.eu/{}", &path);
let mut entry : Entry = Newsletter {
author: Person {
name: match &from.name {
Some(n) => n.to_string(),
_ => match &from.address {
Some(e) => e.to_string(),
_ => "".to_string(),
},
},
email: match &from.address {
Some(e) => Some(e.to_string()),
_ => None,
},
uri: None,
},
title: parsed
.get_subject()
.expect("Expected a subject")
.to_string(),
content: Some(processed_html.clone()),
id: url.clone(),
published: Utc.timestamp(date.to_timestamp(), 0), //(format!("{}{}", &date.to_iso8601(), "+00:00").as_str()).`unwrap(),
url: url,
}
.into();
entry.set_updated(Utc.timestamp(date.to_timestamp(), 0));
feed.entries.push(entry);
}
fn get_path(parsed: &MpMessage, msg: &Message) -> String {
fn get_path(parsed: &ParsedMessage, msg: &Message) -> String {
let date = parsed.get_date().expect("Could not extract date");
let date_str = format!(
"{:04}{:02}{:02}{:02}{:02}{:02}",
@ -129,95 +179,13 @@ fn get_path(parsed: &MpMessage, msg: &Message) -> String {
&parsed.get_html_body(0).expect("Expected a body").as_bytes(),
));
let uid: i32 = msg
.get_uid()
let uid: i32 = msg.get_uid()
.parse()
.expect("Could not convert message uid to an i32.");
//format!("{}_{}_{}.html", &date_str, &file_name, msg.get_uid()).to_owned()
.expect(&format!("Could not convert message id {} to an i32.", msg.get_uid()));
format!("{:05}_{}_{}.html", uid, date_str, &hash).to_owned()
}
fn process_html(input: &str) -> Result<String, ()> {
Ok(input.replace("src", "data-source"))
}
fn build_atom_feed() -> Feed {
FeedBuilder::default()
.title("JJKiers Newsletters")
.id("https://newsletters.kiers.eu/feed.atom")
.link(
LinkBuilder::default()
.href("https://newsletters.kiers.eu/")
.rel("alternate")
.build(),
)
.link(
LinkBuilder::default()
.href("https://newsletters.kiers.eu/feed.atom")
.rel("self")
.build(),
)
.generator(Generator {
value: String::from("newsletter-to-web"),
uri: None,
version: Some(String::from("0.0.1")),
})
.build()
}
fn write_to_test_path(msg: &Message) {
let test_path: PathBuf = [
Path::new("tests/data"),
Path::new(&format!("{}.eml", &msg.get_uid())),
]
.iter()
.collect();
let _ = OpenOptions::new()
.write(true)
.create(true)
.open(test_path)
.expect("Could not open file fir writing")
.write_all(&msg.data);
}
//#[derive(Serialize, Deserialize, Debug)]
struct Newsletter {
id: String,
url: String,
title: String,
content: Option<String>,
author: Person,
published: DateTime<Utc>,
}
impl From<Newsletter> for Entry {
fn from(post: Newsletter) -> Self {
let content = post.content.map(|v| {
ContentBuilder::default()
.value(v)
.content_type(Some("html".to_string()))
.build()
});
EntryBuilder::default()
.title(post.title)
.id(post.id)
.published(Some(post.published.clone().into()))
.author(post.author.into())
.content(content)
.link(
LinkBuilder::default()
.href(post.url)
.rel("alternate")
.build(),
)
.build()
}
}
// pub fn parse_datetime(s: &str) -> Option<DateTime<FixedOffset>> {
// DateTime::<FixedOffset>::from(s)
// .ok()
// .map(|d| d.with_timezone(&Utc.fix()))
// }

24
bin/src/message.rs Normal file
View File

@ -0,0 +1,24 @@
use mail_parser::Message as ParsedMessage;
pub(crate) struct Message {
uid: String,
data: Vec<u8>,
}
impl Message {
pub fn new(uid: String, data: Vec<u8>) -> Message {
Message { uid, data }
}
pub(crate) fn get_parsed(&self) -> Option<ParsedMessage> {
ParsedMessage::parse(&self.data)
}
pub fn get_uid(&self) -> &String {
&self.uid
}
pub fn get_data(&self) -> &Vec<u8> {
&self.data
}
}

View File

@ -12,25 +12,29 @@ use rustls_connector::RustlsConnector;
use crate::Message;
pub trait EmailReader {
pub(crate) trait EmailReader {
fn read_rfc822_messages(&mut self) -> Box<IntoIter<Message>>;
}
pub(crate) struct TestMessagesReader {
pub(crate) struct DataDirectoryMessageReader {
path: PathBuf,
}
impl TestMessagesReader {
impl DataDirectoryMessageReader {
pub fn new(path: PathBuf) -> Self {
TestMessagesReader { path }
DataDirectoryMessageReader { path }
}
}
impl EmailReader for TestMessagesReader {
impl EmailReader for DataDirectoryMessageReader {
fn read_rfc822_messages(&mut self) -> Box<IntoIter<Message>> {
println!("Reading files in {}", &self.path.display());
let reader = match read_dir(&self.path) {
Ok(r) => r,
Err(_) => return Box::new(Vec::new().into_iter()),
Err(e) => {
dbg!(e);
return Box::new(Vec::new().into_iter());
}
};
let items = reader
@ -41,9 +45,20 @@ impl EmailReader for TestMessagesReader {
None => false,
})
.map(|i| {
let uid = i.path().file_stem().unwrap().to_owned();
let uid = i
.path()
.file_stem()
.unwrap()
.to_owned()
.into_string()
.expect("Could not convert filename to string.")
.split("_")
.collect::<Vec<&str>>()[0]
.trim_start_matches("0")
.to_string();
if let Ok(data) = std::fs::read(i.path()) {
Some((uid.into_string().unwrap(), data))
Some((uid, data))
} else {
None
}
@ -135,10 +150,15 @@ impl EmailReader for ImapReader {
fn read_rfc822_messages(&mut self) -> Box<IntoIter<Message>> {
let msgs = match self.connect() {
Ok(m) => m,
Err(_) => return Box::new(Vec::new().into_iter()),
Err(e) => {
dbg!(e);
return Box::new(Vec::new().into_iter());
}
};
let items = msgs.iter().map(|i| Message::new(i.0.to_owned(), i.1.to_owned()));
let items = msgs
.iter()
.map(|i| Message::new(i.0.to_owned(), i.1.to_owned()));
let iter = items.collect::<Vec<Message>>().into_iter();