From 431c26a35860b6f7333c29660083535d82695097 Mon Sep 17 00:00:00 2001 From: Jacob Kiers Date: Tue, 7 Jun 2022 22:27:28 +0200 Subject: [PATCH] Initial commit Signed-off-by: Jacob Kiers --- .gitignore | 1 + Cargo.lock | 483 ++++++++++++++++++++++++++++++++++++++++++++++++++++ Cargo.toml | 19 +++ README.md | 24 +++ src/main.rs | 125 ++++++++++++++ 5 files changed, 652 insertions(+) create mode 100644 .gitignore create mode 100644 Cargo.lock create mode 100644 Cargo.toml create mode 100644 README.md create mode 100644 src/main.rs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..8951f33 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,483 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "aho-corasick" +version = "0.7.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e37cfd5e7657ada45f742d6e99ca5788580b5c529dc78faf11ece6dc702656f" +dependencies = [ + "memchr", +] + +[[package]] +name = "arrayvec" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b" + +[[package]] +name = "autocfg" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" + +[[package]] +name = "base64" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "904dfeac50f3cdaba28fc6f57fdcddb75f49ed61346676a78c4ffe55877802fd" + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "bufstream" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40e38929add23cdf8a366df9b0e088953150724bcbe5fc330b0d8eb3b328eec8" + +[[package]] +name = "bumpalo" +version = "3.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37ccbd214614c6783386c1af30caf03192f17891059cecc394b4fb119e363de3" + +[[package]] +name = "cc" +version = "1.0.73" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "chrono" +version = "0.4.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "670ad68c9088c2a963aaa298cb369688cf3f9465ce5e2d4ca10e6e0098a1ce73" +dependencies = [ + "libc", + "num-integer", + "num-traits", + "time", + "winapi", +] + +[[package]] +name = "encoding_rs" +version = "0.8.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9852635589dc9f9ea1b6fe9f05b50ef208c85c834a562f0c6abb1c475736ec2b" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "imap" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c617c55def8c42129e0dd503f11d7ee39d73f5c7e01eff55768b3879ff1d107d" +dependencies = [ + "base64", + "bufstream", + "chrono", + "imap-proto", + "lazy_static", + "nom", + "regex", +] + +[[package]] +name = "imap-proto" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16a6def1d5ac8975d70b3fd101d57953fe3278ef2ee5d7816cba54b1d1dfc22f" +dependencies = [ + "nom", +] + +[[package]] +name = "js-sys" +version = "0.3.57" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "671a26f820db17c2a2750743f1dd03bafd15b98c9f30c7c2628c024c05d73397" +dependencies = [ + "wasm-bindgen", +] + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "lexical-core" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6607c62aa161d23d17a9072cc5da0be67cdfc89d3afb1e8d9c842bebc2525ffe" +dependencies = [ + "arrayvec", + "bitflags", + "cfg-if", + "ryu", + "static_assertions", +] + +[[package]] +name = "libc" +version = "0.2.126" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "349d5a591cd28b49e1d1037471617a32ddcda5731b99419008085f72d5a53836" + +[[package]] +name = "log" +version = "0.4.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "mail-parser" +version = "0.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c46a841ae5276aba5218ade7bb76896358f9f95a925c7b3deea6a0ec0fb8e2a7" +dependencies = [ + "encoding_rs", + "serde", +] + +[[package]] +name = "memchr" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" + +[[package]] +name = "newsletter-to-web" +version = "0.1.0" +dependencies = [ + "imap", + "mail-parser", + "rustls-connector", +] + +[[package]] +name = "nom" +version = "5.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ffb4262d26ed83a1c0a33a38fe2bb15797329c85770da05e6b828ddb782627af" +dependencies = [ + "lexical-core", + "memchr", + "version_check", +] + +[[package]] +name = "num-integer" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9" +dependencies = [ + "autocfg", + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd" +dependencies = [ + "autocfg", +] + +[[package]] +name = "once_cell" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7709cef83f0c1f58f666e746a08b21e0085f7440fa6a29cc194d68aac97a4225" + +[[package]] +name = "proc-macro2" +version = "1.0.39" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c54b25569025b7fc9651de43004ae593a75ad88543b17178aa5e1b9c4f15f56f" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1feb54ed693b93a84e14094943b84b7c4eae204c512b7ccb95ab0c66d278ad1" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "regex" +version = "1.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d83f127d94bdbcda4c8cc2e50f6f84f4b611f69c902699ca385a39c3a75f9ff1" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.6.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49b3de9ec5dc0a3417da371aab17d729997c15010e7fd24ff707773a33bddb64" + +[[package]] +name = "ring" +version = "0.16.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3053cf52e236a3ed746dfc745aa9cacf1b791d846bdaf412f60a8d7d6e17c8fc" +dependencies = [ + "cc", + "libc", + "once_cell", + "spin", + "untrusted", + "web-sys", + "winapi", +] + +[[package]] +name = "rustls" +version = "0.20.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5aab8ee6c7097ed6057f43c187a62418d0c05a4bd5f18b3571db50ee0f9ce033" +dependencies = [ + "log", + "ring", + "sct", + "webpki", +] + +[[package]] +name = "rustls-connector" +version = "0.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c6a18f8d10f71bce9bca6eaeb80429460e652f3bcf0381f0c5f8954abf7b3b8" +dependencies = [ + "log", + "rustls", + "webpki", + "webpki-roots", +] + +[[package]] +name = "ryu" +version = "1.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3f6f92acf49d1b98f7a81226834412ada05458b7364277387724a237f062695" + +[[package]] +name = "sct" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d53dcdb7c9f8158937a7981b48accfd39a43af418591a5d008c7b22b5e1b7ca4" +dependencies = [ + "ring", + "untrusted", +] + +[[package]] +name = "serde" +version = "1.0.137" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61ea8d54c77f8315140a05f4c7237403bf38b72704d031543aa1d16abbf517d1" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.137" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f26faba0c3959972377d3b2d306ee9f71faee9714294e41bb777f83f88578be" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "spin" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" + +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + +[[package]] +name = "syn" +version = "1.0.96" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0748dd251e24453cb8717f0354206b91557e4ec8703673a4b30208f2abaf1ebf" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "time" +version = "0.1.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6db9e6914ab8b1ae1c260a4ae7a49b6c5611b40328a735b21862567685e73255" +dependencies = [ + "libc", + "wasi", + "winapi", +] + +[[package]] +name = "unicode-ident" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d22af068fba1eb5edcb4aea19d382b2a3deb4c8f9d475c589b6ada9e0fd493ee" + +[[package]] +name = "untrusted" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a156c684c91ea7d62626509bce3cb4e1d9ed5c4d978f7b4352658f96a4c26b4a" + +[[package]] +name = "version_check" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" + +[[package]] +name = "wasi" +version = "0.10.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f" + +[[package]] +name = "wasm-bindgen" +version = "0.2.80" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "27370197c907c55e3f1a9fbe26f44e937fe6451368324e009cba39e139dc08ad" +dependencies = [ + "cfg-if", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.80" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53e04185bfa3a779273da532f5025e33398409573f348985af9a1cbf3774d3f4" +dependencies = [ + "bumpalo", + "lazy_static", + "log", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.80" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17cae7ff784d7e83a2fe7611cfe766ecf034111b49deb850a3dc7699c08251f5" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.80" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "99ec0dc7a4756fffc231aab1b9f2f578d23cd391390ab27f952ae0c9b3ece20b" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.80" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d554b7f530dee5964d9a9468d95c1f8b8acae4f282807e7d27d4b03099a46744" + +[[package]] +name = "web-sys" +version = "0.3.57" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b17e741662c70c8bd24ac5c5b18de314a2c26c32bf8346ee1e6f53de919c283" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "webpki" +version = "0.22.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f095d78192e208183081cc07bc5515ef55216397af48b873e5edcd72637fa1bd" +dependencies = [ + "ring", + "untrusted", +] + +[[package]] +name = "webpki-roots" +version = "0.22.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44d8de8415c823c8abd270ad483c6feeac771fad964890779f9a8cb24fbbc1bf" +dependencies = [ + "webpki", +] + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..3b00a87 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,19 @@ +[package] +name = "newsletter-to-web" +version = "0.1.0" +edition = "2021" +description = "Converts email newsletters to static HTML files" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +mail-parser = "^0.4.8" + +[dependencies.imap] +version = "^2.4.1" +default-features = false + +[dependencies.rustls-connector] +version = "^0.16.1" +default-features = false +features = [ "webpki-roots-certs", "quic" ] \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..e1d9f63 --- /dev/null +++ b/README.md @@ -0,0 +1,24 @@ +# Newsletter to HTML + +Converts a newsletter to static HTML files. + +## Features + +### Receive email + +* [X] IMAP integration +* [ ] Direct feeding of email message +* [ ] Read from Maildir +* [ ] Read from MBOX + +### Convert email to HTML + +* [X] Generate static HTML files +* [ ] Parse and add unsubscribe link +* [ ] Where possible: remove tracking pixels / images +* [ ] Prevent loading of scripts / images without user interaction + +### Create ATOM feed + +* [ ] One feed per newsletter +* [ ] Index feed (containing all newsletters)? diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..04f9d3b --- /dev/null +++ b/src/main.rs @@ -0,0 +1,125 @@ +use std::{ + collections::HashMap, + error::Error, + fs::OpenOptions, + io::Write, + net::TcpStream, + path::{Path, PathBuf}, + }; + +use imap::Session; +use mail_parser::Message; +use rustls_connector::RustlsConnector; + +extern crate imap; +extern crate mail_parser; +extern crate rustls_connector; + +fn main() { + let dir = Path::new("data"); + if !dir.exists() { + std::fs::create_dir(&dir).expect("Could not create directory"); + } + + let messsages = connect("my.kiers.eu", 993, "newsletters@kie.rs", "Jjkcloudron1!") + .expect("A list of messages"); + + for (uid, message) in messsages { + println!("Processing message {}\n", &uid); + + let parsed = Message::parse(&message).expect("A parsed messsage."); + + // let from = match parsed.get_from() { + // mail_parser::HeaderValue::Address(a) => Some(a), + // _ => None, + // } + // .expect("An address"); + + // println!( + // "{} <{}>", + // from.name.as_ref().unwrap(), + // from.address.as_ref().unwrap() + // ); + // println!( + // "HML: {}, Text: {}", + // parsed.get_html_body_count(), + // parsed.get_text_body_count() + // ); + + let html_body = parsed.get_html_body(0).expect("Could not read body"); + let html_bytes = html_body.as_bytes(); + + println!("{}", &html_body); + + let path: PathBuf = [dir, Path::new(&format!("{}.html", uid))].iter().collect(); + let mut file = OpenOptions::new() + .write(true) + .create(true) + .open(path) + .expect("Could not open file fir writing"); + + file.write_all(html_bytes) + .expect("Could not write to file."); + } +} + +fn connect( + server: &str, + port: u16, + username: &str, + password: &str, +) -> Result>, Box> { + let mut session = open_session(server, port, username, password)?; + + session.examine("INBOX")?; + + let items = match session.uid_search("ALL") { + Ok(i) => i, + Err(e) => return Err(Box::new(e)), + }; + + let mut msgs = HashMap::with_capacity(items.len()); + + //println!("# of messages: {}", &items.len()); + + for item in items { + let msg = session.uid_fetch(&item.to_string(), "(BODY.PEEK[] UID)")?; + + let message = if let Some(m) = msg.iter().next() { + m + } else { + continue; + }; + + let body = message.body().expect("Message did not have a body."); + + msgs.insert(item, body.to_owned()); + } + + session.logout().expect("Could not log out"); + + Ok(msgs) +} + +fn open_session( + server: &str, + port: u16, + username: &str, + password: &str, +) -> Result< + Session< + rustls_connector::rustls::StreamOwned< + rustls_connector::rustls::ClientConnection, + TcpStream, + >, + >, + Box, +> { + let stream = TcpStream::connect((server, port))?; + let tls = RustlsConnector::new_with_webpki_roots_certs(); + let tls_stream = tls.connect(server, stream)?; + + let client = imap::Client::new(tls_stream); + + Ok(client.login(username, password).map_err(|e| e.0)?) +}