newsletter-to-web/bin/src/main.rs

185 lines
4.9 KiB
Rust
Raw Normal View History

use std::{
collections::HashMap,
error::Error,
fs::{read_dir, OpenOptions},
io::Write,
net::TcpStream,
path::{Path, PathBuf},
};
use imap::Session;
use mail_parser::Message;
use rustls_connector::RustlsConnector;
use sanitize_html::{rules::Element, sanitize_str};
use sha2::{Digest, Sha256};
extern crate imap;
extern crate mail_parser;
extern crate rustls_connector;
extern crate sanitize_html;
extern crate sha2;
fn main() {
let dir = Path::new("data");
if !dir.exists() {
std::fs::create_dir(&dir).expect("Could not create directory");
}
let messsages = messages_from_tests(&Path::new("tests/data")) //connect("my.kiers.eu", 993, "newsletters@kie.rs", "Jjkcloudron1!")
.expect("A list of messages");
for (uid, message) in messsages {
println!("Processing message {}", &uid);
let parsed = Message::parse(&message).expect("A parsed messsage.");
let title = parsed.get_subject().expect("Expected a subject");
println!("{}", &title);
let html_body = parsed.get_html_body(0).expect("Could not read html body");
let processed_html = process_html(&html_body).expect("Could not process the HTML");
let html_bytes = processed_html.as_bytes();
let hash = base16ct::lower::encode_string(&Sha256::digest(&html_bytes));
println!("{}", hash);
let html_path: PathBuf = [dir, Path::new(&format!("{}.html", &title))]
.iter()
.collect();
OpenOptions::new()
.write(true)
.create(true)
.open(&html_path)
.expect(format!("Could not open file '{}' for writing", &html_path.display()).as_str())
.write_all(&html_bytes)
.expect(format!("Could not write html to file '{}'.", &html_path.display()).as_str());
println!();
}
}
fn connect(
server: &str,
port: u16,
username: &str,
password: &str,
) -> Result<HashMap<String, Vec<u8>>, Box<dyn Error>> {
let mut session = open_session(server, port, username, password)?;
session.examine("INBOX")?;
let items = match session.uid_search("ALL") {
Ok(i) => i,
Err(e) => return Err(Box::new(e)),
};
let mut msgs = HashMap::<String, Vec<u8>>::with_capacity(items.len());
//println!("# of messages: {}", &items.len());
for item in items {
let msg = session.uid_fetch(&item.to_string(), "(BODY.PEEK[] UID)")?;
let message = if let Some(m) = msg.iter().next() {
m
} else {
continue;
};
let body = message.body().expect("Message did not have a body.");
msgs.insert(item.to_string(), body.to_owned());
}
session.logout().expect("Could not log out");
Ok(msgs)
}
fn open_session(
server: &str,
port: u16,
username: &str,
password: &str,
) -> Result<
Session<
rustls_connector::rustls::StreamOwned<
rustls_connector::rustls::ClientConnection,
TcpStream,
>,
>,
Box<dyn Error + 'static>,
> {
let stream = TcpStream::connect((server, port))?;
let tls = RustlsConnector::new_with_webpki_roots_certs();
let tls_stream = tls.connect(server, stream)?;
let client = imap::Client::new(tls_stream);
Ok(client.login(username, password).map_err(|e| e.0)?)
}
fn process_html(input: &str) -> Result<String, sanitize_html::errors::SanitizeError> {
let mut rules = sanitize_html::rules::predefined::relaxed().delete("style");
rules
.allowed_elements
.get_mut("img")
.unwrap()
.attribute_rules
.rename("src", "data-source");
let mut span = Element::new("span");
span.attribute_rules
.modify("style", Box::new(|_i| "".to_string()));
let rules = rules.element(span);
//rules.allowed_elements.remove_entry("img");
sanitize_str(&rules, input)
//Ok(input.to_owned())
}
fn messages_from_tests(path: &Path) -> Result<HashMap<String, Vec<u8>>, Box<dyn Error>> {
let mut messages = HashMap::<String, Vec<u8>>::new();
let items = match read_dir(path) {
Ok(i) => i,
Err(e) => return Err(Box::new(e)),
};
for item in items {
if let Ok(item) = item {
if let Some(extension) = item.path().extension() {
if extension != "eml" {
continue;
}
let uid = item.path().file_stem().unwrap().to_owned();
if let Ok(data) = std::fs::read(item.path()) {
messages.insert(uid.into_string().unwrap(), data);
}
}
}
}
Ok(messages)
}
fn write_to_test_path(uid: &str, message: &[u8]) {
let test_path: PathBuf = [Path::new("tests/data"), Path::new(&format!("{}.eml", &uid))]
.iter()
.collect();
let _ = OpenOptions::new()
.write(true)
.create(true)
.open(test_path)
.expect("Could not open file fir writing")
.write_all(&message);
}