newsletter-to-web/bin/src/main.rs

133 lines
3.6 KiB
Rust

mod message_reader;
use std::{
fs::OpenOptions,
io::Write,
path::{Path, PathBuf},
};
use mail_parser::Message as MpMessage;
use sanitize_html::{rules::Element, sanitize_str};
use sha2::{Digest, Sha256};
extern crate imap;
extern crate mail_parser;
extern crate rustls_connector;
extern crate sanitize_html;
extern crate sha2;
use message_reader::{EmailReader, TestMessagesReader};
pub struct Message {
uid: String,
data: Vec<u8>,
}
impl Message {
pub fn new(uid: String, data: Vec<u8>) -> Message {
Message { uid, data }
}
pub(crate) fn get_parsed(&self) -> Option<MpMessage> {
MpMessage::parse(&self.data)
}
pub fn get_uid(&self) -> &String {
&self.uid
}
}
fn main() {
let dir = Path::new("data");
if !dir.exists() {
std::fs::create_dir(&dir).expect("Could not create directory");
}
let mut reader = TestMessagesReader::new((&Path::new("tests/data")).to_path_buf());
for msg in reader.read_rfc822_messages() {
println!("Processing message {}", msg.get_uid());
let parsed = msg.get_parsed().expect("A parsed messsage.");
let title = parsed.get_subject().expect("Expected a subject");
println!("{}", &title);
let html_body = parsed.get_html_body(0).expect("Could not read html body");
let processed_html = process_html(&html_body).expect("Could not process the HTML");
let html_bytes = processed_html.as_bytes();
let html_path: PathBuf = [dir, Path::new(&get_path(&parsed, &msg))].iter().collect();
println!("Storing to {}", &html_path.display());
OpenOptions::new()
.write(true)
.create(true)
.open(&html_path)
.expect(format!("Could not open file '{}' for writing", &html_path.display()).as_str())
.write_all(&html_bytes)
.expect(format!("Could not write html to file '{}'.", &html_path.display()).as_str());
println!();
}
}
fn get_path(parsed: &MpMessage, msg: &Message) -> String {
let date = parsed.get_date().expect("Could not extract date");
let date_str = format!(
"{:04}{:02}{:02}{:02}{:02}{:02}",
&date.year, &date.month, &date.day, &date.hour, &date.minute, &date.second
);
let hash = base16ct::lower::encode_string(&Sha256::digest(
&parsed.get_html_body(0).expect("Expected a body").as_bytes(),
));
let uid: i32 = msg
.get_uid()
.parse()
.expect("Could not convert message uid to an i32.");
//format!("{}_{}_{}.html", &date_str, &file_name, msg.get_uid()).to_owned()
format!("{:05}_{}_{}.html", uid, date_str, &hash).to_owned()
}
fn process_html(input: &str) -> Result<String, sanitize_html::errors::SanitizeError> {
let mut rules = sanitize_html::rules::predefined::relaxed().delete("style");
rules
.allowed_elements
.get_mut("img")
.unwrap()
.attribute_rules
.rename("src", "data-source");
let mut span = Element::new("span");
span.attribute_rules
.modify("style", Box::new(|_i| "".to_string()));
let rules = rules.element(span);
//rules.allowed_elements.remove_entry("img");
sanitize_str(&rules, input)
//Ok(input.to_owned())
}
fn write_to_test_path(msg: &Message) {
let test_path: PathBuf = [
Path::new("tests/data"),
Path::new(&format!("{}.eml", &msg.get_uid())),
]
.iter()
.collect();
let _ = OpenOptions::new()
.write(true)
.create(true)
.open(test_path)
.expect("Could not open file fir writing")
.write_all(&msg.data);
}