2022-06-07 20:27:28 +00:00
|
|
|
use std::{
|
|
|
|
collections::HashMap,
|
|
|
|
error::Error,
|
2022-06-12 19:39:19 +00:00
|
|
|
fs::{read_dir, DirEntry, OpenOptions},
|
2022-06-07 20:27:28 +00:00
|
|
|
io::Write,
|
|
|
|
net::TcpStream,
|
|
|
|
path::{Path, PathBuf},
|
2022-06-12 19:39:19 +00:00
|
|
|
vec::IntoIter,
|
2022-06-10 11:46:07 +00:00
|
|
|
};
|
2022-06-07 20:27:28 +00:00
|
|
|
|
|
|
|
use imap::Session;
|
|
|
|
use mail_parser::Message;
|
|
|
|
use rustls_connector::RustlsConnector;
|
|
|
|
|
2022-06-10 22:28:28 +00:00
|
|
|
use sanitize_html::{rules::Element, sanitize_str};
|
2022-06-10 11:46:07 +00:00
|
|
|
use sha2::{Digest, Sha256};
|
|
|
|
|
2022-06-07 20:27:28 +00:00
|
|
|
extern crate imap;
|
|
|
|
extern crate mail_parser;
|
|
|
|
extern crate rustls_connector;
|
2022-06-10 22:28:28 +00:00
|
|
|
extern crate sanitize_html;
|
2022-06-10 11:46:07 +00:00
|
|
|
extern crate sha2;
|
2022-06-07 20:27:28 +00:00
|
|
|
|
|
|
|
fn main() {
|
|
|
|
let dir = Path::new("data");
|
|
|
|
if !dir.exists() {
|
|
|
|
std::fs::create_dir(&dir).expect("Could not create directory");
|
|
|
|
}
|
|
|
|
|
2022-06-12 19:39:19 +00:00
|
|
|
// let messsages = messages_from_tests(&Path::new("tests/data")) //connect("my.kiers.eu", 993, "newsletters@kie.rs", "Jjkcloudron1!")
|
|
|
|
// .expect("A list of messages");
|
2022-06-07 20:27:28 +00:00
|
|
|
|
2022-06-12 19:39:19 +00:00
|
|
|
// for (uid, message) in messages {
|
|
|
|
|
|
|
|
let mut reader = TestMessagesReader::new((&Path::new("tests/data")).to_path_buf());
|
|
|
|
|
|
|
|
for (uid, message) in reader.read_rfc822_messages() {
|
2022-06-10 22:28:28 +00:00
|
|
|
println!("Processing message {}", &uid);
|
2022-06-07 20:27:28 +00:00
|
|
|
|
|
|
|
let parsed = Message::parse(&message).expect("A parsed messsage.");
|
2022-06-10 22:28:28 +00:00
|
|
|
let title = parsed.get_subject().expect("Expected a subject");
|
|
|
|
|
|
|
|
println!("{}", &title);
|
2022-06-07 20:27:28 +00:00
|
|
|
|
2022-06-12 18:39:35 +00:00
|
|
|
let html_body = parsed.get_html_body(0).expect("Could not read html body");
|
2022-06-10 22:28:28 +00:00
|
|
|
let processed_html = process_html(&html_body).expect("Could not process the HTML");
|
|
|
|
let html_bytes = processed_html.as_bytes();
|
2022-06-10 11:46:07 +00:00
|
|
|
|
|
|
|
let hash = base16ct::lower::encode_string(&Sha256::digest(&html_bytes));
|
|
|
|
println!("{}", hash);
|
|
|
|
|
2022-06-12 18:39:35 +00:00
|
|
|
let html_path: PathBuf = [dir, Path::new(&format!("{}.html", &title))]
|
2022-06-10 11:46:07 +00:00
|
|
|
.iter()
|
|
|
|
.collect();
|
2022-06-12 18:39:35 +00:00
|
|
|
|
|
|
|
OpenOptions::new()
|
2022-06-07 20:27:28 +00:00
|
|
|
.write(true)
|
|
|
|
.create(true)
|
2022-06-12 18:39:35 +00:00
|
|
|
.open(&html_path)
|
|
|
|
.expect(format!("Could not open file '{}' for writing", &html_path.display()).as_str())
|
|
|
|
.write_all(&html_bytes)
|
|
|
|
.expect(format!("Could not write html to file '{}'.", &html_path.display()).as_str());
|
2022-06-10 22:28:28 +00:00
|
|
|
|
|
|
|
println!();
|
2022-06-07 20:27:28 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn connect(
|
|
|
|
server: &str,
|
|
|
|
port: u16,
|
|
|
|
username: &str,
|
|
|
|
password: &str,
|
2022-06-10 22:28:28 +00:00
|
|
|
) -> Result<HashMap<String, Vec<u8>>, Box<dyn Error>> {
|
2022-06-07 20:27:28 +00:00
|
|
|
let mut session = open_session(server, port, username, password)?;
|
|
|
|
|
|
|
|
session.examine("INBOX")?;
|
|
|
|
|
|
|
|
let items = match session.uid_search("ALL") {
|
|
|
|
Ok(i) => i,
|
|
|
|
Err(e) => return Err(Box::new(e)),
|
|
|
|
};
|
|
|
|
|
2022-06-10 22:28:28 +00:00
|
|
|
let mut msgs = HashMap::<String, Vec<u8>>::with_capacity(items.len());
|
2022-06-07 20:27:28 +00:00
|
|
|
|
|
|
|
//println!("# of messages: {}", &items.len());
|
|
|
|
|
|
|
|
for item in items {
|
|
|
|
let msg = session.uid_fetch(&item.to_string(), "(BODY.PEEK[] UID)")?;
|
|
|
|
|
|
|
|
let message = if let Some(m) = msg.iter().next() {
|
|
|
|
m
|
|
|
|
} else {
|
|
|
|
continue;
|
|
|
|
};
|
|
|
|
|
|
|
|
let body = message.body().expect("Message did not have a body.");
|
|
|
|
|
2022-06-10 22:28:28 +00:00
|
|
|
msgs.insert(item.to_string(), body.to_owned());
|
2022-06-07 20:27:28 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
session.logout().expect("Could not log out");
|
|
|
|
|
|
|
|
Ok(msgs)
|
|
|
|
}
|
|
|
|
|
|
|
|
fn open_session(
|
|
|
|
server: &str,
|
|
|
|
port: u16,
|
|
|
|
username: &str,
|
|
|
|
password: &str,
|
|
|
|
) -> Result<
|
|
|
|
Session<
|
|
|
|
rustls_connector::rustls::StreamOwned<
|
|
|
|
rustls_connector::rustls::ClientConnection,
|
|
|
|
TcpStream,
|
|
|
|
>,
|
|
|
|
>,
|
|
|
|
Box<dyn Error + 'static>,
|
|
|
|
> {
|
|
|
|
let stream = TcpStream::connect((server, port))?;
|
|
|
|
let tls = RustlsConnector::new_with_webpki_roots_certs();
|
|
|
|
let tls_stream = tls.connect(server, stream)?;
|
|
|
|
|
|
|
|
let client = imap::Client::new(tls_stream);
|
|
|
|
|
|
|
|
Ok(client.login(username, password).map_err(|e| e.0)?)
|
|
|
|
}
|
2022-06-10 22:28:28 +00:00
|
|
|
|
|
|
|
fn process_html(input: &str) -> Result<String, sanitize_html::errors::SanitizeError> {
|
|
|
|
let mut rules = sanitize_html::rules::predefined::relaxed().delete("style");
|
|
|
|
|
|
|
|
rules
|
|
|
|
.allowed_elements
|
|
|
|
.get_mut("img")
|
|
|
|
.unwrap()
|
|
|
|
.attribute_rules
|
|
|
|
.rename("src", "data-source");
|
|
|
|
|
|
|
|
let mut span = Element::new("span");
|
|
|
|
|
|
|
|
span.attribute_rules
|
|
|
|
.modify("style", Box::new(|_i| "".to_string()));
|
|
|
|
|
|
|
|
let rules = rules.element(span);
|
|
|
|
|
|
|
|
//rules.allowed_elements.remove_entry("img");
|
|
|
|
|
|
|
|
sanitize_str(&rules, input)
|
|
|
|
//Ok(input.to_owned())
|
|
|
|
}
|
|
|
|
|
|
|
|
fn write_to_test_path(uid: &str, message: &[u8]) {
|
|
|
|
let test_path: PathBuf = [Path::new("tests/data"), Path::new(&format!("{}.eml", &uid))]
|
|
|
|
.iter()
|
|
|
|
.collect();
|
|
|
|
|
|
|
|
let _ = OpenOptions::new()
|
|
|
|
.write(true)
|
|
|
|
.create(true)
|
|
|
|
.open(test_path)
|
|
|
|
.expect("Could not open file fir writing")
|
|
|
|
.write_all(&message);
|
|
|
|
}
|
2022-06-12 19:39:19 +00:00
|
|
|
|
|
|
|
pub trait EmailReader {
|
|
|
|
fn read_rfc822_messages(&mut self) -> Box<IntoIter<(String, Vec<u8>)>>;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct TestMessagesReader {
|
|
|
|
path: PathBuf,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl TestMessagesReader {
|
|
|
|
pub fn new(path: PathBuf) -> Self {
|
|
|
|
TestMessagesReader { path }
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl EmailReader for TestMessagesReader {
|
|
|
|
fn read_rfc822_messages(&mut self) -> Box<IntoIter<(String, Vec<u8>)>> {
|
|
|
|
let reader = match read_dir(&self.path) {
|
|
|
|
Ok(r) => r,
|
|
|
|
Err(_) => return Box::new(Vec::new().into_iter()),
|
|
|
|
};
|
|
|
|
|
|
|
|
let items = reader
|
|
|
|
.filter(|i| i.is_ok())
|
|
|
|
.map(|i| i.unwrap() as DirEntry)
|
|
|
|
.filter(|d| match d.path().extension() {
|
|
|
|
Some(ext) => ext == "eml",
|
|
|
|
None => false,
|
|
|
|
})
|
|
|
|
.map(|i| {
|
|
|
|
let uid = i.path().file_stem().unwrap().to_owned();
|
|
|
|
if let Ok(data) = std::fs::read(i.path()) {
|
|
|
|
Some((uid.into_string().unwrap(), data))
|
|
|
|
} else {
|
|
|
|
None
|
|
|
|
}
|
|
|
|
})
|
|
|
|
.filter(|i| i.is_some())
|
|
|
|
.map(|i| i.unwrap());
|
|
|
|
|
|
|
|
let iter = items.collect::<Vec<(String, Vec<u8>)>>().into_iter();
|
|
|
|
|
|
|
|
Box::new(iter)
|
|
|
|
}
|
|
|
|
}
|