2022-06-12 19:42:18 +00:00
|
|
|
mod message_reader;
|
|
|
|
|
2022-06-07 20:27:28 +00:00
|
|
|
use std::{
|
2022-08-05 00:27:10 +00:00
|
|
|
fs::{File, OpenOptions},
|
2022-06-07 20:27:28 +00:00
|
|
|
io::Write,
|
|
|
|
path::{Path, PathBuf},
|
2022-06-10 11:46:07 +00:00
|
|
|
};
|
2022-06-07 20:27:28 +00:00
|
|
|
|
2022-08-05 00:27:10 +00:00
|
|
|
use atom_syndication::{
|
|
|
|
ContentBuilder, Entry, EntryBuilder, Feed, FeedBuilder, Generator, LinkBuilder, Person,
|
|
|
|
};
|
2022-08-05 10:44:03 +00:00
|
|
|
use chrono::{DateTime, TimeZone, Utc};
|
|
|
|
use mail_parser::{HeaderValue, Message as MpMessage};
|
2022-06-07 20:27:28 +00:00
|
|
|
|
2022-06-10 11:46:07 +00:00
|
|
|
use sha2::{Digest, Sha256};
|
|
|
|
|
2022-06-12 19:42:18 +00:00
|
|
|
use message_reader::{EmailReader, TestMessagesReader};
|
|
|
|
|
2022-07-20 21:03:23 +00:00
|
|
|
pub struct Message {
|
|
|
|
uid: String,
|
|
|
|
data: Vec<u8>,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl Message {
|
|
|
|
pub fn new(uid: String, data: Vec<u8>) -> Message {
|
2022-08-02 21:07:01 +00:00
|
|
|
Message { uid, data }
|
2022-07-20 21:03:23 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
pub(crate) fn get_parsed(&self) -> Option<MpMessage> {
|
|
|
|
MpMessage::parse(&self.data)
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn get_uid(&self) -> &String {
|
|
|
|
&self.uid
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-06-07 20:27:28 +00:00
|
|
|
fn main() {
|
|
|
|
let dir = Path::new("data");
|
|
|
|
if !dir.exists() {
|
|
|
|
std::fs::create_dir(&dir).expect("Could not create directory");
|
|
|
|
}
|
|
|
|
|
2022-08-05 00:27:10 +00:00
|
|
|
let mut feed = build_atom_feed();
|
|
|
|
|
2022-06-12 19:39:19 +00:00
|
|
|
let mut reader = TestMessagesReader::new((&Path::new("tests/data")).to_path_buf());
|
|
|
|
|
2022-07-20 21:03:23 +00:00
|
|
|
for msg in reader.read_rfc822_messages() {
|
|
|
|
println!("Processing message {}", msg.get_uid());
|
2022-06-07 20:27:28 +00:00
|
|
|
|
2022-07-20 21:03:23 +00:00
|
|
|
let parsed = msg.get_parsed().expect("A parsed messsage.");
|
2022-06-07 20:27:28 +00:00
|
|
|
|
2022-06-12 18:39:35 +00:00
|
|
|
let html_body = parsed.get_html_body(0).expect("Could not read html body");
|
2022-06-10 22:28:28 +00:00
|
|
|
let processed_html = process_html(&html_body).expect("Could not process the HTML");
|
|
|
|
let html_bytes = processed_html.as_bytes();
|
2022-06-10 11:46:07 +00:00
|
|
|
|
2022-08-05 00:27:10 +00:00
|
|
|
let path = get_path(&parsed, &msg);
|
|
|
|
let html_path: PathBuf = [dir, Path::new(&path)].iter().collect();
|
2022-08-02 21:07:01 +00:00
|
|
|
println!("Storing to {}", &html_path.display());
|
2022-06-12 18:39:35 +00:00
|
|
|
|
2022-08-05 00:27:10 +00:00
|
|
|
add_entry_to_feed(&mut feed, &msg, &processed_html);
|
|
|
|
|
2022-06-12 18:39:35 +00:00
|
|
|
OpenOptions::new()
|
2022-06-07 20:27:28 +00:00
|
|
|
.write(true)
|
|
|
|
.create(true)
|
2022-06-12 18:39:35 +00:00
|
|
|
.open(&html_path)
|
|
|
|
.expect(format!("Could not open file '{}' for writing", &html_path.display()).as_str())
|
|
|
|
.write_all(&html_bytes)
|
|
|
|
.expect(format!("Could not write html to file '{}'.", &html_path.display()).as_str());
|
2022-06-10 22:28:28 +00:00
|
|
|
|
|
|
|
println!();
|
2022-06-07 20:27:28 +00:00
|
|
|
}
|
2022-08-05 00:27:10 +00:00
|
|
|
|
|
|
|
if feed.entries.len() > 0 {
|
|
|
|
feed.set_updated(Utc::now());
|
|
|
|
let _ = feed.write_to(File::create(format!("{}/feed.atom", dir.display())).unwrap());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn add_entry_to_feed(feed: &mut Feed, message: &Message, processed_html: &String) {
|
|
|
|
let parsed = message.get_parsed().unwrap();
|
|
|
|
let date = parsed.get_date().expect("Could not extract date");
|
|
|
|
let from = match parsed.get_from() {
|
|
|
|
HeaderValue::Address(e) => e,
|
|
|
|
_ => return,
|
|
|
|
};
|
|
|
|
|
|
|
|
let path = get_path(&parsed, message);
|
|
|
|
|
|
|
|
let url = format!("https://newsletters.kiers.eu/{}", &path);
|
|
|
|
|
|
|
|
let mut entry : Entry = Newsletter {
|
|
|
|
author: Person {
|
|
|
|
name: match &from.name {
|
|
|
|
Some(n) => n.to_string(),
|
|
|
|
_ => match &from.address {
|
|
|
|
Some(e) => e.to_string(),
|
|
|
|
_ => "".to_string(),
|
|
|
|
},
|
|
|
|
},
|
|
|
|
email: match &from.address {
|
|
|
|
Some(e) => Some(e.to_string()),
|
|
|
|
_ => None,
|
|
|
|
},
|
|
|
|
uri: None,
|
|
|
|
},
|
|
|
|
title: parsed
|
|
|
|
.get_subject()
|
|
|
|
.expect("Expected a subject")
|
|
|
|
.to_string(),
|
|
|
|
content: Some(processed_html.clone()),
|
|
|
|
id: url.clone(),
|
|
|
|
published: Utc.timestamp(date.to_timestamp(), 0), //(format!("{}{}", &date.to_iso8601(), "+00:00").as_str()).`unwrap(),
|
|
|
|
url: url,
|
|
|
|
}
|
|
|
|
.into();
|
|
|
|
entry.set_updated(Utc.timestamp(date.to_timestamp(), 0));
|
|
|
|
feed.entries.push(entry);
|
2022-06-07 20:27:28 +00:00
|
|
|
}
|
|
|
|
|
2022-08-02 21:07:01 +00:00
|
|
|
fn get_path(parsed: &MpMessage, msg: &Message) -> String {
|
|
|
|
let date = parsed.get_date().expect("Could not extract date");
|
|
|
|
let date_str = format!(
|
|
|
|
"{:04}{:02}{:02}{:02}{:02}{:02}",
|
|
|
|
&date.year, &date.month, &date.day, &date.hour, &date.minute, &date.second
|
|
|
|
);
|
|
|
|
|
|
|
|
let hash = base16ct::lower::encode_string(&Sha256::digest(
|
|
|
|
&parsed.get_html_body(0).expect("Expected a body").as_bytes(),
|
|
|
|
));
|
|
|
|
|
|
|
|
let uid: i32 = msg
|
|
|
|
.get_uid()
|
|
|
|
.parse()
|
|
|
|
.expect("Could not convert message uid to an i32.");
|
|
|
|
//format!("{}_{}_{}.html", &date_str, &file_name, msg.get_uid()).to_owned()
|
|
|
|
format!("{:05}_{}_{}.html", uid, date_str, &hash).to_owned()
|
|
|
|
}
|
|
|
|
|
2022-08-02 21:11:55 +00:00
|
|
|
fn process_html(input: &str) -> Result<String, ()> {
|
|
|
|
Ok(input.replace("src", "data-source"))
|
2022-06-10 22:28:28 +00:00
|
|
|
}
|
|
|
|
|
2022-08-05 00:27:10 +00:00
|
|
|
fn build_atom_feed() -> Feed {
|
|
|
|
FeedBuilder::default()
|
|
|
|
.title("JJKiers Newsletters")
|
|
|
|
.id("https://newsletters.kiers.eu/feed.atom")
|
|
|
|
.link(
|
|
|
|
LinkBuilder::default()
|
|
|
|
.href("https://newsletters.kiers.eu/")
|
|
|
|
.rel("alternate")
|
|
|
|
.build(),
|
|
|
|
)
|
|
|
|
.link(
|
|
|
|
LinkBuilder::default()
|
|
|
|
.href("https://newsletters.kiers.eu/feed.atom")
|
|
|
|
.rel("self")
|
|
|
|
.build(),
|
|
|
|
)
|
|
|
|
.generator(Generator {
|
|
|
|
value: String::from("newsletter-to-web"),
|
|
|
|
uri: None,
|
|
|
|
version: Some(String::from("0.0.1")),
|
|
|
|
})
|
|
|
|
.build()
|
|
|
|
}
|
|
|
|
|
2022-08-02 21:17:59 +00:00
|
|
|
fn write_to_test_path(msg: &Message) {
|
|
|
|
let test_path: PathBuf = [
|
|
|
|
Path::new("tests/data"),
|
|
|
|
Path::new(&format!("{}.eml", &msg.get_uid())),
|
|
|
|
]
|
|
|
|
.iter()
|
|
|
|
.collect();
|
2022-06-10 22:28:28 +00:00
|
|
|
|
|
|
|
let _ = OpenOptions::new()
|
|
|
|
.write(true)
|
|
|
|
.create(true)
|
|
|
|
.open(test_path)
|
|
|
|
.expect("Could not open file fir writing")
|
2022-08-02 21:17:59 +00:00
|
|
|
.write_all(&msg.data);
|
2022-06-10 22:28:28 +00:00
|
|
|
}
|
2022-08-05 00:27:10 +00:00
|
|
|
|
|
|
|
//#[derive(Serialize, Deserialize, Debug)]
|
|
|
|
struct Newsletter {
|
|
|
|
id: String,
|
|
|
|
url: String,
|
|
|
|
title: String,
|
|
|
|
content: Option<String>,
|
|
|
|
author: Person,
|
|
|
|
published: DateTime<Utc>,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl From<Newsletter> for Entry {
|
|
|
|
fn from(post: Newsletter) -> Self {
|
|
|
|
let content = post.content.map(|v| {
|
|
|
|
ContentBuilder::default()
|
|
|
|
.value(v)
|
|
|
|
.content_type(Some("html".to_string()))
|
|
|
|
.build()
|
|
|
|
});
|
|
|
|
|
|
|
|
EntryBuilder::default()
|
|
|
|
.title(post.title)
|
|
|
|
.id(post.id)
|
|
|
|
.published(Some(post.published.clone().into()))
|
|
|
|
.author(post.author.into())
|
|
|
|
.content(content)
|
|
|
|
.link(
|
|
|
|
LinkBuilder::default()
|
|
|
|
.href(post.url)
|
|
|
|
.rel("alternate")
|
|
|
|
.build(),
|
|
|
|
)
|
|
|
|
.build()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// pub fn parse_datetime(s: &str) -> Option<DateTime<FixedOffset>> {
|
|
|
|
// DateTime::<FixedOffset>::from(s)
|
|
|
|
// .ok()
|
|
|
|
// .map(|d| d.with_timezone(&Utc.fix()))
|
|
|
|
// }
|