Generate an Atom feed from the newsletters
continuous-integration/drone/pr Build encountered an error Details
continuous-integration/drone/push Build is passing Details

Signed-off-by: Jacob Kiers <jacob@jacobkiers.net>
This commit is contained in:
Jacob Kiers 2022-08-05 02:27:10 +02:00
parent 144df61af2
commit 118a30df67
4 changed files with 280 additions and 30 deletions

167
Cargo.lock generated
View File

@ -17,6 +17,19 @@ version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b"
[[package]]
name = "atom_syndication"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "21fb6a0b39c6517edafe46f8137e53c51742425a4dae1c73ee12264a37ad7541"
dependencies = [
"chrono",
"derive_builder",
"diligent-date-parser",
"never",
"quick-xml",
]
[[package]]
name = "autocfg"
version = "1.1.0"
@ -76,14 +89,15 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "chrono"
version = "0.4.19"
version = "0.4.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "670ad68c9088c2a963aaa298cb369688cf3f9465ce5e2d4ca10e6e0098a1ce73"
checksum = "6127248204b9aba09a362f6c930ef6a78f2c1b2215f8a7b398c06e1083f17af0"
dependencies = [
"libc",
"js-sys",
"num-integer",
"num-traits",
"time",
"wasm-bindgen",
"winapi",
]
@ -106,6 +120,72 @@ dependencies = [
"typenum",
]
[[package]]
name = "darling"
version = "0.12.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5f2c43f534ea4b0b049015d00269734195e6d3f0f6635cb692251aca6f9f8b3c"
dependencies = [
"darling_core",
"darling_macro",
]
[[package]]
name = "darling_core"
version = "0.12.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e91455b86830a1c21799d94524df0845183fa55bafd9aa137b01c7d1065fa36"
dependencies = [
"fnv",
"ident_case",
"proc-macro2",
"quote",
"strsim",
"syn",
]
[[package]]
name = "darling_macro"
version = "0.12.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "29b5acf0dea37a7f66f7b25d2c5e93fd46f8f6968b1a5d7a3e02e97768afc95a"
dependencies = [
"darling_core",
"quote",
"syn",
]
[[package]]
name = "derive_builder"
version = "0.10.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d13202debe11181040ae9063d739fa32cfcaaebe2275fe387703460ae2365b30"
dependencies = [
"derive_builder_macro",
]
[[package]]
name = "derive_builder_core"
version = "0.10.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "66e616858f6187ed828df7c64a6d71720d83767a7f19740b2d1b6fe6327b36e5"
dependencies = [
"darling",
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "derive_builder_macro"
version = "0.10.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "58a94ace95092c5acb1e97a7e846b310cfbd499652f72297da7493f618a98d73"
dependencies = [
"derive_builder_core",
"syn",
]
[[package]]
name = "digest"
version = "0.10.3"
@ -116,6 +196,15 @@ dependencies = [
"crypto-common",
]
[[package]]
name = "diligent-date-parser"
version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c2d0fd95c7c02e2d6c588c6c5628466fff9bdde4b8c6196465e087b08e792720"
dependencies = [
"chrono",
]
[[package]]
name = "encoding_rs"
version = "0.8.31"
@ -125,6 +214,12 @@ dependencies = [
"cfg-if",
]
[[package]]
name = "fnv"
version = "1.0.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
[[package]]
name = "generic-array"
version = "0.14.6"
@ -135,6 +230,12 @@ dependencies = [
"version_check",
]
[[package]]
name = "ident_case"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39"
[[package]]
name = "imap"
version = "2.4.1"
@ -189,9 +290,9 @@ dependencies = [
[[package]]
name = "libc"
version = "0.2.126"
version = "0.2.127"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "349d5a591cd28b49e1d1037471617a32ddcda5731b99419008085f72d5a53836"
checksum = "505e71a4706fa491e9b1b55f51b95d4037d0821ee40131190475f692b35b009b"
[[package]]
name = "log"
@ -218,11 +319,19 @@ version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
[[package]]
name = "never"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c96aba5aa877601bb3f6dd6a63a969e1f82e60646e81e71b14496995e9853c91"
[[package]]
name = "newsletter-to-web"
version = "0.1.0"
dependencies = [
"atom_syndication",
"base16ct",
"chrono",
"imap",
"mail-parser",
"rustls-connector",
@ -267,18 +376,28 @@ checksum = "18a6dbe30758c9f83eb00cbea4ac95966305f5a7772f3f42ebfc7fc7eddbd8e1"
[[package]]
name = "proc-macro2"
version = "1.0.42"
version = "1.0.43"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c278e965f1d8cf32d6e0e96de3d3e79712178ae67986d9cf9151f51e95aac89b"
checksum = "0a2ca2c61bc9f3d74d2886294ab7b9853abd9c1ad903a3ac7815c58989bb7bab"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.20"
name = "quick-xml"
version = "0.22.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3bcdf212e9776fbcb2d23ab029360416bb1706b1aea2d1a5ba002727cbcab804"
checksum = "8533f14c8382aaad0d592c812ac3b826162128b65662331e1127b45c3d18536b"
dependencies = [
"encoding_rs",
"memchr",
]
[[package]]
name = "quote"
version = "1.0.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bbe448f377a7d6961e30f5955f9b8d106c3f5e449d493ee1b125c1d43c2b5179"
dependencies = [
"proc-macro2",
]
@ -341,9 +460,9 @@ dependencies = [
[[package]]
name = "ryu"
version = "1.0.10"
version = "1.0.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f3f6f92acf49d1b98f7a81226834412ada05458b7364277387724a237f062695"
checksum = "4501abdff3ae82a1c1b477a17252eb69cee9e66eb915c1abaa4f44d873df9f09"
[[package]]
name = "sct"
@ -357,18 +476,18 @@ dependencies = [
[[package]]
name = "serde"
version = "1.0.141"
version = "1.0.142"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7af873f2c95b99fcb0bd0fe622a43e29514658873c8ceba88c4cb88833a22500"
checksum = "e590c437916fb6b221e1d00df6e3294f3fccd70ca7e92541c475d6ed6ef5fee2"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.141"
version = "1.0.142"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "75743a150d003dd863b51dc809bcad0d73f2102c53632f1e954e738192a3413f"
checksum = "34b5b8d809babe02f538c2cfec6f2c1ed10804c0e5a6a041a049a4f5588ccc2e"
dependencies = [
"proc-macro2",
"quote",
@ -399,10 +518,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
[[package]]
name = "syn"
version = "1.0.98"
name = "strsim"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c50aef8a904de4c23c788f104b7dddc7d6f79c647c7c8ce4cc8f73eb0ca773dd"
checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
[[package]]
name = "syn"
version = "1.0.99"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "58dbef6ec655055e20b86b15a8cc6d439cca19b667537ac6a1369572d151ab13"
dependencies = [
"proc-macro2",
"quote",
@ -428,9 +553,9 @@ checksum = "dcf81ac59edc17cc8697ff311e8f5ef2d99fcbd9817b34cec66f90b6c3dfd987"
[[package]]
name = "unicode-ident"
version = "1.0.2"
version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "15c61ba63f9235225a22310255a29b806b907c9b8c964bcbd0a2c70f3f2deea7"
checksum = "c4f5b37a154999a8f3f98cc23a628d850e154479cd94decf3414696e12e31aaf"
[[package]]
name = "untrusted"

View File

@ -19,6 +19,7 @@ Converts a newsletter to static HTML files.
* [X] Prevent loading of scripts / images without user interaction
### Create ATOM feed
* [X] Single feed for everything
* [ ] One feed per newsletter
* [ ] Index feed (containing all newsletters)?

View File

@ -7,7 +7,9 @@ description = "Converts email newsletters to static HTML files"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
atom_syndication = "^0.11.0"
base16ct = { version = "^0.1.0", features = [ "alloc" ] }
chrono = "^0.4"
imap = { version = "^2.4.1", default-features = false }
mail-parser = "^0.5.0"
rustls-connector = { version = "^0.16.1", default-features = false, features = [ "webpki-roots-certs", "quic" ] }

View File

@ -1,20 +1,26 @@
// extern crate atom_syndication;
// extern crate imap;
// extern crate mail_parser;
// extern crate rustls_connector;
// extern crate sha2;
mod message_reader;
use std::{
fs::OpenOptions,
borrow::Cow,
fs::{File, OpenOptions},
io::Write,
path::{Path, PathBuf},
};
use mail_parser::Message as MpMessage;
use atom_syndication::{
ContentBuilder, Entry, EntryBuilder, Feed, FeedBuilder, Generator, LinkBuilder, Person,
};
use chrono::{DateTime, FixedOffset, Offset, TimeZone, Utc};
use mail_parser::{HeaderName, HeaderValue, Message as MpMessage, RfcHeader};
use sha2::{Digest, Sha256};
extern crate imap;
extern crate mail_parser;
extern crate rustls_connector;
extern crate sha2;
use message_reader::{EmailReader, TestMessagesReader};
pub struct Message {
@ -42,6 +48,8 @@ fn main() {
std::fs::create_dir(&dir).expect("Could not create directory");
}
let mut feed = build_atom_feed();
let mut reader = TestMessagesReader::new((&Path::new("tests/data")).to_path_buf());
for msg in reader.read_rfc822_messages() {
@ -53,9 +61,12 @@ fn main() {
let processed_html = process_html(&html_body).expect("Could not process the HTML");
let html_bytes = processed_html.as_bytes();
let html_path: PathBuf = [dir, Path::new(&get_path(&parsed, &msg))].iter().collect();
let path = get_path(&parsed, &msg);
let html_path: PathBuf = [dir, Path::new(&path)].iter().collect();
println!("Storing to {}", &html_path.display());
add_entry_to_feed(&mut feed, &msg, &processed_html);
OpenOptions::new()
.write(true)
.create(true)
@ -66,6 +77,52 @@ fn main() {
println!();
}
if feed.entries.len() > 0 {
feed.set_updated(Utc::now());
let _ = feed.write_to(File::create(format!("{}/feed.atom", dir.display())).unwrap());
}
}
fn add_entry_to_feed(feed: &mut Feed, message: &Message, processed_html: &String) {
let parsed = message.get_parsed().unwrap();
let date = parsed.get_date().expect("Could not extract date");
let from = match parsed.get_from() {
HeaderValue::Address(e) => e,
_ => return,
};
let path = get_path(&parsed, message);
let url = format!("https://newsletters.kiers.eu/{}", &path);
let mut entry : Entry = Newsletter {
author: Person {
name: match &from.name {
Some(n) => n.to_string(),
_ => match &from.address {
Some(e) => e.to_string(),
_ => "".to_string(),
},
},
email: match &from.address {
Some(e) => Some(e.to_string()),
_ => None,
},
uri: None,
},
title: parsed
.get_subject()
.expect("Expected a subject")
.to_string(),
content: Some(processed_html.clone()),
id: url.clone(),
published: Utc.timestamp(date.to_timestamp(), 0), //(format!("{}{}", &date.to_iso8601(), "+00:00").as_str()).`unwrap(),
url: url,
}
.into();
entry.set_updated(Utc.timestamp(date.to_timestamp(), 0));
feed.entries.push(entry);
}
fn get_path(parsed: &MpMessage, msg: &Message) -> String {
@ -91,6 +148,30 @@ fn process_html(input: &str) -> Result<String, ()> {
Ok(input.replace("src", "data-source"))
}
fn build_atom_feed() -> Feed {
FeedBuilder::default()
.title("JJKiers Newsletters")
.id("https://newsletters.kiers.eu/feed.atom")
.link(
LinkBuilder::default()
.href("https://newsletters.kiers.eu/")
.rel("alternate")
.build(),
)
.link(
LinkBuilder::default()
.href("https://newsletters.kiers.eu/feed.atom")
.rel("self")
.build(),
)
.generator(Generator {
value: String::from("newsletter-to-web"),
uri: None,
version: Some(String::from("0.0.1")),
})
.build()
}
fn write_to_test_path(msg: &Message) {
let test_path: PathBuf = [
Path::new("tests/data"),
@ -106,3 +187,44 @@ fn write_to_test_path(msg: &Message) {
.expect("Could not open file fir writing")
.write_all(&msg.data);
}
//#[derive(Serialize, Deserialize, Debug)]
struct Newsletter {
id: String,
url: String,
title: String,
content: Option<String>,
author: Person,
published: DateTime<Utc>,
}
impl From<Newsletter> for Entry {
fn from(post: Newsletter) -> Self {
let content = post.content.map(|v| {
ContentBuilder::default()
.value(v)
.content_type(Some("html".to_string()))
.build()
});
EntryBuilder::default()
.title(post.title)
.id(post.id)
.published(Some(post.published.clone().into()))
.author(post.author.into())
.content(content)
.link(
LinkBuilder::default()
.href(post.url)
.rel("alternate")
.build(),
)
.build()
}
}
// pub fn parse_datetime(s: &str) -> Option<DateTime<FixedOffset>> {
// DateTime::<FixedOffset>::from(s)
// .ok()
// .map(|d| d.with_timezone(&Utc.fix()))
// }