Compare commits

..

1 Commits

Author SHA1 Message Date
Jacob Kiers 3df9434a90 WIP: Message store
Signed-off-by: Jacob Kiers <jacob@jacobkiers.net>
2022-08-06 21:32:56 +02:00
24 changed files with 510 additions and 1801 deletions

View File

@ -1,3 +0,0 @@
[profile.release]
lto = "thin"
strip = true

View File

@ -1,119 +0,0 @@
local executableName = 'newsletter-to-web';
local cross_image = 'img.kie.rs/jjkiers/rust-dind-cross:1.66-full';
local archs = [
{ target: 'aarch64-unknown-linux-musl', short: 'arm64-musl' },
{ target: 'x86_64-pc-windows-gnu', short: 'windows' },
{ target: 'x86_64-unknown-linux-musl', short: 'amd64-musl' },
];
local getStepName(arch) = 'Build for ' + arch.short;
local builtExecutableName(arch) = executableName + if std.length(std.findSubstr(arch.short, 'windows')) > 0 then '.exe' else '';
local targetExecutableName(arch) = executableName + '-' + arch.target + if std.length(std.findSubstr(arch.short, 'windows')) > 0 then '.exe' else '';
local add_build_steps() = [
{
name: getStepName(arch),
image: cross_image,
volumes: [
{
name: 'dockersock',
path: '/var/run',
},
],
commands: [
'echo Hello World from Jsonnet on ' + arch.target + '!',
'cross build --release --target ' + arch.target,
'cp target/' + arch.target + '/release/' + builtExecutableName(arch) + ' artifacts/' + targetExecutableName(arch),
'rm -rf target/' + arch.target + '/release/*',
],
environment: {
CROSS_REMOTE: true,
},
depends_on: ['Wait for Docker'],
}
for arch in archs
];
{
kind: 'pipeline',
type: 'docker',
name: 'default',
platform: {
arch: 'amd64',
},
steps:
[{
name: 'Wait for Docker',
image: cross_image,
commands: [
'mkdir artifacts',
'echo Using image: ' + cross_image,
'while ! docker image ls; do sleep 1; done',
'cargo --version',
'rustc --version',
'docker info',
'docker pull hello-world:latest',
],
environment: {
CROSS_REMOTE: true,
},
volumes: [{
name: 'dockersock',
path: '/var/run',
}],
}] +
add_build_steps() +
[
{
name: 'Show built artifacts',
image: cross_image,
commands: [
'ls -lah artifacts',
],
depends_on: [getStepName(a) for a in archs],
},
{
name: 'Create release on gitea',
image: 'plugins/gitea-release',
settings: {
api_key: {
from_secret: 'gitea_token',
},
base_url: 'https://code.kiers.eu',
files: 'artifacts/*',
checksum: 'sha256',
},
when: {
event: ['tag', 'promote'],
},
depends_on: ['Show built artifacts'],
},
],
services: [{
name: 'docker',
image: 'docker:dind',
privileged: true,
volumes: [
{
name: 'dockersock',
path: '/var/run',
},
{
name: 'docker-storage',
path: '/var/lib/docker',
},
],
}],
volumes: [
{
name: 'dockersock',
temp: {},
},
],
image_pull_secrets: ['docker_private_repo'],
}

13
.drone.yml Normal file
View File

@ -0,0 +1,13 @@
kind: pipeline
name: test-on-amd64
platform:
arch: amd64
steps:
- name: test
image: rust:1.62
commands:
- cargo build --verbose --all
- cargo test --verbose --all

1
.gitignore vendored
View File

@ -1,4 +1,3 @@
/target
/data
/output
/tests

View File

@ -1,44 +0,0 @@
# Changelog
All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## [Unreleased]
## [0.2.3] - 2022-12-29
### Added
* Added `update` subcommand to update to the latest version.
### Fixed
* Truncate feed file before writing, to prevent corruption from leftover data.
* Ensure the feed file name is part of the self URL. This was still hardcoded to `feed.atom`.
## [0.2.2] - 2022-12-16
### Changed
* Updated build pipeline to generate much smaller binaries
## [0.2.1] - 2022-12-13
### Changed
* Updated the [mail-parser](https://docs.rs/crate/mail-parser/0.8.0) crate to v0.8.0.
* Removed two builds from the build pipeline.
## [0.2.0] - 2022-11-30
### Added
* A style sheet is added to the feed for easy readability in a web browser (#4).
* The output directory will now contain an index.html file with some information.
### Changed
* By default, the name of the feed is now feed.xml instead of feed.atom.

1122
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -1,25 +1,5 @@
[package]
name = "newsletter-to-web"
version = "0.2.3"
edition = "2021"
description = "Converts email newsletters to static HTML files"
homepage = "https://code.kiers.eu/newsletter-to-web/newsletter-to-web"
authors = [
"Jacob Kiers <code@kiers.eu>"
[workspace]
members = [
"bin",
]
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
atom_syndication = "^0.11.0"
base16ct = { version = "^0.1.0", features = [ "alloc" ] }
chrono = "^0.4"
clap = { version = "^4.0.22", features = [ "derive" ] }
imap = { version = "^2.4.1", default-features = false }
mail-parser = "^0.8.0"
rustls-connector = { version = "^0.16.1", default-features = false, features = [ "webpki-roots-certs", "quic" ] }
sha2 = "^0.10.2"
self_update = { version = "0.33.0", default-features = false, features = ["rustls"] }
[patch.crates-io]
atom_syndication = { git = "https://github.com/rust-syndication/atom", rev = "5cf8d161e5e5af7d93cca8d2c117b7af879a99b7" }

View File

@ -1,41 +1,6 @@
# Newsletter to Web
# Newsletter to HTML
Converts a newsletter to and Atom feed and static HTML files.
## Usage
Get the latest release [from the releases page](https://code.kiers.eu/newsletter-to-web/newsletter-to-web/releases/latest).
### Getting help
For help, use
* `newsletter-to-web help`
* `newsletter-to-web help <subcommand>`.
### Basic usage
First, download all messages from the IMAP mail server
and store them in the `data/` directory:
```sh
newsletter-to-web fetch-from-imap -s <imap.example.com> -u <email@example.com> -p <password>
```
Then, convert them to an Atom feed, using
`newsletters.example.com` as the base domain:
```sh
newsletter-to-web --include-html build-feed newsletters.example.org
```
This will put the output in the `output/` directory. The Atom
feed will be in `output/feed.xml`, together with a very simple
`index.html` file pointing to the feed. It will also add an HTML
file for every email with the HTML content.
The feed will already contain the full HTML, so it can easily be
read from a feed reader.
Converts a newsletter to static HTML files.
## Features

18
bin/Cargo.toml Normal file
View File

@ -0,0 +1,18 @@
[package]
name = "newsletter-to-web"
version = "0.1.0"
edition = "2021"
description = "Converts email newsletters to static HTML files"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
atom_syndication = "^0.11.0"
base16ct = { version = "^0.1.0", features = [ "alloc" ] }
bincode = "^1.3.3"
chrono = "^0.4"
imap = { version = "^2.4.1", default-features = false }
mail-parser = "^0.5.0"
rustls-connector = { version = "^0.16.1", default-features = false, features = [ "webpki-roots-certs", "quic" ] }
sha2 = "^0.10.2"
sled = "^0.34.7"

230
bin/src/main.rs Normal file
View File

@ -0,0 +1,230 @@
mod message_reader;
mod storage;
use std::{
fs::{File, OpenOptions},
io::Write,
path::{Path, PathBuf}, error::Error,
};
use atom_syndication::{
ContentBuilder, Entry, EntryBuilder, Feed, FeedBuilder, Generator, LinkBuilder, Person,
};
use chrono::{DateTime, TimeZone, Utc};
use mail_parser::{HeaderValue, Message as MpMessage};
use sha2::{Digest, Sha256};
use message_reader::{EmailReader, TestMessagesReader};
use storage::Store;
pub struct Message {
uid: String,
data: Vec<u8>,
}
impl Message {
pub fn new(uid: String, data: Vec<u8>) -> Message {
Message { uid, data }
}
pub(crate) fn get_parsed(&self) -> Option<MpMessage> {
MpMessage::parse(&self.data)
}
pub fn get_uid(&self) -> &String {
&self.uid
}
}
fn main() -> Result<(), Box<dyn Error>> {
let dir = Path::new("output");
if !dir.exists() {
std::fs::create_dir(&dir).expect("Could not create directory");
}
let mut feed = build_atom_feed();
let mut reader = TestMessagesReader::new((&Path::new("tests/data")).to_path_buf());
let store = Store::load_database_for_mailbox("newsletters@kie.rs")?;
for msg in reader.read_rfc822_messages() {
println!("Processing message {}", msg.get_uid());
store.store_mail(&msg)?;
let parsed = msg.get_parsed().expect("A parsed messsage.");
let html_body = parsed.get_html_body(0).expect("Could not read html body");
let processed_html = process_html(&html_body).expect("Could not process the HTML");
let html_bytes = processed_html.as_bytes();
let path = get_path(&parsed, &msg);
let html_path: PathBuf = [dir, Path::new(&path)].iter().collect();
println!("Storing to {}", &html_path.display());
add_entry_to_feed(&mut feed, &msg, &processed_html);
OpenOptions::new()
.write(true)
.create(true)
.open(&html_path)
.expect(format!("Could not open file '{}' for writing", &html_path.display()).as_str())
.write_all(&html_bytes)
.expect(format!("Could not write html to file '{}'.", &html_path.display()).as_str());
println!();
}
if feed.entries.len() > 0 {
feed.set_updated(Utc::now());
let _ = feed.write_to(File::create(format!("{}/feed.atom", dir.display())).unwrap());
}
Ok(())
}
fn add_entry_to_feed(feed: &mut Feed, message: &Message, processed_html: &String) {
let parsed = message.get_parsed().unwrap();
let date = parsed.get_date().expect("Could not extract date");
let from = match parsed.get_from() {
HeaderValue::Address(e) => e,
_ => return,
};
let path = get_path(&parsed, message);
let url = format!("https://newsletters.kiers.eu/{}", &path);
let mut entry : Entry = Newsletter {
author: Person {
name: match &from.name {
Some(n) => n.to_string(),
_ => match &from.address {
Some(e) => e.to_string(),
_ => "".to_string(),
},
},
email: match &from.address {
Some(e) => Some(e.to_string()),
_ => None,
},
uri: None,
},
title: parsed
.get_subject()
.expect("Expected a subject")
.to_string(),
content: Some(processed_html.clone()),
id: url.clone(),
published: Utc.timestamp(date.to_timestamp(), 0), //(format!("{}{}", &date.to_iso8601(), "+00:00").as_str()).`unwrap(),
url: url,
}
.into();
entry.set_updated(Utc.timestamp(date.to_timestamp(), 0));
feed.entries.push(entry);
}
fn get_path(parsed: &MpMessage, msg: &Message) -> String {
let date = parsed.get_date().expect("Could not extract date");
let date_str = format!(
"{:04}{:02}{:02}{:02}{:02}{:02}",
&date.year, &date.month, &date.day, &date.hour, &date.minute, &date.second
);
let hash = base16ct::lower::encode_string(&Sha256::digest(
&parsed.get_html_body(0).expect("Expected a body").as_bytes(),
));
let uid: i32 = msg
.get_uid()
.parse()
.expect("Could not convert message uid to an i32.");
//format!("{}_{}_{}.html", &date_str, &file_name, msg.get_uid()).to_owned()
format!("{:05}_{}_{}.html", uid, date_str, &hash).to_owned()
}
fn process_html(input: &str) -> Result<String, ()> {
Ok(input.replace("src", "data-source"))
}
fn build_atom_feed() -> Feed {
FeedBuilder::default()
.title("JJKiers Newsletters")
.id("https://newsletters.kiers.eu/feed.atom")
.link(
LinkBuilder::default()
.href("https://newsletters.kiers.eu/")
.rel("alternate")
.build(),
)
.link(
LinkBuilder::default()
.href("https://newsletters.kiers.eu/feed.atom")
.rel("self")
.build(),
)
.generator(Generator {
value: String::from("newsletter-to-web"),
uri: None,
version: Some(String::from("0.0.1")),
})
.build()
}
fn _write_to_test_path(msg: &Message) {
let test_path: PathBuf = [
Path::new("tests/data"),
Path::new(&format!("{}.eml", &msg.get_uid())),
]
.iter()
.collect();
let _ = OpenOptions::new()
.write(true)
.create(true)
.open(test_path)
.expect("Could not open file fir writing")
.write_all(&msg.data);
}
//#[derive(Serialize, Deserialize, Debug)]
struct Newsletter {
id: String,
url: String,
title: String,
content: Option<String>,
author: Person,
published: DateTime<Utc>,
}
impl From<Newsletter> for Entry {
fn from(post: Newsletter) -> Self {
let content = post.content.map(|v| {
ContentBuilder::default()
.value(v)
.content_type(Some("html".to_string()))
.build()
});
EntryBuilder::default()
.title(post.title)
.id(post.id)
.published(Some(post.published.clone().into()))
.author(post.author.into())
.content(content)
.link(
LinkBuilder::default()
.href(post.url)
.rel("alternate")
.build(),
)
.build()
}
}
// pub fn parse_datetime(s: &str) -> Option<DateTime<FixedOffset>> {
// DateTime::<FixedOffset>::from(s)
// .ok()
// .map(|d| d.with_timezone(&Utc.fix()))
// }

View File

@ -12,29 +12,25 @@ use rustls_connector::RustlsConnector;
use crate::Message;
pub(crate) trait EmailReader {
pub trait EmailReader {
fn read_rfc822_messages(&mut self) -> Box<IntoIter<Message>>;
}
pub(crate) struct DataDirectoryMessageReader {
pub(crate) struct TestMessagesReader {
path: PathBuf,
}
impl DataDirectoryMessageReader {
impl TestMessagesReader {
pub fn new(path: PathBuf) -> Self {
DataDirectoryMessageReader { path }
TestMessagesReader { path }
}
}
impl EmailReader for DataDirectoryMessageReader {
impl EmailReader for TestMessagesReader {
fn read_rfc822_messages(&mut self) -> Box<IntoIter<Message>> {
println!("Reading files in {}", &self.path.display());
let reader = match read_dir(&self.path) {
Ok(r) => r,
Err(e) => {
dbg!(e);
return Box::new(Vec::new().into_iter());
}
Err(_) => return Box::new(Vec::new().into_iter()),
};
let items = reader
@ -44,25 +40,16 @@ impl EmailReader for DataDirectoryMessageReader {
Some(ext) => ext == "eml",
None => false,
})
.filter_map(|i| {
let uid = i
.path()
.file_stem()
.unwrap()
.to_owned()
.into_string()
.expect("Could not convert filename to string.")
.split('_')
.collect::<Vec<&str>>()[0]
.trim_start_matches('0')
.to_string();
.map(|i| {
let uid = i.path().file_stem().unwrap().to_owned();
if let Ok(data) = std::fs::read(i.path()) {
Some((uid, data))
Some((uid.into_string().unwrap(), data))
} else {
None
}
})
.filter(|i| i.is_some())
.map(|i| i.unwrap())
.map(|i| Message::new(i.0, i.1));
let iter = items.collect::<Vec<Message>>().into_iter();
@ -79,7 +66,7 @@ pub struct ImapReader {
}
impl ImapReader {
pub fn new(host: String, port: u16, username: String, password: String) -> Self {
pub fn _new(host: String, port: u16, username: String, password: String) -> Self {
ImapReader {
host,
port,
@ -148,15 +135,10 @@ impl EmailReader for ImapReader {
fn read_rfc822_messages(&mut self) -> Box<IntoIter<Message>> {
let msgs = match self.connect() {
Ok(m) => m,
Err(e) => {
dbg!(e);
return Box::new(Vec::new().into_iter());
}
Err(_) => return Box::new(Vec::new().into_iter()),
};
let items = msgs
.iter()
.map(|i| Message::new(i.0.to_owned(), i.1.to_owned()));
let items = msgs.iter().map(|i| Message::new(i.0.to_owned(), i.1.to_owned()));
let iter = items.collect::<Vec<Message>>().into_iter();

51
bin/src/storage.rs Normal file
View File

@ -0,0 +1,51 @@
use sled::{Db, Transactional};
use crate::Message;
use std::str::FromStr;
pub(crate) struct Store {
db: Db,
mailbox: String,
}
type ER = Result<(), sled::Error>;
type BR = Result<bool, sled::Error>;
impl Store {
pub fn load_database_for_mailbox<S: Into<String>>(mailbox: S) -> Result<Self, sled::Error> {
let db = sled::open("data/maildb")?;
Ok(Store {
db,
mailbox: mailbox.into(),
})
}
pub fn store_mail(&self, message: &Message) -> ER {
self.mb()?.insert(message.get_uid(), &*message.data)?;
Ok(())
}
pub fn has_mail<S: Into<String>>(&self, uid: S) -> BR {
self.mb()?.contains_key(uid.into())
}
pub fn mark_in_feed<S: Into<String>>(&self, uid: S) -> ER {
self.feed()?.insert(uid.into(), &[1])?;
Ok(())
}
pub fn is_in_feed<S: Into<String>>(&self, uid: S) -> BR {
match self.feed()?.get(uid.into())? {
Some(v) => Ok(bincode::deserialize(&v).expect("Cannot convert to bool")),
None => Ok(false),
}
}
fn mb(&self) -> Result<sled::Tree, sled::Error> {
self.db.open_tree(&self.mailbox)
}
fn feed(&self) -> Result<sled::Tree, sled::Error> {
self.db.open_tree("feed")
}
}

View File

@ -1,78 +0,0 @@
<xsl:stylesheet
version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:atom="http://www.w3.org/2005/Atom"
exclude-result-prefixes="atom"
>
<xsl:output method="html" version="1.0" encoding="UTF-8" indent="yes"/>
<xsl:template match="/">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
<meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1"/>
<title>Web Feed • <xsl:value-of select="atom:feed/atom:title"/></title>
<style type="text/css">
body{max-width:768px;margin:0 auto;font-family:-apple-system,BlinkMacSystemFont,"Segoe UI",Helvetica,Arial,sans-serif,"Apple Color Emoji","Segoe UI Emoji","Segoe UI Symbol";font-size:16px;line-height:1.5em}section{margin:30px 15px}h1{font-size:2em;margin:.67em 0;line-height:1.125em}h2{border-bottom:1px solid #eaecef;padding-bottom:.3em}.alert{background:#fff5b1;padding:4px 12px;margin:0 -12px}a{text-decoration:none}.entry h3{margin-bottom:0}.entry p{margin:4px 0}
</style>
</head>
<body>
<section>
<div class="alert">
<p><strong>This is a web feed</strong>, also known as an RSS feed. <strong>Subscribe</strong> by copying the URL from the address bar into your newsreader app.</p>
</div>
</section>
<section>
<xsl:apply-templates select="atom:feed" />
</section>
<section>
<h2>Recent Items</h2>
<p>Last updated on <xsl:apply-templates select="atom:feed/atom:updated" /></p>
<xsl:apply-templates select="atom:feed/atom:entry" />
</section>
</body>
</html>
</xsl:template>
<xsl:template match="atom:feed">
<h1><xsl:value-of select="atom:title"/>'s Web Feed Preview</h1>
<p>This RSS feed provides the latest posts from <xsl:value-of select="atom:title"/>'s blog.
<a class="head_link" target="_blank">
<xsl:attribute name="href">
<xsl:value-of select="atom:link[@rel='alternate']/@href"/>
</xsl:attribute>
Visit Website &#x2192;
</a>
</p>
<h2>What is an RSS feed?</h2>
<p>An RSS feed is a data format that contains the latest content from a website, blog, or podcast. You can use feeds to <strong>subscribe</strong> to websites and get the <strong>latest content in one place</strong>.</p>
<ul>
<li><strong>Feeds put you in control.</strong> Unlike social media apps, there is no algorithm deciding what you see or read. You always get the latest content from the creators you care about.</li>
<li><strong>Feed are private by design.</strong> No one owns web feeds, so no one is harvesting your personal information and profiting by selling it to advertisers.</li>
<li><strong>Feeds are spam-proof.</strong> Had enough? Easy, just unsubscribe from the feed.</li>
</ul>
<p>All you need to do to get started is to add the URL (web address) for this feed to a special app called a newsreader. Visit <a href="https://aboutfeeds.com/">About Feeds</a> to get started with newsreaders and subscribing. Its free. </p>
</xsl:template>
<xsl:template match="atom:entry">
<div class="entry">
<h3>
<a target="_blank">
<xsl:attribute name="href">
<xsl:value-of select="atom:id"/>
</xsl:attribute>
<xsl:value-of select="atom:title"/>
</a>
</h3>
<p>
<xsl:value-of select="atom:summary" disable-output-escaping="yes" />
</p>
<small>
Published: <xsl:value-of select="atom:updated" />
</small>
</div>
</xsl:template>
</xsl:stylesheet>

View File

@ -1,17 +0,0 @@
<!DOCTYPE html>
<html>
<head>
<title>Newsletters</title>
<link rel="alternate" type="application/atom+xml" title="ATOM feed" href="./{FEED}" />
</head>
<body>
<h1>Newsletters 2 Web</h1>
<p>The real feed can be found at <a href="./{FEED}">{FEED}</a></p>
<p>Nothing to see here.</p>
<p>This is a public landing page for an instance of the
<a href="https://code.kiers.eu/newsletter-to-web/newsletter-to-web">Newsletters 2 Web</a>
project.</p>
<p>It is only here to prevent getting a <em>Not Found</em> page.</p>
</body>
</html>

View File

@ -1,55 +0,0 @@
use std::path::PathBuf;
use clap::{Parser, Subcommand};
#[derive(Parser)]
#[clap(author, version, about, long_about = None)]
pub(crate) struct Cli {
#[clap(subcommand)]
pub command: Command,
}
#[derive(Subcommand)]
pub(crate) enum Command {
/// Fetch emails from an IMAP server
FetchFromImap {
#[clap(short, long, value_parser)]
server: String,
#[clap(long, value_parser, default_value_t = 993)]
port: u16,
#[clap(short, long, value_parser)]
username: String,
#[clap(short, long, value_parser)]
password: String,
},
/// Fetch an email from a .eml file
FetchFromFile {
#[clap(value_parser)]
filename: PathBuf,
},
/// Build an ATOM feed containing the full contents of the email
BuildFeed {
/// Host name hosting the feed
hostname: String,
/// Feed file
#[clap(value_parser, default_value = "output/feed.xml")]
filename: PathBuf,
/// Create an HTML file for each message
#[clap(short, long, value_parser, default_value_t = false)]
include_html: bool,
},
/// Exports the emails as HTML files
ExportHtml {
/// The directory in which the emails will be stored
#[clap(value_parser, default_value = "output/")]
directory: PathBuf,
},
/// Fetches and exports all mails from an IMAP mailbox. Requires configuration
FetchAndExport {
/// The directory in which the output will be stored
#[clap(value_parser, default_value = "output/")]
directory: PathBuf,
},
/// Update to the latest version
Update,
}

View File

@ -1 +0,0 @@
pub(crate) mod update;

View File

@ -1,17 +0,0 @@
use self_update::cargo_crate_version;
use std::error::Error;
pub(crate) fn self_update() -> Result<(), Box<dyn Error>> {
let backend = self_update::backends::gitea::Update::configure()
.with_host("https://code.kiers.eu")
.repo_owner("newsletter-to-web")
.repo_name("newsletter-to-web")
.bin_name("newsletter-to-web")
.show_download_progress(true)
.current_version(cargo_crate_version!())
.build()?;
let status = backend.update()?;
println!("Update status: `{}`!", status.version());
Ok(())
}

View File

@ -1,17 +0,0 @@
mod debug;
pub(crate) fn write_to_test_path(msg: &Message) {
let test_path: PathBuf = [
Path::new("tests/data"),
Path::new(&format!("{}.eml", &msg.get_uid())),
]
.iter()
.collect();
let _ = OpenOptions::new()
.write(true)
.create(true)
.open(test_path)
.expect("Could not open file fir writing")
.write_all(&msg.data);
}

View File

@ -1,131 +0,0 @@
use crate::Message;
use atom_syndication::{
ContentBuilder, Entry, EntryBuilder, Feed, FeedBuilder, Generator, LinkBuilder, Person,
WriteConfig,
};
use chrono::{DateTime, TimeZone, Utc};
use mail_parser::HeaderValue;
pub(crate) fn add_entry_to_feed(
feed: &mut Feed,
message: &Message,
processed_html: &String,
hostname: &String,
include_html: bool,
) {
let parsed = message.get_parsed().unwrap();
let date = parsed.date().expect("Could not extract date");
let from = match parsed.from() {
HeaderValue::Address(e) => e,
_ => return,
};
let path = crate::get_path(&parsed, message);
let url = format!("https://{}/{}", hostname, &path);
let mut entry: Entry = Newsletter {
author: Person {
name: match &from.name {
Some(n) => n.to_string(),
_ => match &from.address {
Some(e) => e.to_string(),
_ => "".to_string(),
},
},
email: from.address.as_ref().map(|e| e.to_string()),
uri: None,
},
title: parsed.subject().expect("Expected a subject").to_string(),
content: Some(processed_html.clone()),
id: url.clone(),
published: Utc.timestamp_opt(date.to_timestamp(), 0).unwrap(),
url: match include_html {
true => url,
false => "".to_string(),
},
}
.into();
entry.set_updated(Utc.timestamp_opt(date.to_timestamp(), 0).unwrap());
feed.entries.push(entry);
}
pub(crate) fn build_atom_feed(hostname: &String, feed_file: &str) -> Feed {
let feed_url = format!("https://{}/{}", hostname, feed_file);
FeedBuilder::default()
.title("JJKiers Newsletters")
.id(&feed_url)
.link(
LinkBuilder::default()
.href(format!("https://{}/", hostname))
.rel("alternate".to_string())
.build(),
)
.link(
LinkBuilder::default()
.href(&feed_url)
.rel("self".to_string())
.build(),
)
.generator(Generator {
value: String::from("newsletter-to-web"),
uri: None,
version: Some(String::from("0.0.1")),
})
.build()
}
pub(crate) fn write_feed<W: std::io::Write>(
feed: Feed,
mut out: W,
) -> Result<W, atom_syndication::Error> {
let _ = writeln!(out, r#"<?xml version="1.0"?>"#);
let _ = writeln!(out, r#"<?xml-stylesheet href="feed.xsl" type="text/xsl"?>"#);
let config = WriteConfig {
write_document_declaration: false,
..Default::default()
};
feed.write_with_config(out, config)
}
//#[derive(Serialize, Deserialize, Debug)]
pub(crate) struct Newsletter {
id: String,
url: String,
title: String,
content: Option<String>,
author: Person,
published: DateTime<Utc>,
}
impl From<Newsletter> for Entry {
fn from(post: Newsletter) -> Self {
let content = post.content.map(|v| {
ContentBuilder::default()
.value(v)
.content_type(Some("html".to_string()))
.build()
});
let mut eb = EntryBuilder::default();
eb.title(post.title)
.id(post.id)
.published(Some(post.published.into()))
.author(post.author)
.content(content);
if post.url.len() > 1 {
eb.link(
LinkBuilder::default()
.href(post.url)
.rel("alternate".to_string())
.build(),
);
}
eb.build()
}
}

View File

@ -1,222 +0,0 @@
#[warn(missing_docs)]
#[doc = include_str!("../README.md")]
mod cli;
mod command;
mod feed;
mod message;
mod message_reader;
use chrono::Utc;
use clap::Parser;
use mail_parser::Message as ParsedMessage;
use message_reader::{DataDirectoryMessageReader, EmailReader, ImapReader};
use sha2::{Digest, Sha256};
use std::{
error::Error,
fs::OpenOptions,
io::Write,
path::{Path, PathBuf},
};
pub(crate) use message::Message;
const INDEX_HTML: &str = include_str!("../resources/index.html");
const FEED_STYLESHEET: &str = include_str!("../resources/feed.xsl");
fn main() -> Result<(), Box<dyn Error>> {
let cli = cli::Cli::parse();
let data_directory = "data";
let result = match &cli.command {
cli::Command::FetchFromImap {
server,
port,
username,
password,
} => fetch_from_imap(
data_directory,
server.to_owned(),
*port,
username.to_owned(),
password.to_owned(),
),
cli::Command::BuildFeed {
filename,
hostname,
include_html,
} => build_feed(filename, hostname, *include_html),
cli::Command::Update => command::update::self_update(),
_ => unimplemented!("This method is not yet implemented."),
};
result
}
fn create_directory<P: AsRef<Path>>(dir: P) -> Result<(), std::io::Error> {
if !dir.as_ref().exists() {
return std::fs::create_dir(&dir);
}
Ok(())
}
fn build_feed(
filename: &PathBuf,
hostname: &String,
include_html: bool,
) -> Result<(), Box<dyn Error>> {
let dir = filename.parent().ok_or(format!(
"Could not get parent directory of {}",
filename.display()
))?;
println!(
"Building the feed to {} in {}/",
filename.display(),
dir.display()
);
create_directory(dir)?;
let feed_file = filename
.file_name()
.expect("Feed path should have a file name")
.to_str()
.expect("Feed path should be printable.");
let mut feed = feed::build_atom_feed(&hostname, feed_file);
let mut reader = DataDirectoryMessageReader::new(Path::new("data").to_path_buf());
for msg in reader.read_rfc822_messages() {
let parsed = msg.get_parsed().expect("A parsed messsage.");
let date = parsed.date().ok_or(format!(
"Could not get the date of message {}",
msg.get_uid()
))?;
let subject = parsed.subject().unwrap_or("No subject");
println!(
"Processing message {} from {} with subject {}",
msg.get_uid(),
date,
subject
);
let html_body = parsed.body_html(0).expect("Could not read html body");
let processed_html = process_html(&html_body).expect("Could not process the HTML");
if include_html {
let path: PathBuf = [dir, Path::new(&get_path(&parsed, &msg))].iter().collect();
write_file(&path, processed_html.as_bytes())?;
}
feed::add_entry_to_feed(&mut feed, &msg, &processed_html, &hostname, include_html);
}
if !feed.entries.is_empty() {
feed.set_updated(Utc::now());
println!("Writing feed to {}", filename.display());
feed::write_feed(feed, open_file(filename).unwrap())?;
write_file(dir.join("feed.xsl"), FEED_STYLESHEET)?;
write_file(
dir.join("index.html"),
INDEX_HTML.replace("{FEED}", feed_file),
)?;
}
println!("Finished building the feed.");
Ok(())
}
fn fetch_from_imap(
data_directory: &str,
server: String,
port: u16,
username: String,
password: String,
) -> Result<(), Box<dyn Error>> {
create_directory(data_directory)?;
print!("Getting mail from {} for mailbox {}", server, username);
let mut reader = ImapReader::new(server, port, username, password);
for msg in reader.read_rfc822_messages() {
let parsed = msg.get_parsed().ok_or(format!(
"Could not parse the message with id {}",
msg.get_uid()
))?;
let date = parsed.date().ok_or(format!(
"Could not get the date of message {}",
msg.get_uid()
))?;
let subject = parsed.subject().unwrap_or("No subject");
println!(
"Processing message {} from {} with subject {}",
msg.get_uid(),
date,
subject
);
let path = get_path(&parsed, &msg);
let html_path: PathBuf = [
Path::new(data_directory),
Path::new(&format!("{}.eml", path)),
]
.iter()
.collect();
println!("Storing to {}", &html_path.display());
write_file(&html_path, msg.get_data())?;
}
Ok(())
}
fn get_path(parsed: &ParsedMessage, msg: &Message) -> String {
let date = parsed.date().expect("Could not extract date");
let date_str = format!(
"{:04}{:02}{:02}{:02}{:02}{:02}",
&date.year, &date.month, &date.day, &date.hour, &date.minute, &date.second
);
let hash = base16ct::lower::encode_string(&Sha256::digest(
parsed.body_html(0).expect("Expected a body").as_bytes(),
));
let uid: i32 = msg
.get_uid()
.parse()
.unwrap_or_else(|_| panic!("Could not convert message id {} to an i32.", msg.get_uid()));
format!("{:05}_{}_{}.html", uid, date_str, &hash)
}
fn process_html(input: &str) -> Result<String, ()> {
Ok(input.replace("src", "data-source"))
}
fn open_file<P: Into<PathBuf>>(path: P) -> std::io::Result<std::fs::File> {
OpenOptions::new()
.write(true)
.truncate(true)
.create(true)
.open(path.into())
}
fn write_file<P: Into<PathBuf>, D: AsRef<[u8]>>(path: P, data: D) -> Result<(), std::io::Error> {
let path: PathBuf = path.into();
open_file(path.clone())
.unwrap_or_else(|_| panic!("Could not open file '{}' for writing", &path.display()))
.write_all(data.as_ref())
}

View File

@ -1,24 +0,0 @@
use mail_parser::Message as ParsedMessage;
pub(crate) struct Message {
uid: String,
data: Vec<u8>,
}
impl Message {
pub fn new(uid: String, data: Vec<u8>) -> Message {
Message { uid, data }
}
pub(crate) fn get_parsed(&self) -> Option<ParsedMessage> {
ParsedMessage::parse(&self.data)
}
pub fn get_uid(&self) -> &String {
&self.uid
}
pub fn get_data(&self) -> &Vec<u8> {
&self.data
}
}

View File

@ -1,18 +0,0 @@
#!/bin/bash
set -euo pipefail
N2W='/path/to/newsletter-to-web'
# I used dav2fs to mount a DAV filesystem, so that the feed is
# automatically published.
#
# This symlinked the output/ folder to dav/
#mount $(pwd)/dav || true
# Change the credentials
${N2W} fetch-from-imap -s mail.example.org --username newsletters@example.org --password 'SuperSecret'
# -i: Include the full text
# newsletters.example.org: hostname where the feed is hosted
${N2W} build-feed -i newsletters.example.org

View File

@ -1,7 +0,0 @@
[Unit]
Description=Create newsletter feed
[Service]
Type=oneshot
WorkingDirectory=/home/n2w
ExecStart=/home/n2w/build-feed.sh

View File

@ -1,10 +0,0 @@
[Unit]
Description=Publish newsletters
[Timer]
# Run two times a day, starting at 01:38
OnCalendar=*-*-* 09/12:38:00
Persistent=true
[Install]
WantedBy=default.target