Compare commits

...

8 Commits

Author SHA1 Message Date
4069d8ac31 Update changelog
All checks were successful
continuous-integration/drone/push Build is passing
Signed-off-by: Jacob Kiers <jacob@jacobkiers.net>
2022-12-27 23:00:45 +01:00
7407654e60 Fix CS and some other small things
Useful tool, that clippy...

Signed-off-by: Jacob Kiers <jacob@jacobkiers.net>
2022-12-27 23:00:45 +01:00
e7fd41ff95 Truncate the feed file before writing
Otherwise, the feed may be overwritten, but if it is shorter, it may
contain some leftover data from a previous run. In that case, the file
will be invalid XML, thereby failing to be parsed.

Signed-off-by: Jacob Kiers <jacob@jacobkiers.net>
2022-12-27 23:00:45 +01:00
c2d09621aa Add style sheet to the feed
Based on the code proposed in PR 70 of the atom-syndication crate.

Signed-off-by: Jacob Kiers <jacob@jacobkiers.net>
2022-12-27 22:59:45 +01:00
9129f7e11b Put real feed url into feed
Instead of hardcoding the feed file name to be feed.atom, it has been
configurable for a while. This is now also reflected in the feed itself.

Signed-off-by: Jacob Kiers <jacob@jacobkiers.net>
2022-12-27 22:50:50 +01:00
71371cb3e1 Go back to docker-in-docker
All checks were successful
continuous-integration/drone/push Build is passing
Signed-off-by: Jacob Kiers <jacob@jacobkiers.net>
2022-12-27 21:07:41 +01:00
f61e635721 Add link to releases page
All checks were successful
continuous-integration/drone/push Build is passing
Signed-off-by: Jacob Kiers <jacob@jacobkiers.net>
2022-12-27 12:27:27 +01:00
d3e4c9e790 Add usage instruction to README
All checks were successful
continuous-integration/drone/push Build is passing
Signed-off-by: Jacob Kiers <jacob@jacobkiers.net>
2022-12-27 12:21:21 +01:00
10 changed files with 144 additions and 76 deletions

View File

@ -19,7 +19,7 @@ local add_build_steps() = [
volumes: [
{
name: 'dockersock',
path: '/var/run/docker.sock',
path: '/var/run',
},
],
commands: [
@ -29,7 +29,7 @@ local add_build_steps() = [
'rm -rf target/' + arch.target + '/release/*',
],
environment: {
CROSS_DOCKER_IN_DOCKER: true,
CROSS_REMOTE: true,
},
depends_on: ['Wait for Docker'],
}
@ -57,11 +57,11 @@ local add_build_steps() = [
'docker pull hello-world:latest',
],
environment: {
CROSS_DOCKER_IN_DOCKER: true,
CROSS_REMOTE: true,
},
volumes: [{
name: 'dockersock',
path: '/var/run/docker.sock',
path: '/var/run',
}],
}] +
add_build_steps() +
@ -92,12 +92,26 @@ local add_build_steps() = [
},
],
services: [{
name: 'docker',
image: 'docker:dind',
privileged: true,
volumes: [
{
name: 'dockersock',
path: '/var/run',
},
{
name: 'docker-storage',
path: '/var/lib/docker',
},
],
}],
volumes: [
{
name: 'dockersock',
host: {
path: '/var/run/docker.sock',
},
temp: {},
},
],

View File

@ -7,6 +7,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased]
### Fixed
* Truncate feed file before writing, to prevent corruption from leftover data.
* Ensure the feed file name is part of the self URL. This was still hardcoded to `feed.atom`.
## [0.2.2] - 2022-12-16
### Changed
@ -30,4 +35,4 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Changed
* By default the name of the feed is now feed.xml instead of feed.atom.
* By default, the name of the feed is now feed.xml instead of feed.atom.

3
Cargo.lock generated
View File

@ -29,8 +29,7 @@ checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b"
[[package]]
name = "atom_syndication"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "21fb6a0b39c6517edafe46f8137e53c51742425a4dae1c73ee12264a37ad7541"
source = "git+https://github.com/jacobkiers/atom-syndication?rev=add7083b56b9d737f0fa1d3383aa82789b6c38ad#add7083b56b9d737f0fa1d3383aa82789b6c38ad"
dependencies = [
"chrono",
"derive_builder",

View File

@ -19,3 +19,6 @@ imap = { version = "^2.4.1", default-features = false }
mail-parser = "^0.8.0"
rustls-connector = { version = "^0.16.1", default-features = false, features = [ "webpki-roots-certs", "quic" ] }
sha2 = "^0.10.2"
[patch.crates-io]
atom_syndication = { git = "https://github.com/jacobkiers/atom-syndication", rev = "add7083b56b9d737f0fa1d3383aa82789b6c38ad" }

View File

@ -1,6 +1,41 @@
# Newsletter 2 Web
# Newsletter to Web
Converts a newsletter to static HTML files.
Converts a newsletter to and Atom feed and static HTML files.
## Usage
Get the latest release [from the releases page](https://code.kiers.eu/newsletter-to-web/newsletter-to-web/releases/latest).
### Getting help
For help, use
* `newsletter-to-web help`
* `newsletter-to-web help <subcommand>`.
### Basic usage
First, download all messages from the IMAP mail server
and store them in the `data/` directory:
```sh
newsletter-to-web fetch-from-imap -s <imap.example.com> -u <email@example.com> -p <password>
```
Then, convert them to an Atom feed, using
`newsletters.example.com` as the base domain:
```sh
newsletter-to-web --include-html build-feed newsletters.example.org
```
This will put the output in the `output/` directory. The Atom
feed will be in `output/feed.xml`, together with a very simple
`index.html` file pointing to the feed. It will also add an HTML
file for every email with the HTML content.
The feed will already contain the full HTML, so it can easily be
read from a feed reader.
## Features

View File

@ -26,6 +26,7 @@
</section>
<section>
<h2>Recent Items</h2>
<p>Last updated on <xsl:apply-templates select="atom:feed/atom:updated" /></p>
<xsl:apply-templates select="atom:feed/atom:entry" />
</section>
</body>

View File

@ -2,6 +2,7 @@ use crate::Message;
use atom_syndication::{
ContentBuilder, Entry, EntryBuilder, Feed, FeedBuilder, Generator, LinkBuilder, Person,
WriteConfig,
};
use chrono::{DateTime, TimeZone, Utc};
@ -34,19 +35,13 @@ pub(crate) fn add_entry_to_feed(
_ => "".to_string(),
},
},
email: match &from.address {
Some(e) => Some(e.to_string()),
_ => None,
},
email: from.address.as_ref().map(|e| e.to_string()),
uri: None,
},
title: parsed
.subject()
.expect("Expected a subject")
.to_string(),
title: parsed.subject().expect("Expected a subject").to_string(),
content: Some(processed_html.clone()),
id: url.clone(),
published: Utc.timestamp_opt(date.to_timestamp(), 0).unwrap(), //(format!("{}{}", &date.to_iso8601(), "+00:00").as_str()).`unwrap(),
published: Utc.timestamp_opt(date.to_timestamp(), 0).unwrap(),
url: match include_html {
true => url,
false => "".to_string(),
@ -57,10 +52,11 @@ pub(crate) fn add_entry_to_feed(
feed.entries.push(entry);
}
pub(crate) fn build_atom_feed(hostname: &String) -> Feed {
pub(crate) fn build_atom_feed(hostname: &String, feed_file: &str) -> Feed {
let feed_url = format!("https://{}/{}", hostname, feed_file);
FeedBuilder::default()
.title("JJKiers Newsletters")
.id(format!("https://{}/feed.atom", hostname))
.id(&feed_url)
.link(
LinkBuilder::default()
.href(format!("https://{}/", hostname))
@ -69,7 +65,7 @@ pub(crate) fn build_atom_feed(hostname: &String) -> Feed {
)
.link(
LinkBuilder::default()
.href(format!("https://{}/feed.atom", hostname))
.href(&feed_url)
.rel("self".to_string())
.build(),
)
@ -81,6 +77,19 @@ pub(crate) fn build_atom_feed(hostname: &String) -> Feed {
.build()
}
pub(crate) fn write_feed<W: std::io::Write>(
feed: Feed,
mut out: W,
) -> Result<W, atom_syndication::Error> {
let _ = writeln!(out, r#"<?xml version="1.0"?>"#);
let _ = writeln!(out, r#"<?xml-stylesheet href="feed.xsl" type="text/xsl"?>"#);
let config = WriteConfig {
write_document_declaration: false,
..Default::default()
};
feed.write(out, config)
}
//#[derive(Serialize, Deserialize, Debug)]
pub(crate) struct Newsletter {
id: String,
@ -104,8 +113,8 @@ impl From<Newsletter> for Entry {
eb.title(post.title)
.id(post.id)
.published(Some(post.published.clone().into()))
.author(post.author.into())
.published(Some(post.published.into()))
.author(post.author)
.content(content);
if post.url.len() > 1 {

View File

@ -20,8 +20,8 @@ use std::{
pub(crate) use message::Message;
const INDEX_HTML: & 'static str = include_str!("../resources/index.html");
const FEED_STYLESHEET: & 'static str = include_str!("../resources/feed.xsl");
const INDEX_HTML: &str = include_str!("../resources/index.html");
const FEED_STYLESHEET: &str = include_str!("../resources/feed.xsl");
fn main() -> Result<(), Box<dyn Error>> {
let cli = cli::Cli::parse();
@ -41,7 +41,11 @@ fn main() -> Result<(), Box<dyn Error>> {
username.to_owned(),
password.to_owned(),
),
cli::Command::BuildFeed { filename , hostname, include_html } => build_feed(&filename, hostname.to_owned(), *include_html),
cli::Command::BuildFeed {
filename,
hostname,
include_html,
} => build_feed(filename, hostname, *include_html),
_ => unimplemented!("This method is not yet implemented."),
};
@ -56,7 +60,11 @@ fn create_directory<P: AsRef<Path>>(dir: P) -> Result<(), std::io::Error> {
Ok(())
}
fn build_feed(filename: &PathBuf, hostname: String, include_html: bool) -> Result<(), Box<dyn Error>> {
fn build_feed(
filename: &PathBuf,
hostname: &String,
include_html: bool,
) -> Result<(), Box<dyn Error>> {
let dir = filename.parent().ok_or(format!(
"Could not get parent directory of {}",
filename.display()
@ -70,9 +78,15 @@ fn build_feed(filename: &PathBuf, hostname: String, include_html: bool) -> Resul
create_directory(dir)?;
let mut feed = feed::build_atom_feed(&hostname);
let feed_file = filename
.file_name()
.expect("Feed path should have a file name")
.to_str()
.expect("Feed path should be printable.");
let mut reader = DataDirectoryMessageReader::new((&Path::new("data")).to_path_buf());
let mut feed = feed::build_atom_feed(&hostname, feed_file);
let mut reader = DataDirectoryMessageReader::new(Path::new("data").to_path_buf());
for msg in reader.read_rfc822_messages() {
let parsed = msg.get_parsed().expect("A parsed messsage.");
@ -82,15 +96,12 @@ fn build_feed(filename: &PathBuf, hostname: String, include_html: bool) -> Resul
msg.get_uid()
))?;
let subject = match parsed.subject() {
Some(subject) => subject,
None => "No subject",
};
let subject = parsed.subject().unwrap_or("No subject");
println!(
"Processing message {} from {} with subject {}",
msg.get_uid(),
date.to_string(),
date,
subject
);
@ -98,26 +109,23 @@ fn build_feed(filename: &PathBuf, hostname: String, include_html: bool) -> Resul
let processed_html = process_html(&html_body).expect("Could not process the HTML");
if include_html {
let path : PathBuf = [dir, Path::new(&get_path(&parsed, &msg))].iter().collect();
let path: PathBuf = [dir, Path::new(&get_path(&parsed, &msg))].iter().collect();
write_file(&path, processed_html.as_bytes())?;
}
feed::add_entry_to_feed(&mut feed, &msg, &processed_html, &hostname, include_html);
}
if feed.entries.len() > 0 {
if !feed.entries.is_empty() {
feed.set_updated(Utc::now());
println!("Writing feed to {}", filename.display());
// TODO: Ugly hack because atom_syndication crate does not support style sheets.
let feed_str = feed.to_string().as_str().replace(">\n<feed", ">\n<?xml-stylesheet href=\"feed.xsl\" type=\"text/xsl\"?>\n<feed");
let _ = write_file(filename, feed_str)?;
let _ = write_file(dir.join("feed.xsl"), FEED_STYLESHEET)?;
// Another ugly hack, but I don't know how to do this better...
let file_name = format!("{:?}", filename.file_name().unwrap()).replace('"', "");
write_file(dir.join("index.html"), INDEX_HTML.replace("{FEED}", file_name.as_str()))?;
feed::write_feed(feed, open_file(filename).unwrap())?;
write_file(dir.join("feed.xsl"), FEED_STYLESHEET)?;
write_file(
dir.join("index.html"),
INDEX_HTML.replace("{FEED}", feed_file),
)?;
}
println!("Finished building the feed.");
@ -136,12 +144,7 @@ fn fetch_from_imap(
print!("Getting mail from {} for mailbox {}", server, username);
let mut reader = ImapReader::new(
String::from(server),
port,
String::from(username),
String::from(password),
);
let mut reader = ImapReader::new(server, port, username, password);
for msg in reader.read_rfc822_messages() {
let parsed = msg.get_parsed().ok_or(format!(
@ -154,15 +157,12 @@ fn fetch_from_imap(
msg.get_uid()
))?;
let subject = match parsed.subject() {
Some(subject) => subject,
None => "No subject",
};
let subject = parsed.subject().unwrap_or("No subject");
println!(
"Processing message {} from {} with subject {}",
msg.get_uid(),
date.to_string(),
date,
subject
);
@ -182,8 +182,6 @@ fn fetch_from_imap(
Ok(())
}
fn get_path(parsed: &ParsedMessage, msg: &Message) -> String {
let date = parsed.date().expect("Could not extract date");
let date_str = format!(
@ -192,26 +190,32 @@ fn get_path(parsed: &ParsedMessage, msg: &Message) -> String {
);
let hash = base16ct::lower::encode_string(&Sha256::digest(
&parsed.body_html(0).expect("Expected a body").as_bytes(),
parsed.body_html(0).expect("Expected a body").as_bytes(),
));
let uid: i32 = msg.get_uid()
let uid: i32 = msg
.get_uid()
.parse()
.expect(&format!("Could not convert message id {} to an i32.", msg.get_uid()));
.unwrap_or_else(|_| panic!("Could not convert message id {} to an i32.", msg.get_uid()));
format!("{:05}_{}_{}.html", uid, date_str, &hash).to_owned()
format!("{:05}_{}_{}.html", uid, date_str, &hash)
}
fn process_html(input: &str) -> Result<String, ()> {
Ok(input.replace("src", "data-source"))
}
fn write_file<P: Into<PathBuf>, D: AsRef<[u8]>>(html_path: P, data: D) -> Result<(), std::io::Error> {
let path : PathBuf = html_path.into();
fn open_file<P: Into<PathBuf>>(path: P) -> std::io::Result<std::fs::File> {
OpenOptions::new()
.write(true)
.truncate(true)
.create(true)
.open(&path)
.expect(format!("Could not open file '{}' for writing", &path.display()).as_str())
.open(path.into())
}
fn write_file<P: Into<PathBuf>, D: AsRef<[u8]>>(path: P, data: D) -> Result<(), std::io::Error> {
let path: PathBuf = path.into();
open_file(path.clone())
.unwrap_or_else(|_| panic!("Could not open file '{}' for writing", &path.display()))
.write_all(data.as_ref())
}
}

View File

@ -21,4 +21,4 @@ impl Message {
pub fn get_data(&self) -> &Vec<u8> {
&self.data
}
}
}

View File

@ -44,7 +44,7 @@ impl EmailReader for DataDirectoryMessageReader {
Some(ext) => ext == "eml",
None => false,
})
.map(|i| {
.filter_map(|i| {
let uid = i
.path()
.file_stem()
@ -52,9 +52,9 @@ impl EmailReader for DataDirectoryMessageReader {
.to_owned()
.into_string()
.expect("Could not convert filename to string.")
.split("_")
.split('_')
.collect::<Vec<&str>>()[0]
.trim_start_matches("0")
.trim_start_matches('0')
.to_string();
if let Ok(data) = std::fs::read(i.path()) {
@ -63,8 +63,6 @@ impl EmailReader for DataDirectoryMessageReader {
None
}
})
.filter(|i| i.is_some())
.map(|i| i.unwrap())
.map(|i| Message::new(i.0, i.1));
let iter = items.collect::<Vec<Message>>().into_iter();