pleroma-archive/src/main.rs

297 lines
9.3 KiB
Rust
Raw Normal View History

2024-11-24 03:37:18 +00:00
use jiff::tz::TimeZone;
use jiff::Timestamp;
use pleroma::Activities;
use std::ffi::OsStr;
2024-11-24 03:37:18 +00:00
use std::sync::OnceLock;
use std::{
collections::HashMap,
env, fs,
2024-11-24 03:37:18 +00:00
fs::File,
io::{self, BufReader, BufWriter},
path::{Path, PathBuf},
process::ExitCode,
};
use url::Url;
mod pleroma;
type BoxError = Box<dyn std::error::Error>;
type Mappings = HashMap<String, Option<Url>>;
const STYLE: &str = include_str!("../style.css");
2024-11-24 03:37:18 +00:00
static TZ: OnceLock<TimeZone> = OnceLock::new();
static MAPPINGS: OnceLock<Mappings> = OnceLock::new();
fn main() -> ExitCode {
let args = env::args_os().skip(1).collect::<Vec<_>>();
let (archive_path, output_path) = match args.as_slice() {
[archive, output] => (Path::new(archive), Path::new(output)),
_ => {
eprintln!("Usage: pleroma-archive path/to/pleroma-archive output/path");
return ExitCode::FAILURE;
}
2024-11-24 03:37:18 +00:00
};
let timezone = TimeZone::system();
TZ.set(timezone).unwrap();
match try_main(archive_path, output_path) {
2024-11-24 03:37:18 +00:00
Ok(()) => ExitCode::SUCCESS,
Err(err) => {
eprintln!("Error: {err}");
ExitCode::FAILURE
}
}
}
fn try_main(path: &Path, output_path: &Path) -> Result<(), BoxError> {
2024-11-24 03:37:18 +00:00
let actor_path = path.join("actor.json");
let outbox_path = path.join("outbox.json");
let mappings_path = path.join("mappings.json");
let file = BufReader::new(File::open(&actor_path)?);
let actor: pleroma::Actor = serde_json::from_reader(file)?;
let file = BufReader::new(File::open(&outbox_path)?);
let activities: Activities = serde_json::from_reader(file)?;
// Load mappings of ids to public URLs
let mut mappings: Mappings = match File::open(&mappings_path) {
Ok(file) => serde_json::from_reader(BufReader::new(file))?,
Err(err) if err.kind() == io::ErrorKind::NotFound => HashMap::new(),
Err(err) => return Err(err.into()),
};
// Ensure output path exists, and write out stylesheet
fs::create_dir_all(output_path)?;
let style_path = output_path.join("style.css");
fs::write(&style_path, STYLE)?;
2024-11-24 03:37:18 +00:00
let agent = ureq::AgentBuilder::new().redirects(0).build();
// Process posts
2024-11-24 03:37:18 +00:00
let mut posts = Vec::with_capacity(activities.ordered_items.len());
for item in &activities.ordered_items {
if item.direct_message {
continue;
}
2024-11-24 03:37:18 +00:00
match &item.object {
pleroma::activity::ObjectUnion::ObjectClass(activity) => {
let id: Url = item.id.parse()?;
if !mappings.contains_key(id.as_str()) {
let response = match agent.head(id.as_str()).call() {
Ok(res) => res,
Err(ureq::Error::Status(status, _res)) => {
eprintln!("expected 3xx response, got {} for {}", status, id);
mappings.insert(item.id.clone(), None);
continue;
}
Err(err) => return Err(err.into()),
};
if !(300..400).contains(&response.status()) {
eprintln!(
"expected 3xx response, got {} for {}",
response.status(),
id
);
mappings.insert(item.id.clone(), None);
continue;
}
let Some(location) = response.header("location") else {
return Err("expected a Location header, but it's missing".into());
};
let url = id.join(location)?;
mappings.insert(item.id.clone(), Some(url));
}
posts.push(activity);
}
pleroma::activity::ObjectUnion::String(s) => {
eprintln!("ObjectUnion::String: {s}");
// TODO
}
}
}
let mappings_writer = BufWriter::new(File::create(&mappings_path)?);
serde_json::to_writer_pretty(mappings_writer, &mappings)?;
MAPPINGS.set(mappings).unwrap();
// Generate index.html
let index_html = Layout {
title: "Pleroma Archive",
body: Index {
2024-11-24 03:37:18 +00:00
actor: &actor,
activities: &posts,
},
actor: &actor,
}
.to_string();
let index_path = output_path.join("index.html");
println!("Writing {}", index_path.display());
fs::write(&index_path, index_html.as_bytes())?;
// Generate individual post pages
for post in posts {
let Some(Some(url)) = MAPPINGS.get().unwrap().get(&post.id) else {
continue;
};
let mut post_path = output_path
.iter()
.chain(
url.path_segments()
.ok_or_else(|| BoxError::from("unable to get path segments of {url}"))?
.map(OsStr::new),
)
.collect::<PathBuf>();
post_path.set_extension("html");
let post_html = Layout {
title: &format!(
"Post from {} on {}",
actor.preferred_username,
post.human_published()
),
body: Show {
actor: &actor,
activity: post,
},
actor: &actor,
2024-11-24 03:37:18 +00:00
}
.to_string();
println!("Writing {}", post_path.display());
fs::create_dir_all(&post_path.parent().expect("post has parent dir"))?;
fs::write(&post_path, post_html.as_bytes())?;
}
2024-11-24 03:37:18 +00:00
Ok(())
}
markup::define! {
Layout<'a, Body: markup::Render>(title: &'a str, body: Body, actor: &'a pleroma::Actor) {
2024-11-24 03:37:18 +00:00
@markup::doctype()
html {
head {
meta[charset="utf-8"];
meta[name="viewport", content="width=device-width, initial-scale=1"];
title { @title }
link[rel="stylesheet", type="text/css", href="/style.css"];
2024-11-24 03:37:18 +00:00
}
body {
2024-11-24 03:51:51 +00:00
@Header { title, actor }
@body
2024-11-24 03:37:18 +00:00
@Footer { }
}
}
}
Index<'a>(actor: &'a pleroma::Actor, activities: &'a [&'a pleroma::Activity]) {
main {
p { @activities.len() " posts:" }
@for activity in activities.iter().rev() {
@Activity { actor, activity }
}
}
}
Show<'a>(actor: &'a pleroma::Actor, activity: &'a pleroma::Activity) {
a[href="/"] { "☜ Back to home page" }
@Activity { actor, activity }
}
2024-11-24 03:51:51 +00:00
Header<'a>(title: &'a str, actor: &'a pleroma::Actor) {
2024-11-24 03:37:18 +00:00
header {
h1 { @title }
2024-11-24 03:51:51 +00:00
p { "This is a static archive of " @actor.username() }
p { @markup::raw(&actor.summary) }
2024-11-24 03:37:18 +00:00
}
}
Footer() {
footer {
"Generated by "
a[href="https://forge.wezm.net/wezm/pleroma-archive"] { "pleroma-archive" }
}
}
Activity<'a>(actor: &'a pleroma::Actor, activity: &'a pleroma::Activity) {
div[class=activity_class(&activity.object_type)] {
@Actor { actor }
div[class="activity-content"] {
a[href=MAPPINGS.get().unwrap().get(&activity.id).and_then(|url| url.as_ref().map(|url| url.path()))] {
2024-11-24 03:37:18 +00:00
time[datetime=&activity.published] { @activity.human_published() }
}
@if let Some(in_reply_to) = &activity.in_reply_to {
a[href=in_reply_to, class="activity-reply-to"] { "↩ reply to" }
" "
}
@markup::raw(&activity.content)
@if !activity.attachment.is_empty() {
ul[class="activity-attachments"] {
@for attachment in activity.attachment.iter() {
li { @Attachment { attachment } }
}
}
}
2024-11-24 03:37:18 +00:00
}
}
hr;
}
Actor<'a>(actor: &'a pleroma::Actor) {
@if let Some(icon) = &actor.icon {
@if icon.icon_type == "Image" {
img[src=&icon.url, alt=&actor.preferred_username, class="actor-icon"];
}
}
}
Attachment<'a>(attachment: &'a pleroma::activity::Attachment) {
@match attachment.media_type.as_str() {
"image/gif" |"image/jpeg" | "image/png" => {
img[src=&attachment.url, loading="lazy"];
}
_ => {
a[href=&attachment.url] { @attachment.media_type " attachment" }
}
}
}
2024-11-24 03:37:18 +00:00
}
fn activity_class(object_type: &pleroma::activity::OneOfType) -> &'static str {
match object_type {
pleroma::activity::OneOfType::Note => "activity activity-note",
pleroma::activity::OneOfType::Question => "activity activity-question",
}
}
impl pleroma::Activity {
fn human_published(&self) -> String {
let published = self
.published
.parse()
.map(|timestamp: Timestamp| timestamp.to_zoned(TZ.get().unwrap().clone()))
.expect("invalid published value");
published.strftime("%d %b %Y").to_string()
}
}
2024-11-24 03:51:51 +00:00
impl pleroma::Actor {
fn username(&self) -> &str {
self.webfinger
.strip_prefix("acct:")
.unwrap_or(&self.webfinger)
}
}