aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJonas Smedegaard <dr@jones.dk>2024-02-21 18:55:52 +0100
committerJonas Smedegaard <dr@jones.dk>2024-02-21 19:28:04 +0100
commitd41622674b2998e178a53fed02351ba2ae1379b8 (patch)
treed0942b88fbfa3518af13fb3cd7cd9f10b886e932
parent8cd66a1a8729d8ef0bce89408b0733926d258958 (diff)
add helper script xmp2rdfxml, and make targets turtle-from-* dotgraph-from-*
-rw-r--r--Makefile10
-rwxr-xr-xbin/xmp2rdfxml25
2 files changed, 35 insertions, 0 deletions
diff --git a/Makefile b/Makefile
index af1651b..beaa1f7 100644
--- a/Makefile
+++ b/Makefile
@@ -1,3 +1,5 @@
+BASE_IRI = https://thoughtroam.abcdefghijklmnopqrstuvxyzæøå.dk/
+
ARTICLES = learn code use
all: preview
@@ -23,3 +25,11 @@ $(ARTICLES:%=wordcount-of-%): wordcount-of-%:
QUARTO_LOG_LEVEL=quiet \
quarto render $*/index.qmd --to plain --columns=9999 --output - \
| perl -nE 'next if /^-*$$/; $$bilag += $$_ eq "Bilag 1\n"; $$chars += length unless $$bilag; END { say $$chars }'
+
+$(ARTICLES:%=turtle-from-%): turtle-from-%: _site/%/index.pdf
+ @bin/xmp2rdfxml _site/$*/index.pdf \
+ | rapper -i rdfxml -o turtle - $(BASE_IRI)$*/
+
+$(ARTICLES:%=dotgraph-from-%): dotgraph-from-%: _site/%/index.pdf
+ @bin/xmp2rdfxml _site/$*/index.pdf \
+ | rapper -i rdfxml -o dot - $(BASE_IRI)$*/
diff --git a/bin/xmp2rdfxml b/bin/xmp2rdfxml
new file mode 100755
index 0000000..81011b9
--- /dev/null
+++ b/bin/xmp2rdfxml
@@ -0,0 +1,25 @@
+#!/usr/bin/perl
+
+# extract XMP data from PDF file and unwrap as generic RDF/XML
+
+use v5.36;
+use strict;
+
+use PDF::API2;
+
+my $xml = PDF::API2->open( $ARGV[0] )->xml_metadata();
+
+# strip noise
+$xml =~ s,^PDF version of requested feature [^<]*,,;
+
+# replace whitespace and surrounding XMP boilerplate with XML boilerplate
+$xml =~ s,\s*<\?xpacket [^>]+>\s*,,;
+$xml =~ s,<x:xmpmeta [^>]+>,<?xml version="1.0" encoding="utf-8"?>,;
+
+# strip trailing boilerplate
+$xml =~ s,\s*</x:xmpmeta>,,;
+$xml =~ s,\s*<\?xpacket [^>]+>,,;
+
+say $xml;
+
+1;