aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile1
-rw-r--r--_make/rdf.mk37
2 files changed, 38 insertions, 0 deletions
diff --git a/Makefile b/Makefile
index 990dd19..d528ebc 100644
--- a/Makefile
+++ b/Makefile
@@ -1,4 +1,5 @@
DOCUMENTS = report
+PDF_DOCUMENTS = $(wildcard _site/*.pdf)
PAD_LIST = \
plan-copyleft.md:ruc-copyleft-plan \
diff --git a/_make/rdf.mk b/_make/rdf.mk
new file mode 100644
index 0000000..0586ade
--- /dev/null
+++ b/_make/rdf.mk
@@ -0,0 +1,37 @@
+# Make snippet for extracting RDF data from PDF documents
+#
+# Copyright 2024, Jonas Smedegaard <dr@jones.dk>
+# SPDX-License-Identifier: GPL-3+
+#
+# Setup:
+# In main Makefile...
+# * set variable PDF_DOCUMENTS or use simple and slow default
+# * set variable BASE_IRI or use default file URI
+# * include this make snippet
+#
+# Dependencies:
+# * podofoxmp (e.g. Debian package libpodofo-utils)
+# * rapper (e.g. Debian package raptor2-utils)
+# * perl v5.36.0 or newer
+
+# list of relative paths to PDF documents
+#PDF_DOCUMENTS = \
+# main_paper.pdf \
+# promo_article.pdf \
+# research/deep/superconductors/report.pdf
+# research/deep/fringe/index.pdf
+
+PDF_DOCUMENTS ?= $(wildcard *.pdf)
+
+# extract XMP metadata from PDF document,
+# and convert to the human-friendlier RDF/Turtle serialization
+$(PDF_DOCUMENTS:%=turtle-from-%): turtle-from-%:
+ @pdfinfo -meta $* \
+ | perl -gp \
+ -e 's,\s*<\?xpacket [^>]+>\s*,,;' \
+ -e 's,<x:xmpmeta [^>]+>,<?xml version="1.0" encoding="utf-8"?>,;' \
+ -e 's,\s*</x:xmpmeta>,,;' \
+ -e 's,\s*<\?xpacket [^>]+>,,;' \
+ | rapper -q -i rdfxml -o turtle - $(BASE_IRI)$(dir $*)
+
+.PHONY: $(PDF_DOCUMENTS:%=turtle-from-%)