From 93937682aa8e62bdc7b0e3b54aaf09c993cbadb7 Mon Sep 17 00:00:00 2001 From: Jonas Smedegaard Date: Tue, 9 Dec 2025 22:52:10 +0100 Subject: and and enable rdf make snippet --- Makefile | 1 + _make/rdf.mk | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+) create mode 100644 _make/rdf.mk diff --git a/Makefile b/Makefile index 990dd19..d528ebc 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,5 @@ DOCUMENTS = report +PDF_DOCUMENTS = $(wildcard _site/*.pdf) PAD_LIST = \ plan-copyleft.md:ruc-copyleft-plan \ diff --git a/_make/rdf.mk b/_make/rdf.mk new file mode 100644 index 0000000..0586ade --- /dev/null +++ b/_make/rdf.mk @@ -0,0 +1,37 @@ +# Make snippet for extracting RDF data from PDF documents +# +# Copyright 2024, Jonas Smedegaard +# SPDX-License-Identifier: GPL-3+ +# +# Setup: +# In main Makefile... +# * set variable PDF_DOCUMENTS or use simple and slow default +# * set variable BASE_IRI or use default file URI +# * include this make snippet +# +# Dependencies: +# * podofoxmp (e.g. Debian package libpodofo-utils) +# * rapper (e.g. Debian package raptor2-utils) +# * perl v5.36.0 or newer + +# list of relative paths to PDF documents +#PDF_DOCUMENTS = \ +# main_paper.pdf \ +# promo_article.pdf \ +# research/deep/superconductors/report.pdf +# research/deep/fringe/index.pdf + +PDF_DOCUMENTS ?= $(wildcard *.pdf) + +# extract XMP metadata from PDF document, +# and convert to the human-friendlier RDF/Turtle serialization +$(PDF_DOCUMENTS:%=turtle-from-%): turtle-from-%: + @pdfinfo -meta $* \ + | perl -gp \ + -e 's,\s*<\?xpacket [^>]+>\s*,,;' \ + -e 's,]+>,,;' \ + -e 's,\s*,,;' \ + -e 's,\s*<\?xpacket [^>]+>,,;' \ + | rapper -q -i rdfxml -o turtle - $(BASE_IRI)$(dir $*) + +.PHONY: $(PDF_DOCUMENTS:%=turtle-from-%) -- cgit v1.2.3