diff options
Diffstat (limited to '_make/rdf.mk')
| -rw-r--r-- | _make/rdf.mk | 37 |
1 files changed, 37 insertions, 0 deletions
diff --git a/_make/rdf.mk b/_make/rdf.mk new file mode 100644 index 0000000..0586ade --- /dev/null +++ b/_make/rdf.mk @@ -0,0 +1,37 @@ +# Make snippet for extracting RDF data from PDF documents +# +# Copyright 2024, Jonas Smedegaard <dr@jones.dk> +# SPDX-License-Identifier: GPL-3+ +# +# Setup: +# In main Makefile... +# * set variable PDF_DOCUMENTS or use simple and slow default +# * set variable BASE_IRI or use default file URI +# * include this make snippet +# +# Dependencies: +# * podofoxmp (e.g. Debian package libpodofo-utils) +# * rapper (e.g. Debian package raptor2-utils) +# * perl v5.36.0 or newer + +# list of relative paths to PDF documents +#PDF_DOCUMENTS = \ +# main_paper.pdf \ +# promo_article.pdf \ +# research/deep/superconductors/report.pdf +# research/deep/fringe/index.pdf + +PDF_DOCUMENTS ?= $(wildcard *.pdf) + +# extract XMP metadata from PDF document, +# and convert to the human-friendlier RDF/Turtle serialization +$(PDF_DOCUMENTS:%=turtle-from-%): turtle-from-%: + @pdfinfo -meta $* \ + | perl -gp \ + -e 's,\s*<\?xpacket [^>]+>\s*,,;' \ + -e 's,<x:xmpmeta [^>]+>,<?xml version="1.0" encoding="utf-8"?>,;' \ + -e 's,\s*</x:xmpmeta>,,;' \ + -e 's,\s*<\?xpacket [^>]+>,,;' \ + | rapper -q -i rdfxml -o turtle - $(BASE_IRI)$(dir $*) + +.PHONY: $(PDF_DOCUMENTS:%=turtle-from-%) |
