diff options
author | Jonas Smedegaard <dr@jones.dk> | 2024-02-21 18:55:52 +0100 |
---|---|---|
committer | Jonas Smedegaard <dr@jones.dk> | 2024-02-21 19:28:04 +0100 |
commit | d41622674b2998e178a53fed02351ba2ae1379b8 (patch) | |
tree | d0942b88fbfa3518af13fb3cd7cd9f10b886e932 | |
parent | 8cd66a1a8729d8ef0bce89408b0733926d258958 (diff) |
add helper script xmp2rdfxml, and make targets turtle-from-* dotgraph-from-*
-rw-r--r-- | Makefile | 10 | ||||
-rwxr-xr-x | bin/xmp2rdfxml | 25 |
2 files changed, 35 insertions, 0 deletions
@@ -1,3 +1,5 @@ +BASE_IRI = https://thoughtroam.abcdefghijklmnopqrstuvxyzæøå.dk/ + ARTICLES = learn code use all: preview @@ -23,3 +25,11 @@ $(ARTICLES:%=wordcount-of-%): wordcount-of-%: QUARTO_LOG_LEVEL=quiet \ quarto render $*/index.qmd --to plain --columns=9999 --output - \ | perl -nE 'next if /^-*$$/; $$bilag += $$_ eq "Bilag 1\n"; $$chars += length unless $$bilag; END { say $$chars }' + +$(ARTICLES:%=turtle-from-%): turtle-from-%: _site/%/index.pdf + @bin/xmp2rdfxml _site/$*/index.pdf \ + | rapper -i rdfxml -o turtle - $(BASE_IRI)$*/ + +$(ARTICLES:%=dotgraph-from-%): dotgraph-from-%: _site/%/index.pdf + @bin/xmp2rdfxml _site/$*/index.pdf \ + | rapper -i rdfxml -o dot - $(BASE_IRI)$*/ diff --git a/bin/xmp2rdfxml b/bin/xmp2rdfxml new file mode 100755 index 0000000..81011b9 --- /dev/null +++ b/bin/xmp2rdfxml @@ -0,0 +1,25 @@ +#!/usr/bin/perl + +# extract XMP data from PDF file and unwrap as generic RDF/XML + +use v5.36; +use strict; + +use PDF::API2; + +my $xml = PDF::API2->open( $ARGV[0] )->xml_metadata(); + +# strip noise +$xml =~ s,^PDF version of requested feature [^<]*,,; + +# replace whitespace and surrounding XMP boilerplate with XML boilerplate +$xml =~ s,\s*<\?xpacket [^>]+>\s*,,; +$xml =~ s,<x:xmpmeta [^>]+>,<?xml version="1.0" encoding="utf-8"?>,; + +# strip trailing boilerplate +$xml =~ s,\s*</x:xmpmeta>,,; +$xml =~ s,\s*<\?xpacket [^>]+>,,; + +say $xml; + +1; |