summaryrefslogtreecommitdiff
path: root/bin
diff options
context:
space:
mode:
Diffstat (limited to 'bin')
-rwxr-xr-xbin/xmp2rdfxml25
1 files changed, 25 insertions, 0 deletions
diff --git a/bin/xmp2rdfxml b/bin/xmp2rdfxml
new file mode 100755
index 0000000..81011b9
--- /dev/null
+++ b/bin/xmp2rdfxml
@@ -0,0 +1,25 @@
+#!/usr/bin/perl
+
+# extract XMP data from PDF file and unwrap as generic RDF/XML
+
+use v5.36;
+use strict;
+
+use PDF::API2;
+
+my $xml = PDF::API2->open( $ARGV[0] )->xml_metadata();
+
+# strip noise
+$xml =~ s,^PDF version of requested feature [^<]*,,;
+
+# replace whitespace and surrounding XMP boilerplate with XML boilerplate
+$xml =~ s,\s*<\?xpacket [^>]+>\s*,,;
+$xml =~ s,<x:xmpmeta [^>]+>,<?xml version="1.0" encoding="utf-8"?>,;
+
+# strip trailing boilerplate
+$xml =~ s,\s*</x:xmpmeta>,,;
+$xml =~ s,\s*<\?xpacket [^>]+>,,;
+
+say $xml;
+
+1;