diff options
Diffstat (limited to 'bin/xmp2rdfxml')
-rwxr-xr-x | bin/xmp2rdfxml | 25 |
1 files changed, 25 insertions, 0 deletions
diff --git a/bin/xmp2rdfxml b/bin/xmp2rdfxml new file mode 100755 index 0000000..81011b9 --- /dev/null +++ b/bin/xmp2rdfxml @@ -0,0 +1,25 @@ +#!/usr/bin/perl + +# extract XMP data from PDF file and unwrap as generic RDF/XML + +use v5.36; +use strict; + +use PDF::API2; + +my $xml = PDF::API2->open( $ARGV[0] )->xml_metadata(); + +# strip noise +$xml =~ s,^PDF version of requested feature [^<]*,,; + +# replace whitespace and surrounding XMP boilerplate with XML boilerplate +$xml =~ s,\s*<\?xpacket [^>]+>\s*,,; +$xml =~ s,<x:xmpmeta [^>]+>,<?xml version="1.0" encoding="utf-8"?>,; + +# strip trailing boilerplate +$xml =~ s,\s*</x:xmpmeta>,,; +$xml =~ s,\s*<\?xpacket [^>]+>,,; + +say $xml; + +1; |