summaryrefslogtreecommitdiff
path: root/bin/xmp2rdfxml
blob: 81011b911fb9ec560daaec7894d81b1a0b0a8331 (plain)
  1. #!/usr/bin/perl
  2. # extract XMP data from PDF file and unwrap as generic RDF/XML
  3. use v5.36;
  4. use strict;
  5. use PDF::API2;
  6. my $xml = PDF::API2->open( $ARGV[0] )->xml_metadata();
  7. # strip noise
  8. $xml =~ s,^PDF version of requested feature [^<]*,,;
  9. # replace whitespace and surrounding XMP boilerplate with XML boilerplate
  10. $xml =~ s,\s*<\?xpacket [^>]+>\s*,,;
  11. $xml =~ s,<x:xmpmeta [^>]+>,<?xml version="1.0" encoding="utf-8"?>,;
  12. # strip trailing boilerplate
  13. $xml =~ s,\s*</x:xmpmeta>,,;
  14. $xml =~ s,\s*<\?xpacket [^>]+>,,;
  15. say $xml;
  16. 1;