aboutsummaryrefslogtreecommitdiff
path: root/bin/xmp2rdfxml
blob: 05a567f4bdab1b6a964510943adb1487afab3b68 (plain)
  1. #!/usr/bin/perl
  2. # extract XMP data from PDF file and unwrap as generic RDF/XML
  3. use v5.36;
  4. use PDF::API2;
  5. my $xml = PDF::API2->open( $ARGV[0] )->xml_metadata();
  6. # strip noise
  7. $xml =~ s,^PDF version of requested feature [^<]*,,;
  8. # replace whitespace and surrounding XMP boilerplate with XML boilerplate
  9. $xml =~ s,\s*<\?xpacket [^>]+>\s*,,;
  10. $xml =~ s,<x:xmpmeta [^>]+>,<?xml version="1.0" encoding="utf-8"?>,;
  11. # strip trailing boilerplate
  12. $xml =~ s,\s*</x:xmpmeta>,,;
  13. $xml =~ s,\s*<\?xpacket [^>]+>,,;
  14. say $xml;
  15. 1;