blob: 81011b911fb9ec560daaec7894d81b1a0b0a8331 (
plain)
- #!/usr/bin/perl
- # extract XMP data from PDF file and unwrap as generic RDF/XML
- use v5.36;
- use strict;
- use PDF::API2;
- my $xml = PDF::API2->open( $ARGV[0] )->xml_metadata();
- # strip noise
- $xml =~ s,^PDF version of requested feature [^<]*,,;
- # replace whitespace and surrounding XMP boilerplate with XML boilerplate
- $xml =~ s,\s*<\?xpacket [^>]+>\s*,,;
- $xml =~ s,<x:xmpmeta [^>]+>,<?xml version="1.0" encoding="utf-8"?>,;
- # strip trailing boilerplate
- $xml =~ s,\s*</x:xmpmeta>,,;
- $xml =~ s,\s*<\?xpacket [^>]+>,,;
- say $xml;
- 1;
|