#!/usr/bin/perl

# extract XMP data from PDF file and unwrap as generic RDF/XML

use v5.36;
use strict;

use PDF::API2;

my $xml = PDF::API2->open( $ARGV[0] )->xml_metadata();

# strip noise
$xml =~ s,^PDF version of requested feature [^<]*,,;

# replace whitespace and surrounding XMP boilerplate with XML boilerplate
$xml =~ s,\s*<\?xpacket [^>]+>\s*,,;
$xml =~ s,<x:xmpmeta [^>]+>,<?xml version="1.0" encoding="utf-8"?>,;

# strip trailing boilerplate
$xml =~ s,\s*</x:xmpmeta>,,;
$xml =~ s,\s*<\?xpacket [^>]+>,,;

say $xml;

1;