#! /bin/sh
eval '(exit $?0)' && eval 'PERL_BADLANG=x;export PERL_BADLANG;: \
;exec perl -x -S -- "$0" ${1+"$@"};#'if 0;
eval 'setenv PERL_BADLANG x;exec perl -x -S -- "$0" $argv:q;#'.q
#!perl -w
+($0=~/(.*)/s);do$1;die$@if$@;__END__+if 0;
# Don't touch/remove lines 1--7: http://www.inf.bme.hu/~pts/Magic.Perl.Header

use strict;

# Change by Thomas Esser, Sept. 1998: The above lines allows us to find
# perl along $PATH rather than guessing a fixed location. The above
# construction should work with most shells.
# Modified by pts@fazekas.hu at and before Sun Feb  2 12:57:10 CET 2003

# A script to transform an EPS file so that:
#   a) it is guarenteed to start at the 0,0 coordinate
#   b) it sets a page size exactly corresponding to the BoundingBox
# This means that when Ghostscript renders it, the result needs no
# cropping, and the PDF MediaBox is correct.
#   c) the result is piped to Ghostscript and a PDF version written
#
# -- It needs a Level 2 PS interpreter.
# -- If the bounding box is not right, of course, you have problems...
# -- BoundingBox: (atend) is perfectly supported.
# -- recommended options: --below --hires
#
# Sebastian Rahtz, for Elsevier Science
#
# now with extra tricks from Hans Hagen's texutil.
#
# History
#  1999/05/06 v2.5 (Heiko Oberdiek)
#    * New options: --hires, --exact, --filter, --help.
#    * Many cosmetics: title, usage, ...
#    * New code for debug, warning, error
#    * Detecting of cygwin perl
#    * Scanning for %%{Hires,Exact,}BoundingBox.
#    * Scanning only the header in order not to get a wrong
#      BoundingBox of an included file.
#    * (atend) supported.
#    * uses strict; (earlier error detecting).
#    * changed first comment from '%!PS' to '%!';
#    * corrected (atend) pattern: '\s*\(atend\)'
#    * using of $bbxpat in all BoundingBox cases,
#      correct the first white space to '...Box:\s*$bb...'
#    * corrected first line (one line instead of two before 'if 0;';
#  2000/11/05 v2.6 (Heiko Oberdiek)
#    * %%HiresBoundingBox corrected to %%HiResBoundingBox
#  2001/03/05 v2.7 (Heiko Oberdiek)
#    * Newline before grestore for the case that there is no
#      whitespace at the end of the eps file.
#  2003/02/02 (Szab� P�ter)
#    * option --below
#    * removes DOS EPSF binary junk correctly
#    * adds all 3 BoundingBox DSC comments
#    * reads all 3 BoundingBox DSC comments, and picks the best
#    * forces BoundingBox to be an integer
#    * adds %%EndComments and proper %!PS-Adobe-?-? EPSF-?.? header
#    * adds %%Pages:
#
# Dat: calling ``showpage'' is not required, gs 6.50 works without it
# Imp: detect error messages from GS, abort...
# Imp: make it work for PDF input
# Imp: make it work for PS non-EPS files

### program identification
my $program = "epstopdf";
my $filedate="2003/02/02"; # my $filedate="2001/03/05";
my $fileversion="2.7p";
my $copyright = "Copyright 1998-2001 by Sebastian Rahtz et al.\nContains modifications by pts\@fazekas.hu";
my $title = "\U$program\E $fileversion, $filedate - $copyright\n";

### ghostscript command name
my($quote,$GS)=("'","gs");
($quote,$GS) = ("\"","gswin32c") if $^O eq 'MSWin32' or $^O =~ /cygwin/i;

### options
$::opt_help=0;
$::opt_debug=0;
$::opt_compress=1;
$::opt_gs=1;
$::opt_hires=0;
$::opt_exact=0;
$::opt_filter=0;
$::opt_outfile="";
$::opt_below=0;

### usage
my @bool = ("false", "true");
my $usage = <<"END_OF_USAGE";
${title}Syntax:  $program [options] <eps file>
Options:
  --help:           print usage
  --outfile=<file>: write result to <file>
  --(no)filter:     read standard input           (default: $bool[$::opt_filter])
  --(no)gs:         run Ghostscript to create PDF (default: $bool[$::opt_gs])
  --(no)compress:   use compression               (default: $bool[$::opt_compress])
  --(no)hires:      scan HiResBoundingBox         (default: $bool[$::opt_hires])
  --(no)exact:      scan ExactBoundingBox         (default: $bool[$::opt_exact])
  --(no)debug:      debug informations            (default: $bool[$::opt_debug])
  --(no)below:      allow below baseline          (default: $bool[$::opt_below])
Examples for producing 'test.pdf':
  * $program test.eps
  * produce postscript | $program --filter >test.pdf
  * produce postscript | $program -f -d -o=test.pdf
Example: look for HiResBoundingBox and produce corrected PostScript:
  * $program -d --nogs -hires test.ps>testcorr.ps
END_OF_USAGE

### process options
use Getopt::Long;
GetOptions (
  "help!",
  "debug!",
  "filter!",
  "compress!",
  "gs!",
  "hires!",
  "below!",
  "exact!",
  "outfile=s",
) or die $usage;

### help functions
sub debug {
  print STDERR "* @_\n" if $::opt_debug;
}
sub warning {
  print STDERR "==> Warning: @_!\n";
}
sub error {
  die "$title!!! Error: @_!\n";
}
sub errorUsage {
  die "$usage\n!!! Error: @_!\n";
}

### option help
die $usage if $::opt_help;

### get input filename
my $InputFilename = "";
if ($::opt_filter) {
  @ARGV == 0 or
    errorUsage "Input file cannot be used with filter option";
  $InputFilename = "-";
  debug "Input file: standard input";
} else {
  @ARGV > 0 or errorUsage "Input filename missing";
  @ARGV < 2 or errorUsage "Unknown option or too many input files";
  $InputFilename = $ARGV[0];
  if ($ARGV[0] eq '-') {
    $::opt_filter=1;
    debug "Input file: standard input";
  } else {
    -f $InputFilename or error "'$InputFilename' does not exist";
    debug "Input filename:", $InputFilename;
  }
}

### option compress
my $GSOPTS;
$GSOPTS = "-dUseFlateCompression=".($::opt_compress?"true":"false")." ";
# $GSOPTS.=" -r72 -g99999x99999 ";
$GSOPTS.=" -r72 -sPAPERSIZE=a4 "; # default
$GSOPTS.=" -dCompatibilityLevel=1.2";

### option BoundingBox types
#**** pts ****
# scan all of them and find the best
{ my $BBprint = "%%BoundingBox:";
  $BBprint = "%%HiResBoundingBox:" if $::opt_hires;
  $BBprint = "%%ExactBoundingBox:" if $::opt_exact;
  debug "strongest BoundingBox comment:", $BBprint;
}
my $BBregex='%%(Hi[Rr]es|Exact|)BoundingBox:';

### option outfile
my $OutputFilename = $::opt_outfile;
if ($OutputFilename eq "") {
  if ($::opt_gs) {
    $OutputFilename = $InputFilename;
    if (!$::opt_filter) {
      $OutputFilename =~ s/\.[^\.]*$//;
      $OutputFilename .= ".pdf";
    }
  }
  else {
    $OutputFilename = "-"; # standard output
  }
}
if ($::opt_filter) {
  debug "Output file: standard output";
}
else {
  debug "Output filename:", $OutputFilename;
}

### option gs
if ($::opt_gs) {
  debug "Ghostscript command:", $GS;
  debug "Compression:", ($::opt_compress) ? "on" : "off";
}

### open input file
if ($::opt_filter) {
  open(IN, "<&STDIN") or error "Cannot open standard input";
} else {
  open(IN,"< $InputFilename") or error "Cannot open '$InputFilename'";
}
binmode IN;


#**** pts ****
sub read_error() { error "read $InputFilename: $!" }
my $bytes_left=-1;
#** @param $_[0] number of bytes to read, or undef to read a line
#** @return the string read
sub readIN(;$) {
  my $S;
  return "" if $bytes_left==0;
  ## print STDERR "READ\n";
  if (defined $_[0]) { read_error if 0>read IN, $S, $_[0] }
  else {
    $S=<IN>;
    read_error if !defined($S) and $!;
    $S="" if !defined $S;
  }
  if ($bytes_left<0) { # unlimited
  } elsif (length($S)>=$bytes_left) {
    $S=substr($S, 0, $bytes_left);
    $bytes_left=0;
  } else { $bytes_left-=length($S) }
  $S
}

### scan first line, check for DOS eps (and remove DOS headers)
#**** pts ****
my $header="";
$_="";
{ my $S;
  read_error if 0>read IN, $S, 1;
  error "$InputFilename: empty file" if 0==length($S);
  if ($S eq "\305") { # DOS EPSF header
    read_error if 29>read IN, $S, 29, 1;
    my ($eheader,$ps_ofs,$ps_len,$wmf_ofs,$wmf_len,$tif_ofs,$tif_len,$checksum)=
      unpack"A4VVVVVVv", $S;
    error "$InputFilename: bad DOS EPS" if $eheader ne "\305\320\323\306" or $ps_ofs<30;
    my($ps_end, $wmf_end, $tif_end)=($ps_ofs+$ps_len, $wmf_ofs+$wmf_len, $tif_ofs+$tif_len);
    $ps_ofs-=30;
    if (!seek IN, $ps_ofs, 1) {
      while ($ps_ofs>4096) { $ps_ofs-=4096; readIN 4096 }
      read_in $ps_ofs if $ps_ofs>0;
    }
    $bytes_left=($ps_end>$wmf_end and $ps_end>$tif_end) ? -1 : $ps_len;
    $S=readIN(1);
  }
  if ($S eq '%') {
    $S.=readIN;
    error "$InputFilename: won't read a PDF file" if substr($S,0,4)eq'%PDF';
    error "$InputFilename: EPS DSC must be %!PS-Adobe" if substr($S,0,4)ne'%!PS';
    # ^^^ Dat: mpost outputs "%!PS\n"
  } else {
    warning "$InputFilename: no PS ADSC header, BoundingBox not found\n"
  }
  $header=$S;
}

### open output file
if ($::opt_gs) {
  my $fn=$OutputFilename;
  $fn=~s@'@\\'@g if $quote eq "'";
  my $pipe = "$GS -q -dBATCH -sDEVICE=pdfwrite $GSOPTS " .
          "-sOutputFile=$quote$OutputFilename$quote -";
  debug "Ghostscript pipe:", $pipe;
  open(OUT,"|$pipe") or error "Cannot open Ghostscript for piped input";
} else {
  open(OUT,"> $OutputFilename") or error "Cannot write '$OutputFilename";
}
die unless binmode OUT;

### variables and pattern for BoundingBox search
my $bbxpatt = '[0-9eE\.\-]';
               # protect backslashes: "\\" gets '\'
my $BBValues = "\\s*($bbxpatt+)\\s+($bbxpatt+)\\s+($bbxpatt+)\\s+($bbxpatt+)";

#** @return arg rounded down to int
sub floor($) {
  # Dat: Perl int() rounds towards zero
  $_[0] < 0 ? -int(-$_[0]) : int($_[0])
}

#** @return arg rounded up to int
sub ceil($) {
  $_[0] > 0 ? -int(-$_[0]) : int($_[0])
}

my ($xoffset, $yoffset) = (0, 0);
my $do_translate=0;
#** @return PostScript code to be printed after the header
sub CorrectBoundingBox($$$$) {
  my ($llx, $lly, $urx, $ury) = @_;
  debug "Old BoundingBox:", $llx, $lly, $urx, $ury;
  my ($width, $height) = ($urx - $llx, $ury - $lly);
  ($xoffset, $yoffset) = (-$llx, -$lly);
  # Dat: it is inherently impossible to tell GS that it shouldn't
  #      recompress the images already compressed in the EPS file, but keep
  #      them in their original, compressed form. So we rather instruct GS to
  #      recompress
  my $pagedevice2set="
/AutoRotatePages /None
/CompatibilityLevel 1.2
/UseFlateCompression true
/AutoPositionEPSFiles false
/AutoFilterGrayImages false
/ConvertImagesToIndexed false
/DownsampleMonoImages false
/DownsampleGrayImages false
/DownsampleColorImages false
/AutoFilterColorImages false
/EncodeMonoImages true
/EncodeGrayImages true
/EncodeColorImages true
/AntiAliasMonoImages false
/AntiAliasGrayImages false
/AntiAliasColorImages false\n";

  $::opt_below=0 if $lly>=0;
  ($llx,$lly,$urx,$ury)=(0,0,$urx-$llx,$ury-$lly) if !$::opt_below;
  my $bbx="%%BoundingBox: ".floor($llx)." ".floor($lly)." ".
                            ceil ($urx)." ".ceil ($ury)."\n".
     "%%HiResBoundingBox: $llx $lly $urx $ury\n".
     "%%ExactBoundingBox: $llx $lly $urx $ury\n%%EndComments\n";
  if ($::opt_below) { #**** pts ****
    # vvv we output a second /MediaBox here, and we'll remove the first one
    #     (written by GS) later
    return "$bbx<< /PageSize [$width $height] $pagedevice2set >> setpagedevice
mark /MediaBox [$llx $lly $urx $ury] /PAGE pdfmark
mark /CropBox  [$llx $lly $urx $ury] /PAGE pdfmark\n";
  }
  debug "New BoundingBox: 0 0", $width, $height;
  debug "Offset:", $xoffset, $yoffset;
  if ($xoffset==0 and $yoffset==0) {
    return "$bbx<< /PageSize [$width $height] $pagedevice2set >> setpagedevice
mark /CropBox [0 0 $width $height] /PAGE pdfmark\n"
  }
  $xoffset=0 if $xoffset==0; # get rid of `-0'
  $yoffset=0 if $yoffset==0; # get rid of `-0'
  $do_translate=1;
  "$bbx<< /PageSize [$width $height] $pagedevice2set >> setpagedevice
mark /CropBox [0 0 $width $height] /PAGE pdfmark
gsave $xoffset $yoffset translate\n"
}

### scan header
if (1<length($header)) {
  my($llx,$lly,$urx,$ury);
  my($bbtype)='-'; # None
  debug "Scanning header for BoundingBox";
  $header=~s@\r\n?\Z(?!\n)@@;
  { my $headEPSF="EPSF-3.0";
    my $headPS="PS-Adobe-3.0";
    $headEPSF=$1 if $header=~s/\s+(EPSF-[.\d]+)$//;
    $headPS=$1 if $header=~s/(PS-Adobe-[.\d+])$//;
    print OUT "%!$headPS $headEPSF\n";
  }
  my $BBCorrected="";
  my $do_atend=0;
  my $had_pages=0;
  my $doing_atend=0;
  my $saved_pos;
  my $saved_bytes_left;
  read_again: while (length($_=readIN)) {
    ## print STDERR "(($_))\n";
    ### end of header
    if (!$doing_atend) {
      if (/^%%EndComments/i) { last }
      if (substr($_,0,2)ne'%%' or substr($_,0,5)eq'%%End') { $BBCorrected=$_; last }
    }

    ### BoundingBox with values
    if (/^$BBregex$BBValues/oi) {
      if ($bbtype eq '-'
       or $::opt_hires and $1 eq 'H'
       or $::opt_exact and $1 eq 'E'
       or $::opt_hires and $1 eq 'E' and $bbtype ne 'H'
       or $::opt_exact and $1 eq 'H' and $bbtype ne 'E'
         ) {
        ($bbtype,$llx,$lly,$urx,$ury)=($1,$2+0,$3+0,$4+0,$5+0);
      }
    } elsif (/^$BBregex\s*\(atend\)/oi) {
      ### BoundingBox with (atend)
      debug "BoundingBox $1 (atend)";
      if ($::opt_filter) {
        warning "Cannot look for BoundingBox in the trailer",
                "with option --filter";
      } else { $do_atend=1 }
    } elsif ($doing_atend) {
    } elsif (/^%%Pages:/) {
      print OUT; $had_pages=1;
    } else { print OUT }
  }
  if ($doing_atend) { # already read (atend); restore file position
    seek(IN, $saved_pos, 0) or error "Cannot go back to line `: (atend)'";
    $bytes_left=$saved_bytes_left;
  } elsif ($do_atend) { # seek to near EOF and try again
    error "Cannot leave line `: (atend)'" if ($saved_pos=tell IN)<0;
    $saved_bytes_left=$bytes_left;
    # vvv get the very last *BoundingBox entry from the last 4096 bytes
    if ($bytes_left>4096) {
      die unless seek(IN, $bytes_left-4096, 1);
      $bytes_left=4096;
    } elsif ($bytes_left<0) {
      die unless seek(IN, -4096, 2);
    }
    $doing_atend=1; goto read_again;
  }
  print OUT "%%Pages: 1\n" if !$had_pages;
  if ($bbtype ne'-') { print OUT CorrectBoundingBox $llx, $lly, $urx, $ury }
  else { warning "BoundingBox not found"; print OUT "%%EndComments\n" }
  print OUT $BBCorrected;
}

### print rest of file
print OUT while length($_=readIN 4096);

### close files
error "running gs" unless close(IN);
# ^^^ SUXX: gs always exit(0), if exists
print OUT "\ngrestore\n" if $do_translate;
close(OUT);

if ($::opt_below and $::opt_gs) {
  ### ****pts**** remove incorrect /MediaBox produced by gs
  die unless open F, "> $OutputFilename.tgs"; # Imp: in temp dir
  die unless print F '
  % this is temporary gs command file created by epstopdf
  GS_PDF_ProcSet begin
  pdfdict begin
  FN (r) file pdfopen begin
  % vvv keep file offsets, because `pdffindpageref` overrides it with contents
  /OFT Objects 0 get dup length array copy def
  % vvv Dat: the generation number is assumed to be 0
  % vvv Dat: modifies Objects[0]
  1 pdffindpageref 0 get
  Objects 0 OFT put
  readxrefentry ===
  currentdict pdfclose end end end
  ';
  die unless close F;

  my $offset=`$GS -dNODISPLAY -dSAFER -dBATCH -sFN=$quote$OutputFilename$quote -q $quote$OutputFilename.tgs$quote`;
  chomp $offset;
  unlink "$OutputFilename.tgs";
  die unless $offset=~/\A\d+\Z(?!\n)/;
  # Dat: now $offsets is a file position in $OutputFilename containing our /Page object

  die unless open F, "+< $OutputFilename";
  die unless binmode F;
  die unless seek F, $offset+=0, 0;
  my $pageobj;
  die unless 32<read F, $pageobj, 4096;
  if ($pageobj=~m@\A(.*?/Type\s*/Page\b.*?)(/MediaBox\s*\[[^\]]*\]).*?/MediaBox@s) {
    substr($pageobj, length($1), length($2))=" "x length($2);
    # ^^^ overwrite first buggy /MediaBox definition with spaces
    die unless seek F, $offset, 0;
    die unless print F $pageobj;
    debug "Below correction ($xoffset $yoffset) applied.";
  } else {
    debug "warning: double /MediaBox not found.";
  }
  die unless close F;
}

debug "Ready.";