#!perl # qx2html.pl # by Jon C. Jensen # created 1998 August # updated 1999 February 9 # HTML preferences $allow_nbsp = 1; $allow_wbr = 1; # debugging output option $debug_info = 0; die "No files to process" unless @ARGV; $OUT = "from-quark.html"; open OUT, ">$OUT" or die "Can't open output file $OUT\n"; foreach $path (sort @ARGV) { $path =~ /:([^:]+)$/; push @xtags_files,$1; } print OUT < Converted from QuarkXPress HTML print OUT "\n\n" if $debug_info; print "\n\nConverting files from XPress Tags to HTML...\n"; foreach $IN (@xtags_files) { open IN or die "Unable to open file $IN\n"; print "Processing file $IN\n"; # verify that this is an XPress Tags file warn "File $IN doesn't appear to be in XPress Tags format\n", next unless ($_ = ) =~ /^/i; print OUT "\n\n\n" if $debug_info; while () { # skip lines that are style sheet definitions @stylename= (not selectors @stylename: ) $holdoverA = 0, next if $holdoverA; $holdoverA = 1, next if /^@[^:=]+=[^<]*<[^>]+$/; next if /^@[^:=]+=/; # remove carriage returns (Mac OS-specific) s/\x0d//g; # remove style sheet selections s/^[^>]*>//, $holdoverB = 0 if $holdoverB; $holdoverB = 1, next if s/<[^>]+$//; s/^\@[^:=]*://g; # beginning of lines s/<\@>//g; # <@> tags # this following one causes trouble for some reason # s/[^\\]\@.+?://g; # in lines, but avoiding escaped <\@> # convert XTags-escaped characters s/<\\@>/\@/g; # @ s/<\\\\>/\\/g; # \ s/<\\<>/</g; # < # convert HTML-necessaries s/&/&/g; # & # convert simple one-spot tags s/<\\n>/<\@br>/ig; # <\n> new line (soft return) s/<\\m>/--/ig; # <\m> em dash s/<\\!->/-/g; # <\!-> nonbreaking hyphen s/<\\[sfpq]>/ /ig; # <\s> <\f> <\p> <\q> various spaces s/<\\[234]>/#?/g; # <\2> <\3> <\4> previous, current, next box number s/<\\[cb]>/<\@\/p><

/ig; # <\c> <\b> new column, box s/<\#(\d+)>/chr($1)/ge; # <\#???> ASCII code # convert tabs to spaces and collapse extraneous whitespace to single space s/\s+/ /g; # multiple spaces # convert bold, italic, and underline attributes s{<([BbIiUu])>([^<]+)<[\$P]>}{<\@$1>$2<\@/$1>}g; # <\d> discretionary return s/<\\d>/if ($allow_wbr) {"<\@wbr>"} else {""}/eig; # <\-> regular (breaking) hyphen s/<\\->/if ($allow_wbr) {"-<\@wbr>"} else {"-"}/eig; # <\!s> <\!f> <\!p> <\!q> various nonbreaking spaces s/<\\![sfpq]>/if ($allow_nbsp) {" "} else {" "}/eig; # remove all remaining tags s/<[^\@][^>]*>//g; # don't take out <@...> tags, as these are new HTML tags # correct HTML tags s/<\@([^>]*>)/<$1/g; # convert right angle brackets unless (//>/g} else { while (/>/) {last unless (s/(<[^>]*>[^<>]*)>/$1>/g)} } # catch odd characters $change=0; @chars=split(//,$_); foreach $c (@chars) { $v = unpack("C",$c); if ($v > 126) { if ($v == 213 || $v == 212) {$c="'"; $change=1;} # convert real apostrophes elsif ($v == 208) {$c="-"; $change=1;} # convert en dash elsif ($v == 209) {$c="--"; $change=1;} # convert em dash elsif ($v == 165) {$c="*"; $change=1;} # convert bullet elsif ($v == 210 || $v == 211) {$c="\""; $change=1;} # convert real quotes elsif ($v == 168) {$c="®"; $change=1;} # circled R elsif ($v == 218) {$c="/"; $change=1;} # fraction slash elsif ($v == 193) {$c="¡"; $change=1;} # upside-down exclamation mark elsif ($v == 143) {$c="è"; $change=1;}# e with accent grave elsif ($v == 142) {$c="é"; $change=1;}# e with accent acute elsif ($v == 150) {$c="ñ"; $change=1;}# n with tilde elsif ($v == 151) {$c="ó"; $change=1;}# o with accent acute elsif ($v == 135) {$c="á"; $change=1;}# a with accent acute elsif ($v < 127 <=> $v < 160) {$c=""; $change=1; print "* nuke $v *\n";} # strip high control chars else {$c="&#$v;"; $change=1; print "* unknown $v *\n";} } elsif ($v < 32) {$c=""; $change=1; print "* nuke $v *\n";} # strip control chars } if ($change) {$_=join('',@chars)} # collapse extraneous whitespace to single space s/\s+/ /g; # multiple spaces s/^ //; # space at line beginning s/ $//; # space at line end # output line as HTML paragraph print OUT "

\n$_\n

\n\n" if $_; } close IN; } print OUT "\n\n\n"; close OUT; print "Done.\n"; # Be it here proclaimed: QuarkXPress 4 is EVIL. Bugs, bugs, bugs. # The XPress Tags export routine is no exception. # If at all possible, use 3.32r5 to export XPress Tags files.