#!/usr/bin/perl -C0 use strict; use warnings; use Encode; my $default_charset = 'cp-1252'; while (my $orig_line = <>) { $orig_line =~ s/^(SET client_encoding = ')SQL_ASCII(';)$/${1}UTF8$2/ and print $orig_line and next; # If whole line is ASCII or UTF-8, print it and move on to the next one eval { print encode_utf8(decode_utf8($orig_line, Encode::FB_CROAK)); }; next unless $@; # If the line had non-ASCII and non-UTF-8, walk through it in chunks # trying UTF-8 and falling back to Windows-1252 my $work_line = $orig_line; while ($work_line) { eval { print encode_utf8(decode_utf8($work_line, Encode::FB_QUIET)); }; die "Unexpected error on second utf8 decode attempt!\n" unless $@ or $work_line ne $orig_line; my $consumed_count = 0; for (my $i = 0; $i < length($work_line); $i++) { my $c = substr($work_line, $i, 1); if (ord($c) > 127) { print encode_utf8(decode($default_charset, $c)); ++$consumed_count; } else { die "ASCII character caused decode error?!\n" if $i == 0; last; } } substr $work_line, 0, $consumed_count, ''; } }