#!/usr/bin/perl
use strict;
use RTF::Writer;
die "Usage: $0 file.txt\n (this will create file.rtf)\n"
unless ( @ARGV and $ARGV[0] =~ /\.txt$/ and -f $ARGV[0] );
# input file is expected to be utf8
open( I, "<:utf8", $ARGV[0] ) or die "$ARGV[0]: $!";
my $utf = do { local $/; }; # slurp it
# here's the magic part: replace each wide character with
# "\uN\5f", where "N" is the decimal numeric codepoint:
$utf =~ s/([^[:ascii:]])/sprintf("\\u%d\\'5f",ord($1))/eg;
( my $out = $ARGV[0] ) =~ s/txt$/rtf/;
my $rtf = RTF::Writer->new_to_file( $out );
my @pars = split( /\n+/, $utf );
$rtf->prolog( title => $out );
for my $par ( @pars ) {
$rtf->paragraph( \$par ); # need to pass $par by reference
}
$rtf->close;
Is that easy, or what?
++ - I wish I'd seen this about a year ago ;-)
Bear in mind that RTF expects signed 16-bit values for character codes, which has bitten me more than once in the past. It's also worth pointing out that MS Word, in particular, will choke if you don't follow each Unicode character with its "ansi equivalent", which you're achieving with the '\5f' in your sprintf - most of the other RTF-aware editors I experimented with showed no such restriction.
perlmonks.org content © perlmonks.org and graff, john_oshea
prlmnks.org © 2006 edmund von der burg (eccles & toad)
v 0.03