Thursday, October 22, 2009

read msword doc with Win32::OLE

this worked:

#!/usr/bin/perl

use Win32::OLE;
#use Data::Dumper;
#use SQL_MACROS qw(&sql_init &sql_query);

$dir = "C:/cygwin/home/yazlovb/cmdb/data";
foreach $infile ( glob "$dir/*" ) {
$document = Win32::OLE -> GetObject($infile) ||
die "Could not GetObject $infile: " . Win32::OLE->LastError() . "\n";
print "Extracting $infile ...\n";
$paragraphs = $document->Paragraphs();
$enumerate = new Win32::OLE::Enum($paragraphs);
while(defined($paragraph = $enumerate->Next())) {
$style = $paragraph->{Style}->{NameLocal};
last if $style eq 'Footer';
# print "+$style\n";
$text = $paragraph->{Range}->{Text};
$text =~ s/[\n\r]//g;
$text =~ s/\x0b/\n/g;
$text =~ s/\x07//g; # get rid of ^G's
print "=$text\n";
1;
}
1;
}
1;

Labels: , , , ,

0 Comments:

Post a Comment

<< Home