Jea*_* T. 5 python docx python-docx
我希望能够接受来自Python的MS Word(.docx)文档的所有更改,最好使用python-docx模块.
我知道如何在Perl中做(参见下面的参考资料),但是想在我的Python程序中使用本机代码来做同样的事情.井
任何的想法?
use strict;
use Win32::OLE qw(in with valof OVERLOAD);
use Win32::OLE::Const 'Microsoft.Word'; # wd constants
use Win32::OLE::Variant;
$Win32::OLE::Warn = 3;
my $true = Variant(VT_BOOL, 1);
my $false = Variant(VT_BOOL, 0);
use File::Spec;
use File::Basename;
## Original & New File
my $DocFile = &transform_path($ARGV[0]);
my $NewFile = ($ARGV[1] ? &transform_path($ARGV[1]) : $DocFile);
[ -e $DocFile ] || die "*** Cannot open '$DocFile'\n";
### Transform path
sub transform_path {
my $path = shift;
if ( ! File::Spec->file_name_is_absolute($path) ) {
my $abs = File::Spec->rel2abs($path);
$path = $abs;
}
else {
$path=~s%/+%\\%g;
}
return $path;
}
## Autoflush
$| = 1;
### opening file: try with "new" function, otherwise use "GetActiveObject"
my $Word;
eval {
$Word = Win32::OLE->GetActiveObject('Word.Application') || Win32::OLE->new('Word.Application', 'Quit');
};
if ($@) {
print "Please open MS Word manually before continuing\n";
print "...Press ENTER to continue...\n";
<STDIN>;
$Word = Win32::OLE->GetActiveObject('Word.Application','Quit');
}
print "Opening '$DocFile'\n";
my $document = $Word->Documents->Open({FileName =>$DocFile, ConfirmConversions => 0});
die "Cannot open '$DocFile'\n" unless defined $document;
$document->Activate();
$Word->ActiveWindow->ActivePane->View->{Type} = wdPrintView;
### Accept all changes
print("Accepting all changes\n");
$Word->ActiveDocument->{TrackRevisions} = $false;
$Word->WordBasic->AcceptAllChangesInDoc();
### Save and Close
if ($NewFile eq $DocFile) {
$document->Save();
$document->Close();
} else {
$document->SaveAs($NewFile);
$document->Close(wdDoNotSaveChanges);
}
print "Saving in '$NewFile'\n"
## END ##
Run Code Online (Sandbox Code Playgroud)
尝试后,我设法将我的脚本转录为Python(with Pywin32).
这导致以下代码
from docx import Document
try:
from xml.etree.cElementTree import XML
except ImportError:
from xml.etree.ElementTree import XML
WORD_NAMESPACE = "{http://schemas.openxmlformats.org/wordprocessingml/2006/main}"
TEXT = WORD_NAMESPACE + "t"
def get_accepted_text(p):
"""Return text of a paragraph after accepting all changes"""
xml = p._p.xml
if "w:del" in xml or "w:ins" in xml:
tree = XML(xml)
runs = (node.text for node in tree.getiterator(TEXT) if node.text)
return "".join(runs)
else:
return p.text
doc = Document("Hello.docx")
for p in doc.paragraphs:
print(get_accepted_text(p))
Run Code Online (Sandbox Code Playgroud)