importosimportos.pathfrom win32com importclient as wc
c=[]
rootdir=["d:/77"] #以该路径为实验
deftxt(j,c):
word= wc.Dispatch('Word.Application')
doc=word.Documents.Open(c[j])
newname=c[j][:-5]+"(translate txt)"doc.SaveAs(newname,4)
doc.Close()
word.Quit()
os.remove(c[j])print("完成")def wordt(c): #定义函数,进行筛选
for j inrange(0,len(c)):if c[j][-5:] == ".docx": #寻找docx文件
txt(j,c)#
else:pass
for i in rootdir: #定义函数,查找所有文件
for parent,dirnames,filenames inos.walk(i):for filename infilenames:
c.append(os.path.join(parent,filename))
wordt(c)
将docx另存为txt ,并且删除源文件
涉及到office中docx文档的打与另存为命令
相关参考
from win32com import client as wc
word = wc.Dispatch('Word.Application')
doc = word.Documents.Open('c:/test')
doc.SaveAs('c:/test.text', 2)
doc.Close()
word.Quit()
open(r'c:\text','r')
wdFormatDocument = 0
wdFormatDocument97 = 0
wdFormatDocumentDefault = 16
wdFormatDOSText = 4
wdFormatDOSTextLineBreaks = 5
wdFormatEncodedText = 7
wdFormatFilteredHTML = 10
wdFormatFlatXML = 19
wdFormatFlatXMLMacroEnabled = 20
wdFormatFlatXMLTemplate = 21
wdFormatFlatXMLTemplateMacroEnabled = 22
wdFormatHTML = 8
wdFormatPDF = 17
wdFormatRTF = 6
wdFormatTemplate = 1
wdFormatTemplate97 = 1
wdFormatText = 2
wdFormatTextLineBreaks = 3
wdFormatUnicodeText = 7
wdFormatWebArchive = 9
wdFormatXML = 11
wdFormatXMLDocument = 12
wdFormatXMLDocumentMacroEnabled = 13
wdFormatXMLTemplate = 14
wdFormatXMLTemplateMacroEnabled = 15
wdFormatXPS = 18
over!