import itertools from zipfile import ZipFile import shutil
if os.path.exists(f'{doc_path}/{temp_dir}/imgs'): shutil.rmtree(f'{doc_path}/{temp_dir}/imgs') os.makedirs(f'{doc_path}/{temp_dir}/imgs')
i = 1 for filename in itertools.chain(Path(doc_path).glob('*.docx'), (Path(doc_path)/temp_dir).glob('*.docx')): print(filename) with ZipFile(filename) as zip_file: for names in zip_file.namelist(): if names.startswith('word/media/image'): zip_file.extract(names, doc_path) os.rename(f'{doc_path}/{names}', f'{doc_path}/{temp_dir}/imgs/{i}{names[names.find('.'):]}') print('\t', names, f'{i}{names[names.find('.'):]}') i += 1 shutil.rmtree(f'{doc_path}/word')
for filename in Path(f'{doc_path}/{temp_dir}/imgs').glob('*'): file = str(filename) with Image.open(file) as im: im.convert('RGB').save( f'{doc_path}/imgs/{filename.name[:filename.name.find('.')]}.jpg', 'jpeg')
if os.path.exists(f'{doc_path}/{temp_dir}/imgs'): shutil.rmtree(f'{doc_path}/{temp_dir}/imgs') os.makedirs(f'{doc_path}/{temp_dir}/imgs')
i = 1 for filename in itertools.chain(Path(doc_path).glob('*.docx'), (Path(doc_path) / temp_dir).glob('*.docx')): print(filename) with ZipFile(filename) as zip_file: for names in zip_file.namelist(): if names.startswith('word/media/image'): zip_file.extract(names, doc_path) os.rename(f'{doc_path}/{names}', f'{doc_path}/{temp_dir}/imgs/{i}{names[names.find('.'):]}') print('\t', names, f'{i}{names[names.find('.'):]}') i += 1 shutil.rmtree(f'{doc_path}/word')
for filename in Path(f'{doc_path}/{temp_dir}/imgs').glob('*'): file = str(filename) with Image.open(file) as im: im.convert('RGB').save( f'{doc_path}/imgs/{filename.name[:filename.name.find('.')]}.jpg', 'jpeg')
import itertools import os import shutil from pathlib import Path from zipfile import ZipFile
from PIL import Image from win32com import client as wc # 導(dǎo)入模塊
defword_img_extract(doc_path, temp_dir='temp'): if os.path.exists(f'{doc_path}/{temp_dir}'): shutil.rmtree(f'{doc_path}/{temp_dir}') os.mkdir(f'{doc_path}/{temp_dir}')
word = wc.Dispatch('Word.Application') # 打開(kāi)word應(yīng)用程序 try: files = list(Path(doc_path).glob('*.doc')) if len(files) == 0: raise Exception('當(dāng)前目錄中沒(méi)有word文檔') for i, filename in enumerate(files, 1): file = str(filename) dest_name = str(filename.parent / f'{temp_dir}' / str(filename.name)) + 'x' # print(file, dest_name) doc = word.Documents.Open(file) # 打開(kāi)word文件 doc.SaveAs(dest_name, 12) # 另存為后綴為'.docx'的文件,,其中參數(shù)12指docx文件 yield'word doc格式轉(zhuǎn)docx格式:', i * 1000 // len(files) finally: word.Quit()
if os.path.exists(f'{doc_path}/{temp_dir}/imgs'): shutil.rmtree(f'{doc_path}/{temp_dir}/imgs') os.makedirs(f'{doc_path}/{temp_dir}/imgs')
i = 1 files = list(itertools.chain(Path(doc_path).glob('*.docx'), (Path(doc_path) / temp_dir).glob('*.docx'))) for j, filename in enumerate(files, 1): # print(filename) with ZipFile(filename) as zip_file: for names in zip_file.namelist(): if names.startswith('word/media/image'): zip_file.extract(names, doc_path) os.rename(f'{doc_path}/{names}', f'{doc_path}/{temp_dir}/imgs/{i}{names[names.find('.'):]}') # print('\t', names, f'{i}{names[names.find('.'):]}') i += 1 yield'word提取圖片:', j * 1000 // len(files) shutil.rmtree(f'{doc_path}/word')
files = list(Path(f'{doc_path}/{temp_dir}/imgs').glob('*')) for i, filename in enumerate(files, 1): file = str(filename) with Image.open(file) as im: im.convert('RGB').save( f'{doc_path}/imgs/{filename.name[:filename.name.find('.')]}.jpg', 'jpeg') yield'圖片轉(zhuǎn)換為jpg格式:', i * 1000 // len(files)
if __name__ == '__main__': doc_path = r'E:\tmp\答疑整理' for msg, i in word_img_extract(doc_path): print(f'\r {msg}{i}', end='')