用Python拷貝word圖片到指定文件

2021-03-06 快學Python

作者：小小明，Pandas數據處理專家，致力於幫助無數數據從業者解決數據處理難題

編輯：朱小五，一隻不務正業的數據狗

日常工作中，領導要求你將一份 Word 文檔中的圖片存儲到一個文件夾內，你可能會一邊內心崩潰，一邊開始一張張的 另存為。

但假如領導要求你將幾百個word文檔中的圖片全部都拷貝出來，你是不是打算離職不幹了？

就比如下面這些word文檔中的圖片，你能否快速的把所有圖片都拷貝出來呢？

如果老朋友們看過這篇文章《老闆讓我從Word中複製出1000張圖片？》的話，就應該知道怎麼做了。

不過，上次分享的這種方法還是有缺陷的：把word文檔用壓縮文件打開，逐個解壓的話依然會耗時較長時間，另外裡面摻雜了doc格式的word文檔，你還需將這些03版本的word文檔另存為docx格式。

今天，將給大家展示一下全新版本！！！

寫個程序，十秒內全部給你轉換完畢，並把圖片都提取出來，還能批量從真實修改圖片格式，而不是簡單的修改一下擴展名。

（文末附帶exe可執行程序）

下面開始展示

doc格式批量轉為docx

python提供了win32com模塊，其中的SaveAs方法可以代替人手批量將文件另存為我們需要的格式。

win32com包含在pypiwin32模塊中，只需安裝pypiwin32模塊即可：

pip install pypiwin32
下面的代碼將指定目錄下的doc文件轉換為docx格式，並放在該目錄的temp_dir下面：
from win32com import client as wc  # 導入模塊
from pathlib import Path
import os
import shutil

doc_path = r"E:\tmp\答疑整理"
temp_dir = "temp"
if os.path.exists(f"{doc_path}/{temp_dir}"):
    shutil.rmtree(f"{doc_path}/{temp_dir}")
os.mkdir(f"{doc_path}/{temp_dir}")

word = wc.Dispatch("Word.Application")  # 打開word應用程式
try:
    for filename in Path(doc_path).glob("*.doc"):
        file = str(filename)
        dest_name = str(filename.parent/f"{temp_dir}"/str(filename.name))+"x"
        print(file, dest_name)
        doc = word.Documents.Open(file)  # 打開word文件
        doc.SaveAs(dest_name, 12)  # 另存為後綴為".docx"的文件，其中參數12指docx文件
finally:
    word.Quit()
運行結果：
轉換得到的文件：
批量提取docx文檔的圖片docx文檔其實也是一個zip壓縮包，所以我們可以通過zip包解壓它，下面的代碼將解壓每個docx文檔中的圖片，我將其移動到臨時目錄下的imgs目錄下：
import itertools
from zipfile import ZipFile
import shutil

if os.path.exists(f"{doc_path}/{temp_dir}/imgs"):
    shutil.rmtree(f"{doc_path}/{temp_dir}/imgs")
os.makedirs(f"{doc_path}/{temp_dir}/imgs")

i = 1
for filename in itertools.chain(Path(doc_path).glob("*.docx"), (Path(doc_path)/temp_dir).glob("*.docx")):
    print(filename)
    with ZipFile(filename) as zip_file:
        for names in zip_file.namelist():
            if names.startswith("word/media/image"):
                zip_file.extract(names, doc_path)
                os.rename(f"{doc_path}/{names}",
                          f"{doc_path}/{temp_dir}/imgs/{i}{names[names.find('.'):]}")
                print("\t", names, f"{i}{names[names.find('.'):]}")
                i += 1
shutil.rmtree(f"{doc_path}/word")
列印結果：
提取結果：
批量圖片格式轉換PIL：Python Imaging Library，已經是Python平臺事實上的圖像處理標準庫了。PIL功能非常強大，但API卻非常簡單易用。
由於PIL僅支持到Python 2.7，加上年久失修，於是一群志願者在PIL的基礎上創建了兼容的版本，名字叫Pillow，支持最新Python 3.x，又加入了許多新特性，因此，我們可以直接安裝使用Pillow。
如果安裝了Anaconda，Pillow就已經可用了。否則，需要在命令行下通過pip安裝：
pip install pillow
直接修改文件擴展名並不能真實的修改圖片格式，通過pillow庫我們即可將圖片批量真實的轉換為jpg格式：
from PIL import Image

if not os.path.exists(f"{doc_path}/imgs"):
    os.mkdir(f"{doc_path}/imgs")

for filename in Path(f"{doc_path}/{temp_dir}/imgs").glob("*"):
    file = str(filename)
    with Image.open(file) as im:
        im.convert('RGB').save(
            f"{doc_path}/imgs/{filename.name[:filename.name.find('.')]}.jpg", 'jpeg')
轉換後：
完整代碼#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# 創建時間：2020/12/25 21:46
__author__ = 'xiaoxiaoming'

import itertools
import os
import shutil
from pathlib import Path
from zipfile import ZipFile

from PIL import Image
from win32com import client as wc  # 導入模塊


def word_img_extract(doc_path, temp_dir):
    if os.path.exists(f"{doc_path}/{temp_dir}"):
        shutil.rmtree(f"{doc_path}/{temp_dir}")
    os.mkdir(f"{doc_path}/{temp_dir}")

    word = wc.Dispatch("Word.Application")  # 打開word應用程式
    try:
        for filename in Path(doc_path).glob("*.doc"):
            file = str(filename)
            dest_name = str(filename.parent / f"{temp_dir}" / str(filename.name)) + "x"
            print(file, dest_name)
            doc = word.Documents.Open(file)  # 打開word文件
            doc.SaveAs(dest_name, 12)  # 另存為後綴為".docx"的文件，其中參數12指docx文件
    finally:
        word.Quit()

    if os.path.exists(f"{doc_path}/{temp_dir}/imgs"):
        shutil.rmtree(f"{doc_path}/{temp_dir}/imgs")
    os.makedirs(f"{doc_path}/{temp_dir}/imgs")

    i = 1
    for filename in itertools.chain(Path(doc_path).glob("*.docx"), (Path(doc_path) / temp_dir).glob("*.docx")):
        print(filename)
        with ZipFile(filename) as zip_file:
            for names in zip_file.namelist():
                if names.startswith("word/media/image"):
                    zip_file.extract(names, doc_path)
                    os.rename(f"{doc_path}/{names}",
                              f"{doc_path}/{temp_dir}/imgs/{i}{names[names.find('.'):]}")
                    print("\t", names, f"{i}{names[names.find('.'):]}")
                    i += 1
    shutil.rmtree(f"{doc_path}/word")

    if not os.path.exists(f"{doc_path}/imgs"):
        os.mkdir(f"{doc_path}/imgs")

    for filename in Path(f"{doc_path}/{temp_dir}/imgs").glob("*"):
        file = str(filename)
        with Image.open(file) as im:
            im.convert('RGB').save(
                f"{doc_path}/imgs/{filename.name[:filename.name.find('.')]}.jpg", 'jpeg')


if __name__ == '__main__':
    doc_path = r"E:\tmp\答疑整理"
    temp_dir = "temp"
    word_img_extract(doc_path, temp_dir)
最終全部執行完成耗時7s：
GUI圖形化工具開發下面使用PySimpleGUI開發一個圖形化工具，使用以下命令安裝該庫：
pip install PySimpleGUI
如果是下載速度慢的可以用下面的清華鏡像地址下載：
pip install PySimpleGUI -i https://pypi.tuna.tsinghua.edu.cn/simple
以下是完整代碼：
import PySimpleGUI as sg

from word_img_extract import word_img_extract

sg.change_look_and_feel("GreenMono")

layout = [
    [
        sg.Text("請輸入word文檔所在的目錄："),
        sg.In(size=(25, 1), enable_events=True, key="-FOLDER-"),
        sg.FolderBrowse('瀏覽'),
    ], [
        sg.Button('開始抽取', enable_events=True, key="抽取"),
        sg.Text(size=(40, 1), key="-TOUT-")
    ]
]
window = sg.Window('word文檔圖片抽取系統', layout)
while True:
    event, values = window.read()
    if event in (None,):
        break  # 相當於關閉界面
    elif event == "抽取":
        if values["-FOLDER-"]:
            window["-TOUT-"].update("準備抽取！！！")
            sg.popup('抽取期間程序將處於假死狀態，請稍等片刻，提取完成後會彈出提示！！！\n點擊ok後開始抽取！！！')
            window["-TOUT-"].update("正在抽取中...")
            word_img_extract(values["-FOLDER-"])
            window["-TOUT-"].update("抽取完畢！！！")
            sg.popup('抽取完畢！！！')
        else:
            sg.popup('請先輸入word文檔所在的路徑！！！')
    print(f'Event: {event}， values: {values}')
window.close()
運行效果：
打包exe創建並激活虛擬環境：
conda create -n gui python=3.6
conda activate gui
注意：創建虛擬環境和激活環境並不是必須，只是為了精簡環境，可以跳過
安裝打包所用的包：
pip install PySimpleGUI
pip install pillow
pip install pywin32
pip install pyinstaller
執行以下命令進行打包：
pyinstaller -F --icon=C:\Users\Think\Pictures\ico\ooopic_1467046829.ico word_img_extract_GUI.py
常用參數說明：
-w 表示去掉控制臺窗口，這在GUI界面時非常有用。不過如果是命令行程序的話那就把這個選項刪除吧！-p 表示你自己自定義需要加載的類路徑，一般情況下用不到打包結果：
帶上-w參數打包，可以去掉控制臺：
pyinstaller -wF --icon=C:\Users\Think\Pictures\ico\ooopic_1467046829.ico word_img_extract_GUI.py
給GUI加入進度條改造處理程序，藉助生成器反饋程序的處理進度，完整代碼如下：
import itertools
import os
import shutil
from pathlib import Path
from zipfile import ZipFile

from PIL import Image
from win32com import client as wc  # 導入模塊

def word_img_extract(doc_path, temp_dir="temp"):
    if os.path.exists(f"{doc_path}/{temp_dir}"):
        shutil.rmtree(f"{doc_path}/{temp_dir}")
    os.mkdir(f"{doc_path}/{temp_dir}")

    word = wc.Dispatch("Word.Application")  # 打開word應用程式
    try:
        files = list(Path(doc_path).glob("*.doc"))
        if len(files) == 0:
            raise Exception("當前目錄中沒有word文檔")
        for i, filename in enumerate(files, 1):
            file = str(filename)
            dest_name = str(filename.parent / f"{temp_dir}" / str(filename.name)) + "x"
            # print(file, dest_name)
            doc = word.Documents.Open(file)  # 打開word文件
            doc.SaveAs(dest_name, 12)  # 另存為後綴為".docx"的文件，其中參數12指docx文件
            yield "word doc格式轉docx格式：", i * 1000 // len(files)
    finally:
        word.Quit()

    if os.path.exists(f"{doc_path}/{temp_dir}/imgs"):
        shutil.rmtree(f"{doc_path}/{temp_dir}/imgs")
    os.makedirs(f"{doc_path}/{temp_dir}/imgs")

    i = 1
    files = list(itertools.chain(Path(doc_path).glob("*.docx"), (Path(doc_path) / temp_dir).glob("*.docx")))
    for j, filename in enumerate(files, 1):
        # print(filename)
        with ZipFile(filename) as zip_file:
            for names in zip_file.namelist():
                if names.startswith("word/media/image"):
                    zip_file.extract(names, doc_path)
                    os.rename(f"{doc_path}/{names}",
                              f"{doc_path}/{temp_dir}/imgs/{i}{names[names.find('.'):]}")
                    # print("\t", names, f"{i}{names[names.find('.'):]}")
                    i += 1
        yield "word提取圖片：", j * 1000 // len(files)
    shutil.rmtree(f"{doc_path}/word")

    if not os.path.exists(f"{doc_path}/imgs"):
        os.mkdir(f"{doc_path}/imgs")

    files = list(Path(f"{doc_path}/{temp_dir}/imgs").glob("*"))
    for i, filename in enumerate(files, 1):
        file = str(filename)
        with Image.open(file) as im:
            im.convert('RGB').save(
                f"{doc_path}/imgs/{filename.name[:filename.name.find('.')]}.jpg", 'jpeg')
        yield "圖片轉換為jpg格式：", i * 1000 // len(files)


if __name__ == '__main__':
    doc_path = r"E:\tmp\答疑整理"
    for msg, i in word_img_extract(doc_path):
        print(f"\r {msg}{i}", end="")
GUI程序的最終完整代碼：
import PySimpleGUI as sg

from word_img_extract import word_img_extract

sg.change_look_and_feel("GreenMono")

layout = [
    [
        sg.Text("請輸入word文檔所在的目錄："),
        sg.In(size=(25, 1), enable_events=True, key="-FOLDER-"),
        sg.FolderBrowse('瀏覽'),
    ], [
        sg.Button('開始抽取', enable_events=True, key="抽取"),
        sg.Text(text_color="red", size=(47, 2), key="error"),
    ], [
        sg.Text("準備：", size=(20, 1), key="-TOUT-"),
        sg.ProgressBar(1000, orientation='h', size=(35, 20), key='progressbar')
    ]
]
window = sg.Window('word文檔圖片抽取系統', layout)
while True:
    event, values = window.read()
    if event in (None,):
        break  # 相當於關閉界面
    elif event == "抽取":
        if values["-FOLDER-"]:
            window["error"].update("")
            try:
                for msg, i in word_img_extract(values["-FOLDER-"]):
                    window["-TOUT-"].update(msg)
                    window['progressbar'].UpdateBar(i)
                window["-TOUT-"].update('抽取完畢！！！')
            except Exception as e:
                window["error"].update(str(e))
        else:
            sg.popup('請先輸入word文檔所在的路徑！！！')
window.close()
重新打包：
pyinstaller -wF --icon=C:\Users\Think\Pictures\ico\ooopic_1467046829.ico word_img_extract_GUI.py
運行效果：
exe下載如果有小夥伴對代碼不感興趣，想直接使用打包好的exe軟體，掃碼關注「快學Python」後臺回復「0109」 ，獲取完整代碼：