PYTHON: textract.exceptions.ShellError: The command antiword d:/doc\300 Dpi.doc failed with exit code 127
Я использую этот код для преобразования .doc и .docx в txt
import os
import textract
# Path to the folder containing .doc and .docx files
input_folder = "d:/doc"
# Path to the folder where .txt files will be saved
output_folder = "d:/doc"
# Get a list of all .doc and .docx files in the input folder
files = [f for f in os.listdir(input_folder) if f.endswith((".doc", ".docx"))]
# Loop through each file and convert it to .txt using Textract
for file in files:
file_path = os.path.join(input_folder, file)
output_file_name = os.path.splitext(file)[0] + ".txt"
output_file_path = os.path.join(output_folder, output_file_name)
text = textract.process(file_path).decode("utf-8")
with open(output_file_path, "w", encoding="utf-8") as txt_file:
txt_file.write(text)
print("Conversion complete!")
И У МЕНЯ ЭТА ОШИБКА НА ВЫВОДЕ
utils.py
*** Remote Interpreter Reinitialized ***
Traceback (most recent call last):
File "C:\Users\Castel\AppData\Roaming\Python\Python310\site-packages\textract\parsers\utils.py", line 87, in run
pipe = subprocess.Popen(
File "C:\Program Files\Python39\lib\subprocess.py", line 966, in __init__
self._execute_child(args, executable, preexec_fn, close_fds,
File "C:\Program Files\Python39\lib\subprocess.py", line 1435, in _execute_child
hp, ht, pid, tid = _winapi.CreateProcess(executable, args,
FileNotFoundError: [WinError 2] The system cannot find the file specified
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "E:\Carte\BB\17 - Site Leadership\alte\Ionel Balauta\Aryeht\Task 1 - Traduce tot site-ul\Doar Google Web\Andreea\Meditatii\2023\Convert Docx and Doc to TXT with textract.py", line 19, in <module>
text = textract.process(file_path).decode("utf-8")
File "C:\Users\Castel\AppData\Roaming\Python\Python310\site-packages\textract\parsers\__init__.py", line 79, in process
return parser.process(filename, input_encoding, output_encoding, **kwargs)
File "C:\Users\Castel\AppData\Roaming\Python\Python310\site-packages\textract\parsers\utils.py", line 46, in process
byte_string = self.extract(filename, **kwargs)
File "C:\Users\Castel\AppData\Roaming\Python\Python310\site-packages\textract\parsers\doc_parser.py", line 9, in extract
stdout, stderr = self.run(['antiword', filename])
File "C:\Users\Castel\AppData\Roaming\Python\Python310\site-packages\textract\parsers\utils.py", line 95, in run
raise exceptions.ShellError(
textract.exceptions.ShellError: The command `antiword d:/doc\300 Dpi.doc` failed with exit code 127
------------- stdout -------------
------------- stderr -------------
>>>
Источник: Stack Overflow на русском