首页 > 英语园地

python读取doc文件_python读取doc

更新时间:2023-06-25 12:35:22 阅读：评论：0

cameleonpython读取doc⽂件_python读取doc import os, time, fnmatch

from docx import Document

class arch:

def __init__(lf, path, arch_string, file_filter):

lf.arch_path = path

lf.arch_string = arch_string

lf.file_filter = file_filter

print ("Search %s in %s..." % (

lf.arch_string, lf.arch_path

阿房宫赋知识点) )

print ("_" * 80)

time_begin = time.time()

file_count = lf.walk()

print ("_" * 80)

print ("%s files arched in %0.2fc." % (

file_count, (time.time() - time_begin)

))

#遍历所有的⽂件，记录⽂件数量

def walk(lf):

file_count = 0

for root, dirlist, filelist in os.walk(lf.arch_path, followlinks=True):

for filename in filelist:

for file_filter in lf.file_filter:

河北高考录取结果查询

if fnmatch.fnmatch(filename, file_filter):

lf.arch_file(os.path.join(root, filename))

file_count += 1

parameter是什么意思return file_count

#遍历⽂件中的字符串，并且剪切显⽰出来

def arch_file(lf, filepath):

d = Document(filepath)

for para in d.paragraphs:

if lf.arch_string in d.paragraphs:

文胸英文

print(filepath)

lf.cutout_content(content)

#剪切字符串并且显⽰

def cutout_content(lf, content):

current_pos = 0

arch_string_len = len(lf.arch_string)

for i in xrange(max_cutouts):

try:

#从current_pos位置往后寻找lf.arch_string个字符串

pos = content.index(lf.arch_string, current_pos)

except ValueError:

designationbreak

#将显⽰窗⼝定义为寻找到的关键字向前向后各content_extract个字符

学科网英语content_window = content[ pos - content_extract : pos + content_extract ] print (">>>", de("String_Escape"))

current_pos += pos + arch_string_len

#主程序⼊⼝

if __name__ == "__main__":

subquentlyarch_path = r"c:\Urs\Administrator\Desktop"

file_filter = ("*.docx",".doc") # fnmatch-Filter

强硬派

arch_string = "history"

content_extract = 35 #获取摘要35吉的堡少儿英语

max_cutouts = 20 #显⽰窗⼝20

arch(arch_path, arch_string, file_filter)

本文发布于:2023-06-25 12:35:22，感谢您对本站的认可！

本文链接：https://www.wtabcd.cn/fanwen/fan/78/1036235.html

上一篇：IMAGE PROCESSING DEVICE AND IMAGE PROCESSING PROGR

下一篇：阀门专业术语

标签：寻找剪切高考

留言与评论（共有 0 条评论）