python读取doc文件_python读取doc

更新时间:2023-06-25 12:35:22 阅读: 评论:0

cameleonpython读取doc⽂件_python读取doc import os, time, fnmatch
from docx import Document
class arch:
def __init__(lf, path, arch_string, file_filter):
lf.arch_path = path
lf.arch_string = arch_string
lf.file_filter = file_filter
print ("Search %s in %s..." % (
lf.arch_string, lf.arch_path
阿房宫赋知识点) )
print ("_" * 80)
time_begin = time.time()
file_count = lf.walk()
print ("_" * 80)
print ("%s files arched in %0.2fc." % (
file_count, (time.time() - time_begin)
))
#遍历所有的⽂件,记录⽂件数量
def walk(lf):
file_count = 0
for root, dirlist, filelist in os.walk(lf.arch_path, followlinks=True):
for filename in filelist:
for file_filter in lf.file_filter:
河北高考录取结果查询
if fnmatch.fnmatch(filename, file_filter):
lf.arch_file(os.path.join(root, filename))
file_count += 1
parameter是什么意思return file_count
#遍历⽂件中的字符串,并且剪切显⽰出来
def arch_file(lf, filepath):
d = Document(filepath)
for para in d.paragraphs:
if lf.arch_string in d.paragraphs:
文胸英文
print(filepath)
lf.cutout_content(content)
#剪切字符串并且显⽰
def cutout_content(lf, content):
current_pos = 0
arch_string_len = len(lf.arch_string)
for i in xrange(max_cutouts):
try:
#从current_pos位置往后寻找lf.arch_string个字符串
pos = content.index(lf.arch_string, current_pos)
except ValueError:
designationbreak
#将显⽰窗⼝定义为寻找到的关键字向前向后各content_extract个字符
学科网英语content_window = content[ pos - content_extract : pos + content_extract ] print (">>>", de("String_Escape"))
current_pos += pos + arch_string_len
print
#主程序⼊⼝
if __name__ == "__main__":
subquentlyarch_path = r"c:\Urs\Administrator\Desktop"
file_filter = ("*.docx",".doc") # fnmatch-Filter
强硬派
arch_string = "history"
content_extract = 35 #获取摘要35吉的堡少儿英语
max_cutouts = 20 #显⽰窗⼝20
arch(arch_path, arch_string, file_filter)

本文发布于:2023-06-25 12:35:22,感谢您对本站的认可!

本文链接:https://www.wtabcd.cn/fanwen/fan/78/1036235.html

版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系,我们将在24小时内删除。

标签:寻找   剪切   高考
相关文章
留言与评论(共有 0 条评论)
   
验证码:
推荐文章
排行榜
Copyright ©2019-2022 Comsenz Inc.Powered by © 专利检索| 网站地图