cameleonpython读取doc⽂件_python读取doc import os, time, fnmatch
from docx import Document
class arch:
def __init__(lf, path, arch_string, file_filter):
lf.arch_path = path
lf.arch_string = arch_string
lf.file_filter = file_filter
print ("Search %s in %s..." % (
lf.arch_string, lf.arch_path
阿房宫赋知识点) )
print ("_" * 80)
time_begin = time.time()
file_count = lf.walk()
print ("_" * 80)
print ("%s files arched in %0.2fc." % (
file_count, (time.time() - time_begin)
))
#遍历所有的⽂件,记录⽂件数量
def walk(lf):
file_count = 0
for root, dirlist, filelist in os.walk(lf.arch_path, followlinks=True):
for filename in filelist:
for file_filter in lf.file_filter:
河北高考录取结果查询
if fnmatch.fnmatch(filename, file_filter):
lf.arch_file(os.path.join(root, filename))
file_count += 1
parameter是什么意思return file_count
#遍历⽂件中的字符串,并且剪切显⽰出来
def arch_file(lf, filepath):
d = Document(filepath)
for para in d.paragraphs:
if lf.arch_string in d.paragraphs:
文胸英文
print(filepath)
lf.cutout_content(content)
#剪切字符串并且显⽰
def cutout_content(lf, content):
current_pos = 0
arch_string_len = len(lf.arch_string)
for i in xrange(max_cutouts):
try:
#从current_pos位置往后寻找lf.arch_string个字符串
pos = content.index(lf.arch_string, current_pos)
except ValueError:
designationbreak
#将显⽰窗⼝定义为寻找到的关键字向前向后各content_extract个字符
学科网英语content_window = content[ pos - content_extract : pos + content_extract ] print (">>>", de("String_Escape"))
current_pos += pos + arch_string_len
print
#主程序⼊⼝
if __name__ == "__main__":
subquentlyarch_path = r"c:\Urs\Administrator\Desktop"
file_filter = ("*.docx",".doc") # fnmatch-Filter
强硬派
arch_string = "history"
content_extract = 35 #获取摘要35吉的堡少儿英语
max_cutouts = 20 #显⽰窗⼝20
arch(arch_path, arch_string, file_filter)