scrapy案例爬取数据保存到excel

更新时间:2023-04-04 10:22:33 阅读：评论：0

# -*- coding: utf-8 -*-import scrapyclass Mkw1Item(scrapy.Item):    img = scrapy.Field()    title = scrapy.Field()    type = scrapy.Field()    pic = scrapy.Field()

# -*- coding: utf-8 -*-import scrapyfrom .. import itemsimport reclass MukeSpider(scrapy.Spider):    n立体几何知识点总结ame = 'muke'    allowed_domains = 纱窗外['imooc.com']    start_urls = ['/d/file/titlepic/span    def par(lf, respon):        item = items.Mkw1Item()        a = respon.xpath('//*[@id="main"]/div[5]/div[1]/a')凤凰山旅游        for i in range(len(a)):            img = respon.xpath('//a[{}]/div/@style'.format(i + 1)).extract()[0]            pattern_2 = '//.*\.*g'            img = re.findall(pattern_2, img)[0]            item['img'] = img            item['title'] = respon.xpath('//a[{}]/p[1]/text()'.format(i + 1)).extract()[0]            item['type'] = respon.xpath('//a[{}]/p[2]/text()'.format(i + 1)).extract()[0]            item['pi易宽c'] = respon.xpath('//a[{}]/p[3]/span[1]/text()'.format(i + 1)).extract()[0]            yield item

# -*- coding: utf-8 -*-import xlwtclass Mkw1Pipeline(ob科学发展弊大的事例ject):    def __init__(lf):        lf.num = 1        lf.wb = xlwt.Workbook()        lf.sheet = lf.wb.add_sheet('慕课网')        lf.list = ['img', 'title', 'type', 'pic']        for i in range(len(lf.list)):            lf.sheet.write(0, i, lf.list[i])    def process_item(lf, item, spider):        for i, j in zip(range(len(item)), item):            lf.sheet.write(lf.num, i, item[j])        lf.num = lf.num + 1    def clo_spider(lf, spider):        lf.wb.save('../mkw.xlsx')