# -*- coding: utf-8 -*-import scrapyclass Mkw1Item(scrapy.Item): img = scrapy.Field() title = scrapy.Field() type = scrapy.Field() pic = scrapy.Field()
# -*- coding: utf-8 -*-import scrapyfrom .. import itemsimport reclass MukeSpider(scrapy.Spider): n立体几何知识点总结ame = 'muke' allowed_domains = 纱窗外['imooc.com'] start_urls = ['/d/file/titlepic/span def par(lf, respon): item = items.Mkw1Item() a = respon.xpath('//*[@id="main"]/div[5]/div[1]/a')凤凰山旅游 for i in range(len(a)): img = respon.xpath('//a[{}]/div/@style'.format(i + 1)).extract()[0] pattern_2 = '//.*\.*g' img = re.findall(pattern_2, img)[0] item['img'] = img item['title'] = respon.xpath('//a[{}]/p[1]/text()'.format(i + 1)).extract()[0] item['type'] = respon.xpath('//a[{}]/p[2]/text()'.format(i + 1)).extract()[0] item['pi易宽c'] = respon.xpath('//a[{}]/p[3]/span[1]/text()'.format(i + 1)).extract()[0] yield item
# -*- coding: utf-8 -*-import xlwtclass Mkw1Pipeline(ob科学发展弊大的事例ject): def __init__(lf): lf.num = 1 lf.wb = xlwt.Workbook() lf.sheet = lf.wb.add_sheet('慕课网') lf.list = ['img', 'title', 'type', 'pic'] for i in range(len(lf.list)): lf.sheet.write(0, i, lf.list[i]) def process_item(lf, item, spider): for i, j in zip(range(len(item)), item): lf.sheet.write(lf.num, i, item[j]) lf.num = lf.num + 1 def clo_spider(lf, spider): lf.wb.save('../mkw.xlsx')
本文地址:https://blog.csdn.net/Hoo_ligan/article/details/110261766
本文发布于:2023-04-04 10:22:30,感谢您对本站的认可!
本文链接:https://www.wtabcd.cn/fanwen/zuowen/cc56c90d891198948804d5b17e6d55fc.html
版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系,我们将在24小时内删除。
本文word下载地址:scrapy案例 爬取数据保存到excel.doc
本文 PDF 下载地址:scrapy案例 爬取数据保存到excel.pdf
留言与评论(共有 0 条评论) |