更新了爬虫部分代码

This commit is contained in:
jackfrued 2018-06-09 10:36:52 +08:00
parent 98dc244c54
commit 3e313ffb91
1 changed files with 7 additions and 2 deletions

View File

@ -1,5 +1,7 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from io import StringIO
from urllib.parse import urlencode from urllib.parse import urlencode
import re
import scrapy import scrapy
@ -26,6 +28,9 @@ class TaobaoSpider(scrapy.Spider):
item = GoodsItem() item = GoodsItem()
item['price'] = goods.xpath('div[5]/div[2]/div[1]/div[1]/strong/text()').extract_first() item['price'] = goods.xpath('div[5]/div[2]/div[1]/div[1]/strong/text()').extract_first()
item['deal'] = goods.xpath('div[5]/div[2]/div[1]/div[2]/text()').extract_first() item['deal'] = goods.xpath('div[5]/div[2]/div[1]/div[2]/text()').extract_first()
item['title'] = goods.xpath('div[6]/div[2]/div[2]/a/text()').extract_first() segments = goods.xpath('div[6]/div[2]/div[2]/a/text()').extract()
title = StringIO()
for segment in segments:
title.write(re.sub('\s', '', segment))
item['title'] = title.getvalue()
yield item yield item