更新了爬虫部分代码
This commit is contained in:
parent
98dc244c54
commit
3e313ffb91
|
|
@ -1,5 +1,7 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
from io import StringIO
|
||||
from urllib.parse import urlencode
|
||||
import re
|
||||
|
||||
import scrapy
|
||||
|
||||
|
|
@ -26,6 +28,9 @@ class TaobaoSpider(scrapy.Spider):
|
|||
item = GoodsItem()
|
||||
item['price'] = goods.xpath('div[5]/div[2]/div[1]/div[1]/strong/text()').extract_first()
|
||||
item['deal'] = goods.xpath('div[5]/div[2]/div[1]/div[2]/text()').extract_first()
|
||||
item['title'] = goods.xpath('div[6]/div[2]/div[2]/a/text()').extract_first()
|
||||
segments = goods.xpath('div[6]/div[2]/div[2]/a/text()').extract()
|
||||
title = StringIO()
|
||||
for segment in segments:
|
||||
title.write(re.sub('\s', '', segment))
|
||||
item['title'] = title.getvalue()
|
||||
yield item
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue