更新了爬虫部分代码
This commit is contained in:
parent
98dc244c54
commit
3e313ffb91
|
|
@ -1,5 +1,7 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
from io import StringIO
|
||||||
from urllib.parse import urlencode
|
from urllib.parse import urlencode
|
||||||
|
import re
|
||||||
|
|
||||||
import scrapy
|
import scrapy
|
||||||
|
|
||||||
|
|
@ -26,6 +28,9 @@ class TaobaoSpider(scrapy.Spider):
|
||||||
item = GoodsItem()
|
item = GoodsItem()
|
||||||
item['price'] = goods.xpath('div[5]/div[2]/div[1]/div[1]/strong/text()').extract_first()
|
item['price'] = goods.xpath('div[5]/div[2]/div[1]/div[1]/strong/text()').extract_first()
|
||||||
item['deal'] = goods.xpath('div[5]/div[2]/div[1]/div[2]/text()').extract_first()
|
item['deal'] = goods.xpath('div[5]/div[2]/div[1]/div[2]/text()').extract_first()
|
||||||
item['title'] = goods.xpath('div[6]/div[2]/div[2]/a/text()').extract_first()
|
segments = goods.xpath('div[6]/div[2]/div[2]/a/text()').extract()
|
||||||
|
title = StringIO()
|
||||||
|
for segment in segments:
|
||||||
|
title.write(re.sub('\s', '', segment))
|
||||||
|
item['title'] = title.getvalue()
|
||||||
yield item
|
yield item
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue