12345678910111213141516171819202122232425262728293031323334353637 |
- # -*- coding: utf-8 -*-
- import scrapy
- import json
- import re
- from scrapy import Request
- from youni_spiders.items import ViceItem
- class ViceSpider(scrapy.Spider):
- name = 'vice'
- allowed_domains = ['vice.cn']
- custom_settings = {
- 'ITEM_PIPELINES': {'youni_spiders.pipelines.VicePipeline': 1,}
- }
- headers = {
- 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36',
- }
- def start_requests(self):
- url = 'http://www.vice.cn/ajax/dndmore?ids_not_in='
- yield Request(url, headers=self.headers)
- def parse(self, response):
- datas = json.loads(response.body)
- item = ViceItem()
- if datas:
- ids = ''
- for data in datas['items']:
- item['id'] = data['id']
- item['content'] = data['preview']
- item['img'] = data['portrait']
- ids += data['id']+','
- yield item
- next_url = response.url+ids
- yield Request(next_url,headers=self.headers)
|