# -*- coding: utf-8 -*- import scrapy import json import re from scrapy import Request from youni_spiders.items import ViceItem class ViceSpider(scrapy.Spider): name = 'vice' allowed_domains = ['vice.cn'] custom_settings = { 'ITEM_PIPELINES': {'youni_spiders.pipelines.VicePipeline': 1,} } headers = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36', } def start_requests(self): url = 'http://www.vice.cn/ajax/dndmore?ids_not_in=' yield Request(url, headers=self.headers) def parse(self, response): datas = json.loads(response.body) item = ViceItem() if datas: ids = '' for data in datas['items']: item['id'] = data['id'] item['content'] = data['preview'] item['img'] = data['portrait'] ids += data['id']+',' yield item next_url = response.url+ids yield Request(next_url,headers=self.headers)