# -*- coding: utf-8 -*- # Define your item pipelines here # # Don't forget to add your pipeline to the ITEM_PIPELINES setting # See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html from .settings import MYSQL_HOST from .settings import MYSQL_DBNAME from .settings import MYSQL_USER from .settings import MYSQL_PASSWORD import pymysql.cursors import logging import time class YouniSpidersPipeline(object): def process_item(self, item, spider): return item class BuDeJiePipeline(object): def __init__(self): # 连接数据库 self.connect = pymysql.connect( host=MYSQL_HOST, # 数据库地址 port=3306, # 数据库端口 db=MYSQL_DBNAME, # 数据库名 user=MYSQL_USER, # 数据库用户名 passwd=MYSQL_PASSWORD, # 数据库密码 charset='utf8', # 编码方式 use_unicode=True) # 通过cursor执行增删查改 self.cursor = self.connect.cursor() def process_item(self, item, spider): main_img = '' imgs = [] if type(item['img']).__name__ == "str": logging.debug('main_img is str type') main_img = item['img'] imgs = [item['img']] elif type(item['img']).__name__ == "list": logging.debug('main_img is list type') main_img = item['img'][0] imgs = item['img'] logging.debug(type(item['img'])) logging.debug('main_img=' + main_img) select_sql = "SELECT * FROM post_store WHERE source = %s and source_id=%s" logging.debug("select=" + select_sql) insert_sql = "insert into post_store(source, source_id, type, img ,video, title,content,created_at,updated_at) value (%s, %s, %s, %s, %s, %s,%s,%s,%s)" imgs_sql = "insert into post_store_imgs(post_store_id,img,created_at,updated_at) value (%s,%s,%s,%s)" logging.debug("insert=" + insert_sql) try: self.cursor.execute(select_sql, ('budejie', item['id'])) logging.debug(select_sql) row = self.cursor.fetchone() logging.debug(row) if row is None: self.cursor.execute(insert_sql, ( 'budejie', item['id'], 'image', main_img, '', '', item['content'], time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))) lastrowid = int(self.cursor.lastrowid) if lastrowid: imgs_data = [] for i in imgs: imgs_data.append((lastrowid, i, time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))) self.cursor.executemany(imgs_sql, imgs_data) # 提交sql语句 self.connect.commit() except BaseException as e: logging.debug('exception=' + repr(e)) self.connect.rollback() return item # 必须实现返回 class VicePipeline(object): def __init__(self): # 连接数据库 self.connect = pymysql.connect( host=MYSQL_HOST, # 数据库地址 port=3306, # 数据库端口 db=MYSQL_DBNAME, # 数据库名 user=MYSQL_USER, # 数据库用户名 passwd=MYSQL_PASSWORD, # 数据库密码 charset='utf8', # 编码方式 use_unicode=True) # 通过cursor执行增删查改 self.cursor = self.connect.cursor() def process_item(self, item, spider): logging.debug("pipline") if type(item['img']).__name__ == "str": main_img = item['img'] imgs = [item['img']] elif type(item['img']).__name__ == "list": main_img = item['img'][0] imgs = item['img'] select_sql = "SELECT * FROM post_store WHERE source = %s and source_id=%s" insert_sql = "insert into post_store(source, source_id, type, img ,video, title,content,created_at,updated_at) value (%s, %s, %s, %s, %s, %s,%s,%s,%s)" imgs_sql = "insert into post_store_imgs(post_store_id,img,created_at,updated_at) value (%s,%s,%s,%s)" try: self.cursor.execute(select_sql, ('vice', item['id'])) row = self.cursor.fetchone() if row is None: self.cursor.execute(insert_sql, ( 'vice', item['id'], 'image', main_img, '', '', item['content'], time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))) lastrowid = int(self.cursor.lastrowid) if lastrowid: imgs_data = [] for i in imgs: imgs_data.append((lastrowid, i, time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))) self.cursor.executemany(imgs_sql, imgs_data) # 提交sql语句 self.connect.commit() except: self.connect.rollback() return item # 必须实现返回