123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123 |
- # -*- coding: utf-8 -*-
- # Define your item pipelines here
- #
- # Don't forget to add your pipeline to the ITEM_PIPELINES setting
- # See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
- from .settings import MYSQL_HOST
- from .settings import MYSQL_DBNAME
- from .settings import MYSQL_USER
- from .settings import MYSQL_PASSWORD
- import pymysql.cursors
- import logging
- import time
- class YouniSpidersPipeline(object):
- def process_item(self, item, spider):
- return item
- class BuDeJiePipeline(object):
- def __init__(self):
- # 连接数据库
- self.connect = pymysql.connect(
- host=MYSQL_HOST, # 数据库地址
- port=3306, # 数据库端口
- db=MYSQL_DBNAME, # 数据库名
- user=MYSQL_USER, # 数据库用户名
- passwd=MYSQL_PASSWORD, # 数据库密码
- charset='utf8', # 编码方式
- use_unicode=True)
- # 通过cursor执行增删查改
- self.cursor = self.connect.cursor()
- def process_item(self, item, spider):
- main_img = ''
- imgs = []
- if type(item['img']).__name__ == "str":
- logging.debug('main_img is str type')
- main_img = item['img']
- imgs = [item['img']]
- elif type(item['img']).__name__ == "list":
- logging.debug('main_img is list type')
- main_img = item['img'][0]
- imgs = item['img']
- logging.debug(type(item['img']))
- logging.debug('main_img=' + main_img)
- select_sql = "SELECT * FROM post_store WHERE source = %s and source_id=%s"
- logging.debug("select=" + select_sql)
- insert_sql = "insert into post_store(source, source_id, type, img ,video, title,content,created_at,updated_at) value (%s, %s, %s, %s, %s, %s,%s,%s,%s)"
- imgs_sql = "insert into post_store_imgs(post_store_id,img,created_at,updated_at) value (%s,%s,%s,%s)"
- logging.debug("insert=" + insert_sql)
- try:
- self.cursor.execute(select_sql, ('budejie', item['id']))
- logging.debug(select_sql)
- row = self.cursor.fetchone()
- logging.debug(row)
- if row is None:
- self.cursor.execute(insert_sql, (
- 'budejie', item['id'], 'image', main_img, '', '', item['content'],
- time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()),
- time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())))
- lastrowid = int(self.cursor.lastrowid)
- if lastrowid:
- imgs_data = []
- for i in imgs:
- imgs_data.append((lastrowid, i, time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()),
- time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())))
- self.cursor.executemany(imgs_sql, imgs_data)
- # 提交sql语句
- self.connect.commit()
- except BaseException as e:
- logging.debug('exception=' + repr(e))
- self.connect.rollback()
- return item # 必须实现返回
- class VicePipeline(object):
- def __init__(self):
- # 连接数据库
- self.connect = pymysql.connect(
- host=MYSQL_HOST, # 数据库地址
- port=3306, # 数据库端口
- db=MYSQL_DBNAME, # 数据库名
- user=MYSQL_USER, # 数据库用户名
- passwd=MYSQL_PASSWORD, # 数据库密码
- charset='utf8', # 编码方式
- use_unicode=True)
- # 通过cursor执行增删查改
- self.cursor = self.connect.cursor()
- def process_item(self, item, spider):
- logging.debug("pipline")
- if type(item['img']).__name__ == "str":
- main_img = item['img']
- imgs = [item['img']]
- elif type(item['img']).__name__ == "list":
- main_img = item['img'][0]
- imgs = item['img']
- select_sql = "SELECT * FROM post_store WHERE source = %s and source_id=%s"
- insert_sql = "insert into post_store(source, source_id, type, img ,video, title,content,created_at,updated_at) value (%s, %s, %s, %s, %s, %s,%s,%s,%s)"
- imgs_sql = "insert into post_store_imgs(post_store_id,img,created_at,updated_at) value (%s,%s,%s,%s)"
- try:
- self.cursor.execute(select_sql, ('vice', item['id']))
- row = self.cursor.fetchone()
- if row is None:
- self.cursor.execute(insert_sql, (
- 'vice', item['id'], 'image', main_img, '', '', item['content'],
- time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()),
- time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())))
- lastrowid = int(self.cursor.lastrowid)
- if lastrowid:
- imgs_data = []
- for i in imgs:
- imgs_data.append((lastrowid, i, time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()),
- time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())))
- self.cursor.executemany(imgs_sql, imgs_data)
- # 提交sql语句
- self.connect.commit()
- except:
- self.connect.rollback()
- return item # 必须实现返回
|