|
@@ -0,0 +1,122 @@
|
|
|
|
+# -*- coding: utf-8 -*-
|
|
|
|
+
|
|
|
|
+# Define your item pipelines here
|
|
|
|
+#
|
|
|
|
+# Don't forget to add your pipeline to the ITEM_PIPELINES setting
|
|
|
|
+# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
|
|
|
|
+from .settings import MYSQL_HOST
|
|
|
|
+from .settings import MYSQL_DBNAME
|
|
|
|
+from .settings import MYSQL_USER
|
|
|
|
+from .settings import MYSQL_PASSWORD
|
|
|
|
+import pymysql.cursors
|
|
|
|
+import logging
|
|
|
|
+import time
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+class YouniSpidersPipeline(object):
|
|
|
|
+ def process_item(self, item, spider):
|
|
|
|
+ return item
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+class BuDeJiePipeline(object):
|
|
|
|
+ def __init__(self):
|
|
|
|
+ # 连接数据库
|
|
|
|
+ self.connect = pymysql.connect(
|
|
|
|
+ host=MYSQL_HOST, # 数据库地址
|
|
|
|
+ port=3306, # 数据库端口
|
|
|
|
+ db=MYSQL_DBNAME, # 数据库名
|
|
|
|
+ user=MYSQL_USER, # 数据库用户名
|
|
|
|
+ passwd=MYSQL_PASSWORD, # 数据库密码
|
|
|
|
+ charset='utf8', # 编码方式
|
|
|
|
+ use_unicode=True)
|
|
|
|
+ # 通过cursor执行增删查改
|
|
|
|
+ self.cursor = self.connect.cursor()
|
|
|
|
+
|
|
|
|
+ def process_item(self, item, spider):
|
|
|
|
+ main_img = ''
|
|
|
|
+ imgs = []
|
|
|
|
+ if type(item['img']).__name__ == "str":
|
|
|
|
+ logging.debug('main_img is str type')
|
|
|
|
+ main_img = item['img']
|
|
|
|
+ imgs = [item['img']]
|
|
|
|
+ elif type(item['img']).__name__ == "list":
|
|
|
|
+ logging.debug('main_img is list type')
|
|
|
|
+ main_img = item['img'][0]
|
|
|
|
+ imgs = item['img']
|
|
|
|
+ logging.debug(type(item['img']))
|
|
|
|
+ logging.debug('main_img=' + main_img)
|
|
|
|
+ select_sql = "SELECT * FROM post_store WHERE source = %s and source_id=%s"
|
|
|
|
+ logging.debug("select=" + select_sql)
|
|
|
|
+ insert_sql = "insert into post_store(source, source_id, type, img ,video, title,content,created_at,updated_at) value (%s, %s, %s, %s, %s, %s,%s,%s,%s)"
|
|
|
|
+ imgs_sql = "insert into post_store_imgs(post_store_id,img,created_at,updated_at) value (%s,%s,%s,%s)"
|
|
|
|
+ logging.debug("insert=" + insert_sql)
|
|
|
|
+ try:
|
|
|
|
+ self.cursor.execute(select_sql, ('budejie', item['id']))
|
|
|
|
+ logging.debug(select_sql)
|
|
|
|
+ row = self.cursor.fetchone()
|
|
|
|
+ logging.debug(row)
|
|
|
|
+ if row is None:
|
|
|
|
+ self.cursor.execute(insert_sql, (
|
|
|
|
+ 'budejie', item['id'], 'image', main_img, '', '', item['content'],
|
|
|
|
+ time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()),
|
|
|
|
+ time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())))
|
|
|
|
+ lastrowid = int(self.cursor.lastrowid)
|
|
|
|
+ if lastrowid:
|
|
|
|
+ imgs_data = []
|
|
|
|
+ for i in imgs:
|
|
|
|
+ imgs_data.append((lastrowid, i, time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()),
|
|
|
|
+ time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())))
|
|
|
|
+ self.cursor.executemany(imgs_sql, imgs_data)
|
|
|
|
+ # 提交sql语句
|
|
|
|
+ self.connect.commit()
|
|
|
|
+ except BaseException as e:
|
|
|
|
+ logging.debug('exception=' + repr(e))
|
|
|
|
+ self.connect.rollback()
|
|
|
|
+ return item # 必须实现返回
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+class VicePipeline(object):
|
|
|
|
+ def __init__(self):
|
|
|
|
+ # 连接数据库
|
|
|
|
+ self.connect = pymysql.connect(
|
|
|
|
+ host=MYSQL_HOST, # 数据库地址
|
|
|
|
+ port=3306, # 数据库端口
|
|
|
|
+ db=MYSQL_DBNAME, # 数据库名
|
|
|
|
+ user=MYSQL_USER, # 数据库用户名
|
|
|
|
+ passwd=MYSQL_PASSWORD, # 数据库密码
|
|
|
|
+ charset='utf8', # 编码方式
|
|
|
|
+ use_unicode=True)
|
|
|
|
+ # 通过cursor执行增删查改
|
|
|
|
+ self.cursor = self.connect.cursor()
|
|
|
|
+
|
|
|
|
+ def process_item(self, item, spider):
|
|
|
|
+ logging.debug("pipline")
|
|
|
|
+ if type(item['img']).__name__ == "str":
|
|
|
|
+ main_img = item['img']
|
|
|
|
+ imgs = [item['img']]
|
|
|
|
+ elif type(item['img']).__name__ == "list":
|
|
|
|
+ main_img = item['img'][0]
|
|
|
|
+ imgs = item['img']
|
|
|
|
+ select_sql = "SELECT * FROM post_store WHERE source = %s and source_id=%s"
|
|
|
|
+ insert_sql = "insert into post_store(source, source_id, type, img ,video, title,content,created_at,updated_at) value (%s, %s, %s, %s, %s, %s,%s,%s,%s)"
|
|
|
|
+ imgs_sql = "insert into post_store_imgs(post_store_id,img,created_at,updated_at) value (%s,%s,%s,%s)"
|
|
|
|
+ try:
|
|
|
|
+ self.cursor.execute(select_sql, ('vice', item['id']))
|
|
|
|
+ row = self.cursor.fetchone()
|
|
|
|
+ if row is None:
|
|
|
|
+ self.cursor.execute(insert_sql, (
|
|
|
|
+ 'vice', item['id'], 'image', main_img, '', '', item['content'],
|
|
|
|
+ time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()),
|
|
|
|
+ time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())))
|
|
|
|
+ lastrowid = int(self.cursor.lastrowid)
|
|
|
|
+ if lastrowid:
|
|
|
|
+ imgs_data = []
|
|
|
|
+ for i in imgs:
|
|
|
|
+ imgs_data.append((lastrowid, i, time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()),
|
|
|
|
+ time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())))
|
|
|
|
+ self.cursor.executemany(imgs_sql, imgs_data)
|
|
|
|
+ # 提交sql语句
|
|
|
|
+ self.connect.commit()
|
|
|
|
+ except:
|
|
|
|
+ self.connect.rollback()
|
|
|
|
+ return item # 必须实现返回
|