pipelines.py 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123
  1. # -*- coding: utf-8 -*-
  2. # Define your item pipelines here
  3. #
  4. # Don't forget to add your pipeline to the ITEM_PIPELINES setting
  5. # See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
  6. from .settings import MYSQL_HOST
  7. from .settings import MYSQL_DBNAME
  8. from .settings import MYSQL_USER
  9. from .settings import MYSQL_PASSWORD
  10. import pymysql.cursors
  11. import logging
  12. import time
  13. class YouniSpidersPipeline(object):
  14. def process_item(self, item, spider):
  15. return item
  16. class BuDeJiePipeline(object):
  17. def __init__(self):
  18. # 连接数据库
  19. self.connect = pymysql.connect(
  20. host=MYSQL_HOST, # 数据库地址
  21. port=3306, # 数据库端口
  22. db=MYSQL_DBNAME, # 数据库名
  23. user=MYSQL_USER, # 数据库用户名
  24. passwd=MYSQL_PASSWORD, # 数据库密码
  25. charset='utf8', # 编码方式
  26. use_unicode=True)
  27. # 通过cursor执行增删查改
  28. self.cursor = self.connect.cursor()
  29. def process_item(self, item, spider):
  30. main_img = ''
  31. imgs = []
  32. if type(item['img']).__name__ == "str":
  33. logging.debug('main_img is str type')
  34. main_img = item['img']
  35. imgs = [item['img']]
  36. elif type(item['img']).__name__ == "list":
  37. logging.debug('main_img is list type')
  38. main_img = item['img'][0]
  39. imgs = item['img']
  40. logging.debug(type(item['img']))
  41. logging.debug('main_img=' + main_img)
  42. select_sql = "SELECT * FROM post_store WHERE source = %s and source_id=%s"
  43. logging.debug("select=" + select_sql)
  44. insert_sql = "insert into post_store(source, source_id, type, img ,video, title,content,created_at,updated_at) value (%s, %s, %s, %s, %s, %s,%s,%s,%s)"
  45. imgs_sql = "insert into post_store_imgs(post_store_id,img,created_at,updated_at) value (%s,%s,%s,%s)"
  46. logging.debug("insert=" + insert_sql)
  47. try:
  48. self.cursor.execute(select_sql, ('budejie', item['id']))
  49. logging.debug(select_sql)
  50. row = self.cursor.fetchone()
  51. logging.debug(row)
  52. if row is None:
  53. self.cursor.execute(insert_sql, (
  54. 'budejie', item['id'], 'image', main_img, '', '', item['content'],
  55. time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()),
  56. time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())))
  57. lastrowid = int(self.cursor.lastrowid)
  58. if lastrowid:
  59. imgs_data = []
  60. for i in imgs:
  61. imgs_data.append((lastrowid, i, time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()),
  62. time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())))
  63. self.cursor.executemany(imgs_sql, imgs_data)
  64. # 提交sql语句
  65. self.connect.commit()
  66. except BaseException as e:
  67. logging.debug('exception=' + repr(e))
  68. self.connect.rollback()
  69. return item # 必须实现返回
  70. class VicePipeline(object):
  71. def __init__(self):
  72. # 连接数据库
  73. self.connect = pymysql.connect(
  74. host=MYSQL_HOST, # 数据库地址
  75. port=3306, # 数据库端口
  76. db=MYSQL_DBNAME, # 数据库名
  77. user=MYSQL_USER, # 数据库用户名
  78. passwd=MYSQL_PASSWORD, # 数据库密码
  79. charset='utf8', # 编码方式
  80. use_unicode=True)
  81. # 通过cursor执行增删查改
  82. self.cursor = self.connect.cursor()
  83. def process_item(self, item, spider):
  84. logging.debug("pipline")
  85. if type(item['img']).__name__ == "str":
  86. main_img = item['img']
  87. imgs = [item['img']]
  88. elif type(item['img']).__name__ == "list":
  89. main_img = item['img'][0]
  90. imgs = item['img']
  91. select_sql = "SELECT * FROM post_store WHERE source = %s and source_id=%s"
  92. insert_sql = "insert into post_store(source, source_id, type, img ,video, title,content,created_at,updated_at) value (%s, %s, %s, %s, %s, %s,%s,%s,%s)"
  93. imgs_sql = "insert into post_store_imgs(post_store_id,img,created_at,updated_at) value (%s,%s,%s,%s)"
  94. try:
  95. self.cursor.execute(select_sql, ('vice', item['id']))
  96. row = self.cursor.fetchone()
  97. if row is None:
  98. self.cursor.execute(insert_sql, (
  99. 'vice', item['id'], 'image', main_img, '', '', item['content'],
  100. time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()),
  101. time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())))
  102. lastrowid = int(self.cursor.lastrowid)
  103. if lastrowid:
  104. imgs_data = []
  105. for i in imgs:
  106. imgs_data.append((lastrowid, i, time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()),
  107. time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())))
  108. self.cursor.executemany(imgs_sql, imgs_data)
  109. # 提交sql语句
  110. self.connect.commit()
  111. except:
  112. self.connect.rollback()
  113. return item # 必须实现返回