scrapy使用mysql保存数据
#安装pymysql pip install pymysql
# 引入pymysql
# 数据库配置在settings.py
# MYSQL_HOST = "127.0.0.1"
# MYSQL_DBNAME = "test"
# MYSQL_USER = "root"
# MYSQL_PASSWORD = "root"
import pymysql
from twisted.enterprise import adbapi
# settings.py item配置
# ITEM_PIPELINES = {
# # 'cnblog.pipelines.CnblogPipeline': 300,
# 'cnblog.pipelines.MysqlTwistedPipeline': 1,
# }
# 异步进入数据库
class MysqlTwistedPipeline(object):
def __init__(self, dbpool):
self.dbpool = dbpool
@classmethod
def from_settings(cls, settings):
# 获取settings文件中的配置
dbparms=dict(
host=settings['MYSQL_HOST'],
db=settings['MYSQL_DBNAME'],
user=settings['MYSQL_USER'],
passwd=settings['MYSQL_PASSWORD'],
charset='utf8',
cursorclass=pymysql.cursors.DictCursor,
use_unicode=True,
)
# 使用Twisted中的adbapi获取数据库连接池对象
dbpool = adbapi.ConnectionPool("pymysql", **dbparms)
return cls(dbpool)
def process_item(self,item, spider):
# 使用teisted讲mysql插入变成异步执行
# 使用数据库连接池对象进行数据库操作,自动传递cursor对象到第一个参数
query = self.dbpool.runInteraction(self.do_insert, item)
# 设置出错时的回调方法,自动传递出错消息对象failure到第一个参数
query.addErrback(self.handle_error, item, spider) #处理异常
def handle_error(self, failure, item, spider):
# 处理异步插入的异常
print(failure)
def do_insert(self, cursor, item):
#执行具体的插入
insert_sql = """
insert into te(title, `time`)
values (%s, %s)
"""
cursor.execute(insert_sql, (item["title"], item["time"]))