1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59
| from scrapy.contrib.exporter import XmlItemExporter,JsonItemExporter,JsonLinesItemExporter,CsvItemExporter from scrapy import signals,log from scrapy.exceptions import DropItem import datetime,pymongo import MySQLdb class MysqlDBPipline(object): def __init__(self,mysql_host,mysql_db,mysql_user,mysql_passwd): self.mysql_host = mysql_host self.mysql_db = mysql_db self.mysql_user = mysql_user self.mysql_passwd = mysql_passwd @classmethod def from_crawler(cls,crawler): return cls( mysql_host=crawler.settings.get('MYSQL_HOST'), mysql_user=crawler.settings.get('MYSQL_USER'), mysql_passwd=crawler.settings.get('MYSQL_PASSWD'), mysql_db=crawler.settings.get('MYSQL_DB') ) def open_spider(self,spider): try: self.conn = MySQLdb.connect( user=self.mysql_user, passwd=self.mysql_passwd, db=self.mysql_db, host=self.mysql_host, charset="utf8", use_unicode=True ) self.cursor = self.conn.cursor() except MySQLdb.Error,e: print "Mysql Error %d: %s" % (e.args[0], e.args[1]) def close_spider(self,spider): self.cursor.close() self.conn.close() def process_item(self,item,spider): valid = True for data in item: if not data: valid = False raise DropItem("Missing {0}!".format(data)) if valid: for n in range(len(item['title'])): try: value = [item['title'][n],item['url'][n],item['create_time']] self.cursor.execute('INSERT INTO itunes (title,url,create_time) VALUES (%s,%s,%s)',value) self.conn.commit() except MySQLdb.Error,e: print "Mysql Error %d: %s" % (e.args[0], e.args[1]) log.msg("Question added to MySQLdb database!", level=log.DEBUG, spider=spider) return item
|