切换主题
pymysql实现数据存储
前置
首先您应该确定您的计算机上已经安装了 MySQL 数据库,然后再进行如下操作:
# 1. 连接到mysql数据库
mysql -h127.0.0.1 -uroot -p123456
# 2. 建库
create database maoyandb charset utf8;
# 3. 切换数据库
use maoyandb;
# 4. 创建数据表
create table filmtab(
name varchar(100),
star varchar(400),
time varchar(30)
);
基本使用
python
import pymysql
db = pymysql.connect(host='localhost', user='root', password='123456', port=3306, db='maoyandb')
cursor = db.cursor()
# 写入-方式一
sql = "INSERT INTO filmtab VALUES ('%s', '%s', '%s')" % ('test', 'test', 'test')
cursor.execute(sql)
# 写入-方式二
sql = 'insert into filmtab values(%s,%s,%s)'
cursor.execute(sql,['刺杀,小说家','雷佳音','2021'])
# 批量写入
info_list = [('我不是药神','徐峥','2018-07-05'),('你好,李焕英','贾玲','2021-02-12')]
sql = 'insert into movieinfo values(%s,%s,%s)'
cursor.executemany(sql,info_list)
db.commit()
cursor.close()
db.close()
完善 [实例]抓取猫眼电影排行榜 的代码-储存数据
python
from urllib import request
import re
import time
import random
import csv
import pymysql
from public.ua_info import ua_list
class MaoYanTop100:
def __init__(self):
self.url = 'https://maoyan.com/board/4?offset={}'
# 链接数据库
self.conn = pymysql.connect(host='localhost', port=3306, user='root', password='123456', db='maoyandb',
charset='utf8')
self.cursor = self.conn.cursor()
# 请求函数
def get_html(self, url):
headers = {
'User-Agent': random.choice(ua_list),
'Cookie': '__mta=107189309.1714956767314.1714960814725.1714962238422.6; uuid_n_v=v1; uuid=F3D982800B4211EFA7D89144D35F720D375D6C1B69AC445CA0D001CA3BCBE62A; _csrf=da5df0dde006b71428ee8bd8ef228acf56d2170a9e7a866884d865a5e1f82949; _lxsdk_cuid=18f4b638dd7c8-0d181a09707b6f-4c657b58-1fa400-18f4b638dd7c8; _lxsdk=F3D982800B4211EFA7D89144D35F720D375D6C1B69AC445CA0D001CA3BCBE62A; Hm_lvt_703e94591e87be68cc8da0da7cbd0be2=1714956767; Hm_lpvt_703e94591e87be68cc8da0da7cbd0be2=1714962238; _lxsdk_s=18f4b96b4c9-3dd-fb-81d%7C%7C10'
}
req = request.Request(url=url, headers=headers)
res = request.urlopen(req)
html = res.read().decode('utf-8')
# 解析函数
self.parse_html(html)
def parse_html(self, html):
pattern = re.compile(
'<div class="movie-item-info">.*?title="(.*?)".*?class="star">(.*?)</p>.*?releasetime">(.*?)</p>', re.S)
r_list = pattern.findall(html)
# 保存函数
# self.write_csv(r_list)
self.save_db(r_list)
def save_db(self, r_list):
L = []
sql = 'insert into filmtab(name,star,time) values(%s,%s,%s)'
for r in r_list:
name = r[0].strip()
star = r[1].strip()
time = r[2].strip()[5:15]
L.append([name, star, time])
try:
self.cursor.executemany(sql, L)
self.conn.commit()
except:
self.conn.rollback()
def write_csv(self, r_list):
with open('猫眼电影top100.csv', 'a', newline='', encoding='utf-8-sig') as f:
writer = csv.writer(f)
for r in r_list:
name = r[0].strip()
star = r[1].strip()
time = r[2].strip()[5:15]
writer.writerow([name, star, time])
def run(self):
for offset in range(0, 100, 10):
url = self.url.format(offset)
self.get_html(url)
time.sleep(random.randint(1, 2))
self.cursor.close()
self.conn.close()
if __name__ == '__main__':
try:
start = time.time()
spider = MaoYanTop100()
spider.run()
end = time.time()
print('执行时间:%.2f' % (end - start))
except Exception as e:
print(e)