Skip to content

pymysql实现数据存储

前置

首先您应该确定您的计算机上已经安装了 MySQL 数据库,然后再进行如下操作:

# 1. 连接到mysql数据库
mysql -h127.0.0.1 -uroot -p123456
# 2. 建库
create database maoyandb charset utf8;
# 3. 切换数据库
use maoyandb;
# 4. 创建数据表
create table filmtab(
name varchar(100),
star varchar(400),
time varchar(30)
);

基本使用

python
import pymysql

db = pymysql.connect(host='localhost', user='root', password='123456', port=3306, db='maoyandb')

cursor = db.cursor()

# 写入-方式一
sql = "INSERT INTO filmtab VALUES ('%s', '%s', '%s')" % ('test', 'test', 'test')
cursor.execute(sql)
# 写入-方式二
sql = 'insert into filmtab values(%s,%s,%s)'
cursor.execute(sql,['刺杀,小说家','雷佳音','2021'])

# 批量写入
info_list = [('我不是药神','徐峥','2018-07-05'),('你好,李焕英','贾玲','2021-02-12')]
sql = 'insert into movieinfo values(%s,%s,%s)'
cursor.executemany(sql,info_list)

db.commit()


cursor.close()
db.close()

完善 [实例]抓取猫眼电影排行榜 的代码-储存数据

python
from urllib import request
import re
import time
import random
import csv

import pymysql

from public.ua_info import ua_list


class MaoYanTop100:
    def __init__(self):
        self.url = 'https://maoyan.com/board/4?offset={}'
        # 链接数据库
        self.conn = pymysql.connect(host='localhost', port=3306, user='root', password='123456', db='maoyandb',
                                     charset='utf8')
        self.cursor = self.conn.cursor()

    # 请求函数
    def get_html(self, url):
        headers = {
            'User-Agent': random.choice(ua_list),
            'Cookie': '__mta=107189309.1714956767314.1714960814725.1714962238422.6; uuid_n_v=v1; uuid=F3D982800B4211EFA7D89144D35F720D375D6C1B69AC445CA0D001CA3BCBE62A; _csrf=da5df0dde006b71428ee8bd8ef228acf56d2170a9e7a866884d865a5e1f82949; _lxsdk_cuid=18f4b638dd7c8-0d181a09707b6f-4c657b58-1fa400-18f4b638dd7c8; _lxsdk=F3D982800B4211EFA7D89144D35F720D375D6C1B69AC445CA0D001CA3BCBE62A; Hm_lvt_703e94591e87be68cc8da0da7cbd0be2=1714956767; Hm_lpvt_703e94591e87be68cc8da0da7cbd0be2=1714962238; _lxsdk_s=18f4b96b4c9-3dd-fb-81d%7C%7C10'
        }
        req = request.Request(url=url, headers=headers)
        res = request.urlopen(req)
        html = res.read().decode('utf-8')
        # 解析函数
        self.parse_html(html)

    def parse_html(self, html):
        pattern = re.compile(
            '<div class="movie-item-info">.*?title="(.*?)".*?class="star">(.*?)</p>.*?releasetime">(.*?)</p>', re.S)
        r_list = pattern.findall(html)
        # 保存函数
        # self.write_csv(r_list)
        self.save_db(r_list)

    def save_db(self, r_list):
        L = []
        sql = 'insert into filmtab(name,star,time) values(%s,%s,%s)'
        for r in r_list:
            name = r[0].strip()
            star = r[1].strip()
            time = r[2].strip()[5:15]
            L.append([name, star, time])
        try:
            self.cursor.executemany(sql, L)
            self.conn.commit()
        except:
            self.conn.rollback()


    def write_csv(self, r_list):
        with open('猫眼电影top100.csv', 'a', newline='', encoding='utf-8-sig') as f:
            writer = csv.writer(f)
            for r in r_list:
                name = r[0].strip()
                star = r[1].strip()
                time = r[2].strip()[5:15]
                writer.writerow([name, star, time])

    def run(self):
        for offset in range(0, 100, 10):
            url = self.url.format(offset)
            self.get_html(url)
            time.sleep(random.randint(1, 2))

            self.cursor.close()
            self.conn.close()



if __name__ == '__main__':
    try:
        start = time.time()
        spider = MaoYanTop100()
        spider.run()
        end = time.time()
        print('执行时间:%.2f' % (end - start))
    except Exception as e:
        print(e)