背景
社交产品,用户表,用户数据600万条,字段87个(别问为什么这么多,问就是业务需要+祖传屎山);此时需要增加一个geometry类型的字段将用户的经纬度数据存入数据表中并增加空间索引, 且要求必须在原表中新加字段(别杠为什么不开新表,杠就是业务需要)。
机器配置:18核36线程CPU,64GB内存。
表结构如下:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 | CREATE TABLE `july_user` ( `user_id` INT UNSIGNED NOT NULL AUTO_INCREMENT COMMENT '用户ID', `third_user_id` VARCHAR(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '第三方登陆id', `type` VARCHAR(4) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '用户类型;d:dady,b:bady', `age` tinyint NOT NULL DEFAULT '0' COMMENT '用户年龄', `birthday` DATE NOT NULL DEFAULT '1970-01-01' COMMENT '用户生日', `sex` tinyint NOT NULL DEFAULT '0' COMMENT '性别0.女,1男', `sex_filter` tinyint NOT NULL DEFAULT '1' COMMENT '性别筛选1.双性,2男,3女', `country` VARCHAR(120) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '国家', `state` VARCHAR(120) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '省份', `address` VARCHAR(250) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '地址', `city` VARCHAR(250) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '城市', `longitude` DOUBLE(25,20) NOT NULL DEFAULT '0.00000000000000000000' COMMENT '经度', `latitude` DOUBLE(25,20) NOT NULL DEFAULT '0.00000000000000000000' COMMENT '纬度', `occupation` VARCHAR(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '职业', `education` VARCHAR(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '学历', `net_assets` VARCHAR(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '收入', `signature` VARCHAR(1500) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '签名', `need_padded_info` tinyint NOT NULL DEFAULT '1' COMMENT '是否需要补充信息', `is_certified` tinyint NOT NULL DEFAULT '0' COMMENT '审核状态', `certified_date` datetime NOT NULL COMMENT '审核时间', `certified_tag` tinyint NOT NULL DEFAULT '0' COMMENT '审核标签', `certified_tag_time` INT UNSIGNED NOT NULL DEFAULT '0' COMMENT '审核标签通过时间', `is_send_vip` tinyint NOT NULL DEFAULT '0' COMMENT '是否赠送过新注册用户会员', `is_vip` tinyint NOT NULL DEFAULT '0' COMMENT '是否会员', `vip_expiry_date` INT UNSIGNED NOT NULL DEFAULT '0' COMMENT '会员过期时间', `cougar_is_vip` tinyint NOT NULL DEFAULT '0' COMMENT 'cougar是否会员', `cougar_vip_expiry_date` INT NOT NULL DEFAULT '0' COMMENT 'cougar会员过期时间', `gay_is_vip` tinyint NOT NULL DEFAULT '0' COMMENT 'gay是否会员', `gay_vip_expiry_date` INT NOT NULL DEFAULT '0' COMMENT 'gay会员过期时间', `les_is_vip` tinyint NOT NULL DEFAULT '0' COMMENT 'les是否会员', `les_vip_expiry_date` INT NOT NULL DEFAULT '0' COMMENT 'les会员过期时间', `is_deactivate` tinyint NOT NULL DEFAULT '0' COMMENT '是否冻结账号', `is_passed_by_admin` tinyint NOT NULL DEFAULT '0' COMMENT '是否被管理员通过', `is_quickpass` tinyint NOT NULL DEFAULT '0' COMMENT '是否快速通过审核', `is_verify` tinyint NOT NULL DEFAULT '0' COMMENT '是否认证;0.未认证 1.认证中 2.认证通过 3.认证拒绝', `verify_level` tinyint NOT NULL DEFAULT '0' COMMENT '认证等级', `verify_type` VARCHAR(25) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '认证类型', `verify_date` datetime NOT NULL COMMENT '认证时间', `is_publish_verify_moment` tinyint NOT NULL DEFAULT '0' COMMENT '是否发布了认证moment 0.否 1.是 V460版本后认证通过的用户才有该值', `is_quality_user` tinyint NOT NULL DEFAULT '0' COMMENT '是否优质推荐', `like_noti_switch` tinyint NOT NULL DEFAULT '1' COMMENT '是否开启喜欢开关', `message_noti_switch` tinyint NOT NULL DEFAULT '1' COMMENT '是否开启message开关', `avatar` VARCHAR(250) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '头像', `password` VARCHAR(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '密码', `email` VARCHAR(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '邮箱', `realname` VARCHAR(25) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '昵称', `online_date` INT NOT NULL DEFAULT '0' COMMENT '在线时间', `register_date` datetime NOT NULL COMMENT '注册时间', `modify_date` datetime NOT NULL COMMENT '编辑时间', `regist_date_switch_on` tinyint NOT NULL DEFAULT '1' COMMENT '注册时间开关(0:关/1:开)', `third_user_email` VARCHAR(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '第三方用户邮箱', `verify_code` VARCHAR(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '验证码', `verify_code_date` VARCHAR(25) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '验证码时间', `is_certified_email` tinyint NOT NULL DEFAULT '0' COMMENT '是否邮箱认证', `rel_ticket` VARCHAR(25) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT 'rel购买标识', `rel` VARCHAR(25) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT 'rel信息', `info` VARCHAR(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '版本信息', `app_version` VARCHAR(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '0' COMMENT 'app版本', `anonymous_name` VARCHAR(25) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '匿名昵称', `anonymous_avatar` VARCHAR(250) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '匿名头像', `udid` VARCHAR(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '唯一udid', `today_moments_count` tinyint NOT NULL DEFAULT '0' COMMENT '今天moment数量', `is_admin` tinyint NOT NULL DEFAULT '0' COMMENT '控制是否为小助手账户', `beauty_verify_is_disabled` tinyint NOT NULL DEFAULT '0' COMMENT '认证是否被停⽤, 0未停⽤, 1已停⽤', `change_beauty_verify_state` tinyint NOT NULL DEFAULT '0' COMMENT '更改认证状态:0:未提交,1:已提交/审核中,2:审核成功,3:审核失败', `vip_vote_out_times` tinyint NOT NULL DEFAULT '0' COMMENT '会员允许的免审核的机会次数', `third_platform` VARCHAR(20) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '0' COMMENT '第三方平台', `gay_role` tinyint NOT NULL DEFAULT '0' COMMENT '同性恋角色:0:other,1:button,2:vers,3:top', `words` VARCHAR(1000) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '关键词过滤', `user_language` VARCHAR(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '用户使用的语言', `birthday_timestamp` INT NOT NULL DEFAULT '0' COMMENT '生日timestamp', `language` VARCHAR(200) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '用户会的语言', `is_open_block_message` tinyint NOT NULL DEFAULT '0' COMMENT 'Block Message开关是否打开 0:关闭 1:打开', `is_open_control_who_see_you` tinyint NOT NULL DEFAULT '0' COMMENT 'is_open_control_who_see_you 开关是否打开 0:关闭 1:打开', `is_certified_student` tinyint NOT NULL DEFAULT '0' COMMENT '用户是否学生认证 0.未认证 1.认证', `email_code` VARCHAR(20) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NOT NULL DEFAULT '' COMMENT '用户主动验证邮箱的校验code', `student_verify_time` INT NOT NULL DEFAULT '0' COMMENT '学生认证时间', `is_trace_visit` tinyint NOT NULL DEFAULT '0' COMMENT '是否无痕浏览 0.否 1.是', `risk_score` VARCHAR(6) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL DEFAULT '' COMMENT '用户MaxMind评测风险值', `first_login_sudynew` tinyint NOT NULL DEFAULT '1' COMMENT '是否未登陆过新版sudy, 1是, 0 否', `have_contact` tinyint NOT NULL DEFAULT '0' COMMENT '是否有联系方式', `free_like_count` tinyint UNSIGNED NOT NULL DEFAULT '2' COMMENT '(超级喜欢)免费剩余次数', `buy_like_count` INT NOT NULL DEFAULT '0' COMMENT '(超级喜欢)购买剩余次数', `vip_like_count` tinyint UNSIGNED NOT NULL DEFAULT '0' COMMENT '(超级喜欢)会员剩余次数', `message_count` INT NOT NULL DEFAULT '0' COMMENT '购买的message剩余次数', `video_message_mins` INT NOT NULL DEFAULT '0' COMMENT '够买的视频剩余时长,分钟数', `user_location` geometry NOT NULL DEFAULT (point(0.0,0.0)) COMMENT '用户位置序列化存储', PRIMARY KEY (`user_id`) ) ENGINE=InnoDB AUTO_INCREMENT=6541872 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci; |
传统解决方案
由于MySQL自身问题,此处添加geometry字段时不能同时设置not null和默认值,所以需要分两句SQL进行执行。
添加字段:
1 2 3 4 5 | ALTER TABLE `july_user` ADD COLUMN `user_location` geometry DEFAULT (POINT(0.0, 0.0)) COMMENT '用户位置序列化存储' AFTER `video_message_mins`; -- 执行时间91秒 |
设置字段默认值字段:
1 2 3 4 | ALTER TABLE `july_user` MODIFY COLUMN `user_location` geometry NOT NULL DEFAULT (POINT(0.0, 0.0)) COMMENT '用户位置序列化存储'; -- 执行时间53秒 |
将原有数据写入新字段:
1 2 3 4 | UPDATE `july_user` SET user_location = POINT(longitude, latitude); -- 执行时间142秒 |
添加空间索引:
1 2 3 4 | ALTER TABLE `july_user` ADD SPATIAL KEY `user_location_index` (`user_location`) COMMENT '用户位置索引'; -- 执行时间644秒 |
从上面的SQL执行结果可知,虽然达到了目的,但总的执行时长达到了惊人的930秒,整体执行完需要16分钟左右。
而在这期间,虽然MySQL引入了ONLINE DDL,整个表依然有接近8分钟的时间不可使用,这种情况在线上无疑是不可接受的,严重的时候甚至会导致整个服务崩溃。
更优雅的解决方案
①创建一个临时的新表,首先复制旧表的结构(包含索引)
②给新表加上新增的字段
③把旧表的数据复制过来
④重命名旧表为不可用表名,重命名新表的名字为旧表的名字,在适当的时机删除旧表
①创建一个临时的新表,首先复制旧表的结构(包含索引)
1 2 3 | CREATE TABLE july_user_slave LIKE july_user; -- 执行时间0.1毫秒 |
②给新表加上新增的字段并添加索引
1 2 3 4 5 6 7 | ALTER TABLE `july_user_slave` ADD COLUMN `user_location` geometry DEFAULT (POINT(0.0, 0.0)) COMMENT '用户位置序列化存储' AFTER `video_message_mins`; ALTER TABLE `july_user_slave` MODIFY COLUMN `user_location` geometry NOT NULL DEFAULT (POINT(0.0, 0.0)) COMMENT '用户位置序列化存储'; -- 执行时间0.1毫秒 |
③把旧表的数据复制过来并将数据处理正确
1 2 3 4 5 | INSERT INTO `july_user_slave`(user_id, third_user_id, TYPE, age, birthday, sex, sex_filter, country, state, address, city, longitude, latitude, occupation, education, net_assets, signature, need_padded_info, is_certified, certified_date, certified_tag, certified_tag_time, is_send_vip, is_vip, vip_expiry_date, cougar_is_vip, cougar_vip_expiry_date, gay_is_vip, gay_vip_expiry_date, les_is_vip, les_vip_expiry_date, is_deactivate, is_passed_by_admin, is_quickpass, is_verify, verify_level, verify_type, verify_date, is_publish_verify_moment, is_quality_user, like_noti_switch, message_noti_switch, avatar, password, email, realname, online_date, register_date, modify_date, regist_date_switch_on, third_user_email, verify_code, verify_code_date, is_certified_email, rel_ticket, rel, info, app_version, anonymous_name, anonymous_avatar, udid, today_moments_count, is_admin, beauty_verify_is_disabled, change_beauty_verify_state, vip_vote_out_times, third_platform, gay_role, words, user_language, birthday_timestamp, LANGUAGE, is_open_block_message, is_open_control_who_see_you, is_certified_student, email_code, student_verify_time, is_trace_visit, risk_score, first_login_sudynew, have_contact, free_like_count, buy_like_count, vip_like_count, message_count, video_message_mins) SELECT user_id, third_user_id, TYPE, age, birthday, sex, sex_filter, country, state, address, city, longitude, latitude, occupation, education, net_assets, signature, need_padded_info, is_certified, certified_date, certified_tag, certified_tag_time, is_send_vip, is_vip, vip_expiry_date, cougar_is_vip, cougar_vip_expiry_date, gay_is_vip, gay_vip_expiry_date, les_is_vip, les_vip_expiry_date, is_deactivate, is_passed_by_admin, is_quickpass, is_verify, verify_level, verify_type, verify_date, is_publish_verify_moment, is_quality_user, like_noti_switch, message_noti_switch, avatar, password, email, realname, online_date, register_date, modify_date, regist_date_switch_on, third_user_email, verify_code, verify_code_date, is_certified_email, rel_ticket, rel, info, app_version, anonymous_name, anonymous_avatar, udid, today_moments_count, is_admin, beauty_verify_is_disabled, change_beauty_verify_state, vip_vote_out_times, third_platform, gay_role, words, user_language, birthday_timestamp, LANGUAGE, is_open_block_message, is_open_control_who_see_you, is_certified_student, email_code, student_verify_time, is_trace_visit, risk_score, first_login_sudynew, have_contact, free_like_count, buy_like_count, vip_like_count, message_count, video_message_mins FROM july_user; -- 执行时间 181 s UPDATE `july_user_slave` SET user_location = POINT(longitude, latitude); -- 执行时间 147 s |
④重命名旧表为不可用表名,重命名新表的名字为旧表的名字,在适当的时机删除旧表
1 2 3 4 5 | ALTER TABLE july_user RENAME TO july_user_bak; ALTER TABLE july_user_slave RENAME TO july_user; -- 执行时间0.1毫秒 |
总体执行时间330秒,从16分钟缩短到6分钟左右。而且在整个执行过程中不会锁定业务需要使用的表。
需要注意的是在重命名表的时候需要挑一个业务不活跃的时间点,尽量降低业务短时中断的时间到用户不可察觉,同时将数据的不一致风向降到最低。
总结:
一般情况下,十几万的数据量,可以直接进行加字段操作。
如果数据量较大的情况下,整体思路是创建一个新表,复制数据,然后将表名进行更换。
程序猿老龚(龚杰洪)原创,版权所有,转载请注明出处.