【问题标题】:How to optimise a query containing joins and subqueries如何优化包含连接和子查询的查询
【发布时间】:2014-08-10 08:37:31
【问题描述】:

我继承了以下查询和数据库结构,我想优化它,因为它很慢。它包含我读过的连接和子查询不是一个好计划。我尝试了各种方法来改进它,但我被卡住/迷路了。

如果它很好,那么很好,但如果有改进它的建议,我将非常感激......

该查询从各种表格中提取数据以生成有关供应商网站的点击次数、供应商电话号码“显示”以及已向供应商发送电子邮件的报告。

WHERE 子句使用 1=1,因为有时会添加条件来按地区、县和供应商的业务类型过滤报告。

从 mysql_slow 日志中复制代码以插入所有 $variables。表的结构是从 mysql 转储中输出的。

 

查询:

SELECT Business.*, 
       ( SELECT Count(Message.id) FROM messages as Message 
         WHERE (U.id = Message.from_to OR U.id = Message.user_id)  
           AND Message.created BETWEEN '2014-04-01 00:00:00' and '2014-04-30 23:59:59'
       ) as message_no, 
       ( SELECT Count(DISTINCT(MessageUnique.user_id)) FROM messages as MessageUnique 
         WHERE (U.id = MessageUnique.from_to OR U.id = MessageUnique.user_id) 
           AND (MessageUnique.parent_message_id is null OR MessageUnique.parent_message_id = MessageUnique.id)  
           AND MessageUnique.created BETWEEN '2014-04-01 00:00:00' and '2014-04-30 23:59:59'
       ) as message_unique_no, 
       ( SELECT Count(*) FROM business_counties as bc2 
         WHERE Business.id = bc2.business_id ) as county_no, 
       ( SELECT Count(click.id) FROM business_clickthroughs as click 
         WHERE Business.id = click.business_id  
           AND click.created BETWEEN '2014-04-01 00:00:00' and '2014-04-30 23:59:59'
       ) as clicks, 
       ( SELECT Count(*) FROM business_regions as br2 
         WHERE Business.id = br2.business_id ) as region_no, 
       ( SELECT count(BusinessReveal.id) as reveal_no FROM business_reveals as BusinessReveal
         WHERE 1=1  
           AND BusinessReveal.created BETWEEN '2014-04-01 00:00:00' and '2014-04-30 23:59:59' 
           AND BusinessReveal.business_id = Business.id
       ) as reveals_no 
FROM businesses as Business 
LEFT JOIN users as U ON Business.id = U.business_id  
LEFT JOIN business_counties as bc ON Business.id = bc.business_id 
LEFT JOIN businesses_business_types as bt ON Business.id = bt.business_id 
LEFT JOIN business_regions as br ON Business.id = br.business_id 
WHERE 1=1  
Group By Business.id;

 

表结构:

/*
 Navicat MySQL Data Transfer

 Source Server         : _Localhost
 Source Server Type    : MySQL
 Source Server Version : 50530
 Target Server Type    : MySQL
 Target Server Version : 50530
 File Encoding         : utf-8
*/


-- ----------------------------
--  Table structure for `business_clickthroughs`
-- ----------------------------
DROP TABLE IF EXISTS `business_clickthroughs`;
CREATE TABLE `business_clickthroughs` (
  `id` bigint(12) unsigned NOT NULL AUTO_INCREMENT,
  `business_id` int(8) unsigned NOT NULL,
  `registered_user` tinyint(1) unsigned DEFAULT '0',
  `created` datetime NOT NULL,
  PRIMARY KEY (`id`),
  KEY `bid` (`business_id`)
) ENGINE=InnoDB AUTO_INCREMENT=29357 DEFAULT CHARSET=utf8 ROW_FORMAT=COMPACT;

-- ----------------------------
--  Table structure for `business_counties`
-- ----------------------------
DROP TABLE IF EXISTS `business_counties`;
CREATE TABLE `business_counties` (
  `id` int(11) NOT NULL AUTO_INCREMENT,
  `business_id` int(11) NOT NULL,
  `county_id` int(11) NOT NULL,
  PRIMARY KEY (`id`),
  KEY `bcid` (`business_id`)
) ENGINE=MyISAM AUTO_INCREMENT=20124 DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci ROW_FORMAT=FIXED;

-- ----------------------------
--  Table structure for `business_regions`
-- ----------------------------
DROP TABLE IF EXISTS `business_regions`;
CREATE TABLE `business_regions` (
  `id` int(11) NOT NULL AUTO_INCREMENT,
  `business_id` int(11) NOT NULL,
  `region_id` int(11) NOT NULL,
  PRIMARY KEY (`id`)
) ENGINE=MyISAM AUTO_INCREMENT=2719 DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci ROW_FORMAT=FIXED;

-- ----------------------------
--  Table structure for `business_reveals`
-- ----------------------------
DROP TABLE IF EXISTS `business_reveals`;
CREATE TABLE `business_reveals` (
  `id` int(11) NOT NULL AUTO_INCREMENT,
  `business_id` int(11) NOT NULL,
  `customer_id` int(11) DEFAULT NULL,
  `created` datetime NOT NULL,
  `modified` datetime NOT NULL,
  PRIMARY KEY (`id`),
  KEY `bid` (`business_id`)
) ENGINE=InnoDB AUTO_INCREMENT=3172 DEFAULT CHARSET=latin1 ROW_FORMAT=COMPACT;

-- ----------------------------
--  Table structure for `businesses_business_types`
-- ----------------------------
DROP TABLE IF EXISTS `businesses_business_types`;
CREATE TABLE `businesses_business_types` (
  `id` int(11) NOT NULL AUTO_INCREMENT,
  `business_id` int(11) NOT NULL,
  `business_type_id` int(11) NOT NULL,
  `level` int(2) NOT NULL DEFAULT '2',
  PRIMARY KEY (`id`),
  KEY `bid` (`business_id`) COMMENT '(null)'
) ENGINE=MyISAM AUTO_INCREMENT=4484 DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci ROW_FORMAT=FIXED;

-- ----------------------------
--  Table structure for `messages`
-- ----------------------------
DROP TABLE IF EXISTS `messages`;
CREATE TABLE `messages` (
  `id` int(11) NOT NULL AUTO_INCREMENT,
  `subject` varchar(500) DEFAULT NULL,
  `message` text,
  `user_id` int(11) DEFAULT NULL,
  `message_folder_id` int(11) DEFAULT NULL,
  `parent_message_id` int(11) DEFAULT NULL,
  `status` int(11) DEFAULT NULL,
  `direction` int(11) DEFAULT NULL,
  `from_to` varchar(500) DEFAULT NULL,
  `attachment` varchar(500) DEFAULT NULL,
  `created` datetime DEFAULT NULL,
  `modified` datetime DEFAULT NULL,
  `guest_sender` varchar(255) DEFAULT NULL,
  PRIMARY KEY (`id`),
  KEY `fromto` (`from_to`(255)),
  KEY `uid` (`user_id`),
  KEY `pmid` (`parent_message_id`)
) ENGINE=InnoDB AUTO_INCREMENT=4582 DEFAULT CHARSET=utf8 ROW_FORMAT=COMPACT;

-- ----------------------------
--  Table structure for `users`
-- ----------------------------
DROP TABLE IF EXISTS `users`;
CREATE TABLE `users` (
  `id` int(11) NOT NULL AUTO_INCREMENT,
  `login` varchar(255) COLLATE latin1_general_ci NOT NULL,
  `password` varchar(255) COLLATE latin1_general_ci NOT NULL,
  `name` varchar(255) COLLATE latin1_general_ci NOT NULL,
  `email` varchar(255) COLLATE latin1_general_ci NOT NULL,
  `title` varchar(20) COLLATE latin1_general_ci NOT NULL,
  `firstname` varchar(255) COLLATE latin1_general_ci NOT NULL,
  `lastname` varchar(255) COLLATE latin1_general_ci NOT NULL,
  `active` tinyint(1) NOT NULL DEFAULT '0',
  `first_visit` tinyint(1) NOT NULL DEFAULT '1',
  `signature` text COLLATE latin1_general_ci,
  `type` varchar(45) COLLATE latin1_general_ci DEFAULT 'customer',
  `business_id` int(11) DEFAULT NULL,
  `admin_monitor` tinyint(1) NOT NULL DEFAULT '0',
  `partner_name` varchar(255) COLLATE latin1_general_ci DEFAULT NULL,
  `postcode` varchar(255) COLLATE latin1_general_ci DEFAULT NULL,
  `venue_postcode` varchar(255) COLLATE latin1_general_ci DEFAULT NULL,
  `wedding_date` datetime DEFAULT NULL,
  `phone` varchar(255) COLLATE latin1_general_ci NOT NULL,
  `register_date` datetime DEFAULT NULL,
  `event` text COLLATE latin1_general_ci,
  `mailing_list` tinyint(1) NOT NULL DEFAULT '0',
  `created` datetime NOT NULL,
  `modified` datetime NOT NULL,
  PRIMARY KEY (`id`)
) ENGINE=MyISAM AUTO_INCREMENT=2854 DEFAULT CHARSET=latin1 COLLATE=latin1_general_ci ROW_FORMAT=DYNAMIC;

 

解释计划。

id  select_type         table           type    possible_keys       key     key_len     ref             rows    Extra

1   PRIMARY             Business        ALL     -                   -       -           -               444     Using temporary; Using filesort
1   PRIMARY             U               ALL     -                   -       -           -               2658    -
1   PRIMARY             bc              ref     bcid                bcid    4           Business.id     7       Using index
1   PRIMARY             bt              ref     bid                 bid     4           Business.id     9       Using index
1   PRIMARY             br              ALL     -                   -       -           -               440     -
7   DEPENDENT SUBQUERY  BusinessReveal  ref     bid                 bid     4           func            5       Using where
6   DEPENDENT SUBQUERY  br2             ALL     -                   -       -           -               440     Using where
5   DEPENDENT SUBQUERY  click           ref     bid                 bid     4           func            22      Using where
4   DEPENDENT SUBQUERY  bc2             ref     bcid                bcid    4           func            7       Using index
3   DEPENDENT SUBQUERY  MessageUnique   ALL     fromto,uid,pmid     -       -           -               4958    Using where
2   DEPENDENT SUBQUERY  Message         ALL     fromto,uid          -       -           -               4958    Using where

【问题讨论】:

  • 请格式化您的查询,将带有一些示例数据的设置放在sqlfiddle.com 并在此处发布链接。
  • 您对任何表都有索引吗?如果您没有不必要地加入,加入也不错。
  • @mkross1983 似乎是这样。 OP 已经发布了完整的表定义。

标签: mysql sql join optimization subquery


【解决方案1】:

您的查询有 6 个相关的子查询,总共返回 444 行。每个返回的行都有效地执行了这些相关子查询中的每一个。因此,您的单个查询会产生不到 3000 个查询。

我个人更喜欢避免使用大连接或连接子查询。但是,这取决于返回的行数

此外,您还直接连接到您正在执行左连接的表,这将生成大量重复项,然后 GROUP BY 会排除这些重复项。由于您没有直接从大多数这些表中获取任何内容,并且 GROUP BY 似乎是唯一键,因此它似乎无关紧要。

如果你保留相关的子查询:-

SELECT Count(Message.id) FROM messages as Message 
WHERE (U.id = Message.from_to OR U.id = Message.user_id)  
AND Message.created BETWEEN '2014-04-01 00:00:00' and '2014-04-30 23:59:59'

此表上没有用于此子查询的有用索引。当您检查 U.id 的 2 个不同列时,在那里可以做的不多,但创建的索引会有所帮助。最好复制这个子查询,一次检查 from_to 和一次检查 user_id,然后将结果相加。因为你可以在相关的 id 字段和日期上有一个索引。

此外,您正在对似乎是唯一键的值进行计数,因此永远不应为空。

SELECT Count(DISTINCT(MessageUnique.user_id)) FROM messages as MessageUnique 
WHERE (U.id = MessageUnique.from_to OR U.id = MessageUnique.user_id) 
AND (MessageUnique.parent_message_id is null OR MessageUnique.parent_message_id = MessageUnique.id)  
AND MessageUnique.created BETWEEN '2014-04-01 00:00:00' and '2014-04-30 23:59:59'

与上一个子查询相同的问题。

SELECT Count(*) FROM business_counties as bc2 
WHERE Business.id = bc2.business_id

这在 business_id 上有一个键,应该没问题

SELECT Count(click.id) FROM business_clickthroughs as click 
WHERE Business.id = click.business_id  
AND click.created BETWEEN '2014-04-01 00:00:00' and '2014-04-30 23:59:59'

虽然根据企业 ID 编制索引,但没有涵盖企业 ID 和创建日期的索引,这可能会有所帮助。

SELECT Count(*) FROM business_regions as br2 
WHERE Business.id = br2.business_id

这需要业务区域表上的 business_id 索引

SELECT count(BusinessReveal.id) as reveal_no FROM business_reveals as BusinessReveal
WHERE 1=1  
AND BusinessReveal.created BETWEEN '2014-04-01 00:00:00' and '2014-04-30 23:59:59' 
AND BusinessReveal.business_id = Business.id

这里的key不包括创建日期,只包括业务ID。

如果你想尝试对子查询进行连接(这可能更有效,尽管 MySQL 在连接子查询方面很差)然后像这样(未测试):-

SELECT Business.*, 
       mess_1.mess_count + mess_2.mess_count as message_no, 
       mess_3.mess_count + mess_4.mess_count as message_unique_no, 
       business1.county_no, 
       click1.clicks, 
       business_regions.region_no, 
       business_reveals1.reveals_no 
FROM businesses as Business 
LEFT JOIN users as U ON Business.id = U.business_id  
LEFT OUTER JOIN
(
    SELECT Message.from_to, Count(Message.id) AS mess_count
    FROM messages as Message 
    WHERE Message.created BETWEEN '2014-04-01 00:00:00' and '2014-04-30 23:59:59'
    GROUP BY  Message.from_to
) AS mess_1
ON U.id = mess_1.from_to
LEFT OUTER JOIN
(
    SELECT Message.user_id, Count(Message.id) AS mess_count
    FROM messages as Message 
    WHERE Message.created BETWEEN '2014-04-01 00:00:00' and '2014-04-30 23:59:59'
    GROUP BY  Message.user_id
) AS mess_2
ON U.id = mess_2.user_id
LEFT OUTER JOIN
( 
    SELECT MessageUnique.from_to, Count(DISTINCT(MessageUnique.user_id))  AS mess_count
    FROM messages as MessageUnique 
    WHERE (MessageUnique.parent_message_id is null OR MessageUnique.parent_message_id = MessageUnique.id)  
    AND MessageUnique.created BETWEEN '2014-04-01 00:00:00' and '2014-04-30 23:59:59'
    GROUP BY  MessageUnique.from_to
) AS mess_3
ON U.id = mess_3.from_to
LEFT OUTER JOIN
( 
    SELECT MessageUnique.user_id, Count(DISTINCT(MessageUnique.user_id))  AS mess_count
    FROM messages as MessageUnique 
    WHERE (MessageUnique.parent_message_id is null OR MessageUnique.parent_message_id = MessageUnique.id)  
    AND MessageUnique.created BETWEEN '2014-04-01 00:00:00' and '2014-04-30 23:59:59'
    GROUP BY  MessageUnique.user_id
) AS mess_4
ON U.id = mess_4.from_to
LEFT OUTER JOIN
( 
    SELECT business_id, Count(*)  AS county_no
    FROM business_counties as bc2 
    GROUP BY  Business.id 
) as business1
ON Business.id = business1.business_id 
LEFT OUTER JOIN
( 
    SELECT click.business_id, Count(click.id) AS clicks
    FROM business_clickthroughs as click 
    WHERE click.created BETWEEN '2014-04-01 00:00:00' and '2014-04-30 23:59:59'
    GROUP BY click.business_id 
) as click1 
ON Business.id = click1.business_id  
LEFT OUTER JOIN
( 
    SELECT br2.business_id, Count(*) AS region_no 
    FROM business_regions as br2 
    WHERE Business.id = br2.business_id 
    GROUP BY br2.business_id 
) as business_regions 
ON Business.id = business_regions.business_id 
LEFT OUTER JOIN
( 
    SELECT BusinessReveal.business_id, count(BusinessReveal.id) as reveal_no 
    FROM business_reveals as BusinessReveal
    WHERE BusinessReveal.created BETWEEN '2014-04-01 00:00:00' and '2014-04-30 23:59:59' 
    GROUP BY BusinessReveal.business_id
) as business_reveals1 
ON business_reveals1.business_id = Business.id

【讨论】:

  • kickstart 非常感谢。仅您的索引建议就可以在 11 秒内搜索 1200 万行,这是一个巨大的改进。接下来,我将查看您的加入子查询建议。再次感谢...
  • 祝你好运。连接子查询可能更有效,因为 MySQL 然后只执行每个子查询一次(而不是每个返回的行一次),但它在连接时往往会丢失索引。使用 MySQL,更有效的方法会随着数据量的变化而变化(即,如果您有一个主记录并将其连接到返回大量记录的子查询的结果中,那么连接上的索引会很慢)。
猜你喜欢
  • 2023-01-03
  • 1970-01-01
  • 2011-02-11
  • 2010-11-06
  • 2020-05-10
  • 2016-04-07
  • 2015-05-19
  • 1970-01-01
  • 1970-01-01
相关资源
最近更新 更多