在进行数据流转换之前,我们先介绍一下使用场景:以IISLOG为依据,进行网站点击率分析(IP & PV 分析),具体需求如下:
(1)分析一段时间内,网站点击率的变化趋势。同时还需要知道各个周未、各个节假日网站的流量情况。
(2)分析一天内,各时段(以小时为单位)网站的压力情况。
(3)了解网站客户群分别来自哪些国家,哪些地区。
为了实现这些需求,我们建立了如下的数据模型,请看:
USE [IisLog]
GO
--建立事实表
CREATE TABLE [dbo].[IISLog](
[lngID] [bigint] NOT NULL,
[lngShopID] [int] NULL,
[lngDateID] [int] NULL,
[lngTimeID] [int] NULL,
[csDateTime] [datetime] NULL,
[lngIpID] [int] NULL,
[cIP] [varchar](30) NULL,
[csUriStem] [varchar](1000) NULL,
[csUriQuery] [varchar](1000) NULL,
[scStatus] [varchar](30) NULL,
[UserAgent] [varchar](255) NULL,
[lngReferer] [int] NULL,
[csReferer] [varchar](1000) NULL,
[csRefererKPI] [varchar](1000) NULL,
[lngFlag] [int] NULL
) ON [PRIMARY]
--IP库
CREATE TABLE [dbo].[dimIP](
[ID] [bigint] IDENTITY(1,1) NOT NULL,
[ipSegment] [nvarchar](20) NULL,
[strCountry] [varchar](20) NULL,
[strProvince] [varchar](20) NULL,
[strCity] [varchar](50) NULL,
[strMemo] [varchar](100) NULL,
CONSTRAINT [PK_ID] PRIMARY KEY CLUSTERED
(
[ID] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
) ON [PRIMARY]
--日期
CREATE TABLE [dbo].[dimDate](
[lngDateID] [int] NOT NULL,
[lngYear] [int] NULL,
[strMonth] [varchar](10) NULL,
[dtDateTime] [datetime] NULL,
[strQuarter] [varchar](10) NULL,
[strDateAttr] [varchar](10) NULL,
[strMemo] [varchar](50) NULL,
CONSTRAINT [PK_dimDate] PRIMARY KEY CLUSTERED
(
[lngDateID] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
) ON [PRIMARY]
--时间
CREATE TABLE [dbo].[dimTime](
[lngTimeID] [int] NOT NULL,
[lngHour] [int] NULL,
[strHour] [varchar](10) NULL,
[strTimeAttr] [varchar](10) NULL,
[strMemo] [varchar](50) NULL,
CONSTRAINT [PK_dimTime] PRIMARY KEY CLUSTERED
(
[lngTimeID] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
) ON [PRIMARY]
GO
--建立事实表
CREATE TABLE [dbo].[IISLog](
[lngID] [bigint] NOT NULL,
[lngShopID] [int] NULL,
[lngDateID] [int] NULL,
[lngTimeID] [int] NULL,
[csDateTime] [datetime] NULL,
[lngIpID] [int] NULL,
[cIP] [varchar](30) NULL,
[csUriStem] [varchar](1000) NULL,
[csUriQuery] [varchar](1000) NULL,
[scStatus] [varchar](30) NULL,
[UserAgent] [varchar](255) NULL,
[lngReferer] [int] NULL,
[csReferer] [varchar](1000) NULL,
[csRefererKPI] [varchar](1000) NULL,
[lngFlag] [int] NULL
) ON [PRIMARY]
--IP库
CREATE TABLE [dbo].[dimIP](
[ID] [bigint] IDENTITY(1,1) NOT NULL,
[ipSegment] [nvarchar](20) NULL,
[strCountry] [varchar](20) NULL,
[strProvince] [varchar](20) NULL,
[strCity] [varchar](50) NULL,
[strMemo] [varchar](100) NULL,
CONSTRAINT [PK_ID] PRIMARY KEY CLUSTERED
(
[ID] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
) ON [PRIMARY]
--日期
CREATE TABLE [dbo].[dimDate](
[lngDateID] [int] NOT NULL,
[lngYear] [int] NULL,
[strMonth] [varchar](10) NULL,
[dtDateTime] [datetime] NULL,
[strQuarter] [varchar](10) NULL,
[strDateAttr] [varchar](10) NULL,
[strMemo] [varchar](50) NULL,
CONSTRAINT [PK_dimDate] PRIMARY KEY CLUSTERED
(
[lngDateID] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
) ON [PRIMARY]
--时间
CREATE TABLE [dbo].[dimTime](
[lngTimeID] [int] NOT NULL,
[lngHour] [int] NULL,
[strHour] [varchar](10) NULL,
[strTimeAttr] [varchar](10) NULL,
[strMemo] [varchar](50) NULL,
CONSTRAINT [PK_dimTime] PRIMARY KEY CLUSTERED
(
[lngTimeID] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
) ON [PRIMARY]