以下是 Snowflake(或 SQL)中的分层示例,基于以下内容:
https://en.wikipedia.org/wiki/Stratified_sampling
这可以作为固定数字或百分比返回。
这已被编译为单个查询,在我们的实现中,我们实际上预先创建了排序段列表 (W0) 作为临时表,而不是让相同的查询运行多次。
SELECT
W1.Id,
W1.EmploymentStatus,
W1.Gender
FROM
( SELECT
ID,
Row_Number() OVER ( PARTITION BY COALESCE(Gender, '') || COALESCE(EmploymentStatus,'') ORDER BY random() ) as iInternalRank,
COALESCE(Gender, '') || COALESCE(EmploymentStatus,'') as sInternalGroupVal
FROM STAFF ) W0,
STAFF W1 -- Linked back the original table (Inbound query)
WHERE (
SELECT
MAX(case when W2.sInternalGroupVal = W3.sInternalGroupVal then W3.iGroupSegmentVolume else 0 end ) -- This is where the magic happens...
FROM (
SELECT
ID,
Row_Number() OVER ( PARTITION BY COALESCE(Gender, '') || COALESCE(EmploymentStatus,'') ORDER BY random() ) as iInternalRank,
COALESCE(Gender, '') || COALESCE(EmploymentStatus,'') as sInternalGroupVal
FROM STAFF ) W2,
(SELECT
sInternalGroupVal,
COUNT(Id)*(40/iTotalPopulation::DOUBLE PRECISION) as iGroupSegmentVolume -- as a fixed volumne (40 Records)
--COUNT(Id)*((iTotalPopulation*(23/100.00))/iTotalPopulation::DOUBLE PRECISION) as iGroupSegmentVolume -- as a percentage (23% of overall population)
FROM (SELECT
ID,
Row_Number() OVER ( PARTITION BY COALESCE(Gender, '') || COALESCE(EmploymentStatus,'') ORDER BY random() ) as iInternalRank,
COALESCE(Gender, '') || COALESCE(EmploymentStatus,'') as sInternalGroupVal
FROM STAFF ),
(SELECT
COUNT(Id) as iTotalPopulation
FROM ( SELECT
ID,
Row_Number() OVER ( PARTITION BY COALESCE(Gender, '') || COALESCE(EmploymentStatus,'') ORDER BY random() ) as iInternalRank,
COALESCE(Gender, '') || COALESCE(EmploymentStatus,'') as sInternalGroupVal
FROM STAFF )
) W4
GROUP BY sInternalGroupVal, iTotalPopulation) W3
WHERE ((W2.sInternalGroupVal = W0.sInternalGroupVal)) AND ((W2.sInternalGroupVal = W3.sInternalGroupVal))
) >= iInternalRank
AND ((W1.Id = W0.Id));