SQL Severのランダムサンプリング、階層抽出とバランスサンプリング
1650 ワード
3.3
3.3.1
-- 10%
SELECT t.*
FROM t
WHERE RAND() < 0.1
SQL Sever , NEWID(), ,
, NEWID()
SELECT TOP PERCENT t.*
FROM t
ORDER BY NEWID()
10%
SELECT t.*
FROM t
WHERE RAND(CHECKSUM(NEWID())) < 0.1
, , 。
3.3.2
ID :
SELECT t.*
FROM t
WHERE RAND(id) < 0.1
ID , , , :
WHERE RAND(id + 1) < 0.1
ROW_NUMBER() 。
WITH t as (
SELECT t.*, ROW_NUMBER() OVER (ORDER BY col) as seqnum
FROM t
)
SELECT t.*
FROM t
WHERE (t * 17 + 57) % 101 <= 10;
3.3.3
,subscribers 47.18% 。
WITH a as (
SELECT s.*, ROW_NUMBER() OVER (ORDER BY isactive) as seqnum
FROM subscribers s
)
SELECT AVG(1.0 * isactive)
FROM a
WHERE seqnum % 100 = 1;
CTE , 99 100 , 100
, 100 。 47.18,
, ,
WITH s as (
SELECT s.*, ROW_NUMBER() OVER (ORDER BY market, isactive) as sequm
FROM subscribers s
)
SELECT AVG(isactive)
FROM s
WHERE sequm % 100 = 1;
3.3.4
WITH o as (
SELECT o.*,
ROW_NUMBER() OVER (PARTITION BY isae ORDER BY NEWID()) as seqnum
FROM (SELECT o.*,
(CASE WHEN paymenttype = 'AE' THEN 1 ELSE 0 END) as isae
FROM orders o
WHERE totalprice <= 200
) o
)
SELECT orderdate,
(CASE WHEN isae = 1 THEN totalprice END) as ae,
(CASE WHEN isae = 0 THEN totalprice END) as notae
FROM o
WHERE seqnum <= 100;