SQL Severのランダムサンプリング、階層抽出とバランスサンプリング

1650 ワード

3.3      

3.3.1     

--  10%       
SELECT t.*
FROM  t
WHERE RAND() < 0.1

      SQL Sever ,   NEWID(),                      ,
      ,       NEWID()

SELECT TOP PERCENT t.*
FROM  t
ORDER BY NEWID()
     10%

SELECT t.*
FROM  t
WHERE RAND(CHECKSUM(NEWID())) < 0.1
               ,           ,               。

3.3.2         
  ID           :

SELECT t.*
FROM  t
WHERE RAND(id) < 0.1

      ID ,          ,         ,      :
WHERE RAND(id + 1) < 0.1
         ROW_NUMBER()        。
WITH t as (
			 SELECT t.*, ROW_NUMBER() OVER (ORDER BY col) as seqnum
			 FROM  t
	)
SELECT t.*
FROM t
WHERE (t * 17 + 57) % 101 <= 10;

3.3.3       
                    ,subscribers  47.18%       。
WITH a as (
				SELECT s.*, ROW_NUMBER() OVER (ORDER BY isactive) as seqnum
				FROM subscribers s
					)
SELECT AVG(1.0 * isactive)
FROM a
WHERE seqnum % 100 = 1;

CTE                  ,    99      100   ,      100 
           , 100           。   47.18,       

                    ,    ,                        

WITH s as (
		SELECT s.*, ROW_NUMBER() OVER (ORDER BY market, isactive) as sequm
		FROM subscribers s
)
SELECT AVG(isactive)
FROM s
WHERE sequm % 100 = 1;

3.3.4      

WITH o as (
			SELECT o.*,
			ROW_NUMBER() OVER (PARTITION BY isae ORDER BY NEWID()) as seqnum
			FROM (SELECT o.*,
									 (CASE WHEN paymenttype = 'AE' THEN 1 ELSE 0 END) as isae
						FROM orders o
						WHERE totalprice <= 200
					) o
			)
SELECT orderdate,
			(CASE WHEN isae = 1 THEN totalprice END) as ae,
			(CASE WHEN isae = 0 THEN totalprice END) as notae
FROM o
WHERE seqnum <= 100;