HOME/📦 INBOX/

【🔎 SQL】繰り返し処理できるの? - BigQueryScripting

Article Outline
TOC
Collection Outline

|| LOOP処理

DECLARE x INT64 DEFAULT 0;
LOOP
  SET x = x + 1;
  IF x >= 10 THEN
    LEAVE;
  END IF;
END LOOP;
SELECT x;

|| BigQueryScriptingってなに?

-- Declare a variable to hold names as an array.
DECLARE top_names ARRAY<STRING>;
-- Build an array of the top 100 names from the year 2017.
SET top_names = (
  SELECT ARRAY_AGG(name ORDER BY number DESC LIMIT 100)
  FROM `bigquery-public-data`.usa_names.usa_1910_current
  WHERE year = 2017
);
-- Which names appear as words in Shakespeare's plays?
SELECT
  name AS shakespeare_name
FROM UNNEST(top_names) AS name
WHERE name IN (
  SELECT word
  FROM `bigquery-public-data`.samples.shakespeare
);

Cf.

|| 早速やってみよう!

結論、まだできね (><;)

(ちょっとづつ完成に向けて試行錯誤するか...)

DECLARE x INT64 DEFAULT 1;
DECLARE column_name ARRAY<STRING>;
DECLARE result ARRAY<STRING>;

SET column_name = [
    'vendor_id', 'pickup_datetime','dropoff_datetime', 'passenger_count',
    'payment_type', 'trip_distance', 'pickup_longitude', 'pickup_latitude',
    'rate_code', 'store_and_fwd_flag', 'dropoff_longitude', 'dropoff_latitude',     
    'fare_amount', 'surcharge', 'mta_tax', 'tip_amount', 'tolls_amount', 'total_amount'];

SET result = [];

WHILE x < array_length(column_name) DO
    select
        count(column_name) as n,
        avg(column_name) as mean,
        stddev(column_name) as std,
        min(column_name) as min,
        (select q from(select percentile_cont(column_name, 0.25) over() as q from `pj.ds.tb`) group by q) as first_quartile,
        (select q from(select percentile_cont(column_name, 0.5) over() as q from `pj.ds.tb`) group by q) as median,
        (select q from(select percentile_cont(column_name, 0.75) over() as q from `pj.ds.tb`) group by q) as thrd_quartile,
        max(column_name) as max
    from `pj.ds.tb`
    where passenger_count is not null;

    SET x = x + 1;

END WHILE;
select x;