Article Outline
google_bigquery_8
TOC
Collection Outline
句(節)
演算子
関数
入門 @Udemy
DS100ノック
ML
アクセス解析
date : 2021-09-16 title : 🔍 分析入門 Section8: ER図、テーブル結合 excerpt : ---
tags : ["🔍", "BigQuery", "Udemy"]
|| Section8
| ER図
ER図(Entity-Relations): テーブル同士の関係性を表した図表
そもそも、DBが複数のテーブルを持っているのは?
∵ カスタマイズ性、メンテナンス性を高めることが重視されているため。
| キーの種類
🔑 主キー(PK:PrimaryKey) :
🗝 外部キー(FK:ForeignKey) : 外部テーブルと結合するためのキー
| JOIN - 結合
※ JOINの種類
1. INNER JOIN : 左右両方のテーブルに[FK=PK]が存在するレコードだけを結合(いずれにも該当しないレコードは弾かれる)
2. LEFT OUTER JOIN : 左右にあろうが、無かろうが、兎に角「左を優先して!」結合
3. RIGHT OUTER JOIN : 左右にあろうが、無かろうが、兎に角「右を優先して!」結合
4. FULL OUTER JOIN : 左右にあるだけのレコードを「全部まとめて出してくれ!」結合
5. CROSS JOIN :
e.g. 書式
SELECT
FROM テーブルA AS A
(INNER|LEFT[OUTER]|RIGHT[OUTER]|FULL[OUTER])JOIN テーブルB AS B
→ USING(FK-PKの両テーブルに共通するカラム)
→ ON A.FK = B.PK [AND 絞り込み条件]
※ デフォルトINNER JOIN (INNERや、OUTERは省略可能)。
※ JOIN後は、USINGで共通キーを紐づける。更に詳細に設定する場合にON句を使用。
※ ON句の[AND 絞り込み条件]は省略可能。
■ USINGでの結合
e.g.
SELECT
p.user_id,
p.product_id,
p.unit_price,
sm.category,
sm.prod_name
FROM
`prj-test3.bq_trial.pos` AS p
LEFT JOIN
`prj-test3.bq_trial.shohin_master` AS sm
USING(product_id)
ORDER BY p.product_id;
-- [key]
-- | |user_id|product_id|unit_price|category|prob_name|
-- | 1|ABC | 1| 120|くだもの|いちご |
-- | 2|XYZ | 2| 200|野菜 |白菜 |
-- | 3|STU | 2| 200|野菜 |白菜 |
-- | 4|STU | 3| 160|野菜 |人参 |
-- |15|www | 11| 210|null |null |
-- [shohin_master.csv]には無い値なので、nullが返る
■ ON句による条件指定の結合
e.g.
SELECT
p.user_id,
p.product_id,
p.unit_price,
p.quantity,
sm.category,
sm.prod_name
FROM
`prj-test3.bq_trial.pos` AS p
LEFT JOIN
`prj-test3.bq_trial.shohin_master` AS sm
ON p.product_id = sm.product_id AND p.user_id="ABC" -- ON句で詳細設定しての結合
ORDER BY p.product_id;
-- [条件] [key]
-- | |user_id|product_id|unit_price|quantity|category|prod_name|
-- |1|ABC | 1| 120| 10|くだもの |いちご |
-- |2|XYZ | 2| 200| 2|null |null |
-- |3|STU | 2| 200| 3|null |null |
-- LEFT JOIN での結合のため、左側の全て取得しnullが返ってしまう。
-- この場合は、INNER JOINを使用すると、意図する挙動になる。
INNER JOIN `prj-test3.bq_trial.shohin_master` AS sm
-- | |user_id|product_id|unit_price|quantity|category|prod_name|
-- |1|ABC | 1| 120| 10|くだもの |いちご |
-- |2|ABC | 4| 160| 12|魚 |アジ |
-- |3|ABC | 5| 100| 5|肉 |豚肉 |
-- 更にON句では AND で条件を追加もできる。
ON p.product_id = sm.product_id AND p.user_id="ABC" AND sm.category="肉"
--[条件]-- --[key]-- --[条件]--
-- | |user_id|product_id|unit_price|quantity|category|prod_name|
-- |1|ABC | 5| 100| 5|肉 |豚肉 |
-- |2|ABC | 10| 150| 8|肉 |豚肉 |
■結合例外処理(注意)
e.g. 主キー(PK)の重複により要件を満たさないテーブルの結合
SELECT
p.user_id,
p.product_id,
p.unit_price,
p.quantity,
smb.category,
smb.prod_name
FROM
`prj-test3.bq_trial.pos` AS p
LEFT JOIN
`prj-test3.bq_trial.shohin_master_bad` AS smb
USING(product_id)
WHERE p.product_id = 3
ORDER BY P.product_id;
-- | |user_id|product_id|unit_price|qunantity|category|prod_name |
-- |1|STU | 3| 160| 8|野菜 |人参 |
-- |2|STU | 3| 160| 8|野菜 |人参 |
-- |3|WWW | 3| 160| 5|野菜 |人参 |
-- |4|WWW | 3| 160| 5|野菜 |人参 |
-- |5|XYZ | 3| 160| 2|野菜 |人参 |
-- |6|XYZ | 3| 160| 2|野菜 |人参 |
/****************************************************************
* (問題点)
* pos.csvには、product_id=3番は3レコード存在している。
* shohin_master_bad.csvには本来PKは一意に存在しなくてはいけないところ、product_id3番が重複していた。
* その結果、結合後に3レコード取るはずが、倍の6レコード取得されてしまっていた。(重複分が招いた、問題点)
* 集計時に二重計上による誤差になりかねない💀
****************************************************************/
ex.【8.5 演習問題1(0:20)】
SELECT
-- sp.purchase_id, sp.date, sp.user_id,
-- c.gender,
CASE(c.gender)
WHEN 1 THEN "mele"
WHEN 2 THEN "female"
ELSE "unknow"
END AS gender,
-- CONCAT(c.last_name, " ", c.first_name) AS full_name,
COUNT(*) AS shop_count,
SUM(sp.quantity) AS shop_quantity,
SUM(sp.sales_amount) AS total_amount,
ROUND(SUM(sp.sales_amount)/SUM(sp.quantity)) AS avg_amount
FROM `prj-test3.bq_sample.shop_purchases` AS sp
LEFT JOIN `prj-test3.bq_sample.customers` AS c
USING(user_id)
GROUP BY gender
ORDER BY total_amount DESC;
-- | |gender|shop_count|shop_quantity|total_amount|avg_amount|
-- |1|female| 792| 1819| 11456055| 6298.0|
-- |2|male | 450| 984| 7969220| 8099.0|
-- |3|unknow| 40| 92| 693117| 7534.0|
-- #Q1. female
-- #Q2. female
-- #Q3. female
-- #Q4. male
ex.【8.5 演習問題2(3:50)】
#(miss_code)
-- SELECT
-- -- *,
-- -- sp.user_id,
-- -- ("2018-12-31"- c.birthday)/365 AS age,
-- ROUND(DATE_DIFF(date"2018-12-31", c.birthday, DAY)/365) AS age,
-- CASE(c.gender)
-- WHEN 1 THEN "mele"
-- WHEN 2 THEN "female"
-- ELSE "unknow"
-- END AS gender,
-- ROUND(AVG(quantity),1) AS avg_quantity
-- FROM `prj-test3.bq_sample.shop_purchases` AS sp
-- LEFT JOIN `prj-test3.bq_sample.customers` AS c
-- USING(user_id)
-- GROUP BY gender, age
-- HAVING gender != "unknow"
-- ORDER BY 3 DESC
-- LIMIT 3;
-- | |age |gender|avg_quantity|
-- |1|32.0|male | 3.7|
-- |2|44.0|male | 3.3|
-- |3|62.0|male | 3.0|
#(collect_code)
SELECT
DATE_DIFF("2018-12-31", cu.birthday, YEAR) AS nenrei,
CASE cu.gender
WHEN 1 THEN "男性"
WHEN 2 THEN "女性"
END AS seibetsu,
ROUND(AVG(sp.quantity), 1) AS avg_aty
FROM `prj-test3.bq_sample.shop_purchases` AS sp
LEFT JOIN `prj-test3.bq_sample.customers` AS cu
USING(user_id)
WHERE gender <> 3
GROUP BY nenrei, gender
ORDER BY avg_aty DESC
LIMIT 3;
-- | |nenrei |seibetsu|avg_qty|
-- |1| 31|男性 | 3.5|
-- |2| 66|女性 | 3.2|
-- |3| 62|男性 | 3.1|
ex.【8.5 演習問題3(7:30)】
SELECT
CONCAT(c.last_name, " ",c.first_name ) AS full_name,
SUM(sp.sales_amount) AS total_amount
FROM `prj-test3.bq_sample.shop_purchases` AS sp
LEFT JOIN `prj-test3.bq_sample.customers` AS c
USING(user_id)
WHERE
c.Is_premium = false --プレミアム会員以外
AND c.first_name IS NOT NULL
AND c.last_name IS NOT NULL
GROUP BY full_name
ORDER BY total_amount DESC
LIMIT 3;
-- | |full_name|total_amount|
-- |1|小杉 信貴 | 104500|
-- |2|宗村 良崇 | 60216|
-- |3|和栗 昇悟 | 59400|
| 複数テーブルの結合
e.g. 書式
SELECT
FROM [テーブルA] AS A
(INNER|LEFT[OUTER]|RIGHT[OUTER]|FULL[OUTER])JOIN [テーブルB] AS B
→ USING(FK-PKの両テーブルに共通するカラム)
→ ON A.FK = B.PK [AND 絞り込み条件]
(INNER|LEFT[OUTER]|RIGHT[OUTER]|FULL[OUTER])JOIN [テーブルC] AS C
→ USING(FK-PKの両テーブルに共通するカラム)
→ ON A.FK = C.PK [AND 絞り込み条件]
ex.【8.6 演習問題1(2:10)】
#(miss_code)
-- SELECT
-- s.shop_name,
-- s.chief_name,
-- SUM(sp.sales_amount) AS total_amount,
-- CASE c.gender
-- WHEN 1 THEN "male"
-- WHEN 2 THEN "female"
-- ELSE "unknow"
-- END AS gender
-- FROM `prj-test3.bq_sample.shop_purchases` AS sp
-- LEFT JOIN `prj-test3.bq_sample.customers` AS c USING(user_id)
-- LEFT JOIN `prj-test3.bq_sample.products_master` AS p USING(product_id)
-- LEFT JOIN `prj-test3.bq_sample.shops_master` AS s USING(shop_id)
-- GROUP BY s.shop_name, s.chief_name, gender
-- HAVING gender = female
-- ORDER BY 1, 3 DESC;
#(collect_code)
SELECT
sm.chief_name AS tencho,
sm.shop_name AS shop_name,
CASE cu.gender
WHEN 1 THEN "male"
WHEN 2 THEN "female"
END AS customer_gender,
COUNT(DISTINCT sp.user_id) AS kyakusuu,
SUM(sp.sales_amount) AS uriage
FROM
`prj-test3.bq_sample.shop_purchases` AS sp
LEFT JOIN
`prj-test3.bq_sample.customers` AS cu ON sp.user_id = cu.user_id
LEFT JOIN
`prj-test3.bq_sample.products_master` AS pm ON sp.product_id = pm.product_id
LEFT JOIN
`prj-test3.bq_sample.shops_master` AS sm ON sp.shop_id = sm.shop_id
WHERE cu.gender <> 3
GROUP BY
tencho,
sm.shop_name,
customer_gender
ORDER BY
customer_gender,
uriage DESC
;
-- | |tencho |shop_name |customer_gnder|kyakusuu|uriage |
-- |1|柳澤 華子 |自由が丘店 |female | 306|4721503|
-- |2|山下 唐三郎|下北沢店 |female | 258|4476825|
-- |5|柳澤 華子 |自由が丘店 |male | 172|4004558|
-- |6|山下 唐三郎|下北沢店 |male | 145|2713434|
ex.【8.6 演習問題2(7:30)】
SELECT
COUNT(*) AS sales_count,
SUM(sp.sales_amount) AS total_amount,
s.chief_name AS chief
FROM `prj-test3.bq_sample.shop_purchases` AS sp
LEFT JOIN `prj-test3.bq_sample.customers` AS c USING(user_id)
LEFT JOIN `prj-test3.bq_sample.products_master` AS p USING(product_id)
LEFT JOIN `prj-test3.bq_sample.shops_master` AS s USING(shop_id)
WHERE
sp.date BETWEEN "2018-03-01" AND "2018-03-31"
-- DATE_TRUNC(sp.date, month) = "2018-03-01" #別解
AND c.prefecture != "Tokyo"
AND c.gender IS NOT NULL
AND p.prod_gender = "f"
GROUP BY chief
ORDER BY 2 DESC;
-- | |sales_count|total_amount|chief |
-- |1| 3| 142276|大井谷 みすず|
-- |2| 4| 108905|山下 唐三郎 |
-- |3| 2| 80667|柳澤 華子 |
ex.【8.6 演習問題3(10:20)】
SELECT
-- s.shop_name AS shop_name,
p.prod_name AS product,
MAX(sp.sales_amount) AS max_amoount,
MIN(sp.sales_amount) AS min_amount,
MAX(sp.sales_amount)-MIN(sp.sales_amount) AS diff_amount
FROM `prj-test3.bq_sample.shop_purchases` AS sp
INNER JOIN `prj-test3.bq_sample.customers` AS c USING(user_id)
INNER JOIN `prj-test3.bq_sample.products_master` AS p USING(product_id)
INNER JOIN `prj-test3.bq_sample.shops_master` AS s ON sp.shop_id = s.shop_id AND chief_name = "大井谷 みすず"
WHERE c.Is_premium = TRUE
GROUP BY product
ORDER BY diff_amount DESC
limit 1;
-- | |product |max_amoount|min_amount|diff_amount|
-- |1|ブラウス 長袖| 73000| 12775| 60225|
#(other code)
FROM `prj-test3.bq_sample.shop_purchases` AS sp
LEFT JOIN `prj-test3.bq_sample.customers` AS cu ON sp.user_id = cu.shop_id
LEFT JOIN `prj-test3.bq_sample.products_master` AS pm ON sp.product_id = pm.product_id
LEFT JOIN `prj-test3.bq_sample.shops_master` AS sm ON sp.shop_id = sm.shop_id
WHERE
sm.chief_name LIKE "大井谷%"
AND cu.Is_premium IS TRUE
GROUP BY pm.prod_name
ORDER BY 4 DESC
LIMIT 1;
-- | |product |max_amoount|min_amount|diff_amount|
-- |1|ブラウス 長袖| 73000| 12775| 60225|