Lecture 2 – GA4 BigQuery Practice
CTEs, Arrays, Joins, Windows, and Approximate Functions
1. CTEs (ITH) - Total Users & New Users
SQL Code
-- 1) CTEs: Total users + new users
WITH UserInfo AS (
SELECT
user_pseudo_id,
MAX(IF(event_name IN ('first_visit', 'first_open'), 1, 0)) AS is_new_user
FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`
WHERE _TABLE_SUFFIX BETWEEN '20201101' AND '20201130'
GROUP BY user_pseudo_id
)
SELECT
COUNT(*) AS total_users,
SUM(is_new_user) AS new_users
FROM UserInfo;
```Explanation
This query creates a temporary table (UserInfo) with one row per user and a flag showing whether they triggered a first_visit or first_open event. The final SELECT counts total users and how many are new.
Screenshot Outcome
2. Arrays & UNNEST
2.1 Scalar subquery for page_location
SQL Code
-- 2A) Arrays: scalar subquery to get page_location
SELECT
TIMESTAMP_MICROS(event_timestamp) AS event_time,
(
SELECT value.string_value
FROM UNNEST(event_params)
WHERE key = 'page_location'
LIMIT 1
) AS page_location
FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`
WHERE event_name = 'page_view'
AND _TABLE_SUFFIX BETWEEN '20201201' AND '20201202'
LIMIT 50;
```Explanation
This query pulls the page_location parameter from the event_params array without changing the number of rows. Each row stays at the event level.
Screenshot Outcome
2.2 UNNEST items for item level purchases
SQL Code
-- 2B) Arrays: UNNEST items for item-level purchase analysis
SELECT
event_date,
item.item_name,
COUNT(*) AS item_rows
FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*` e,
UNNEST(e.items) AS item
WHERE e.event_name = 'purchase'
AND _TABLE_SUFFIX BETWEEN '20201201' AND '20201231'
GROUP BY event_date, item.item_name
ORDER BY item_rows DESC
LIMIT 20;
```Explanation
UNNEST turns each item in a purchase into its own row. This lets you analyze which items appear most often in purchase events.
Screenshot Outcome
3. String_AGG & ARRAY_AGG
3.1 Events Seen Per Day (String_AGG)
SQL Code
-- 3A) STRING_AGG: events seen per day
SELECT
event_date,
STRING_AGG(DISTINCT event_name, ', ' ORDER BY event_name) AS events_seen
FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`
WHERE _TABLE_SUFFIX BETWEEN '20201201' AND '20201203'
GROUP BY event_date
ORDER BY event_date;
```Explanation
This query groups by date and creates a comma‑separated list of all event names seen on that day.
Screenshot Outcome
3.2 Items Added to Cart per User (ARRAY_AGG)
SQL Code
-- 3B) ARRAY_AGG: items added to cart per user
SELECT
user_pseudo_id,
ARRAY_AGG(item_name) AS items_added_to_cart
FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`,
UNNEST(items) AS item
WHERE event_name = 'add_to_cart'
AND _TABLE_SUFFIX BETWEEN '20201201' AND '20201231'
GROUP BY user_pseudo_id
ORDER BY user_pseudo_id
LIMIT 10;
```Explanation
This collects all item names each user added to cart into an array, giving a “cart list” per user.
Screenshot Outcome
3.3 Session-Level Cart Summary (ARRAY_AGG with STRUCT)
SQL Code
-- 3C) ARRAY_AGG with STRUCT: session-level cart summary
WITH add_to_cart AS (
SELECT
user_pseudo_id,
(
SELECT value.int_value
FROM UNNEST(event_params)
WHERE key = 'ga_session_id'
) AS session_id,
TIMESTAMP_MICROS(event_timestamp) AS event_timestamp,
i.item_id,
i.item_name,
i.quantity,
i.price
FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`,
UNNEST(items) AS i
WHERE event_name = 'add_to_cart'
AND _TABLE_SUFFIX BETWEEN '20210101' AND '20211231'
)
SELECT
user_pseudo_id,
session_id,
COUNT(*) AS total_add_to_cart_events,
ARRAY_AGG(
STRUCT(item_id, item_name, quantity, price, event_timestamp)
ORDER BY quantity DESC, event_timestamp ASC
LIMIT 10
) AS cart_items
FROM add_to_cart
WHERE session_id IS NOT NULL
GROUP BY user_pseudo_id, session_id;
```Explanation
The CTE builds a detailed table of add‑to‑cart events. The final query groups by session and creates an array of item structs, sorted by quantity and timestamp.
Screenshot Outcome
4. Joins
4.1 Daily Users + Daily Purchases + Conversion Rate
SQL Code
-- 4) Joins: daily users + daily purchases + conversion rate
WITH daily_users AS (
SELECT
event_date,
COUNT(DISTINCT user_pseudo_id) AS users
FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`
WHERE _TABLE_SUFFIX BETWEEN '20201201' AND '20201231'
GROUP BY event_date
),
daily_purchases AS (
SELECT
event_date,
COUNT(DISTINCT
(SELECT value.string_value
FROM UNNEST(event_params)
WHERE key = 'transaction_id')
) AS purchases
FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`
WHERE event_name = 'purchase'
AND _TABLE_SUFFIX BETWEEN '20201201' AND '20201231'
GROUP BY event_date
)
SELECT
u.event_date,
u.users,
IFNULL(p.purchases, 0) AS purchases,
ROUND(
IFNULL(p.purchases, 0) / NULLIF(u.users, 0) * 100, 2
) AS conversion_rate_pct
FROM daily_users u
LEFT JOIN daily_purchases p
ON u.event_date = p.event_date
ORDER BY u.event_date;
```Explanation
This joins daily user counts with daily purchase counts and calculates a daily conversion rate. LEFT JOIN ensures days with no purchases show 0 instead of NULL.
Screenshot Outcome
5. Window Functions & Qualify
5.1 Top 3 Event Types Per Day (RANK + QUALIFY)
SQL Code
-- 5A) Window + QUALIFY: top 3 event types per day
WITH daily_event_counts AS (
SELECT
event_date,
event_name,
COUNT(*) AS events
FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`
WHERE _TABLE_SUFFIX BETWEEN '20201201' AND '20201207'
GROUP BY event_date, event_name
)
SELECT
event_date,
event_name,
events,
RANK() OVER (PARTITION BY event_date ORDER BY events DESC) AS rnk
FROM daily_event_counts
QUALIFY rnk <= 3
ORDER BY event_date, rnk;
```Explanation
This ranks event types by volume for each day and keeps only the top 3 using QUALIFY.
Screenshot Outcome
5.2 Rolling 7-Day Average of Purchases
SQL Code
-- 5B) Window: 7-day rolling average of purchases
WITH daily_purchases AS (
SELECT
PARSE_DATE('%Y%m%d', event_date) AS dt,
COUNT(*) AS purchases
FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`
WHERE event_name = 'purchase'
AND _TABLE_SUFFIX BETWEEN '20201201' AND '20201231'
GROUP BY dt
)
SELECT
dt,
purchases,
AVG(purchases) OVER (
ORDER BY dt
ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
) AS purchases_7d_avg
FROM daily_purchases
ORDER BY dt;
```Explanation
This computes daily purchase counts and then applies a rolling 7‑day window to calculate a moving average.
Screenshot Outcome
6. Approximate Functions
6.1 Approximate Distinct Users Per Event Type
SQL Code
-- 6) Approximate distinct users per event type
SELECT
event_name,
APPROX_COUNT_DISTINCT(user_pseudo_id) AS approx_users
FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`
WHERE _TABLE_SUFFIX BETWEEN '20201201' AND '20201231'
GROUP BY event_name
ORDER BY approx_users DESC
LIMIT 15;
```Explanation
This uses an approximate algorithm to estimate distinct users per event type. It’s faster and cheaper than exact distinct counts.
Screenshot Outcome