Lecture 2 – GA4 BigQuery Practice

CTEs, Arrays, Joins, Windows, and Approximate Functions

Author

Juan Christian De La Cruz - Calderon

Published

March 1, 2026

1. CTEs (ITH) - Total Users & New Users

SQL Code

-- 1) CTEs: Total users + new users
WITH UserInfo AS (
  SELECT
    user_pseudo_id,
    MAX(IF(event_name IN ('first_visit', 'first_open'), 1, 0)) AS is_new_user
  FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`
  WHERE _TABLE_SUFFIX BETWEEN '20201101' AND '20201130'
  GROUP BY user_pseudo_id
)
SELECT
  COUNT(*) AS total_users,
  SUM(is_new_user) AS new_users
FROM UserInfo;
```

Explanation

This query creates a temporary table (UserInfo) with one row per user and a flag showing whether they triggered a first_visit or first_open event. The final SELECT counts total users and how many are new.

Screenshot Outcome

2. Arrays & UNNEST

2.1 Scalar subquery for page_location

SQL Code

-- 2A) Arrays: scalar subquery to get page_location
SELECT
  TIMESTAMP_MICROS(event_timestamp) AS event_time,
  (
    SELECT value.string_value
    FROM UNNEST(event_params)
    WHERE key = 'page_location'
    LIMIT 1
  ) AS page_location
FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`
WHERE event_name = 'page_view'
  AND _TABLE_SUFFIX BETWEEN '20201201' AND '20201202'
LIMIT 50;
```

Explanation

This query pulls the page_location parameter from the event_params array without changing the number of rows. Each row stays at the event level.

Screenshot Outcome

2.2 UNNEST items for item level purchases

SQL Code

-- 2B) Arrays: UNNEST items for item-level purchase analysis
SELECT
  event_date,
  item.item_name,
  COUNT(*) AS item_rows
FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*` e,
UNNEST(e.items) AS item
WHERE e.event_name = 'purchase'
  AND _TABLE_SUFFIX BETWEEN '20201201' AND '20201231'
GROUP BY event_date, item.item_name
ORDER BY item_rows DESC
LIMIT 20;
```

Explanation

UNNEST turns each item in a purchase into its own row. This lets you analyze which items appear most often in purchase events.

Screenshot Outcome

3. String_AGG & ARRAY_AGG

3.1 Events Seen Per Day (String_AGG)

SQL Code

-- 3A) STRING_AGG: events seen per day
SELECT
  event_date,
  STRING_AGG(DISTINCT event_name, ', ' ORDER BY event_name) AS events_seen
FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`
WHERE _TABLE_SUFFIX BETWEEN '20201201' AND '20201203'
GROUP BY event_date
ORDER BY event_date;
```

Explanation

This query groups by date and creates a comma‑separated list of all event names seen on that day.

Screenshot Outcome

3.2 Items Added to Cart per User (ARRAY_AGG)

SQL Code

-- 3B) ARRAY_AGG: items added to cart per user
SELECT
  user_pseudo_id,
  ARRAY_AGG(item_name) AS items_added_to_cart
FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`,
UNNEST(items) AS item
WHERE event_name = 'add_to_cart'
  AND _TABLE_SUFFIX BETWEEN '20201201' AND '20201231'
GROUP BY user_pseudo_id
ORDER BY user_pseudo_id
LIMIT 10;
```

Explanation

This collects all item names each user added to cart into an array, giving a “cart list” per user.

Screenshot Outcome

3.3 Session-Level Cart Summary (ARRAY_AGG with STRUCT)

SQL Code

-- 3C) ARRAY_AGG with STRUCT: session-level cart summary
WITH add_to_cart AS (
  SELECT
    user_pseudo_id,
    (
      SELECT value.int_value 
      FROM UNNEST(event_params) 
      WHERE key = 'ga_session_id'
    ) AS session_id,
    TIMESTAMP_MICROS(event_timestamp) AS event_timestamp,
    i.item_id,
    i.item_name,
    i.quantity,
    i.price
  FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`,
  UNNEST(items) AS i
  WHERE event_name = 'add_to_cart'
    AND _TABLE_SUFFIX BETWEEN '20210101' AND '20211231'
)
SELECT
  user_pseudo_id,
  session_id,
  COUNT(*) AS total_add_to_cart_events,
  ARRAY_AGG(
    STRUCT(item_id, item_name, quantity, price, event_timestamp)
    ORDER BY quantity DESC, event_timestamp ASC
    LIMIT 10
  ) AS cart_items
FROM add_to_cart
WHERE session_id IS NOT NULL
GROUP BY user_pseudo_id, session_id;
```

Explanation

The CTE builds a detailed table of add‑to‑cart events. The final query groups by session and creates an array of item structs, sorted by quantity and timestamp.

Screenshot Outcome

4. Joins

4.1 Daily Users + Daily Purchases + Conversion Rate

SQL Code

-- 4) Joins: daily users + daily purchases + conversion rate
WITH daily_users AS (
  SELECT
    event_date,
    COUNT(DISTINCT user_pseudo_id) AS users
  FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`
  WHERE _TABLE_SUFFIX BETWEEN '20201201' AND '20201231'
  GROUP BY event_date
),
daily_purchases AS (
  SELECT
    event_date,
    COUNT(DISTINCT
      (SELECT value.string_value
       FROM UNNEST(event_params)
       WHERE key = 'transaction_id')
    ) AS purchases
  FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`
  WHERE event_name = 'purchase'
    AND _TABLE_SUFFIX BETWEEN '20201201' AND '20201231'
  GROUP BY event_date
)
SELECT
  u.event_date,
  u.users,
  IFNULL(p.purchases, 0) AS purchases,
  ROUND(
    IFNULL(p.purchases, 0) / NULLIF(u.users, 0) * 100, 2
  ) AS conversion_rate_pct
FROM daily_users u
LEFT JOIN daily_purchases p
  ON u.event_date = p.event_date
ORDER BY u.event_date;
```

Explanation

This joins daily user counts with daily purchase counts and calculates a daily conversion rate. LEFT JOIN ensures days with no purchases show 0 instead of NULL.

Screenshot Outcome

5. Window Functions & Qualify

5.1 Top 3 Event Types Per Day (RANK + QUALIFY)

SQL Code

-- 5A) Window + QUALIFY: top 3 event types per day
WITH daily_event_counts AS (
  SELECT
    event_date,
    event_name,
    COUNT(*) AS events
  FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`
  WHERE _TABLE_SUFFIX BETWEEN '20201201' AND '20201207'
  GROUP BY event_date, event_name
)
SELECT
  event_date,
  event_name,
  events,
  RANK() OVER (PARTITION BY event_date ORDER BY events DESC) AS rnk
FROM daily_event_counts
QUALIFY rnk <= 3
ORDER BY event_date, rnk;
```

Explanation

This ranks event types by volume for each day and keeps only the top 3 using QUALIFY.

Screenshot Outcome

5.2 Rolling 7-Day Average of Purchases

SQL Code

-- 5B) Window: 7-day rolling average of purchases
WITH daily_purchases AS (
  SELECT
    PARSE_DATE('%Y%m%d', event_date) AS dt,
    COUNT(*) AS purchases
  FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`
  WHERE event_name = 'purchase'
    AND _TABLE_SUFFIX BETWEEN '20201201' AND '20201231'
  GROUP BY dt
)
SELECT
  dt,
  purchases,
  AVG(purchases) OVER (
    ORDER BY dt
    ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
  ) AS purchases_7d_avg
FROM daily_purchases
ORDER BY dt;
```

Explanation

This computes daily purchase counts and then applies a rolling 7‑day window to calculate a moving average.

Screenshot Outcome

6. Approximate Functions

6.1 Approximate Distinct Users Per Event Type

SQL Code

-- 6) Approximate distinct users per event type
SELECT
  event_name,
  APPROX_COUNT_DISTINCT(user_pseudo_id) AS approx_users
FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`
WHERE _TABLE_SUFFIX BETWEEN '20201201' AND '20201231'
GROUP BY event_name
ORDER BY approx_users DESC
LIMIT 15;
```

Explanation

This uses an approximate algorithm to estimate distinct users per event type. It’s faster and cheaper than exact distinct counts.

Screenshot Outcome