OkBublewrap

SQL Funnel 분석 본문

개발/SQL

SQL Funnel 분석

옥뽁뽁 2025. 3. 16. 00:07

실습환경 세팅

더보기
import sqlite3
import pandas as pd

# SQLite 데이터베이스 연결
conn = sqlite3.connect(":memory:")
cursor = conn.cursor()
print("SQLite in-memory database connected!")

# 퍼널 분석을 위한 샘플 테이블 생성 (funnel_step 추가)
cursor.execute('''
CREATE TABLE event_logs (
    user_id INTEGER,
    event_name TEXT,
    funnel_step INTEGER,
    event_time TIMESTAMP
)
''')
conn.commit()
print("Event logs table with funnel_step created!")

# 샘플 데이터 삽입 (funnel_step 추가)
sample_data = [
    (1, 'signup', 1, '2024-03-01 10:00:00'),
    (1, 'add_to_cart', 2, '2024-03-01 10:05:00'),
    (1, 'purchase', 3, '2024-03-01 10:10:00'),
    (2, 'signup', 1, '2024-03-02 09:00:00'),
    (2, 'add_to_cart', 2, '2024-03-02 09:10:00'),
    (2, 'signup', 1, '2024-03-03 09:00:00'),
    (2, 'add_to_cart', 2, '2024-03-03 09:10:00'),
    (2, 'purchase', 3, '2024-03-03 11:15:00'),
    (3, 'signup', 1, '2024-03-02 11:00:00'),
    (3, 'purchase', 3, '2024-03-02 11:15:00'),
    (4, 'signup', 1, '2024-03-03 08:00:00'),
    (5, 'add_to_cart', 2, '2024-03-04 07:00:00'),
    (5, 'purchase', 3, '2024-03-04 07:10:00')
]

cursor.executemany("INSERT INTO event_logs (user_id, event_name, funnel_step, event_time) VALUES (?, ?, ?, ?)", sample_data)
conn.commit()
print("Sample data inserted with funnel_step!")

query_table = '''
SELECT *
FROM event_logs
'''

df_table = pd.read_sql(query_table, conn)
df_table



Open Funnel

  funnel_step 1 funnel_step 2 funnel_step 3
1 o o o
2 o o  
2 o o o
3 o   o
4 o    
5   o o
이벤트 Total 5 4 4
고유 사용자 Total 4 3 4
더보기
-- # 1 이벤트 발생 건수
SELECT 
    count(case when event_name = 'signup' then 1 end) as signup,
    count(case when event_name = 'add_to_cart' then 1 end) as add_to_cart,
    count(case when event_name = 'purchase' then 1 end) as purchase
from event_logs

-- # 2 이벤트 발생 건수
SELECT
event_name,
count(*) event_count
FROM event_logs
WHERE event_name IN ('signup','add_to_cart','purchase')
GROUP BY 1
ORDER BY 2 DESC

-- # 3 (고유)사용자 사용 건수
SELECT
   COUNT(DISTINCT CASE WHEN event_name = 'signup' THEN user_id ELSE NULL END) AS signup,
   COUNT(DISTINCT CASE WHEN event_name = 'add_to_cart' THEN user_id ELSE NULL END) AS add_to_carts,
   COUNT(DISTINCT CASE WHEN event_name = 'purchase' THEN user_id ELSE NULL END) AS purchase
FROM event_logs;

-- # 4 (고유)사용자 사용 건수
SELECT
event_name,
count(distinct user_id) users
 FROM event_logs
 WHERE event_name IN ('signup','add_to_cart','purchase')
GROUP BY 1
ORDER BY 2 DESC

 

 

Closed Funnel

  funnel_step 1 funnel_step 2 funnel_step 3
1 o o o
2 o o  
2 o o o
3 o   o
4 o    
5   o o
고유 사용자 Total 4 2 2

 

1. 로직 구현

1️⃣ window 구하기

  • funnel_step = 1인 기준으로 window 개수 정하기
SELECT *, rank () over (partition by user_id order by event_time) as window_num
FROM event_logs
WHERE funnel_step = 1
ORDER BY user_id, event_time

 

 

 

  • 각 아이디별, 윈도우 별 이전 윈도우 시작 날짜 가져오기, 없으면 현재 시간으로
WITH window as (
  SELECT user_id, 
	window_num, 
	event_time as window_st_ts, 
	lead(event_time,1,current_timestamp) over (partition by user_id order by window_num) as window_end_ts
  FROM(
      SELECT *, rank () over (partition by user_id order by event_time) as window_num
      FROM event_logs
      WHERE funnel_step = 1
      ORDER BY user_id, event_time
  ) 
)
SELECT *
from window

 

 

2️⃣ funnel_log 구하기

funnel_log AS (
    SELECT l.user_id, l.event_name, funnel_step, window_num
    , lead(funnel_step,1,null) over (partition by l.user_id, window_num order by funnel_step) as next_funnel_step
    FROM event_logs l 
      JOIN window w 
      on l.user_id = w.user_id 
      AND l.event_time >= window_st_ts AND l.event_time < window_end_ts 
)
SELECT *
from funnel_log

 

각 window_num에 해당하는 funnel_step 다음 단계 값을 가져옴, 없으면 Null 값을 반환

 

 

3️⃣ funnel_step 구하기

funnel_step AS (
  SELECT user_id, window_num, replace(GROUP_CONCAT(DISTINCT steps),',','') AS steps
  FROM (

  SELECT 
  user_id, window_num
  , GROUP_CONCAT(funnel_step) AS steps
  FROM funnel_log
  WHERE next_funnel_step IS NULL OR (next_funnel_step - funnel_step) = 1
  GROUP BY user_id, window_num

  UNION 

  SELECT 
  user_id, window_num
  , GROUP_CONCAT(next_funnel_step) AS steps
  FROM funnel_log
  WHERE next_funnel_step IS NULL OR (next_funnel_step - funnel_step) = 1
  GROUP BY user_id, window_num
  )
  WHERE steps like '1%'
  GROUP BY  user_id, window_num

)

 

4️⃣ 집계

SELECT count(distinct step_1) as step_1
, count(distinct step_12) as step_12
, count(distinct step_123) as step_123

FROM (
SELECT DISTINCT user_id, window_num, steps,
		case when steps like '1%' then user_id else null end as step_1,
		case when steps like '12%' then user_id else null end as step_12,
		case when steps like '123%' then user_id else null end as step_123

FROM funnel_step
)

 

 

2. 로직 구현

더보기
query_step_conversion = """
WITH su AS (
  SELECT
    event_time,
    user_id
  FROM event_logs
  WHERE event_name = 'signup'
),

add_to_cart AS ( 
  SELECT
    event_time,
    user_id
  FROM event_logs
  WHERE event_name = 'add_to_cart'
),

purchase AS ( 
  SELECT
    event_time,
    user_id
  FROM event_logs
  WHERE event_name = 'purchase'
),

funnel AS (
  SELECT
    su.user_id AS su_user,
    add_to_cart.user_id AS add_user,
    purchase.user_id AS bc_user
  FROM su
  LEFT JOIN add_to_cart  
    ON su.user_id = add_to_cart.user_id
    AND su.event_time <= add_to_cart.event_time
  LEFT JOIN purchase  
    ON add_to_cart.user_id = purchase.user_id
    AND add_to_cart.event_time <= purchase.event_time
)

SELECT
  COUNT(DISTINCT su_user) AS signup,
  COUNT(DISTINCT add_user) AS add_to_carts,
  COUNT(DISTINCT bc_user) AS purchase
FROM funnel;

"""

df_step_conversion = pd.read_sql(query_step_conversion, conn)
df_step_conversion

 

 

3. 로직 구현

더보기
query_step_conversion = """
WITH events AS (
  SELECT 
    user_id, 
    event_name, 
    MIN(event_time) AS min_event_time
  FROM event_logs
  WHERE event_name IN ('signup','add_to_cart','purchase')
  GROUP BY user_id, event_name
),

su AS (
  SELECT DISTINCT user_id
  FROM events
  WHERE event_name = 'signup'
),

atc AS ( 
  SELECT DISTINCT e.user_id
  FROM su
  INNER JOIN events e ON su.user_id = e.user_id
  WHERE e.event_name = 'add_to_cart'
),

purchases AS (  
  SELECT DISTINCT e.user_id
  FROM atc
  INNER JOIN events e ON atc.user_id = e.user_id
  WHERE e.event_name = 'purchase'
)

SELECT 'signup' AS step, COUNT(*) AS user_count FROM su
UNION ALL
SELECT 'add_to_cart' AS step, COUNT(*) FROM atc
UNION ALL
SELECT 'purchase' AS step, COUNT(*) FROM purchases;
"""

df_step_conversion = pd.read_sql(query_step_conversion, conn)
df_step_conversion

 

 

Direct Order Funnel

  funnel_step 1 funnel_step 2 funnel_step 3
1 o o o
2 o o  
2 o o o
3 o   o
4 o    
5   o o
Total(unique) 5 2 3

 

user 1: 1 - 2 - 3

user 2: 1 - 2

user 2: 1 - 2 - 3

user 3: 1 - 3

user 4: 1 

user 5: 2 - 3

더보기
query = """
WITH window AS (
    SELECT
        user_id, window_num, event_time AS window_st_ts,
        LEAD(event_time, 1, current_timestamp) OVER (PARTITION BY user_id ORDER BY window_num) AS window_end_ts
    FROM
    (
        SELECT *,
               RANK() OVER(PARTITION BY user_id ORDER BY event_time) AS window_num
        FROM event_logs
        WHERE funnel_step = 1
    )
), funnel_logs AS (
    SELECT l.user_id, window_num, event_time, funnel_step,
           LEAD(funnel_step, 1, NULL) OVER(PARTITION BY l.user_id, window_num ORDER BY event_time) AS next_funnel_step
    FROM event_logs l
    JOIN window w
        ON l.user_id = w.user_id
        AND event_time >= window_st_ts AND event_time < window_end_ts
), funnel_step AS (
    SELECT user_id, window_num,
           REPLACE(GROUP_CONCAT(DISTINCT funnel_step), ',', '') AS steps
    FROM funnel_logs
    GROUP BY user_id, window_num
)
SELECT
    COUNT(DISTINCT step_1) AS step_1,
    COUNT(DISTINCT step_12) AS step_12,
    COUNT(DISTINCT step_123) AS step_123
FROM
(
    SELECT user_id, window_num, steps,
           CASE WHEN steps LIKE '1%' THEN user_id END AS step_1,
           CASE WHEN steps LIKE '12%' THEN user_id END AS step_12,
           CASE WHEN steps LIKE '123%' THEN user_id END AS step_123
    FROM funnel_step
)
"""
table = pd.read_sql(query, conn)
table

 

 

공부 자료

 

Open and closed funnels based on GA4 raw data in BigQuery

Running a funnel analysis is a crucial part of data analysis, especially in the context of user behavior, marketing, and product analytics.

medium.com

 

 

 

'개발 > SQL' 카테고리의 다른 글

코호트 분석, 세그먼트 분석  (0) 2025.03.16
Histogram of Users and Purchases  (0) 2025.03.16
Card Launch Success  (0) 2025.03.15
Compressed Mode  (0) 2025.03.15
Department vs. Company Salary  (0) 2025.03.15