Notice
Recent Posts
Recent Comments
Link
일 | 월 | 화 | 수 | 목 | 금 | 토 |
---|---|---|---|---|---|---|
1 | 2 | 3 | ||||
4 | 5 | 6 | 7 | 8 | 9 | 10 |
11 | 12 | 13 | 14 | 15 | 16 | 17 |
18 | 19 | 20 | 21 | 22 | 23 | 24 |
25 | 26 | 27 | 28 | 29 | 30 | 31 |
Tags
- 미세먼지
- harkerrank
- 내일배움
- TiL
- 오블완
- 프로젝트
- 파이썬 철저 입문
- 스파르타코딩
- 스파르타
- MySQL
- 웹 스크랩핑
- 티스토리챌린지
- 회귀분석
- 내일배움카드
- 파이썬 머신러닝 완벽 가이드
- 파이썬 철저입문
- 프로그래머스
- 텍스트 분석
- 스파르타 코딩
- Cluster
- 파이썬 완벽 가이드
- 실전 데이터 분석 프로젝트
- 파이썬
- R
- SQL
- 파이썬 머신러닝 완벽가이드
- 중회귀모형
- hackerrank
- 내일배움캠프
- wil
Archives
- Today
- Total
OkBublewrap
SQL Funnel 분석 본문
실습환경 세팅
더보기
import sqlite3
import pandas as pd
# SQLite 데이터베이스 연결
conn = sqlite3.connect(":memory:")
cursor = conn.cursor()
print("SQLite in-memory database connected!")
# 퍼널 분석을 위한 샘플 테이블 생성 (funnel_step 추가)
cursor.execute('''
CREATE TABLE event_logs (
user_id INTEGER,
event_name TEXT,
funnel_step INTEGER,
event_time TIMESTAMP
)
''')
conn.commit()
print("Event logs table with funnel_step created!")
# 샘플 데이터 삽입 (funnel_step 추가)
sample_data = [
(1, 'signup', 1, '2024-03-01 10:00:00'),
(1, 'add_to_cart', 2, '2024-03-01 10:05:00'),
(1, 'purchase', 3, '2024-03-01 10:10:00'),
(2, 'signup', 1, '2024-03-02 09:00:00'),
(2, 'add_to_cart', 2, '2024-03-02 09:10:00'),
(2, 'signup', 1, '2024-03-03 09:00:00'),
(2, 'add_to_cart', 2, '2024-03-03 09:10:00'),
(2, 'purchase', 3, '2024-03-03 11:15:00'),
(3, 'signup', 1, '2024-03-02 11:00:00'),
(3, 'purchase', 3, '2024-03-02 11:15:00'),
(4, 'signup', 1, '2024-03-03 08:00:00'),
(5, 'add_to_cart', 2, '2024-03-04 07:00:00'),
(5, 'purchase', 3, '2024-03-04 07:10:00')
]
cursor.executemany("INSERT INTO event_logs (user_id, event_name, funnel_step, event_time) VALUES (?, ?, ?, ?)", sample_data)
conn.commit()
print("Sample data inserted with funnel_step!")
query_table = '''
SELECT *
FROM event_logs
'''
df_table = pd.read_sql(query_table, conn)
df_table
Open Funnel
funnel_step 1 | funnel_step 2 | funnel_step 3 | |
1 | o | o | o |
2 | o | o | |
2 | o | o | o |
3 | o | o | |
4 | o | ||
5 | o | o | |
이벤트 Total | 5 | 4 | 4 |
고유 사용자 Total | 4 | 3 | 4 |
더보기
-- # 1 이벤트 발생 건수
SELECT
count(case when event_name = 'signup' then 1 end) as signup,
count(case when event_name = 'add_to_cart' then 1 end) as add_to_cart,
count(case when event_name = 'purchase' then 1 end) as purchase
from event_logs
-- # 2 이벤트 발생 건수
SELECT
event_name,
count(*) event_count
FROM event_logs
WHERE event_name IN ('signup','add_to_cart','purchase')
GROUP BY 1
ORDER BY 2 DESC
-- # 3 (고유)사용자 사용 건수
SELECT
COUNT(DISTINCT CASE WHEN event_name = 'signup' THEN user_id ELSE NULL END) AS signup,
COUNT(DISTINCT CASE WHEN event_name = 'add_to_cart' THEN user_id ELSE NULL END) AS add_to_carts,
COUNT(DISTINCT CASE WHEN event_name = 'purchase' THEN user_id ELSE NULL END) AS purchase
FROM event_logs;
-- # 4 (고유)사용자 사용 건수
SELECT
event_name,
count(distinct user_id) users
FROM event_logs
WHERE event_name IN ('signup','add_to_cart','purchase')
GROUP BY 1
ORDER BY 2 DESC
Closed Funnel
funnel_step 1 | funnel_step 2 | funnel_step 3 | |
1 | o | o | o |
2 | o | o | |
2 | o | o | o |
3 | o | o | |
4 | o | ||
5 | o | o | |
고유 사용자 Total | 4 | 2 | 2 |
1. 로직 구현
1️⃣ window 구하기
- funnel_step = 1인 기준으로 window 개수 정하기
SELECT *, rank () over (partition by user_id order by event_time) as window_num
FROM event_logs
WHERE funnel_step = 1
ORDER BY user_id, event_time
- 각 아이디별, 윈도우 별 이전 윈도우 시작 날짜 가져오기, 없으면 현재 시간으로
WITH window as (
SELECT user_id,
window_num,
event_time as window_st_ts,
lead(event_time,1,current_timestamp) over (partition by user_id order by window_num) as window_end_ts
FROM(
SELECT *, rank () over (partition by user_id order by event_time) as window_num
FROM event_logs
WHERE funnel_step = 1
ORDER BY user_id, event_time
)
)
SELECT *
from window
2️⃣ funnel_log 구하기
funnel_log AS (
SELECT l.user_id, l.event_name, funnel_step, window_num
, lead(funnel_step,1,null) over (partition by l.user_id, window_num order by funnel_step) as next_funnel_step
FROM event_logs l
JOIN window w
on l.user_id = w.user_id
AND l.event_time >= window_st_ts AND l.event_time < window_end_ts
)
SELECT *
from funnel_log
각 window_num에 해당하는 funnel_step 다음 단계 값을 가져옴, 없으면 Null 값을 반환
3️⃣ funnel_step 구하기
funnel_step AS (
SELECT user_id, window_num, replace(GROUP_CONCAT(DISTINCT steps),',','') AS steps
FROM (
SELECT
user_id, window_num
, GROUP_CONCAT(funnel_step) AS steps
FROM funnel_log
WHERE next_funnel_step IS NULL OR (next_funnel_step - funnel_step) = 1
GROUP BY user_id, window_num
UNION
SELECT
user_id, window_num
, GROUP_CONCAT(next_funnel_step) AS steps
FROM funnel_log
WHERE next_funnel_step IS NULL OR (next_funnel_step - funnel_step) = 1
GROUP BY user_id, window_num
)
WHERE steps like '1%'
GROUP BY user_id, window_num
)
4️⃣ 집계
SELECT count(distinct step_1) as step_1
, count(distinct step_12) as step_12
, count(distinct step_123) as step_123
FROM (
SELECT DISTINCT user_id, window_num, steps,
case when steps like '1%' then user_id else null end as step_1,
case when steps like '12%' then user_id else null end as step_12,
case when steps like '123%' then user_id else null end as step_123
FROM funnel_step
)
2. 로직 구현
더보기
query_step_conversion = """
WITH su AS (
SELECT
event_time,
user_id
FROM event_logs
WHERE event_name = 'signup'
),
add_to_cart AS (
SELECT
event_time,
user_id
FROM event_logs
WHERE event_name = 'add_to_cart'
),
purchase AS (
SELECT
event_time,
user_id
FROM event_logs
WHERE event_name = 'purchase'
),
funnel AS (
SELECT
su.user_id AS su_user,
add_to_cart.user_id AS add_user,
purchase.user_id AS bc_user
FROM su
LEFT JOIN add_to_cart
ON su.user_id = add_to_cart.user_id
AND su.event_time <= add_to_cart.event_time
LEFT JOIN purchase
ON add_to_cart.user_id = purchase.user_id
AND add_to_cart.event_time <= purchase.event_time
)
SELECT
COUNT(DISTINCT su_user) AS signup,
COUNT(DISTINCT add_user) AS add_to_carts,
COUNT(DISTINCT bc_user) AS purchase
FROM funnel;
"""
df_step_conversion = pd.read_sql(query_step_conversion, conn)
df_step_conversion
3. 로직 구현
더보기
query_step_conversion = """
WITH events AS (
SELECT
user_id,
event_name,
MIN(event_time) AS min_event_time
FROM event_logs
WHERE event_name IN ('signup','add_to_cart','purchase')
GROUP BY user_id, event_name
),
su AS (
SELECT DISTINCT user_id
FROM events
WHERE event_name = 'signup'
),
atc AS (
SELECT DISTINCT e.user_id
FROM su
INNER JOIN events e ON su.user_id = e.user_id
WHERE e.event_name = 'add_to_cart'
),
purchases AS (
SELECT DISTINCT e.user_id
FROM atc
INNER JOIN events e ON atc.user_id = e.user_id
WHERE e.event_name = 'purchase'
)
SELECT 'signup' AS step, COUNT(*) AS user_count FROM su
UNION ALL
SELECT 'add_to_cart' AS step, COUNT(*) FROM atc
UNION ALL
SELECT 'purchase' AS step, COUNT(*) FROM purchases;
"""
df_step_conversion = pd.read_sql(query_step_conversion, conn)
df_step_conversion
Direct Order Funnel
funnel_step 1 | funnel_step 2 | funnel_step 3 | |
1 | o | o | o |
2 | o | o | |
2 | o | o | o |
3 | o | o | |
4 | o | ||
5 | o | o | |
Total(unique) | 5 | 2 | 3 |
user 1: 1 - 2 - 3
user 2: 1 - 2
user 2: 1 - 2 - 3
user 3: 1 - 3
user 4: 1
user 5: 2 - 3
더보기
query = """
WITH window AS (
SELECT
user_id, window_num, event_time AS window_st_ts,
LEAD(event_time, 1, current_timestamp) OVER (PARTITION BY user_id ORDER BY window_num) AS window_end_ts
FROM
(
SELECT *,
RANK() OVER(PARTITION BY user_id ORDER BY event_time) AS window_num
FROM event_logs
WHERE funnel_step = 1
)
), funnel_logs AS (
SELECT l.user_id, window_num, event_time, funnel_step,
LEAD(funnel_step, 1, NULL) OVER(PARTITION BY l.user_id, window_num ORDER BY event_time) AS next_funnel_step
FROM event_logs l
JOIN window w
ON l.user_id = w.user_id
AND event_time >= window_st_ts AND event_time < window_end_ts
), funnel_step AS (
SELECT user_id, window_num,
REPLACE(GROUP_CONCAT(DISTINCT funnel_step), ',', '') AS steps
FROM funnel_logs
GROUP BY user_id, window_num
)
SELECT
COUNT(DISTINCT step_1) AS step_1,
COUNT(DISTINCT step_12) AS step_12,
COUNT(DISTINCT step_123) AS step_123
FROM
(
SELECT user_id, window_num, steps,
CASE WHEN steps LIKE '1%' THEN user_id END AS step_1,
CASE WHEN steps LIKE '12%' THEN user_id END AS step_12,
CASE WHEN steps LIKE '123%' THEN user_id END AS step_123
FROM funnel_step
)
"""
table = pd.read_sql(query, conn)
table
공부 자료
Open and closed funnels based on GA4 raw data in BigQuery
Running a funnel analysis is a crucial part of data analysis, especially in the context of user behavior, marketing, and product analytics.
medium.com
'개발 > SQL' 카테고리의 다른 글
코호트 분석, 세그먼트 분석 (0) | 2025.03.16 |
---|---|
Histogram of Users and Purchases (0) | 2025.03.16 |
Card Launch Success (0) | 2025.03.15 |
Compressed Mode (0) | 2025.03.15 |
Department vs. Company Salary (0) | 2025.03.15 |