在数字化运营与数据分析领域,标签是连接原始数据与业务应用的核心载体,而标签的精准度、规范性,直接决定了数据价值的挖掘效率与应用效果。CDA(Certified Data Analyst)数据分析师作为专业的数据价值转化者,不仅要掌握标签体系的设计逻辑,更要精通各类标签加工方式——标签加工是将杂乱无章的原始数据,转化为标准化、可复用、有价值标签的核心环节,也是CDA分析师区别于普通数据从业者的核心技能之一。熟练掌握不同标签的加工方式,结合业务需求灵活运用,既能让标签更贴合业务、更具实用性,更能让数据精准赋能业务决策,实现数据价值的最大化。
-- 规则式加工:用户年龄标签(按业务规则分段) SELECT user_id, age, -- 业务规则:18岁以下未成年,18-25岁青年,26-35岁中青年,36-45岁中年,45岁以上中老年 CASE WHEN age < 18 THEN '未成年' WHEN age BETWEEN 18 AND 25 THEN '青年' WHEN age BETWEEN 26 AND 35 THEN '中青年' WHEN age BETWEEN 36 AND 45 THEN '中年' ELSE '中老年' END AS age_tag FROM user_table WHERE age IS NOT NULL; -- 排除空值,保障标签质量
-- 统计式加工:用户消费能力标签(基于近90天消费金额统计) SELECT user_id, SUM(order_amount) AS total_consume_90d, -- 近90天总消费金额(统计指标) -- 根据统计结果划分消费能力标签 CASE WHEN SUM(order_amount) >= 2000 THEN '高消费' WHEN SUM(order_amount) BETWEEN 500 AND 1999 THEN '中消费' WHEN SUM(order_amount) BETWEEN 100 AND 499 THEN '低消费' ELSE '零消费' END AS consume_ability_tag FROM order_table WHERE order_time > DATE_SUB(NOW(), INTERVAL 90 DAY) AND pay_status = '成功' -- 仅统计支付成功的订单 GROUP BY user_id;
-- 关联式加工:用户商品偏好标签(关联用户表、订单表、商品表) SELECT u.user_id, -- 统计用户近30天购买次数最多的商品品类,作为偏好标签 MAX(CASE WHEN rank_num = 1 THEN p.category END) AS product_prefer_tag FROM user_table u INNER JOIN order_table o ON u.user_id = o.user_id INNER JOIN product_table p ON o.product_id = p.product_id WHERE o.order_time > DATE_SUB(NOW(), INTERVAL 30 DAY) AND o.pay_status = '成功' GROUP BY u.user_id -- 子查询:给每个用户的商品品类按购买次数排序 JOIN ( SELECT user_id, p.category, COUNT(o.order_id) AS buy_count, ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY COUNT(o.order_id) DESC) AS rank_num FROM order_table o INNER JOIN product_table p ON o.product_id = p.product_id WHERE o.order_time > DATE_SUB(NOW(), INTERVAL 30 DAY) AND o.pay_status = '成功' GROUP BY user_id, p.category ) AS category_rank ON u.user_id = category_rank.user_id;