算法应用–携程客户流失预测
此次逻辑回归应用的数据依然为携程客户流失数据
library(tidyverse)
library(MASS)
library(pROC)
# 设置种子
set.seed(1111)
# 载入数据
load("~/Documents/GitHub/customer_loss/data/df_train.RDs")
load("~/Documents/GitHub/customer_loss/data/df_test.RDs")
建模过程
为方便载入建好的模型
# 载入模型
load("~/Documents/GitHub/customer_loss/data/lr_aic_back.model")
# 训练集预测概率
pred_train_LR_AIC <- predict(lr_aic_back, df_train, type = "response")
# 测试集预测概率
pred_test_LR_AIC <- predict(lr_aic_back, df_test, type = "response")
# 训练集AUC值
auc(roc(df_train$label, pred_train_LR_AIC))
## Area under the curve: 0.7007
# 绘制ROC曲线(训练集)
plot(roc(df_train$label, pred_train_LR_AIC),
col="blue",
ylab = "train_sensitivity")
# 测试集AUC值
auc(roc(df_test$label, pred_test_LR_AIC))
## Area under the curve: 0.7016
# 绘制ROC曲线(测试集)
plot(roc(df_test$label, pred_test_LR_AIC),
col="blue",
ylab = "test_sensitivity")
为对比变量重要性,将数据标准化后重新建模
df_train_scale <- scale(df_train[, -1]) %>% cbind(df_train[, "label"])
载入标准化后建好的模型
# 载入模型
load("~/Documents/GitHub/customer_loss/data/lr_scale_aic_back.model")
# 变量重要性
name_scale_aic_back <- names(coef(lr_scale_aic_back))
coef_scale_aic_back <- coef(lr_scale_aic_back)
names(coef_scale_aic_back) <- NULL
coef_scale_aic_back <- data.frame(
vars = name_scale_aic_back,
beta = coef_scale_aic_back
) %>%
mutate(abs_coef = abs(beta)) %>%
arrange(-abs_coef)
coef_scale_aic_back
## vars beta abs_coef
## 1 (Intercept) -1.090877469 1.090877469
## 2 intervals -0.303154495 0.303154495
## 3 ordernum_oneyear 0.253100544 0.253100544
## 4 iforderpv_24h 0.243717294 0.243717294
## 5 visitnum_oneyear -0.239545030 0.239545030
## 6 cr 0.235518604 0.235518604
## 7 h -0.185350887 0.185350887
## 8 cityorders 0.089755866 0.089755866
## 9 cancelrate 0.088965702 0.088965702
## 10 lowestprice -0.086693042 0.086693042
## 11 delta_price2 0.084078964 0.084078964
## 12 hotelcr 0.081661081 0.081661081
## 13 hoteluv -0.076884207 0.076884207
## 14 sid -0.064459424 0.064459424
## 15 cr_pre 0.063210075 0.063210075
## 16 businessrate_pre2 0.063171709 0.063171709
## 17 avgprice -0.059925933 0.059925933
## 18 lastpvgap 0.055242920 0.055242920
## 19 lowestprice_pre2 -0.052549346 0.052549346
## 20 ctrip_profits 0.051192872 0.051192872
## 21 novoters_pre 0.043840721 0.043840721
## 22 uv_pre2 -0.041661459 0.041661459
## 23 historyvisit_avghotelnum -0.041369649 0.041369649
## 24 price_sensitive 0.036234687 0.036234687
## 25 landhalfhours 0.034746513 0.034746513
## 26 deltaprice_pre2_t1 -0.021384723 0.021384723
## 27 starprefer -0.013198428 0.013198428
## 28 ordercanceledprecent 0.011950176 0.011950176
## 29 historyvisit_visit_detailpagenum 0.010735176 0.010735176
## 30 ordercanncelednum -0.009666662 0.009666662