Sys.setlocale(category = "LC_ALL", locale = "zh_TW.UTF-8")
## [1] "zh_TW.UTF-8/zh_TW.UTF-8/zh_TW.UTF-8/C/zh_TW.UTF-8/zh_TW.UTF-8"
pacman::p_load(dplyr,tidyverse,pROC,caTools,ISLR)
load("UL_all7.rdata")
lm 年資 把有na欄位拿掉
UL_narm = UL %>% select(-c(ID,on,off,job_num,store,section))
UL_narm = UL_narm[,-c(25:77)]
UL_narm$not_off = as.factor(UL_narm$not_off)
UL_narm=mutate_if(UL_narm,is.character,as.factor)
UL_narm = na.omit(UL_narm)
str(UL_narm)
## tibble[,24] [3,053 × 24] (S3: tbl_df/tbl/data.frame)
## $ sex : Factor w/ 2 levels "男","女": 1 1 1 1 1 1 1 1 1 1 ...
## $ job : Factor w/ 7 levels "出納","服專",..: 6 6 6 6 6 6 6 6 6 6 ...
## $ position : Factor w/ 33 levels "辦事員","董事長",..: 33 33 33 13 33 19 19 13 15 19 ...
## $ distributor : Factor w/ 9 levels "EM","ES","HL",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ mean_car_18_20 : num [1:3053] 0 0 0 0 0 ...
## $ mean_price_18_20 : num [1:3053] 0 0 0 0 0 ...
## $ num_car_before : num [1:3053] 0 0 0 0 0 56 0 159 199 0 ...
## $ num_car2018 : num [1:3053] 0 0 0 0 0 23 0 0 57 0 ...
## $ num_car2019 : num [1:3053] 0 0 0 0 0 4 0 0 59 0 ...
## $ num_car2020 : num [1:3053] 0 0 0 0 0 0 25 0 51 7 ...
## $ totalprice_before : num [1:3053] 0 0 0 0 0 ...
## $ totalprice_2018 : num [1:3053] 0 0 0 0 0 ...
## $ totalprice_2019 : num [1:3053] 0 0 0 0 0 ...
## $ totalprice_2020 : num [1:3053] 0 0 0 0 0 ...
## $ lecturer_training : num [1:3053] 0 0 0 0 0 0 0 0 0 0 ...
## $ performance : num [1:3053] 0 0 0 0 0 0 0 0 4 0 ...
## $ internet_marketing: num [1:3053] 0 0 0 0 0 0 0 0 0 0 ...
## $ management : num [1:3053] 0 0 0 0 0 0 0 28 0 0 ...
## $ newproduct : num [1:3053] 0 0 0 0 0 7 7 39 26 0 ...
## $ fresh : num [1:3053] 0 0 0 0 0 0 28 0 0 0 ...
## $ interview_skill : num [1:3053] 0 0 0 0 0 0 0 0 0 0 ...
## $ not_off : Factor w/ 2 levels "0","1": 2 2 2 2 2 1 2 2 2 2 ...
## $ seniority : num [1:3053] 5.42 5.42 5.42 2.92 5.42 ...
## $ reward : num [1:3053] 0 0 0 0 0 0 0 2 0 0 ...
## - attr(*, "na.action")= 'omit' Named int [1:5] 770 963 1339 2358 2425
## ..- attr(*, "names")= chr [1:5] "770" "963" "1339" "2358" ...
#colnames(UL_narm)
# Split the data into training and test set
library(caret)
## Loading required package: lattice
##
## Attaching package: 'caret'
## The following object is masked from 'package:purrr':
##
## lift
set.seed(123)
training.samples <- UL_narm$position %>%
createDataPartition(p = 0.7, list = FALSE)
## Warning in createDataPartition(., p = 0.7, list = FALSE): Some classes have no
## records ( 董事長 ) and these will be ignored
## Warning in createDataPartition(., p = 0.7, list = FALSE): Some classes have a
## single record ( 副主任, 顧客服務專員, 領牌專員, 引擎技師, 主任, 助理 ) and these
## will be selected for the sample
train.data <- UL_narm[training.samples, ]
test.data <- UL_narm[-training.samples, ]
lm.fit = lm(seniority ~num_car_before+mean_price_18_20+num_car2018+num_car2019+num_car2020+totalprice_before+totalprice_2018+totalprice_2019+lecturer_training+internet_marketing+performance+management+interview_skill+reward+newproduct+fresh,data=train.data)
summary(lm.fit)
##
## Call:
## lm(formula = seniority ~ num_car_before + mean_price_18_20 +
## num_car2018 + num_car2019 + num_car2020 + totalprice_before +
## totalprice_2018 + totalprice_2019 + lecturer_training + internet_marketing +
## performance + management + interview_skill + reward + newproduct +
## fresh, data = train.data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -23.5936 -1.4881 -1.0896 0.1737 30.2011
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.380e+00 1.333e-01 10.351 < 2e-16 ***
## num_car_before 3.206e-03 5.227e-03 0.613 0.539766
## mean_price_18_20 3.118e-04 1.907e-03 0.164 0.870135
## num_car2018 1.442e-01 3.991e-02 3.612 0.000311 ***
## num_car2019 -1.068e-01 5.364e-02 -1.992 0.046517 *
## num_car2020 -2.693e-02 5.324e-02 -0.506 0.612993
## totalprice_before 2.264e-04 7.149e-05 3.166 0.001566 **
## totalprice_2018 -1.667e-03 7.899e-04 -2.111 0.034914 *
## totalprice_2019 1.271e-03 1.155e-03 1.101 0.271213
## lecturer_training -2.360e-01 6.224e-02 -3.792 0.000153 ***
## internet_marketing -1.157e-01 3.202e-01 -0.361 0.718014
## performance 4.731e-01 4.903e-01 0.965 0.334746
## management 2.587e-01 1.982e-02 13.050 < 2e-16 ***
## interview_skill -2.236e-01 3.013e-01 -0.742 0.458124
## reward -5.941e-01 2.972e-01 -1.999 0.045754 *
## newproduct 2.113e-01 1.461e-02 14.466 < 2e-16 ***
## fresh -5.781e-02 1.048e-02 -5.519 3.82e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.538 on 2136 degrees of freedom
## Multiple R-squared: 0.5485, Adjusted R-squared: 0.5451
## F-statistic: 162.2 on 16 and 2136 DF, p-value: < 2.2e-16
#預測2018~2020平均業績
lm.fit = lm(mean_price_18_20 ~ position+num_car_before+totalprice_before+lecturer_training+internet_marketing+performance+management+interview_skill+reward+newproduct+fresh+seniority,data=train.data)
summary(lm.fit)
##
## Call:
## lm(formula = mean_price_18_20 ~ position + num_car_before + totalprice_before +
## lecturer_training + internet_marketing + performance + management +
## interview_skill + reward + newproduct + fresh + seniority,
## data = train.data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4211.2 -287.7 -41.9 134.0 15456.8
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -83.91700 575.94844 -0.146 0.88417
## position副理 -435.99759 694.31852 -0.628 0.53010
## position副主任 -191.61046 1131.59816 -0.169 0.86556
## position副總經理 132.15959 894.05213 0.148 0.88250
## position高級顧問 234.13054 620.37780 0.377 0.70591
## position顧客服務代表 169.52868 912.38357 0.186 0.85261
## position顧客服務專員 84.06798 1136.10918 0.074 0.94102
## position管理課長 -295.07692 590.12834 -0.500 0.61711
## position經理 201.42794 634.03527 0.318 0.75075
## position課長 91.83932 699.83793 0.131 0.89561
## position領牌專員 2840.73835 1134.31028 2.504 0.01234 *
## position所長 -499.26550 593.53444 -0.841 0.40035
## position銷售副理 810.31707 583.50060 1.389 0.16507
## position銷售高級專員 372.28115 578.88717 0.643 0.52023
## position銷售經理 1058.22650 584.40313 1.811 0.07032 .
## position銷售課長 329.54626 580.82437 0.567 0.57052
## position銷售主任 1799.89217 656.06996 2.743 0.00613 **
## position銷售專員 126.09203 576.08549 0.219 0.82677
## position協理 653.45660 682.47101 0.957 0.33843
## position行政會計 -299.89175 805.87085 -0.372 0.70983
## position業務代表 181.59220 601.46394 0.302 0.76275
## position業務助理 62.76859 619.66852 0.101 0.91933
## position引擎技師 -36.27138 1133.40391 -0.032 0.97447
## position職員 274.08439 806.65502 0.340 0.73406
## position主任 93.50753 1134.43559 0.082 0.93432
## position助理 26.32417 1131.04155 0.023 0.98143
## position專員 473.64136 752.54438 0.629 0.52916
## position組長 443.02594 800.27053 0.554 0.57991
## positionNULL 76.92702 605.37448 0.127 0.89889
## positionSA級服務專員 -123.83674 899.94739 -0.138 0.89057
## positionSSA級服務專員 -30.47985 804.18836 -0.038 0.96977
## positionTEST 95.83974 720.30701 0.133 0.89416
## num_car_before -9.06826 0.78550 -11.545 < 2e-16 ***
## totalprice_before 0.17511 0.01047 16.727 < 2e-16 ***
## lecturer_training -6.03708 13.70531 -0.440 0.65963
## internet_marketing -15.16987 69.03347 -0.220 0.82609
## performance 206.56177 106.04164 1.948 0.05156 .
## management -58.63034 4.96213 -11.816 < 2e-16 ***
## interview_skill -82.78536 65.45381 -1.265 0.20609
## reward 340.62055 65.01272 5.239 1.77e-07 ***
## newproduct 64.49504 3.10448 20.775 < 2e-16 ***
## fresh 4.44924 2.28298 1.949 0.05144 .
## seniority -2.62410 5.30576 -0.495 0.62095
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 979.3 on 2110 degrees of freedom
## Multiple R-squared: 0.5961, Adjusted R-squared: 0.5881
## F-statistic: 74.16 on 42 and 2110 DF, p-value: < 2.2e-16
# Make predictions
predictions <- lm.fit %>% predict(test.data)
# Model performance
# (a) Prediction error, RMSE
RMSE(predictions, test.data$mean_price_18_20)
## [1] 912.4191
# (b) R-square
R2(predictions, test.data$mean_price_18_20)
## [1] 0.6208327
與銷售額最有關的有(越多星號代表越顯著):
+ position: 銷售主任>>領牌專員 + totalprice_before,num_car_before + reward, newproduct,management
負相關: + num_car_before + management
(a) Prediction error, RMSE
908.6958 (b) R-square
0.6164879
plot(lm.fit)
## Warning: not plotting observations with leverage one:
## 102, 1808
## Warning in sqrt(crit * p * (1 - hh)/hh): 產生了 NaNs
## Warning in sqrt(crit * p * (1 - hh)/hh): 產生了 NaNs
lm.fit = lm(totalprice_2020 ~ num_car_before+totalprice_before+lecturer_training+internet_marketing+performance+management+interview_skill+reward+newproduct+fresh+seniority,data=train.data)
summary(lm.fit)
##
## Call:
## lm(formula = totalprice_2020 ~ num_car_before + totalprice_before +
## lecturer_training + internet_marketing + performance + management +
## interview_skill + reward + newproduct + fresh + seniority,
## data = train.data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5120.0 -461.8 -34.1 86.1 17031.0
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 34.89040 37.42495 0.932 0.351299
## num_car_before -5.24994 0.94477 -5.557 3.09e-08 ***
## totalprice_before 0.11212 0.01247 8.988 < 2e-16 ***
## lecturer_training -43.66007 17.16166 -2.544 0.011027 *
## internet_marketing 78.29300 88.10894 0.889 0.374322
## performance 244.77600 134.90852 1.814 0.069758 .
## management -72.69246 5.15550 -14.100 < 2e-16 ***
## interview_skill -164.73718 82.90166 -1.987 0.047034 *
## reward 345.68807 81.10880 4.262 2.11e-05 ***
## newproduct 82.47605 3.64411 22.633 < 2e-16 ***
## fresh 11.02923 2.88995 3.816 0.000139 ***
## seniority -6.96507 5.92404 -1.176 0.239834
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1250 on 2141 degrees of freedom
## Multiple R-squared: 0.4198, Adjusted R-squared: 0.4168
## F-statistic: 140.8 on 11 and 2141 DF, p-value: < 2.2e-16
# Make predictions
predictions <- lm.fit %>% predict(test.data)
# Model performance
# (a) Prediction error, RMSE
RMSE(predictions, test.data$mean_price_18_20)
## [1] 942.6994
# (b) R-square
R2(predictions, test.data$mean_price_18_20)
## [1] 0.5947843
與2020銷售額正相關的有(越多星號代表越顯著):
+ totalprice_before,reward,reward,fresh + totalprice_before,num_car_before + reward, newproduct,management
(a) Prediction error, RMSE
940.7542 (b) R-square
0.5901853
lm.fit = lm(seniority ~ num_car_before+totalprice_before+lecturer_training+internet_marketing+performance+management+interview_skill+reward+newproduct+fresh,data=train.data)
summary(lm.fit)
##
## Call:
## lm(formula = seniority ~ num_car_before + totalprice_before +
## lecturer_training + internet_marketing + performance + management +
## interview_skill + reward + newproduct + fresh, data = train.data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -20.4618 -1.4644 -1.0934 0.1612 30.0652
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.4211406 0.1330016 10.685 < 2e-16 ***
## num_car_before 0.0067263 0.0034428 1.954 0.050862 .
## totalprice_before 0.0001933 0.0000453 4.268 2.06e-05 ***
## lecturer_training -0.2301006 0.0623961 -3.688 0.000232 ***
## internet_marketing -0.1509863 0.3213437 -0.470 0.638503
## performance 0.4629422 0.4919510 0.941 0.346794
## management 0.2489040 0.0180182 13.814 < 2e-16 ***
## interview_skill -0.2085602 0.3023341 -0.690 0.490374
## reward -0.5694691 0.2955726 -1.927 0.054154 .
## newproduct 0.2103619 0.0124899 16.843 < 2e-16 ***
## fresh -0.0613739 0.0104568 -5.869 5.06e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.56 on 2142 degrees of freedom
## Multiple R-squared: 0.5429, Adjusted R-squared: 0.5408
## F-statistic: 254.4 on 10 and 2142 DF, p-value: < 2.2e-16
# Make predictions
predictions <- lm.fit %>% predict(test.data)
# Model performance
# (a) Prediction error, RMSE
RMSE(predictions, test.data$mean_price_18_20)
## [1] 1707.083
# (b) R-square
R2(predictions, test.data$mean_price_18_20)
## [1] 0.2644379