Number of clients with purchase/visit/address information, we delete clients with 1 visit, and atheletic client
nrow(client_of_interest)
[1] 981
Number of visits(at least 2 visit)
summary(client_of_interest$number_of_visit)
Min. 1st Qu. Median Mean 3rd Qu. Max.
2.00 4.00 8.00 22.13 18.00 618.00
Days spent in the gym
summary(client_of_interest$length)
Min. 1st Qu. Median Mean 3rd Qu. Max.
0 41 119 303 406 2438
Purchase count
summary(client_of_interest$purchase_count)
Min. 1st Qu. Median Mean 3rd Qu. Max.
1 3 6 18 14 569
Purchase amount
summary(client_of_interest$purchase_amount)
Min. 1st Qu. Median Mean 3rd Qu. Max.
1.5 217.2 518.0 1514.3 1180.6 64012.0
Visit Items
visit_by_item
Visit Payment
visit_by_payment
nrow(client_PT)
[1] 217
summary(client_PT$number_of_visit)
Min. 1st Qu. Median Mean 3rd Qu. Max.
2.00 5.00 14.00 46.23 46.00 618.00
summary(client_PT$purchase_amount)
Min. 1st Qu. Median Mean 3rd Qu. Max.
1.5 481.4 1067.6 3644.3 3493.5 64012.0
sales_PT_item_summary
first_purchase_PT_item_summary
client_PT %>% group_by(first_Item.name) %>% summarise(count=n(),avg_price=mean(sum_first_purchase_amount),gym_days = mean(length)) %>% arrange(-gym_days)
Length of stay in the gym = First Purchase amount + driving time
summary(PT_retention_regression)
Call:
lm(formula = length ~ driving_time_minute + sum_first_purchase_amount,
data = client_PT)
Residuals:
Min 1Q Median 3Q Max
-484.8 -378.0 -220.7 216.5 2018.8
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 408.21247 56.91156 7.173 1.18e-11 ***
driving_time_minute -0.35922 0.26081 -1.377 0.170
sum_first_purchase_amount 0.08706 0.10236 0.851 0.396
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 540.4 on 214 degrees of freedom
Multiple R-squared: 0.01168, Adjusted R-squared: 0.002442
F-statistic: 1.264 on 2 and 214 DF, p-value: 0.2845
client_PT %>% group_by(first_Item.name) %>% summarise(n_visit=mean(number_of_visit_before_second_purchase,na.rm=TRUE)) %>% arrange(-n_visit)
cor(client_PT$sum_second_purchase_amount,client_PT$number_of_visit_before_second_purchase,use="complete.obs")
[1] 0.2542691
nrow(client_pilate)
[1] 90
summary(client_pilate$number_of_visit)
Min. 1st Qu. Median Mean 3rd Qu. Max.
2.00 5.00 11.00 36.54 43.00 471.00
summary(client_pilate$purchase_amount)
Min. 1st Qu. Median Mean 3rd Qu. Max.
1.5 219.2 749.6 3049.0 2269.6 64012.0
sales_pilate_item_summary
client_pilate %>% group_by(first_Item.name) %>% summarise(count=n(),avg_price=mean(sum_first_purchase_amount),gym_days = mean(length)) %>% arrange(-gym_days)
Length of stay in the gym = First Purchase amount + driving time
summary(pilate_retention_regression)
Call:
lm(formula = length ~ driving_time_minute + sum_first_purchase_amount,
data = client_pilate)
Residuals:
Min 1Q Median 3Q Max
-366.7 -327.6 -123.3 105.3 2081.9
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 352.60009 63.78183 5.528 3.34e-07 ***
driving_time_minute 0.02144 0.18757 0.114 0.909
sum_first_purchase_amount 0.06122 0.13874 0.441 0.660
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 464.3 on 87 degrees of freedom
Multiple R-squared: 0.002321, Adjusted R-squared: -0.02061
F-statistic: 0.1012 on 2 and 87 DF, p-value: 0.9039