library(dplyr)
library(tidyr)
library(magrittr)
library(ggplot2)
#attach(dating)

大大大綱

如何增加媒合度
哪些條件使然(外在因素)金錢 出身領域 總族 宗教等
個人契合度 興趣等等
是否有跟預期的差很多
某些人的媒合度特別高 原因是?
場地因素 人的因素 哪一個?
回歸到死吧哈哈哈哈

dating<-read.csv(“~/Desktop/Report/Speed Dating Data.csv”,fileEncoding = “big5”) set.seed(69) number<-sample(8378,8378*0.7) ddat<-dating %>% select(dec_o,sports:yoga,int_corr) train<-ddat[number,] %>% select(dec_o,sports:yoga,int_corr) test<-ddat[-number,]%>% select(dec_o,sports:yoga,int_corr) model0<-glm(dec_o~.,family = binomial(link = “logit”),data = train) model1.1<-update(model0,.~. -museums -art -reading -theater -movies -concerts -music -yoga) model1.2<-update(model0,.~. -museums -art -reading -tv -movies -concerts -music -yoga) model1.3<-update(model0,.~. -museums -art -reading -tv -theater -concerts -music -yoga) model1.4<-update(model0,.~. -museums -art -reading -tv -theater -movies -music -yoga) model1.5<-update(model0,.~. -museums -art -reading -tv -theater -movies -music ) anova(model1.1,model0,test=“Chisq”) anova(model1.2,model0,test=“Chisq”) anova(model1.3,model0,test=“Chisq”) anova(model1.4,model0,test=“Chisq”) anova(model1.5,model0,test=“Chisq”)#Reject

AIC of model1.3 is the lowest

AIC = 7776.680

AIC(model1.1,model1.2,model1.3,model1.4,model0) anova(model1.3,test =“Chisq” ) model2<-update(model1.3,.~. -hiking -movies) anova(model2,model1.3,test =“Chisq” ) AIC(model2)#7781.577

model2.1<-update(model1.3,.~. -movies) model2.2<-update(model1.3,.~. -hiking ) AIC(model2.1,model2.2) #7780.901,7777.826 # Not good # 最終使用模型 model1.3

a<-add1(model1.3, ~.^2,test=“Chisq”)

model3<-update(model1.3,.~.+hiking:clubbing) anova(model3,model1.3,test=“Chisq”) AIC(model3,model1.3)#7766.892 #加入兩兩交互作用項,的確加入某些交互項可以降低AIC值 model4 <- step(model1.3, ~.^2) model4$aic AIC(model4)

model5<-glm(formula = dec_o ~ sports + tvsports + exercise + dining + hiking + gaming + clubbing + movies + shopping + hiking:clubbing + dining:gaming + sports:tvsports + dining:shopping + dining:clubbing + tvsports:clubbing + gaming:clubbing + exercise:movies + exercise:clubbing + clubbing:movies + movies:shopping + dining:hiking + tvsports:movies + exercise:dining + gaming:movies, family = binomial(link = “logit”), data = train)

anova(model5,model1.3,test=“Chisq”)

model6<-glm(formula = dec_o ~ sports + tvsports + exercise + dining + hiking + clubbing + movies + shopping + hiking:movies + dining:clubbing + exercise:dining + exercise:movies + clubbing:movies + tvsports:exercise + movies:shopping + dining:shopping + dining:movies + sports:movies + sports:dining + tvsports:dining + dining:movies:shopping + exercise:dining:movies + sports:dining:movies, family = binomial(link = “logit”), data = train) anova(model6,model5,test=“Chisq”) AIC(model6,model5)#7766.892

model7<-glm(formula = dec_o ~ sports + tvsports + exercise + dining + hiking + gaming + clubbing + movies + shopping + int_corr + hiking:clubbing + exercise:int_corr + dining:gaming + dining:clubbing + clubbing:movies + dining:shopping + sports:tvsports + tvsports:clubbing + gaming:clubbing + exercise:dining + tvsports:movies + movies:shopping + dining:hiking + exercise:hiking + movies:int_corr + sports:gaming + tvsports:hiking + sports:shopping + exercise:dining:hiking,data = train)

model8<-glm(formula = dec_o ~ sports + tvsports + exercise + dining + hiking + gaming + clubbing + movies + shopping + int_corr + hiking:clubbing + exercise:int_corr + dining:gaming + dining:clubbing + clubbing:movies + dining:shopping + sports:tvsports + tvsports:clubbing + gaming:clubbing + exercise:dining + tvsports:movies + movies:shopping + dining:hiking + exercise:hiking + movies:int_corr + sports:gaming, family = binomial(link = “logit”), data = train)

pred<-predict(model8,newdata = test,type = “response”) pred.1<-pred[!is.na(pred)] ff<-function(p){ pred.2 = ifelse(pred.1>p,1,0) tab<-table(Y=test[!is.na(pred),“dec_o”],Ypred=pred.2) print((tab[1,1]+tab[2,2])/length(pred.2)) } pred.1 %>% range() k<-c() seq<-seq(0.1,0.8,0.1) for(i in 1:length(seq)){ k[i]<-ff(seq[i]) } plot(k) hist(pred)