# Thuoc do danh gia mo hinh hoi quy tuyen tinh
ob=read.csv("C:\\Users\\Thu Bo\\Desktop\\obesity data.csv")
head(ob)
#He so xac dinh (R^2): cang lon , chung to muc do hieu chinh cua y theo x cang cao (coefficient of determination)
#R^2= 5917/(5917+56821)=0.094 =(SS reg/SS reg+SS error) =(SS reg/SS total)
#khac biet giua tuoi giai thich 9.4% khac biet pcfat
#RMSE= SS error/Residual= 56821/1215= 46.8
#phuong sai cua pcfat laf 51.6
#phuong sai cua pcfat sau khi da hieu chinh theo age la 46.8 (RMSE)
var(ob$pcfat)
m1=lm(pcfat~age, data=ob)
anova(m1)
#He so xac dinh (R^2):
#R^2= 27808/(27808+34929)= 0.4432 = (SS reg/SS reg+SS error) =(SS reg/SS total)
#su khac biet ve gioi tinh giai thich 44.3% su khac biet ve pcfat
#RMSE= SS error/Residual= 34929/1215= 28.7
#phuong sai cua pcfat laf 51.6
#phuong sai cua pcfat sau khi da hieu chinh theo gender la 28.7 (RMSE)
#RMSE cang nho thi mo hinh cang co y nghia
var(ob$pcfat)
m2=lm(pcfat~gender, data=ob)
anova(m2)
#So sanh 2 mo hinh hoi quy tuyen tinh xem mo hinh nao tot hon
#chon mo hinh co RMSE thap nhat va R^2 cao nhat
#RMSE cua m3: 26.2, R^2 49%
#RMSE cua m4: 16.4, R^2 68%
#mo hinh m4 tot hon m3
#lua chon mo hinh sau cung (bac 2, 3) dua vao p co YNTK
#neu tham so them vao khong co YNTK, hoac la thay doi RR rat it thi khong nen dua them vao mo hinh (VAY BAO NHIEU LA XUNG DANG???????????????)
m3= lm(pcfat~gender+age, data=ob)
m4= lm(pcfat~gender+bmi, data=ob)
anova(m3)
anova(m4)
summary(m3)
summary(m4)
#r\R^2 8.7%, P>0.05---> Height giai thich 8.7% su khac biet WHR, nhung khong co YNTK
hh=read.csv("C:\\Users\\Thu Bo\\Desktop\\Hoa hau Vietnam.csv")
hh$WHR=hh$Waist/hh$Hip
head(hh)
m5=lm(WHR~Height, data=hh)
summary(m5)
plot(hh$WHR~hh$Height, pch=16, col="blue")
abline(m5)