透過Mutinomial Logistic迴歸驗證颱風緯度與強度間的關係

library(pacman)
pacman::p_load("tidyverse", "magrittr", "mlogit","sabreR") 
## Installing package into 'C:/Users/user/Documents/R/win-library/3.4'
## (as 'lib' is unspecified)
## Warning: package 'sabreR' is not available (for R version 3.4.3)
## Bioconductor version 3.6 (BiocInstaller 1.28.0), ?biocLite for help
## Warning in p_install(package, character.only = TRUE, ...):
## Warning in library(package, lib.loc = lib.loc, character.only = TRUE,
## logical.return = TRUE, : there is no package called 'sabreR'
## Warning in pacman::p_load("tidyverse", "magrittr", "mlogit", "sabreR"): Failed to install/load:
## sabreR
##透過Mutinomial Logistic迴歸驗證颱風緯度與強度間的關係
dta<-read.csv("https://sites.google.com/site/rlearningsite/data/typhoon.csv?attredirects=0",  header=T, sep=",")
names(dta)
## [1] "Typhoon"               "Date"                  "Longitude"            
## [4] "Latitude"              "Wind.Velocity.near.TW"
##dta %<>% mutate(Latcat = ifelse(Latitude<=10, "L", ifelse(Latitude>=20, "H", "M")))
dta$Latcat[dta$Latitude<=10]<- "L" #緯度10以下為低緯度
dta$Latcat[dta$Latitude>10 & dta$Latitude<20]<- "M" #緯度11至19為中緯度
dta$Latcat[dta$Latitude>=20]<- "H" #緯度20以上為高緯度
dta$Latcat = factor(dta$Latcat, levels = c("L", "M", "H"))#將Latcat設定為類別尺度
table(dta$Latcat) #檢視交叉表,確定是否完成分類
## 
##  L  M  H 
## 12 45 20
ggplot(dta, aes(x = Latcat , y =Wind.Velocity.near.TW , color = Latcat))+
  stat_summary(fun.data = mean_se, position = position_dodge(.5))+
  theme_bw()+
  labs(title = "颱風生成緯度與颱風強弱")

library(mlogit) 
# 使用的是最大概似法
# 以mlogit.data()設定資料供分析用,choice不一定只能是factor
typhoondata<-mlogit.data(dta, varying=NULL, choice="Latcat", shape="wide") #以mlogit.data()設定資料
typhoondata[1:5,] #確定資料是否設定完成
##     Typhoon     Date Longitude Latitude Wind.Velocity.near.TW Latcat chid
## 1.H  南瑪都 20110821       128       12                    53  FALSE    1
## 1.L  南瑪都 20110821       128       12                    53  FALSE    1
## 1.M  南瑪都 20110821       128       12                    53   TRUE    1
## 2.H    梅花 20110726       146        8                    43  FALSE    2
## 2.L    梅花 20110726       146        8                    43   TRUE    2
##     alt
## 1.H   H
## 1.L   L
## 1.M   M
## 2.H   H
## 2.L   L
# reflevel 設定參照組
summary(m1<-mlogit(formula=Latcat ~ 0|Wind.Velocity.near.TW, data=typhoondata, reflevel="H"))
## 
## Call:
## mlogit(formula = Latcat ~ 0 | Wind.Velocity.near.TW, data = typhoondata, 
##     reflevel = "H", method = "nr", print.level = 0)
## 
## Frequencies of alternatives:
##       H       L       M 
## 0.25974 0.15584 0.58442 
## 
## nr method
## 5 iterations, 0h:0m:0s 
## g'(-H)^-1g = 1.28E-05 
## successive function values within tolerance limits 
## 
## Coefficients :
##                          Estimate Std. Error t-value Pr(>|t|)   
## L:(intercept)           -3.842754   1.467145 -2.6192 0.008814 **
## M:(intercept)           -1.707722   0.968680 -1.7629 0.077911 . 
## L:Wind.Velocity.near.TW  0.094382   0.038675  2.4404 0.014671 * 
## M:Wind.Velocity.near.TW  0.073644   0.028258  2.6061 0.009157 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Log-Likelihood: -68.701
## McFadden R^2:  0.064522 
## Likelihood ratio test : chisq = 9.477 (p.value = 0.0087518)
#截距代表三個是0時的機率(H, L ,M)
c = c( 0 , -3.85 , -1.70)
exp(c)/sum(exp(c))
## [1] 0.83059013 0.01767474 0.15173513
# x=0時,颱風生成於高緯度的機率是0.83059  生成於低緯度是0.01767  生成於中緯度是0.15174
# 氣象局說 :緯度5度以上、25度以下的地區,利於形成旋轉運動,赤道上幾乎不可能形成颱風

#斜率代表log-odd機率,是比較下的機率
cc=c(0, 0.0944, 0.0736)
exp(cc)/sum(exp(cc))
## [1] 0.3149234 0.3461006 0.3389760
# 當颱風強度增加1 (即x上升1),生成於中緯度有比高緯度高0.3461的機率

#log(LL/LH)= -3.8428 + 0.09X
#log(LM/LH)= -1.7077 + 0.07X