#載入檔案
IntentionVerb <- read_excel('IntentionVerb.xlsx', 2)
NonIntentionVerb <- read_excel('NonIntentionVerb.xlsx',2)
ContextVerbLog <- read_excel('ContextVerb.xlsx', 2)
#取log值
IntenLog <- log(IntentionVerb$AllPerMillionWord) 
NonIntenLog <- log(NonIntentionVerbMean$NIVVDmean)
shapiro.test(as.numeric(IntenLogFrame$IntenLog)) 
## 
##  Shapiro-Wilk normality test
## 
## data:  as.numeric(IntenLogFrame$IntenLog)
## W = 0.97481, p-value = 0.588
shapiro.test(as.numeric(NonIntenLogFrame$NonIntenLog))
## 
##  Shapiro-Wilk normality test
## 
## data:  as.numeric(NonIntenLogFrame$NonIntenLog)
## W = 0.9485, p-value = 0.2723
t.test(IntenLogFrame$IntenLog, NonIntenLogFrame$NonIntenLog)
## 
##  Welch Two Sample t-test
## 
## data:  IntenLogFrame$IntenLog and NonIntenLogFrame$NonIntenLog
## t = 0.86878, df = 36.953, p-value = 0.3906
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.7550011  1.8883527
## sample estimates:
## mean of x mean of y 
##  1.937916  1.371240
windowsFonts(A=windowsFont("微軟正黑體"))

#意願性詞彙取log值後長條圖
ggplot(data = IntenLogFrame, aes(x = reorder(Word, IntenLog), y = IntenLog)) +
  geom_bar(stat = "identity", width = 0.7)+
  labs(y = '詞頻的log值')+ 
  labs(x = '詞彙')+ 
  theme(axis.title.y =element_text(hjust = 0.5,color="firebrick4",angle=90,face="bold",size=12, family = "A"))+
  theme(axis.title.x =element_text(hjust = 0.5,color="firebrick4",angle=360,face="bold",size=12, family = "A"))+
  theme(axis.text.y=element_text(face="bold",size=10,color="#333333",family = "A"))+ 
  theme(axis.text.x=element_text(face="bold",size=7,angle=360,color="#333333",family = "A"))

#非意願詞彙取log值後長條圖
ggplot(data = NonIntenLogFrame, aes(x = reorder(Word, NonIntenLog), y = NonIntenLog)) +
  geom_bar(stat = "identity", width = 0.7)+
  labs(y = '詞頻的log值')+ 
  labs(x = '詞彙')+ 
  theme(axis.title.y =element_text(hjust = 0.5,color="firebrick4",angle=90,face="bold",size=15, family = "A"))+
  theme(axis.title.x =element_text(hjust = 0.5,color="firebrick4",angle=360,face="bold",size=15, family = "A"))+
  theme(axis.text.y=element_text(face="bold",size=12,color="#333333",family = "A"))+ 
  theme(axis.text.x=element_text(face="bold",size=9,angle=360,color="#333333",family = "A"))

ggplot(ContextVerbLog, aes(x = VerbType, y = AllPerMillionLog, fill = VerbType)) +
  stat_summary(fun=mean, geom="bar", na.rm=TRUE, width = 0.8, position=position_dodge(width = 0.6)) +
  stat_summary(fun.data = mean_se, na.rm=TRUE, geom = "errorbar", width = 0.6 )+
  theme(legend.title = element_text(size=14))+
  theme(legend.text = element_text(size=14))+
  labs(y = '詞頻的log值')+ 
  labs(x = '詞類')+ 
  theme(axis.title.y =element_text(hjust = 0.5,angle=90,size=16, family = "A"))+
  theme(axis.title.x =element_text(hjust = 0.5,angle=360,size=16, family = "A"))+
  theme(axis.text.y=element_text(face="bold",size=12,color="#333333",family = "A"))+
  theme(axis.text.x=element_text(face="bold",size=12,color="#333333",family = "A"))

#log frequency distribution
#hist(v,main,xlab,xlim,ylim,breaks,col,border)
hist(ContextVerbLog$AllPerMillionLog, main = '各詞彙log值的次數分配', xlab = '各詞彙的log值', family = 'A')

AllVerbLog <- data.frame(Log = ContextVerbLog$AllPerMillionLog,
                       VerbType = ContextVerbLog$VerbType)
ggplot(AllVerbLog, aes(x = Log, fill = VerbType, col = VerbType))+
  geom_histogram()+
  labs(y = '次數')+ 
  labs(x = '各詞彙的log值')+
  theme(axis.title.y =element_text(hjust = 0.5,angle=90,size=14, family = "A"))+
  theme(axis.title.x =element_text(hjust = 0.5,angle=360,size=14, family = "A"))+
  theme(axis.text.y=element_text(face="bold",size=10,color="#333333",family = "A"))+
  theme(axis.text.x=element_text(face="bold",size=10,color="#333333",family = "A"))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(AllVerbLog, aes(x = Log, fill = VerbType, col = VerbType))+
  geom_histogram()+
  facet_grid(~ VerbType)+
  labs(y = '次數')+ 
  labs(x = '各詞彙的log值')+
  theme(axis.title.y =element_text(hjust = 0.5,angle=90,size=14, family = "A"))+
  theme(axis.title.x =element_text(hjust = 0.5,angle=360,size=14, family = "A"))+
  theme(axis.text.y=element_text(face="bold",size=10,color="#333333",family = "A"))+
  theme(axis.text.x=element_text(face="bold",size=10,color="#333333",family = "A"))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.