#載入檔案
IntentionVerb <- read_excel('IntentionVerb.xlsx', 2)
NonIntentionVerb <- read_excel('NonIntentionVerb.xlsx',2)
ContextVerbLog <- read_excel('ContextVerb.xlsx', 2)
#取log值
IntenLog <- log(IntentionVerb$AllPerMillionWord)
NonIntenLog <- log(NonIntentionVerbMean$NIVVDmean)
shapiro.test(as.numeric(IntenLogFrame$IntenLog))
##
## Shapiro-Wilk normality test
##
## data: as.numeric(IntenLogFrame$IntenLog)
## W = 0.97481, p-value = 0.588
shapiro.test(as.numeric(NonIntenLogFrame$NonIntenLog))
##
## Shapiro-Wilk normality test
##
## data: as.numeric(NonIntenLogFrame$NonIntenLog)
## W = 0.9485, p-value = 0.2723
t.test(IntenLogFrame$IntenLog, NonIntenLogFrame$NonIntenLog)
##
## Welch Two Sample t-test
##
## data: IntenLogFrame$IntenLog and NonIntenLogFrame$NonIntenLog
## t = 0.86878, df = 36.953, p-value = 0.3906
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.7550011 1.8883527
## sample estimates:
## mean of x mean of y
## 1.937916 1.371240
windowsFonts(A=windowsFont("微軟正黑體"))
#意願性詞彙取log值後長條圖
ggplot(data = IntenLogFrame, aes(x = reorder(Word, IntenLog), y = IntenLog)) +
geom_bar(stat = "identity", width = 0.7)+
labs(y = '詞頻的log值')+
labs(x = '詞彙')+
theme(axis.title.y =element_text(hjust = 0.5,color="firebrick4",angle=90,face="bold",size=12, family = "A"))+
theme(axis.title.x =element_text(hjust = 0.5,color="firebrick4",angle=360,face="bold",size=12, family = "A"))+
theme(axis.text.y=element_text(face="bold",size=10,color="#333333",family = "A"))+
theme(axis.text.x=element_text(face="bold",size=7,angle=360,color="#333333",family = "A"))

#非意願詞彙取log值後長條圖
ggplot(data = NonIntenLogFrame, aes(x = reorder(Word, NonIntenLog), y = NonIntenLog)) +
geom_bar(stat = "identity", width = 0.7)+
labs(y = '詞頻的log值')+
labs(x = '詞彙')+
theme(axis.title.y =element_text(hjust = 0.5,color="firebrick4",angle=90,face="bold",size=15, family = "A"))+
theme(axis.title.x =element_text(hjust = 0.5,color="firebrick4",angle=360,face="bold",size=15, family = "A"))+
theme(axis.text.y=element_text(face="bold",size=12,color="#333333",family = "A"))+
theme(axis.text.x=element_text(face="bold",size=9,angle=360,color="#333333",family = "A"))

ggplot(ContextVerbLog, aes(x = VerbType, y = AllPerMillionLog, fill = VerbType)) +
stat_summary(fun=mean, geom="bar", na.rm=TRUE, width = 0.8, position=position_dodge(width = 0.6)) +
stat_summary(fun.data = mean_se, na.rm=TRUE, geom = "errorbar", width = 0.6 )+
theme(legend.title = element_text(size=14))+
theme(legend.text = element_text(size=14))+
labs(y = '詞頻的log值')+
labs(x = '詞類')+
theme(axis.title.y =element_text(hjust = 0.5,angle=90,size=16, family = "A"))+
theme(axis.title.x =element_text(hjust = 0.5,angle=360,size=16, family = "A"))+
theme(axis.text.y=element_text(face="bold",size=12,color="#333333",family = "A"))+
theme(axis.text.x=element_text(face="bold",size=12,color="#333333",family = "A"))

#log frequency distribution
#hist(v,main,xlab,xlim,ylim,breaks,col,border)
hist(ContextVerbLog$AllPerMillionLog, main = '各詞彙log值的次數分配', xlab = '各詞彙的log值', family = 'A')

AllVerbLog <- data.frame(Log = ContextVerbLog$AllPerMillionLog,
VerbType = ContextVerbLog$VerbType)
ggplot(AllVerbLog, aes(x = Log, fill = VerbType, col = VerbType))+
geom_histogram()+
labs(y = '次數')+
labs(x = '各詞彙的log值')+
theme(axis.title.y =element_text(hjust = 0.5,angle=90,size=14, family = "A"))+
theme(axis.title.x =element_text(hjust = 0.5,angle=360,size=14, family = "A"))+
theme(axis.text.y=element_text(face="bold",size=10,color="#333333",family = "A"))+
theme(axis.text.x=element_text(face="bold",size=10,color="#333333",family = "A"))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(AllVerbLog, aes(x = Log, fill = VerbType, col = VerbType))+
geom_histogram()+
facet_grid(~ VerbType)+
labs(y = '次數')+
labs(x = '各詞彙的log值')+
theme(axis.title.y =element_text(hjust = 0.5,angle=90,size=14, family = "A"))+
theme(axis.title.x =element_text(hjust = 0.5,angle=360,size=14, family = "A"))+
theme(axis.text.y=element_text(face="bold",size=10,color="#333333",family = "A"))+
theme(axis.text.x=element_text(face="bold",size=10,color="#333333",family = "A"))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
