t=file.choose()
fmh = read.csv(t)
head(fmh)
library (ggplot2)
ggplot(data= fmh, aes(x= bmi, y= sysbp)) + geom_point()
ggplot(data= fmh, aes(x= bmi, y= sysbp)) + geom_point() + ylab(“Systolic Blood Pressure”) + xlab(“Body Mass Index”)
ggplot(data= fmh, aes(x= bmi, y= sysbp)) + geom_point() + ylab("Systolic Blood Pressure") + xlab("Body Mass Index")
t=file.choose()
arr=read.csv(t)
head(arr)
library(compareGroups)
createTable(compareGroups(finance ~ age + race + prior + parole, data=arr))
`createTable(compareGroups(arrest ~ race, data=arr))``{r}
createTable(compareGroups(arrest ~ race, data=arr))
chisq.test(arr$race, arr$race)
library(magrittr)
library (dplyr)
library (ggplot2)
arr %>% count(arrest, race) %>% group_by(arrest) %>% mutate(percent = n / sum(n) * 100) %>% ggplot(aes(x=arrest, y=percent, fill=race)) + geom_bar(stat="identity") + geom_text(aes(label=paste0(sprintf("%1.1f", percent),"%")), position=position_stack(vjust=0.5)) + theme(legend.position="none") + labs(x="Tái phạm", y="Phần trăm (%)")
createTable(compareGroups(arrest ~ finance, data = arr))
chisq.test(arr$finance, arr$arrest)
chisq.test(arr$finance, arr$arrest)
arr$edu = as.factor(arr$educ)
createTable(compareGroups(arrest ~ edu, data = arr))
chisq.test(arr$arrest, arr$edu)
arr %>% count(arrest, edu) %>% group_by(arrest) %>% mutate(percent = n / sum(n) * 100) %>% ggplot(aes(x=arrest, y=percent, fill=edu)) + geom_bar(stat="identity") + geom_text(aes(label=paste0(sprintf("%1.1f", percent),"%")), position=position_stack(vjust=0.5)) + theme(legend.position="none") + labs(x="Tái phạm", y="Phần trăm (%)")
createTable(compareGroups(arrest ~ age, data = arr))
t.test(arr$arrest, arr$age)
createTable(compareGroups(arrest ~ prior, data = arr))
t.test(arr$arrest, arr$prior)
ggplot(arr, aes(group=arrest, x=arrest, y=age, fill=arrest, color=arrest)) + geom_boxplot(color="black") + geom_jitter(aes(color=arrest), alpha=0.5) + theme(legend.position="none") + labs(x="Tái phạm", y="Tuổi (năm)")
i=file.choose()
library(readxl)
ins=read_excel(i)
ins$gender[ins$sex == "male"] = 1
ins$gender[ins$sex == "female"] = 2
t.test(ins$gender, ins$charge)
head(ins)
install.packages("ggplot2")
library(ggplot2)
ggplot(ins, aes(group=gender, x=gender, y=charge, fill=gender, color=gender)) + geom_boxplot(colour= "black") + geom_jitter(aes(color=gender), alpha=0.5) + theme(legend.position="none") + labs(x="Giới tính", y="Tiền bảo hiểm") t.test(ins$gender, ins$charge)
ggplot(ins, aes(group=gender, x=gender, y=charge, fill=gender, color=gender)) + geom_boxplot(colour= "black") + geom_jitter(aes(color=gender), alpha=0.5) + theme(legend.position="none") + labs(x="Giới tính", y="Tiền bảo hiểm")
ggplot(ins, aes(x=charge)) + geom_histogram(aes(y=..density..), fill="blue", col="white") + geom_density(col="purple") + labs(x="Tiền bảo hiểm", y="Số tiền bảo hiểm", title="Phân bố của tiền bảo hiểm")
men = ins$charge[ins$sex=="male"]
women = ins$charge[ins$sex=="female"]
n = length(men)
m <- length(women)
B = 1000
difference <- numeric(B)
for (i in 1:B) {
bs.men = sample(men, n, replace=T)
bs.women = sample(women, m, replace=T)
difference[i] = mean(bs.men, na.rm=T) - mean(bs.women, na.rm=T)
}
hist(difference, breaks=20)
quantile(difference, probs=c(0.025, 0.50, 0.975))
summary(aov(charge ~ region, data = ins))
TukeyHSD(aov(charge ~ region, data = ins))
ggplot(ins, aes(group=region, x= region, y=charge, fill= region, color= region)) + geom_boxplot(colour= "black") + geom_jitter(aes(color=region))+theme(legend.position="none")+ labs(x="Khu vực", y="Tiền bảo hiểm")
ggplot(ins, aes(x=charge)) + geom_histogram(aes(y=..density..), fill="blue", col="white") + geom_density(col="purple") + labs(x="Tiền bảo hiểm", y="Số tiền bảo hiểm", title="Phân bố của tiền bảo hiểm")