plot(women, type='n')
points(women[1,]) #顯示women第一列資料
##以lattice畫出Y軸為Weight x軸為height 點出所切割處理women第一列資料
lattice::xyplot(weight ~ height,
data=women,
subset=row.names(women)==1, type='p')
###以ggplot 點出women中Y軸為Weight x軸為height women第一列資料
library(ggplot2)
ggplot(data=women[1,], aes(height, weight))+ geom_point()
#資料匯入、檢視資料
dta <- read.table("/Users/User/Desktop/DM_R/hk0420/langMathDutch.txt", header=T , stringsAsFactor=F, fill=T )
dim(dta)
## [1] 2287 6
str(dta)
## 'data.frame': 2287 obs. of 6 variables:
## $ school: int 1 1 1 1 1 1 1 1 1 1 ...
## $ pupil : int 17001 17002 17003 17004 17005 17006 17007 17008 17009 17010 ...
## $ IQV : num 15 14.5 9.5 11 8 9.5 9.5 13 9.5 11 ...
## $ size : int 29 29 29 29 29 29 29 29 29 29 ...
## $ lang : int 46 45 33 46 20 30 30 57 36 36 ...
## $ arith : int 24 19 24 26 9 13 13 30 23 22 ...
head(dta)
## school pupil IQV size lang arith
## 1 1 17001 15.0 29 46 24
## 2 1 17002 14.5 29 45 19
## 3 1 17003 9.5 29 33 24
## 4 1 17004 11.0 29 46 26
## 5 1 17005 8.0 29 20 9
## 6 1 17006 9.5 29 30 13
##classified IQ
quantile(dta$IQV,seq(0, 1, 1/3))
## 0% 33.33333% 66.66667% 100%
## 4.0 11.0 12.5 18.0
classIQ <- with(dta, cut(IQV, ordered=T, breaks=c(0, 11, 12.5, 18), labels=c("Low", "Middle","High")))
newdta <- cbind(dta,classIQ)
head(newdta)
## school pupil IQV size lang arith classIQ
## 1 1 17001 15.0 29 46 24 High
## 2 1 17002 14.5 29 45 19 High
## 3 1 17003 9.5 29 33 24 Low
## 4 1 17004 11.0 29 46 26 Low
## 5 1 17005 8.0 29 20 9 Low
## 6 1 17006 9.5 29 30 13 Low
##classified size
quantile(dta$size,seq(0, 1, 1/3))
## 0% 33.33333% 66.66667% 100%
## 5 20 27 37
classsize <- with(dta, cut(size, ordered=T, breaks=c(0, 20, 27, 37), labels=c("Small", "Medium", "Large")))
ndta <- cbind(newdta,classsize)
head(ndta)
## school pupil IQV size lang arith classIQ classsize
## 1 1 17001 15.0 29 46 24 High Large
## 2 1 17002 14.5 29 45 19 High Large
## 3 1 17003 9.5 29 33 24 Low Large
## 4 1 17004 11.0 29 46 26 Low Large
## 5 1 17005 8.0 29 20 9 Low Large
## 6 1 17006 9.5 29 30 13 Low Large
##cbind classsize&classIQ (use , split)
IQsize <- with(ndta,paste(classsize,classIQ,sep = ","))
head(IQsize)
## [1] "Large,High" "Large,High" "Large,Low" "Large,Low" "Large,Low"
## [6] "Large,Low"
resultdata <- cbind(ndta,IQsize)
head(resultdata)
## school pupil IQV size lang arith classIQ classsize IQsize
## 1 1 17001 15.0 29 46 24 High Large Large,High
## 2 1 17002 14.5 29 45 19 High Large Large,High
## 3 1 17003 9.5 29 33 24 Low Large Large,Low
## 4 1 17004 11.0 29 46 26 Low Large Large,Low
## 5 1 17005 8.0 29 20 9 Low Large Large,Low
## 6 1 17006 9.5 29 30 13 Low Large Large,Low
#geom_point#點#透明度,method=“lm”#線性,se=T##陰影部分,facet_wrap(~ IQsize)依照IQsize排列
p1 <- ggplot(resultdata,
aes(lang, arith)) +
geom_point(size=rel(1),
alpha=1) +
stat_smooth(method="lm",
formula=y ~ x,
se=T,
col='blue') +
facet_wrap(~ IQsize) +
labs(x="Language score", y="Arithmetic score") +
theme_bw()
p1
#資料匯入、檢視資料
dta <- datasets::USPersonalExpenditure
dim(dta)
## [1] 5 5
str(dta)
## num [1:5, 1:5] 22.2 10.5 3.53 1.04 0.341 44.5 15.5 5.76 1.98 0.974 ...
## - attr(*, "dimnames")=List of 2
## ..$ : chr [1:5] "Food and Tobacco" "Household Operation" "Medical and Health" "Personal Care" ...
## ..$ : chr [1:5] "1940" "1945" "1950" "1955" ...
head(dta)
## 1940 1945 1950 1955 1960
## Food and Tobacco 22.200 44.500 59.60 73.2 86.80
## Household Operation 10.500 15.500 29.00 36.5 46.20
## Medical and Health 3.530 5.760 9.71 14.0 21.10
## Personal Care 1.040 1.980 2.45 3.4 5.40
## Private Education 0.341 0.974 1.80 2.6 3.64
# transform to long form
library(reshape2)
mdta <- melt(dta)
head(mdta)
## Var1 Var2 value
## 1 Food and Tobacco 1940 22.200
## 2 Household Operation 1940 10.500
## 3 Medical and Health 1940 3.530
## 4 Personal Care 1940 1.040
## 5 Private Education 1940 0.341
## 6 Food and Tobacco 1945 44.500
#Def. colname
colnames(mdta) <- c("category", "year", "expenditure")
head(mdta)
## category year expenditure
## 1 Food and Tobacco 1940 22.200
## 2 Household Operation 1940 10.500
## 3 Medical and Health 1940 3.530
## 4 Personal Care 1940 1.040
## 5 Private Education 1940 0.341
## 6 Food and Tobacco 1945 44.500
##transform the dollar amounts to log base 10
logexp <- round(log10(mdta$expenditure),3)
head(logexp)
## [1] 1.346 1.021 0.548 0.017 -0.467 1.648
#參考李唐榮同學
resultdata <- cbind(mdta,logexp)
##draw ggplot
library(ggplot2)
p0 <- ggplot(resultdata) +
aes(x=logexp, y=category,color = logexp>mean(logexp)) +
geom_point(size=rel(3)) +
facet_grid(. ~ year) +
geom_segment(aes(xend=logexp, yend=category, x=mean(logexp))) +
geom_vline(aes(xintercept=mean(logexp)))+
theme_minimal()
p0
library(ggplot2)
p0 <- ggplot(mdta) +
aes(x=logexp, y=category) +
labs(x="log of Expenditures", y="Category")
p0
p1 <- p0 +
geom_point()
p1
p2 <- p1 +
facet_grid(.~ year)
p2
p3 <- p2 +
geom_segment(aes(xend = mean(logexp),
yend = category)) +
geom_vline(aes(xintercept=mean(logexp))) +
theme_minimal()
p3
#資料匯入、檢視資料
library(WWGbook)
dta <- na.omit(autism) #畫圖顯示有missing data,回頭移除NA
dim(dta)
## [1] 610 4
str(dta)
## 'data.frame': 610 obs. of 4 variables:
## $ age : int 2 3 5 9 13 2 3 5 9 13 ...
## $ vsae : int 6 7 18 25 27 17 18 12 18 24 ...
## $ sicdegp: int 3 3 3 3 3 3 3 3 3 3 ...
## $ childid: int 1 1 1 1 1 3 3 3 3 3 ...
## - attr(*, "na.action")= 'omit' Named int 507 553
## ..- attr(*, "names")= chr "507" "553"
head(dta)
## age vsae sicdegp childid
## 1 2 6 3 1
## 2 3 7 3 1
## 3 5 18 3 1
## 4 9 25 3 1
## 5 13 27 3 1
## 6 2 17 3 3
#Convert 123 to LMH.
SICLMH <- with(dta, cut(sicdegp, ordered=T, breaks=c(0, 1, 2, 3), labels=c("L", "M", "H")))
#cbind
newdta <- cbind(dta, SICLMH)
head(newdta)
## age vsae sicdegp childid SICLMH
## 1 2 6 3 1 H
## 2 3 7 3 1 H
## 3 5 18 3 1 H
## 4 9 25 3 1 H
## 5 13 27 3 1 H
## 6 2 17 3 3 H
#Def. centered age
centerage <- round((newdta$age-mean(newdta$age)),3)
ndta <- cbind(newdta, centerage)
head(ndta)
## age vsae sicdegp childid SICLMH centerage
## 1 2 6 3 1 H -3.77
## 2 3 7 3 1 H -2.77
## 3 5 18 3 1 H -0.77
## 4 9 25 3 1 H 3.23
## 5 13 27 3 1 H 7.23
## 6 2 17 3 3 H -3.77
library(ggplot2)
p0 <- ggplot(ndta) +
aes(x=centerage, y=vsae) +
geom_point() +
labs(x="Age (in years, centered)", y="VSAE score")
p0
p1 <- p0 +
geom_line(aes(group=childid), color="gray", alpha=.5)
p1
p2 <- p1 +
stat_smooth(data=ndta,
formula = y ~ x,
method = "lm",
se=TRUE)
p2
p3 <- p2 +
facet_wrap(.~ SICLMH)
p3
p4 <- p3 +
scale_x_continuous(breaks=seq(-2.5, 7.5, 2.5))+
scale_y_continuous(breaks=seq(0, 200, 50)) #調整xyscale
p4
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
nNAdta <- na.omit(ndta)
newdata<-nNAdta %>% mutate(age2 = age - 2) %>%
# group data
group_by(SICLMH, age2) %>%
# compute mean and standard error for vase
summarize(vsaemean = mean(vsae),
vsaese = sd(vsae) / sqrt(n()))
head(newdata)
## # A tibble: 6 x 4
## # Groups: SICLMH [2]
## SICLMH age2 vsaemean vsaese
## <ord> <dbl> <dbl> <dbl>
## 1 L 0 7 0.387
## 2 L 1 12.0 0.914
## 3 L 3 15.0 1.47
## 4 L 7 25.6 4.74
## 5 L 11 37.1 6.72
## 6 M 0 8.67 0.435
##draw ggplot
gg3 <- ggplot(data=newdata) +
aes(age2, vsaemean, group=SICLMH, shape=SICLMH) +
geom_errorbar(aes(ymin=vsaemean - vsaese,
ymax=vsaemean + vsaese),
width=.2, size=.3,
position=position_dodge(.5)) +
geom_line(position=position_dodge(.5),
aes(linetype=SICLMH))+
geom_point(position=position_dodge(.5),
size=rel(2))+
scale_shape_manual(values = c(1, 2, 19))+
labs(x="Age (in year -2)", y="VSAE score")+
theme(legend.position= c(0.07,0.85),)
gg3
#資料匯入、檢視資料
dta <-read.csv("/Users/User/Desktop/DM_R/hk0420/diabetes_mell.csv", header = T)
dim(dta)
## [1] 8706 9
str(dta)
## 'data.frame': 8706 obs. of 9 variables:
## $ SEQN : int 51624 51626 51627 51628 51629 51630 51632 51633 51634 51635 ...
## $ RIAGENDR: int 1 1 1 2 1 2 1 1 1 1 ...
## $ RIDRETH1: int 3 4 4 4 1 3 2 3 1 3 ...
## $ DIQ010 : int 2 2 2 1 2 2 2 2 2 1 ...
## $ BMXBMI : num 32.2 22 18.2 42.4 32.6 ...
## $ gender : Factor w/ 2 levels "Females","Males": 2 2 2 1 2 1 2 2 2 2 ...
## $ race : Factor w/ 3 levels "Black","Hispanic",..: 3 1 1 1 2 3 2 3 2 3 ...
## $ diabetes: Factor w/ 2 levels "No","Yes": 1 1 1 2 1 1 1 1 1 2 ...
## $ BMI : Factor w/ 2 levels "Normal weight",..: 2 1 1 2 2 2 1 2 1 2 ...
dta <- data.frame(with(dta[,c("race" ,"gender", "diabetes", "BMI")], xtabs(~ race + gender + diabetes + BMI)))
#用xtabs將 race + gender + diabetes + BMI合為data frame資料類型,算frequency
head(dta)
## race gender diabetes BMI Freq
## 1 Black Females No Normal weight 347
## 2 Hispanic Females No Normal weight 712
## 3 White Females No Normal weight 998
## 4 Black Males No Normal weight 429
## 5 Hispanic Males No Normal weight 706
## 6 White Males No Normal weight 873
library(ggplot2)
library(ggalluvial)
p0 <- ggplot(dta) +
aes(axis1=race, axis2=gender, axis3=diabetes,
y=Freq) +
scale_x_discrete(limits=c("race", "gender", "diabetes"), expand=c(.1, .05)) +
labs(x="", y="NO. individuals") +
geom_alluvium(aes(fill=BMI)) +
geom_stratum() + #變項框
geom_text(stat="stratum", infer.label=TRUE) +
scale_fill_manual(values=c("gray","orange")) +
theme_minimal()
p0
###尚未轉換方向
install.packages(“gapminder”)
library(ggplot2)
library(gapminder)
data(gapminder) #下載gapminder
str(gapminder) #查看gapminder資料
## Classes 'tbl_df', 'tbl' and 'data.frame': 1704 obs. of 6 variables:
## $ country : Factor w/ 142 levels "Afghanistan",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ continent: Factor w/ 5 levels "Africa","Americas",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ year : int 1952 1957 1962 1967 1972 1977 1982 1987 1992 1997 ...
## $ lifeExp : num 28.8 30.3 32 34 36.1 ...
## $ pop : int 8425333 9240934 10267083 11537966 13079460 14880372 12881816 13867957 16317921 22227415 ...
## $ gdpPercap: num 779 821 853 836 740 ...
gap <- gapminder #指定資料為gap
ggplot(data = gap, aes(x = lifeExp)) #使用ggplot畫圖,資料為gap,並以lifeEXP資料為x
ggplot(data = gap, aes(x = lifeExp)) +
geom_histogram() #添上直方圖
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(data = gap, aes(x = lifeExp)) +
geom_histogram(fill = "blue", color = "black", bins = 10) +
ggtitle("Life expectancy for the gap dataset") +
xlab("Life expectancy (years)") +
ylab("Frequency") +
theme_classic()
#每長條寬度為10
ggplot(data = gap, aes(x = continent, y = lifeExp, fill = continent)) +
geom_boxplot() +
ggtitle("Boxplots for lifeExp by continent") +
xlab("Continent") +
ylab("Life expectancy (years)") +
theme_minimal() + #繪製以continebt為x lifeExp為y的盒狀圖增加title及xy標題
guides(fill = FALSE) #圖例不見
#以x為lifeExp y為 gdpPercap 將contenent以不同顏色及型態呈現
ggplot(data = gap, aes(x = lifeExp, y = gdpPercap, color = continent, shape = continent)) +
geom_point(size = 5, alpha = 0.5) + #加上scatter plot (點的大小及透明度)
theme_classic() + #套用主題
ggtitle("Scatterplot of life expectancy by gdpPercap") +
xlab("Life expectancy (years)") +
ylab("gdpPercap (USD)") +
theme(legend.position = "top",
plot.title = element_text(hjust = 0.5, size = 20),
legend.title = element_text(size = 10),
legend.text = element_text(size = 5),
axis.text.x = element_text(angle = 45, hjust = 1))