Exercises-02
dta <- read.table("http://www1.aucegypt.edu/faculty/hadi/RABE5/Data5/P005.txt", h = T, sep = "\t")
head(dta)
## City COL PD URate Pop Taxes Income RTWL
## 1 Atlanta 169 414 13.6 1790128 5128 2961 1
## 2 Austin 143 239 11.0 396891 4303 1711 1
## 3 Bakersfield 339 43 23.7 349874 4166 2122 0
## 4 Baltimore 173 951 21.0 2147850 5001 4654 0
## 5 Baton Rouge 99 255 16.0 411725 3965 1620 1
## 6 Boston 363 1257 24.4 3914071 4928 5634 0
Exercises-03
fL <- paste0("http://", IDPW, "140.116.183.121/~sheu/dataM/Data/juniorSchools.txt")
dta <- read.table(fL, header = T)
head(dta)
## school class sex soc ravens pupil english math year
## 1 S1 C1 G 9 23 P1 72 23 0
## 2 S1 C1 G 9 23 P1 80 24 1
## 3 S1 C1 G 9 23 P1 39 23 2
## 4 S1 C1 B 2 15 P2 7 14 0
## 5 S1 C1 B 2 15 P2 17 11 1
## 6 S1 C1 B 2 22 P3 88 36 0
03-1
names(dta)[3] <- "Gender"
head(dta)
## school class Gender soc ravens pupil english math year
## 1 S1 C1 G 9 23 P1 72 23 0
## 2 S1 C1 G 9 23 P1 80 24 1
## 3 S1 C1 G 9 23 P1 39 23 2
## 4 S1 C1 B 2 15 P2 7 14 0
## 5 S1 C1 B 2 15 P2 17 11 1
## 6 S1 C1 B 2 22 P3 88 36 0
03-2
dta$N_soc <- factor(dta$soc, 1:9, c("I", "II", "III_0man", "III_man", "IV", "V", "VI_Unemp_L", "VII_emp_NC", "VIII_Miss_Dad"))
levels(dta$N_soc)
## [1] "I" "II" "III_0man" "III_man"
## [5] "IV" "V" "VI_Unemp_L" "VII_emp_NC"
## [9] "VIII_Miss_Dad"
plot(x = dta$N_soc, y = dta$math,
ylab = "Mathematics test score",
xlab = "Social Class",
cex.axis = 0.7)

03-3
write.csv(dta, "DMHWW3-3.csv")
Exercises-04
library(readr)
fL <- "https://ww2.amstat.org/publications/jse/datasets/aaup2.dat.txt"
fwf_empty(fL)[1:2]
## $begin
## [1] 0 6 40 45 49 53 57 61 66 70 74 79 83 87 92 95
##
## $end
## [1] 5 39 43 48 52 56 60 65 69 73 78 82 86 90 94 NA
head(dta <- read_fwf(fL, fwf_cols(ID = 5, University = 32, var3 = 3, var4 = 4, var5 = 4,
var6 = 4, var7 = 4, var8 = 5, var9 = 4, var10 = 4,
var11 = 5, var12 = 4, var13 = 4, var14 = 4, var15 = 4,
var16 = 4, var17 = 5), na = c("*", "NA")))
## # A tibble: 6 x 17
## ID University var3 var4 var5 var6 var7 var8 var9 var10 var11
## <int> <chr> <chr> <chr> <int> <int> <int> <int> <int> <int> <int>
## 1 1061 Alaska Paci~ AK IIB 454 382 362 382 567 485 471
## 2 1063 Univ.Alaska~ AK I 686 560 432 508 914 753 572
## 3 1065 Univ.Alaska~ AK IIA 533 494 329 415 716 663 442
## 4 11462 Univ.Alaska~ AK IIA 612 507 414 498 825 681 557
## 5 1002 Alabama Agr~ AL IIA 442 369 310 350 530 444 376
## 6 1004 University ~ AL IIA 441 385 310 388 542 473 383
## # ... with 6 more variables: var12 <int>, var13 <int>, var14 <int>,
## # var15 <int>, var16 <int>, var17 <int>
Exercises-05
pacman::p_load(ggplot2, data.table, dplyr, magrittr, tidyr )
fL <- paste0("http://", IDPW, "140.116.183.121/~sheu/dataM/Data/Subject1.zip")
download.file(fL, "Subject1.zip", mode = "wb")
unzip("Subject1.zip")
fLs <- paste0("Subject1/", 1:4, sep = "w.dat")
dta <-
lapply(fLs, read.table, header = TRUE, skip = 1, sep = "\t") %>%
rbindlist %>%
select(-X)
names(dta) <- gsub("X......", "", names(dta))
names(dta) <- gsub("[.]", "", names(dta))
new_dta <- dta %>% mutate(ID = 1:1804) %>%
gather(key = "Attribute", value = "score", 1:30) %>%
mutate(Attribute_f = as.factor(Attribute))
ggplot(new_dta, aes(Attribute_f, score))+
geom_boxplot()+
labs(x = "腦位置", y = "毫秒")
