Exercises-02

dta <- read.table("http://www1.aucegypt.edu/faculty/hadi/RABE5/Data5/P005.txt", h = T, sep = "\t")
head(dta)
##          City COL   PD URate     Pop Taxes Income RTWL
## 1     Atlanta 169  414  13.6 1790128  5128   2961    1
## 2      Austin 143  239  11.0  396891  4303   1711    1
## 3 Bakersfield 339   43  23.7  349874  4166   2122    0
## 4   Baltimore 173  951  21.0 2147850  5001   4654    0
## 5 Baton Rouge  99  255  16.0  411725  3965   1620    1
## 6      Boston 363 1257  24.4 3914071  4928   5634    0

Exercises-03

fL <- paste0("http://", IDPW, "140.116.183.121/~sheu/dataM/Data/juniorSchools.txt")
dta <- read.table(fL, header = T)
head(dta)
##   school class sex soc ravens pupil english math year
## 1     S1    C1   G   9     23    P1      72   23    0
## 2     S1    C1   G   9     23    P1      80   24    1
## 3     S1    C1   G   9     23    P1      39   23    2
## 4     S1    C1   B   2     15    P2       7   14    0
## 5     S1    C1   B   2     15    P2      17   11    1
## 6     S1    C1   B   2     22    P3      88   36    0

03-1

names(dta)[3] <- "Gender"
head(dta)
##   school class Gender soc ravens pupil english math year
## 1     S1    C1      G   9     23    P1      72   23    0
## 2     S1    C1      G   9     23    P1      80   24    1
## 3     S1    C1      G   9     23    P1      39   23    2
## 4     S1    C1      B   2     15    P2       7   14    0
## 5     S1    C1      B   2     15    P2      17   11    1
## 6     S1    C1      B   2     22    P3      88   36    0

03-2

dta$N_soc <- factor(dta$soc, 1:9, c("I", "II", "III_0man", "III_man", "IV", "V", "VI_Unemp_L", "VII_emp_NC", "VIII_Miss_Dad"))
levels(dta$N_soc)
## [1] "I"             "II"            "III_0man"      "III_man"      
## [5] "IV"            "V"             "VI_Unemp_L"    "VII_emp_NC"   
## [9] "VIII_Miss_Dad"
 plot(x = dta$N_soc, y = dta$math, 
     ylab = "Mathematics test score",
     xlab = "Social Class",
     cex.axis = 0.7)

03-3

write.csv(dta, "DMHWW3-3.csv")

Exercises-04

library(readr)
fL <- "https://ww2.amstat.org/publications/jse/datasets/aaup2.dat.txt"
fwf_empty(fL)[1:2]
## $begin
##  [1]  0  6 40 45 49 53 57 61 66 70 74 79 83 87 92 95
## 
## $end
##  [1]  5 39 43 48 52 56 60 65 69 73 78 82 86 90 94 NA
head(dta <- read_fwf(fL, fwf_cols(ID = 5, University = 32, var3 = 3, var4 = 4, var5 = 4,
                                  var6 = 4, var7 = 4, var8 = 5, var9 = 4, var10 = 4, 
                                  var11 = 5, var12 = 4, var13 = 4, var14 = 4, var15 = 4,
                                  var16 = 4, var17 = 5), na = c("*", "NA")))
## # A tibble: 6 x 17
##      ID University   var3  var4   var5  var6  var7  var8  var9 var10 var11
##   <int> <chr>        <chr> <chr> <int> <int> <int> <int> <int> <int> <int>
## 1  1061 Alaska Paci~ AK    IIB     454   382   362   382   567   485   471
## 2  1063 Univ.Alaska~ AK    I       686   560   432   508   914   753   572
## 3  1065 Univ.Alaska~ AK    IIA     533   494   329   415   716   663   442
## 4 11462 Univ.Alaska~ AK    IIA     612   507   414   498   825   681   557
## 5  1002 Alabama Agr~ AL    IIA     442   369   310   350   530   444   376
## 6  1004 University ~ AL    IIA     441   385   310   388   542   473   383
## # ... with 6 more variables: var12 <int>, var13 <int>, var14 <int>,
## #   var15 <int>, var16 <int>, var17 <int>

Exercises-05

pacman::p_load(ggplot2, data.table, dplyr, magrittr, tidyr )

fL <- paste0("http://", IDPW, "140.116.183.121/~sheu/dataM/Data/Subject1.zip")
download.file(fL, "Subject1.zip", mode = "wb")
unzip("Subject1.zip")

fLs <- paste0("Subject1/", 1:4, sep = "w.dat")

dta <- 
  lapply(fLs, read.table, header = TRUE, skip = 1, sep = "\t") %>% 
  rbindlist %>% 
  select(-X)

names(dta) <- gsub("X......", "", names(dta))
names(dta) <- gsub("[.]", "", names(dta))

new_dta <- dta %>% mutate(ID = 1:1804) %>% 
  gather(key = "Attribute", value = "score", 1:30) %>% 
  mutate(Attribute_f = as.factor(Attribute))

ggplot(new_dta, aes(Attribute_f, score))+
  geom_boxplot()+
  labs(x = "腦位置", y = "毫秒")