Hw1

Chatterjee and Hadi (Regression by Examples, 2006) provided a link to the right to work data set on their web page. Display the relationship between Income and Taxes.

dta1<-read.csv("P005.txt", header = T, sep = '\t')[,c("City","Taxes","Income")]
str(dta1)

## 'data.frame':    38 obs. of  3 variables:
##  $ City  : chr  "Atlanta" "Austin" "Bakersfield" "Baltimore" ...
##  $ Taxes : int  5128 4303 4166 5001 3965 4928 4471 4813 4839 5408 ...
##  $ Income: int  2961 1711 2122 4654 1620 5634 7213 5535 7224 6113 ...

summary(dta1)

##      City               Taxes          Income    
##  Length:38          Min.   :3965   Min.   : 782  
##  Class :character   1st Qu.:4620   1st Qu.:3110  
##  Mode  :character   Median :4858   Median :4865  
##                     Mean   :4903   Mean   :4709  
##                     3rd Qu.:5166   3rd Qu.:6082  
##                     Max.   :6404   Max.   :8392

# plot
with(dta1, plot(Income,Taxes, xlab="Income", ylab="Taxes", type='n', bty='n')) #bty外框限
grid()
abline(lm(Taxes~Income, data= dta1))
with(dta1, text(Income, Taxes,
                labels=City, cex=.5))

#correlation
with(dta1,cor(Income ,Taxes))

## [1] 0.0560718

HW2

The pdf version of the paper by Hsu, et al (2020). Health inequality: a longitudinal study on geographic variations in lung cancer incidence and mortality in Taiwan is available on-line. Extract the values in Table 1 of the articlc to replicate Figure 2 in the paper.

pacman::p_load(tabulizer)
fL <- "C:/Users/user/Desktop/data management/1025 R input and output/Table1-s12889-020-09044-2.pdf"
# extract table from pdf
tbl1 <- extract_tables(fL, encoding="UTF-8")
# 僅保留我們要的表[[1]]
dta2<- tbl1[[1]] |> as.data.frame()
# to see the full column name
namelist<-paste(unlist(dta2[c(1:3),]), collapse =" ")
# rename column names
colnames(dta2) <- c("Areas", "Incidence","IncidenceGR", "Mortality", "MortalityGR")
# remove some useless row
dta2 <-dta2[-c(1:3,7,12,16,18,25,27,29),]
dplyr::glimpse(dta2)

## Rows: 19
## Columns: 5
## $ Areas       <chr> "Taipei City", "Keelung City", "New Taipei", "Yilan County~
## $ Incidence   <chr> "33.36", "35.33", "39.29", "39.63", "34.83", "34.64", "30.~
## $ IncidenceGR <chr> "10.83", "<U+2212>11.08", "12.68", "2.60", "12.22", "11.21", "23.~
## $ Mortality   <chr> "20.3", "27.6", "25.2", "30.2", "24.8", "21", "20.7", "23"~
## $ MortalityGR <chr> "<U+2212>14.31", "<U+2212>25.63", "<U+2212>9.60", "<U+2212>9.16", "<U+2212>10.21", "17.38", "~

# add back the remove text
dta2[3,1] <- paste(dta2[3,1], "City", sep=" ")
dta2[c(7, 10, 11, 17, 18, 19),1] <- paste(dta2[c(7, 10, 11, 17, 18, 19), 1], "County", sep=" ")

# 使用gsub替換"-"，成為R看得懂的 minus sign
dta2v<-lapply(dta2[,2:5], function(x) as.numeric(gsub("−", "\\-", x)))|> as.data.frame()
dta2v$Areas<-dta2$Areas
dta2v<- dta2v[,c(5,1:4)]
write.csv(dta2v, file="dta2v.csv", row.names = FALSE)
dplyr::glimpse(dta2v)|>knitr::kable()

如果沒有透過gsub轉換，在as.numeric的時候就會有error.

雖然透過gsub轉換後，已可以轉換為數字型態，但在KNIT的時候依然會有錯誤

只好轉存成csv再重新讀入

dta2v <- read.csv("dta2v.csv")

Figure 1-Incidence and Mortality

# plot
with(dta2v, plot(Incidence,Mortality, 
                 xlab="Incidence (per 100,000)", 
                 ylab="Mortality (per 100,000)",
                 pch=20, bty="n", col="lightblue",
                 main="Age-standardized incidence and mortality rates (2014)", axes = FALSE))
axis(1, at = seq(25,45, by=1))
axis(2, at = seq(15,35, by=2))
grid()

# 畫垂直與水平的直平均線 
abline(h=mean(dta2v$Mortality), v=mean(dta2v$Incidence), col="gray")

# change the point to city name
with(dta2v, text(Incidence, Mortality, 
                labels = Areas, 
                adj = c(1,0), 
                cex=0.6))

pch = modify the symbol of the points in the plot cex = change the size bty = changing the type of box of the R graphs adj = change the plot title position with a value between 0 (left) and 1 (right)

useful plot function introduce Plot in R

Figure 2-growth rate

with(dta2v, plot(IncidenceGR, MortalityGR, 
                xlab="Incidence growth rate (%)",
                ylab="Mortality growth rate (%)",
                pch=20, bty="n", col="lightblue",
                main="Age-standardized incidence and mortality growth rates (2005–2014)", axes = FALSE))
axis(1, at = seq(-12,28, by=5)) #row
axis(2, at = seq(-30,20, by=5)) #column
grid()

# 畫垂直與水平的直平均線
abline(h=mean(dta2v$MortalityGR), v=mean(dta2v$IncidenceGR), col="gray")

# change the point to city name
with(dta2v, text(IncidenceGR, MortalityGR, 
                labels = Areas, 
                adj = c(1,0), 
                cex=0.6))

HW3

Reproducible data and code are available for Leongomez, J.D. et al.(2020). Self-reported Health is Related to Body Height and Waist Circumference in Rural Indigenous and Urbanised Latin-American Populations. Scientific Report, 10, 4391. Summarize the mean of height, waist, and weight in each study sample by gender.

# load data
dta3 <- read.csv("Full_data.csv")
dplyr::glimpse(dta3)

## Rows: 477
## Columns: 15
## $ ID          <chr> "F001", "F003", "F004", "F005", "F009", "F010", "F011", "F~
## $ Country     <chr> "Colombia", "Colombia", "Colombia", "Colombia", "Colombia"~
## $ Population  <chr> "Urban", "Urban", "Urban", "Urban", "Urban", "Urban", "Urb~
## $ Sex         <chr> "Female", "Female", "Female", "Female", "Female", "Female"~
## $ Age         <int> 23, 24, 19, 19, 18, 18, 21, 22, 19, 18, 24, 18, 19, 21, 19~
## $ Waist       <dbl> 67.33333, 97.50000, 81.13333, 70.30000, 66.73333, 82.53333~
## $ Hip         <dbl> 90.43333, 107.50000, 106.06667, 96.06667, 91.46667, 102.06~
## $ Height      <dbl> 157.8000, 164.6000, 164.7667, 161.0667, 162.1667, 160.9667~
## $ Weight      <dbl> 48.80000, 71.43333, 73.90000, 56.16667, 54.80000, 72.73333~
## $ Fat         <dbl> 29.96667, 42.56667, 43.50000, 34.23333, 32.36667, 45.96667~
## $ VisceralFat <dbl> 3.000000, 5.000000, 5.000000, 3.666667, 3.000000, 5.000000~
## $ BMI         <dbl> 19.70000, 26.23333, 27.10000, 21.66667, 20.90000, 28.10000~
## $ Muscle      <dbl> 24.66667, 26.36667, 23.73333, 25.66667, 26.23333, 22.23333~
## $ Health      <dbl> 75.00, 50.00, 43.75, 50.00, 68.75, 62.50, 75.00, 87.50, 93~
## $ Sample      <chr> "Colombia-Urban", "Colombia-Urban", "Colombia-Urban", "Col~

str(dta3)

## 'data.frame':    477 obs. of  15 variables:
##  $ ID         : chr  "F001" "F003" "F004" "F005" ...
##  $ Country    : chr  "Colombia" "Colombia" "Colombia" "Colombia" ...
##  $ Population : chr  "Urban" "Urban" "Urban" "Urban" ...
##  $ Sex        : chr  "Female" "Female" "Female" "Female" ...
##  $ Age        : int  23 24 19 19 18 18 21 22 19 18 ...
##  $ Waist      : num  67.3 97.5 81.1 70.3 66.7 ...
##  $ Hip        : num  90.4 107.5 106.1 96.1 91.5 ...
##  $ Height     : num  158 165 165 161 162 ...
##  $ Weight     : num  48.8 71.4 73.9 56.2 54.8 ...
##  $ Fat        : num  30 42.6 43.5 34.2 32.4 ...
##  $ VisceralFat: num  3 5 5 3.67 3 ...
##  $ BMI        : num  19.7 26.2 27.1 21.7 20.9 ...
##  $ Muscle     : num  24.7 26.4 23.7 25.7 26.2 ...
##  $ Health     : num  75 50 43.8 50 68.8 ...
##  $ Sample     : chr  "Colombia-Urban" "Colombia-Urban" "Colombia-Urban" "Colombia-Urban" ...

approach 1-aggregate

aggregate(cbind(Height,Weight,Waist)~Sample+Sex, data=dta3, FUN=mean)|>knitr::kable()

Sample	Sex	Height	Weight	Waist
Colombia-Urban	Female	158.8906	57.84819	71.79928
Mexico-Indigenous	Female	146.1667	54.24583	87.00417
Mexico-Urban	Female	157.6667	65.48000	87.79000
Colombia-Urban	Male	172.2061	68.17627	78.16925
Mexico-Indigenous	Male	159.9077	65.91282	88.61026
Mexico-Urban	Male	172.0933	71.03000	84.51000

approach 2- Leongomez, J.D. et al.(2020) Supplementary Material file p10

pacman::p_load(psych, radiant.data)
descVarNames <- c("Waist circumference (cm)", "Height (cm)","Weight (kg)")

# Subset of women participants
datF <- subset(dta3, dta3$Sex == "Female")
# Descriptives by Country and Population
descF <- describeBy(datF[,c('Height','Weight','Waist')], datF$Sample, mat = TRUE, digits = 1)
# change rownames to column named "Measured characteristic"
descF <- descF[, c(2, 4:7, 10:11)] %>% rownames_to_column("Measured characteristic")
# specify measured anthropometric characteristic
varnames <- descVarNames
descF$`Measured characteristic` <- rep(varnames, each = 3)

# Subset of men participants
datM <- subset(dta3, dta3$Sex == "Male")
# Descriptives by Country and Population
descM <- describeBy(datM[,c('Height','Weight','Waist')], datM$Sample, mat = TRUE, digits = 1)
# change rownames to column named "Measured characteristic"
descM <- descM[, c(2, 4:7, 10:11)] %>% rownames_to_column("Measured characteristic")
# specify measured anthropometric characteristic
varnames <- descVarNames
descM$`Measured characteristic` <- rep(varnames, each = 3)

# Merge Tables S2 and S3 by measured characteristic and sample
tab <- merge(descF, descM, by = c("Measured characteristic", "group1"), all = TRUE)
tab

##    Measured characteristic            group1 n.x mean.x sd.x median.x min.x
## 1              Height (cm)    Colombia-Urban 184   57.8 10.2     55.8  39.3
## 2              Height (cm) Mexico-Indigenous  24   54.2  7.7     54.4  43.7
## 3              Height (cm)      Mexico-Urban  30   65.5 12.5     65.0  41.8
## 4 Waist circumference (cm)    Colombia-Urban 184  158.9  6.0    159.1 141.9
## 5 Waist circumference (cm) Mexico-Indigenous  24  146.2  5.5    144.0 136.0
## 6 Waist circumference (cm)      Mexico-Urban  30  157.7  5.9    158.0 145.0
## 7              Weight (kg)    Colombia-Urban 184   71.8  8.4     70.3  55.3
## 8              Weight (kg) Mexico-Indigenous  24   87.0  8.2     86.7  73.0
## 9              Weight (kg)      Mexico-Urban  30   87.8 10.9     87.4  66.5
##   max.x n.y mean.y sd.y median.y min.y max.y
## 1  93.9 170   68.2 10.5     67.4  46.5 106.6
## 2  67.2  39   65.9 14.5     61.9  43.4 101.7
## 3 100.3  30   71.0 11.5     69.0  48.7 114.1
## 4 178.9 170  172.2  6.4    171.7 155.5 188.1
## 5 157.0  39  159.9  6.8    161.0 143.0 173.5
## 6 168.0  30  172.1  6.8    171.8 159.9 184.1
## 7 103.9 170   78.2  7.9     77.6  62.1 103.6
## 8 106.0  39   88.6 11.9     86.4  70.5 118.0
## 9 113.9  30   84.5  8.4     84.3  69.0 106.6

pacman::p_load(kableExtra) 
descColNames <- c("Measured characteristic","Sample","$n$","Mean","$SD$","Median","Min","Max")

# Final fromated table
Tab1 <- kable(
tab,
digits = 2,
booktabs = TRUE,
align = c("l", "l", rep("c", 12)),
caption = "\\textbf{Table 1.}
Descriptive statistics of measured variables for all participants",
col.names = c("Measured characteristic", "Sample",
rep(descColNames[3:8], 2)),
escape = FALSE) %>%
add_header_above(c(" " = 2,
"Women" = 6,
"Men" = 6)) %>%
kable_styling(latex_options = c("scale_down", "HOLD_position")) %>%
collapse_rows(columns = 1, valign = "middle")
Tab1

Descriptive statistics of measured variables for all participants
		Women						Men
Measured characteristic	Sample	\(n\)	Mean	\(SD\)	Median	Min	Max	\(n\)	Mean	\(SD\)	Median	Min	Max
Height (cm)	Colombia-Urban	184	57.8	10.2	55.8	39.3	93.9	170	68.2	10.5	67.4	46.5	106.6
Height (cm)	Mexico-Indigenous	24	54.2	7.7	54.4	43.7	67.2	39	65.9	14.5	61.9	43.4	101.7
Height (cm)	Mexico-Urban	30	65.5	12.5	65.0	41.8	100.3	30	71.0	11.5	69.0	48.7	114.1
Waist circumference (cm)	Colombia-Urban	184	158.9	6.0	159.1	141.9	178.9	170	172.2	6.4	171.7	155.5	188.1
Waist circumference (cm)	Mexico-Indigenous	24	146.2	5.5	144.0	136.0	157.0	39	159.9	6.8	161.0	143.0	173.5
Waist circumference (cm)	Mexico-Urban	30	157.7	5.9	158.0	145.0	168.0	30	172.1	6.8	171.8	159.9	184.1
Weight (kg)	Colombia-Urban	184	71.8	8.4	70.3	55.3	103.9	170	78.2	7.9	77.6	62.1	103.6
Weight (kg)	Mexico-Indigenous	24	87.0	8.2	86.7	73.0	106.0	39	88.6	11.9	86.4	70.5	118.0
Weight (kg)	Mexico-Urban	30	87.8	10.9	87.4	66.5	113.9	30	84.5	8.4	84.3	69.0	106.6

collapse_rows: Collapse repeated rows to multirow cell

HW4

The following zip file contains one subject’s laser-event potentials (LEP) data for 4 separate conditions (different level of stimulus intensity), each in a plain text file (1w.dat, 2w.dat, 3w.dat and 4w.dat). The rows are time points from -100 to 800 ms sampled at 2 ms per record. The columns are channel IDs. Input all the files into R for graphical exploration.

透過code載入與解壓縮zip–失敗

# 創造一個叫tmp_data的資料夾
dir.create(file.path(getwd(), "tmp_data"), showWarnings=FALSE)

tmp <- "C:/Users/user/Desktop/temptry/file.zip"
fL <- "https://drive.google.com/file/d/1xigKQNt5AmHOHTpqzmdfgEdK4fLUV02f/view?usp=sharing/Subject1.zip"
download.file(fL, tmp, mode="wb")

unzip(tmp, exdir = "C:/Users/user/Desktop/temptry/zipfile.zip")

得到以下訊息，尚未找到方法處理 Warning in unzip(tmp, exdir = “C:/Users/user/Desktop/temptry/zipfile.zip”) : error 1 in extracting from zip file

嘗試直接把載下來的檔案手動解壓縮，卻得到”這個檔壓所檔不是未知的格式就是損壞”

手動解壓縮

#Get multiple files simutaneously
list.files("./Subject1/")

## [1] "1w.dat" "2w.dat" "3w.dat" "4w.dat"

# Collect the file names.
fls <- list.files(path = "./Subject1")
fls

## [1] "1w.dat" "2w.dat" "3w.dat" "4w.dat"

# Give files the full path to their location.
fL <- paste0("./Subject1/", fls)
fL

## [1] "./Subject1/1w.dat" "./Subject1/2w.dat" "./Subject1/3w.dat"
## [4] "./Subject1/4w.dat"

# Input multiple files
ff <- lapply(fL, read.table,skip = 1, sep = "\t",  header=T)
str(ff)

## List of 4
##  $ :'data.frame':    451 obs. of  31 variables:
##   ..$ X.......F7.: num [1:451] -0.9733 -0.7079 -0.3732 -0.0225 0.3523 ...
##   ..$ X......FT7.: num [1:451] -1.007 -1.022 -0.981 -0.878 -0.71 ...
##   ..$ X.......T7.: num [1:451] -0.1834 -0.1705 -0.1544 -0.1239 -0.0611 ...
##   ..$ X......TP7.: num [1:451] -1.05 -1.14 -1.18 -1.09 -0.88 ...
##   ..$ X.......P7.: num [1:451] -0.705 -0.791 -0.821 -0.817 -0.697 ...
##   ..$ X......Fp1.: num [1:451] -1.15 -1.084 -1.007 -0.922 -0.817 ...
##   ..$ X.......F3.: num [1:451] -1.042 -1.017 -0.956 -0.864 -0.748 ...
##   ..$ X......FC3.: num [1:451] -0.521 -0.52 -0.499 -0.457 -0.36 ...
##   ..$ X.......C3.: num [1:451] -0.248 -0.264 -0.249 -0.204 -0.14 ...
##   ..$ X......CP3.: num [1:451] -0.0064 0.0064 0.0595 0.1351 0.2059 ...
##   ..$ X.......P3.: num [1:451] 0.235 0.244 0.304 0.385 0.492 ...
##   ..$ X.......O1.: num [1:451] 0.623 0.484 0.389 0.31 0.269 ...
##   ..$ X.......Fz.: num [1:451] -0.362 -0.301 -0.236 -0.177 -0.138 ...
##   ..$ X......FCz.: num [1:451] 0.203 0.224 0.249 0.285 0.293 ...
##   ..$ X.......Cz.: num [1:451] 0.322 0.275 0.22 0.195 0.167 ...
##   ..$ X......CPz.: num [1:451] 0.529 0.462 0.441 0.433 0.404 ...
##   ..$ X.......Pz.: num [1:451] 0.0692 -0.0322 -0.0949 -0.1078 -0.1046 ...
##   ..$ X.......Oz.: num [1:451] 0.922 0.821 0.758 0.724 0.726 ...
##   ..$ X......Fp2.: num [1:451] -1.234 -1.121 -0.931 -0.706 -0.516 ...
##   ..$ X.......F4.: num [1:451] -0.689 -0.643 -0.579 -0.532 -0.52 ...
##   ..$ X......FC4.: num [1:451] 0.756 0.753 0.75 0.735 0.677 ...
##   ..$ X.......C4.: num [1:451] 0.793 0.714 0.656 0.602 0.536 ...
##   ..$ X......CP4.: num [1:451] 0.571 0.631 0.66 0.648 0.59 ...
##   ..$ X.......P4.: num [1:451] 0.956 0.933 0.917 0.896 0.821 ...
##   ..$ X.......O2.: num [1:451] 0.909 0.763 0.645 0.537 0.47 ...
##   ..$ X.......F8.: num [1:451] 0.0611 0.1673 0.2461 0.2687 0.2059 ...
##   ..$ X......FT8.: num [1:451] -0.3459 -0.2864 -0.1657 -0.0322 0.0676 ...
##   ..$ X.......T8.: num [1:451] 0.507 0.809 1.097 1.35 1.511 ...
##   ..$ X......TP8.: num [1:451] 1.44 1.82 2.16 2.41 2.53 ...
##   ..$ X.......P8.: num [1:451] 1.45 1.47 1.47 1.42 1.34 ...
##   ..$ X          : logi [1:451] NA NA NA NA NA NA ...
##  $ :'data.frame':    451 obs. of  31 variables:
##   ..$ X.......F7.: num [1:451] -1.5 -1.47 -1.4 -1.29 -1.23 ...
##   ..$ X......FT7.: num [1:451] -2.39 -2.37 -2.27 -2.13 -2.05 ...
##   ..$ X.......T7.: num [1:451] -2.14 -2.07 -1.98 -1.91 -1.89 ...
##   ..$ X......TP7.: num [1:451] -0.822 -0.953 -1.161 -1.413 -1.679 ...
##   ..$ X.......P7.: num [1:451] 0.0446 -0.1981 -0.464 -0.7083 -0.9065 ...
##   ..$ X......Fp1.: num [1:451] -2.21 -2.18 -2.13 -2.05 -1.99 ...
##   ..$ X.......F3.: num [1:451] -1.89 -1.8 -1.72 -1.66 -1.62 ...
##   ..$ X......FC3.: num [1:451] -1.74 -1.67 -1.62 -1.57 -1.53 ...
##   ..$ X.......C3.: num [1:451] -1.07 -1.17 -1.29 -1.43 -1.59 ...
##   ..$ X......CP3.: num [1:451] -0.821 -1.042 -1.331 -1.621 -1.943 ...
##   ..$ X.......P3.: num [1:451] -0.385 -0.698 -1.058 -1.431 -1.821 ...
##   ..$ X.......O1.: num [1:451] 0.3533 0.0297 -0.3715 -0.8206 -1.2746 ...
##   ..$ X.......Fz.: num [1:451] -1.37 -1.26 -1.18 -1.13 -1.15 ...
##   ..$ X......FCz.: num [1:451] -1.51 -1.45 -1.38 -1.34 -1.35 ...
##   ..$ X.......Cz.: num [1:451] -0.368 -0.401 -0.467 -0.542 -0.659 ...
##   ..$ X......CPz.: num [1:451] -0.248 -0.442 -0.613 -0.789 -1.01 ...
##   ..$ X.......Pz.: num [1:451] 0.2526 -0.0231 -0.355 -0.7397 -1.1426 ...
##   ..$ X.......Oz.: num [1:451] 0.893 0.65 0.297 -0.117 -0.517 ...
##   ..$ X......Fp2.: num [1:451] -1.63 -1.53 -1.46 -1.45 -1.49 ...
##   ..$ X.......F4.: num [1:451] -1.265 -1.053 -0.89 -0.797 -0.847 ...
##   ..$ X......FC4.: num [1:451] -0.708 -0.604 -0.513 -0.451 -0.497 ...
##   ..$ X.......C4.: num [1:451] -0.1585 -0.1337 -0.0875 -0.0627 -0.0842 ...
##   ..$ X......CP4.: num [1:451] 0.215 0.258 0.31 0.363 0.335 ...
##   ..$ X.......P4.: num [1:451] 0.2279 0.0809 -0.0677 -0.2295 -0.4408 ...
##   ..$ X.......O2.: num [1:451] 1.344 1.258 1.077 0.809 0.485 ...
##   ..$ X.......F8.: num [1:451] -1.006 -0.969 -0.888 -0.819 -0.816 ...
##   ..$ X......FT8.: num [1:451] 0.3517 0.3286 0.2708 0.1981 0.0578 ...
##   ..$ X.......T8.: num [1:451] -0.158 -0.216 -0.352 -0.566 -0.812 ...
##   ..$ X......TP8.: num [1:451] 0.403 0.459 0.58 0.697 0.731 ...
##   ..$ X.......P8.: num [1:451] 0.492 0.4194 0.2823 0.1139 -0.0925 ...
##   ..$ X          : logi [1:451] NA NA NA NA NA NA ...
##  $ :'data.frame':    451 obs. of  31 variables:
##   ..$ X.......F7.: num [1:451] 1.63 1.89 2.22 2.53 2.82 ...
##   ..$ X......FT7.: num [1:451] 1.75 1.99 2.24 2.46 2.61 ...
##   ..$ X.......T7.: num [1:451] 1.37 1.4 1.36 1.28 1.21 ...
##   ..$ X......TP7.: num [1:451] 0.843 0.949 0.998 0.968 0.903 ...
##   ..$ X.......P7.: num [1:451] 0.0709 0.1146 0.1009 0.0682 0.0491 ...
##   ..$ X......Fp1.: num [1:451] 0.551 0.737 0.958 1.167 1.345 ...
##   ..$ X.......F3.: num [1:451] 1.88 2.12 2.37 2.56 2.71 ...
##   ..$ X......FC3.: num [1:451] 2.28 2.55 2.81 3.04 3.23 ...
##   ..$ X.......C3.: num [1:451] 1.92 2.15 2.35 2.53 2.66 ...
##   ..$ X......CP3.: num [1:451] 1.47 1.6 1.72 1.79 1.85 ...
##   ..$ X.......P3.: num [1:451] 1.07 1.2 1.31 1.37 1.38 ...
##   ..$ X.......O1.: num [1:451] 1.54 1.56 1.45 1.25 1.03 ...
##   ..$ X.......Fz.: num [1:451] 1.54 1.67 1.84 1.99 2.1 ...
##   ..$ X......FCz.: num [1:451] 1.66 1.75 1.85 1.93 1.99 ...
##   ..$ X.......Cz.: num [1:451] 1.01 1.05 1.08 1.13 1.15 ...
##   ..$ X......CPz.: num [1:451] 1.57 1.69 1.79 1.87 1.92 ...
##   ..$ X.......Pz.: num [1:451] 1.06 1.12 1.14 1.1 1.05 ...
##   ..$ X.......Oz.: num [1:451] 1.389 1.435 1.348 1.184 0.985 ...
##   ..$ X......Fp2.: num [1:451] -0.1991 0.0655 0.3464 0.6192 0.8566 ...
##   ..$ X.......F4.: num [1:451] -0.5783 -0.3928 -0.2319 -0.0955 0 ...
##   ..$ X......FC4.: num [1:451] 0.685 0.676 0.641 0.578 0.54 ...
##   ..$ X.......C4.: num [1:451] 0.723 0.698 0.633 0.556 0.488 ...
##   ..$ X......CP4.: num [1:451] 2.06 2.08 2.08 2.11 2.13 ...
##   ..$ X.......P4.: num [1:451] -0.12 -0.09 -0.0927 -0.1228 -0.1719 ...
##   ..$ X.......O2.: num [1:451] 0.881 0.764 0.666 0.562 0.461 ...
##   ..$ X.......F8.: num [1:451] -1.083 -0.998 -0.892 -0.829 -0.78 ...
##   ..$ X......FT8.: num [1:451] -0.723 -0.726 -0.676 -0.608 -0.551 ...
##   ..$ X.......T8.: num [1:451] -0.652 -0.674 -0.674 -0.617 -0.546 ...
##   ..$ X......TP8.: num [1:451] -1.282 -1.208 -1.045 -0.854 -0.701 ...
##   ..$ X.......P8.: num [1:451] 0.2428 0.1391 0.0464 -0.0491 -0.1609 ...
##   ..$ X          : logi [1:451] NA NA NA NA NA NA ...
##  $ :'data.frame':    451 obs. of  31 variables:
##   ..$ X.......F7.: num [1:451] -0.0298 0.08 0.1412 0.1176 0.058 ...
##   ..$ X......FT7.: num [1:451] 0.555 0.827 1.092 1.314 1.468 ...
##   ..$ X.......T7.: num [1:451] -0.022 -0.11 -0.195 -0.24 -0.309 ...
##   ..$ X......TP7.: num [1:451] 0.6933 0.469 0.2839 0.1223 0.0063 ...
##   ..$ X.......P7.: num [1:451] -0.353 -0.412 -0.383 -0.314 -0.257 ...
##   ..$ X......Fp1.: num [1:451] -1.63 -1.69 -1.72 -1.76 -1.78 ...
##   ..$ X.......F3.: num [1:451] -0.0533 -0.2259 -0.3561 -0.4486 -0.527 ...
##   ..$ X......FC3.: num [1:451] 0.298 0.1396 -0.0016 -0.1067 -0.1867 ...
##   ..$ X.......C3.: num [1:451] 0.2933 0.1772 0.0988 0.0659 0.0737 ...
##   ..$ X......CP3.: num [1:451] -0.455 -0.446 -0.359 -0.238 -0.111 ...
##   ..$ X.......P3.: num [1:451] -0.182 -0.2118 -0.1741 -0.0925 0.0204 ...
##   ..$ X.......O1.: num [1:451] -0.486 -0.486 -0.427 -0.365 -0.35 ...
##   ..$ X.......Fz.: num [1:451] -0.331 -0.383 -0.403 -0.4 -0.364 ...
##   ..$ X......FCz.: num [1:451] -0.519 -0.574 -0.565 -0.527 -0.442 ...
##   ..$ X.......Cz.: num [1:451] -0.1255 -0.1239 -0.0831 -0.0204 0.0643 ...
##   ..$ X......CPz.: num [1:451] -0.2259 -0.2416 -0.2086 -0.1427 -0.0361 ...
##   ..$ X.......Pz.: num [1:451] -0.2886 -0.2541 -0.1506 -0.0361 0.091 ...
##   ..$ X.......Oz.: num [1:451] -0.883 -0.913 -0.845 -0.784 -0.739 ...
##   ..$ X......Fp2.: num [1:451] -2.54 -2.58 -2.57 -2.48 -2.39 ...
##   ..$ X.......F4.: num [1:451] -2.1 -2.03 -1.89 -1.72 -1.52 ...
##   ..$ X......FC4.: num [1:451] -1.64 -1.53 -1.39 -1.22 -1.03 ...
##   ..$ X.......C4.: num [1:451] -1.252 -1.169 -1.059 -0.938 -0.797 ...
##   ..$ X......CP4.: num [1:451] -1.446 -1.316 -1.139 -0.973 -0.817 ...
##   ..$ X.......P4.: num [1:451] -0.309 -0.259 -0.138 0.011 0.174 ...
##   ..$ X.......O2.: num [1:451] -0.471 -0.577 -0.615 -0.602 -0.571 ...
##   ..$ X.......F8.: num [1:451] -2.07 -2.14 -2.15 -2.12 -2.06 ...
##   ..$ X......FT8.: num [1:451] -0.949 -0.943 -0.897 -0.833 -0.734 ...
##   ..$ X.......T8.: num [1:451] -1.72 -1.57 -1.42 -1.24 -1.04 ...
##   ..$ X......TP8.: num [1:451] -1.69 -1.73 -1.7 -1.56 -1.31 ...
##   ..$ X.......P8.: num [1:451] -1.114 -1.063 -0.913 -0.706 -0.464 ...
##   ..$ X          : logi [1:451] NA NA NA NA NA NA ...

head(ff[[1]])

##   X.......F7. X......FT7. X.......T7. X......TP7. X.......P7. X......Fp1.
## 1     -0.9733     -1.0071     -0.1834     -1.0521     -0.7046     -1.1503
## 2     -0.7079     -1.0216     -0.1705     -1.1422     -0.7915     -1.0843
## 3     -0.3732     -0.9813     -0.1544     -1.1760     -0.8205     -1.0071
## 4     -0.0225     -0.8784     -0.1239     -1.0891     -0.8173     -0.9218
## 5      0.3523     -0.7095     -0.0611     -0.8800     -0.6966     -0.8173
## 6      0.6998     -0.5116      0.0483     -0.5711     -0.5277     -0.7095
##   X.......F3. X......FC3. X.......C3. X......CP3. X.......P3. X.......O1.
## 1     -1.0425     -0.5212     -0.2477     -0.0064      0.2349      0.6226
## 2     -1.0167     -0.5196     -0.2638      0.0064      0.2445      0.4842
## 3     -0.9556     -0.4987     -0.2494      0.0595      0.3041      0.3893
## 4     -0.8639     -0.4569     -0.2043      0.1351      0.3845      0.3105
## 5     -0.7481     -0.3604     -0.1400      0.2059      0.4923      0.2687
## 6     -0.6274     -0.2397     -0.0547      0.2703      0.5936      0.3089
##   X.......Fz. X......FCz. X.......Cz. X......CPz. X.......Pz. X.......Oz.
## 1     -0.3620      0.2027      0.3218      0.5293      0.0692      0.9218
## 2     -0.3008      0.2236      0.2751      0.4617     -0.0322      0.8205
## 3     -0.2365      0.2494      0.2204      0.4408     -0.0949      0.7577
## 4     -0.1770      0.2848      0.1947      0.4328     -0.1078      0.7239
## 5     -0.1384      0.2928      0.1673      0.4038     -0.1046      0.7256
## 6     -0.0901      0.2767      0.1512      0.3362     -0.0724      0.7658
##   X......Fp2. X.......F4. X......FC4. X.......C4. X......CP4. X.......P4.
## 1     -1.2339     -0.6886      0.7561      0.7931      0.5711      0.9556
## 2     -1.1213     -0.6435      0.7529      0.7143      0.6306      0.9331
## 3     -0.9315     -0.5792      0.7497      0.6564      0.6596      0.9170
## 4     -0.7062     -0.5325      0.7352      0.6017      0.6483      0.8961
## 5     -0.5164     -0.5196      0.6773      0.5357      0.5904      0.8205
## 6     -0.3781     -0.5052      0.6033      0.4102      0.4617      0.6757
##   X.......O2. X.......F8. X......FT8. X.......T8. X......TP8. X.......P8.  X
## 1      0.9090      0.0611     -0.3459      0.5068      1.4415      1.4479 NA
## 2      0.7626      0.1673     -0.2864      0.8092      1.8243      1.4704 NA
## 3      0.6451      0.2461     -0.1657      1.0972      2.1622      1.4672 NA
## 4      0.5373      0.2687     -0.0322      1.3498      2.4083      1.4173 NA
## 5      0.4698      0.2059      0.0676      1.5106      2.5258      1.3385 NA
## 6      0.4488      0.0627      0.1593      1.5412      2.4389      1.1937 NA

# grap the column name
dta4colname <- scan("./Subject1/1w.dat", skip=1, sep="\t", nlines = 1, what = "character")
dta4colname <- gsub(" ", "", dta4colname)

df <-lapply(ff, function(x){
  names(x)<-dta4colname #change header name in 4 list
  x[,c(31)]<-NULL;x #delete na column in 4 list
  x$time <-rep(seq(-100, 800, 2));x #create time in 4 list
  })

# rbind four list than cbind condition varable
# add a column to indicate condition 
dtaL <- cbind(Reduce(rbind, df), 
              condition=rep(paste0(1:4, "w"), each=nrow(df[[1]])))|>  as.data.frame()

pacman::p_load(reshape)

# wide to long format
dat4_melt <- melt(dtaL, id = c("time", "condition"))

str(dat4_melt)

## 'data.frame':    54120 obs. of  4 variables:
##  $ time     : num  -100 -98 -96 -94 -92 -90 -88 -86 -84 -82 ...
##  $ condition: chr  "1w" "1w" "1w" "1w" ...
##  $ variable : Factor w/ 30 levels "[F7]","[FT7]",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ value    : num  -0.9733 -0.7079 -0.3732 -0.0225 0.3523 ...

pacman::p_load(lattice)

xyplot(value ~ time | variable, 
       groups = condition,
       data = dat4_melt, cex=.5,
       type = c("p","g", "r"),
       pch = 20,
       xlab = "Time",
       ylab = "LFP",
       layout = c(6,5),
       auto.key = list(space = "right"))

HW5

The ASCII (plain text) file schiz.asc contains response times (in milliseconds) for 11 non-schizophrenics and 6 schizophrenics (30 measurements for each person). Summarize and compare descriptive statistics of the measurements from the two groups. Source: Belin, T., & Rubin, D. (1995). The analysis of repeated-measures data on schizophrenic reaction times using mixture models. Statistics in Medicine 14(8), 747-768.

fL <- "http://www.stat.columbia.edu/~gelman/book/data/schiz.asc"
dat5<- read.table(fL, skip = 4, col.names = paste0("T", 101:130))

# Create Group and ID 
dat5$Group <- c(rep("Non-schiz", 11), rep("Schiz", 6))
dat5$ID <- 1:nrow(dat5)
dat5 <- dat5[,c(32,31,1:30)]
head(dat5)

##   ID     Group T101 T102 T103 T104 T105 T106 T107 T108 T109 T110 T111 T112 T113
## 1  1 Non-schiz  312  272  350  286  268  328  298  356  292  308  296  372  396
## 2  2 Non-schiz  354  346  384  342  302  312  322  376  306  402  320  298  308
## 3  3 Non-schiz  256  284  320  274  324  268  370  430  314  312  362  256  342
## 4  4 Non-schiz  260  294  306  292  264  290  272  268  344  362  330  280  354
## 5  5 Non-schiz  204  272  250  260  314  308  246  236  208  268  272  264  308
## 6  6 Non-schiz  590  312  286  310  778  364  318  316  316  298  344  262  274
##   T114 T115 T116 T117 T118 T119 T120 T121 T122 T123 T124 T125 T126 T127 T128
## 1  402  280  330  254  282  350  328  332  308  292  258  340  242  306  328
## 2  414  304  422  388  422  426  338  332  426  478  372  392  374  430  388
## 3  388  302  366  298  396  274  226  328  274  258  220  236  272  322  284
## 4  320  334  276  418  288  338  350  350  324  286  322  280  256  218  256
## 5  236  238  350  272  252  252  236  306  238  350  206  260  280  274  318
## 6  330  312  310  376  326  346  334  282  292  282  300  290  302  300  306
##   T129 T130
## 1  294  272
## 2  354  368
## 3  274  356
## 4  220  356
## 5  268  210
## 6  294  444

wide to long format

dat5L <- reshape(dat5, varying=list(3:32), idvar="ID", 
                 v.names="RT", timevar="Test", direction="long")

describeBy function

pacman::p_load(psych, radiant.data)
# Descriptives by Group
descGroup <- describeBy(dat5L$RT, dat5L$Group, mat = TRUE, digits = 1)
# Descriptives by ID
descID <- describeBy(dat5L$RT, dat5L$ID, mat = TRUE, digits = 1)
# change rownames to column named "Measured characteristic"
descGroup <- descGroup[, c(2, 4:7, 10:11)] %>% rownames_to_column("Measured characteristic")
# 
descID <- descID[, c(2, 4:7, 10:11)] %>% rownames_to_column("Measured characteristic")
# specify measured anthropometric characteristic
descGroup$`Measured characteristic` <- rep("Group",2)
descID$`Measured characteristic` <- rep("ID",17)

#rbind Tables
tab1 <- rbind(descGroup, descID)
tab1

##    Measured characteristic    group1   n  mean    sd median min  max
## 1                    Group Non-schiz 330 310.2  64.9    303 204  778
## 2                    Group     Schiz 180 506.9 262.8    432 226 1714
## 3                       ID         1  30 311.1  40.0    307 242  402
## 4                       ID         2  30 366.7  47.6    370 298  478
## 5                       ID         3  30 306.2  52.5    300 220  430
## 6                       ID         4  30 303.6  45.1    293 218  418
## 7                       ID         5  30 265.2  38.9    262 204  350
## 8                       ID         6  30 339.8 102.9    311 262  778
## 9                       ID         7  30 358.1  63.3    350 250  542
## 10                      ID         8  30 268.5  45.1    261 208  418
## 11                      ID         9  30 257.7  39.3    261 206  384
## 12                      ID        10  30 325.7  44.6    321 256  480
## 13                      ID        11  30 309.3  61.6    298 234  434
## 14                      ID        12  30 303.5  34.3    301 258  380
## 15                      ID        13  30 419.1  56.2    400 354  550
## 16                      ID        14  30 635.9 437.1    439 256 1700
## 17                      ID        15  30 599.9 274.6    537 296 1714
## 18                      ID        16  30 594.3 202.1    568 300 1032
## 19                      ID        17  30 488.5 172.2    452 226 1154

with(dat5L, boxplot(RT ~ Group,
                    horizontal = TRUE,
                    frame = FALSE,
                    ylab = "Reaction time",
                    col = "aliceblue",
                    varwidth = TRUE
                    ))

# ANOVA
aov(RT ~ Group + as.integer(Test), data = dat5L) |> anova()

## Analysis of Variance Table
## 
## Response: RT
##                   Df   Sum Sq Mean Sq  F value Pr(>F)    
## Group              1  4506212 4506212 166.2613 <2e-16 ***
## as.integer(Test)   1    10482   10482   0.3867 0.5343    
## Residuals        507 13741318   27103                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# regression
lm(RT ~ Group + as.integer(Test), data = dat5L)

## 
## Call:
## lm(formula = RT ~ Group + as.integer(Test), data = dat5L)
## 
## Coefficients:
##      (Intercept)        GroupSchiz  as.integer(Test)  
##         318.2882          196.6970           -0.5238

Google scholar

由於我尚未有citation的紀錄，所以畫不出圖，先試試我的指導教授哈哈

pacman::p_load(scholar)
dat6 <- get_citation_history("Dhd-F1EAAAAJ")
plot(dat6, 
     xlab="Year", ylab="Citations", 
     type='h', lwd=2, 
     xaxt="n", bty='n', 
     xlim=c(2007, 2021),
     ylim=c(0,300))
axis(side=1, 
     at=seq(2007, 2021, by=1), 
     cex.axis=0.7)
abline(h=seq(0, 400, by=100), 
       lty=2, col="darkgray")

還沒有引用數

Wan Chen Hsu

1025HW- input and output

Jenny Hsu

2021/10/31

Hw1

HW2