Task: Explain what does this statement do: lapply(lapply(search(), ls), length)
# the code means apply the length function on the result which apply ls function on the search()
# the result provide the number of function in each attached packages
## search(): gives a list of attached packages
lapply(lapply(search(), ls), length)
## [[1]]
## [1] 0
##
## [[2]]
## [1] 449
##
## [[3]]
## [1] 87
##
## [[4]]
## [1] 113
##
## [[5]]
## [1] 247
##
## [[6]]
## [1] 104
##
## [[7]]
## [1] 203
##
## [[8]]
## [1] 0
##
## [[9]]
## [1] 1255
Task: Convert the R script in the NZ schools example into a rmarkdown file and provide comments to each code chunk indicated by ‘##’. Give alternative code to perform the same calculation where appropriate.
The New Zealand Ministry of Education provides basic information for all primary and secondary schools in the country.
## keep the school names with white spaces
<- read.csv("nzSchools.csv", as.is=2)
dta
## show display the structure of dta
str(dta)
## 'data.frame': 2571 obs. of 6 variables:
## $ ID : int 1015 1052 1062 1092 1130 1018 1029 1030 1588 1154 ...
## $ Name: chr "Hora Hora School" "Morningside School" "Onerahi School" "Raurimu Avenue School" ...
## $ City: Factor w/ 541 levels "Ahaura","Ahipara",..: 533 533 533 533 533 533 533 533 533 533 ...
## $ Auth: Factor w/ 4 levels "Other","Private",..: 3 3 3 3 3 3 3 3 4 3 ...
## $ Dec : int 2 3 4 2 4 8 5 5 6 1 ...
## $ Roll: int 318 200 455 86 577 329 637 395 438 201 ...
## Dimension of dta (row, column)
dim(dta)
## [1] 2571 6
## binning
## create "Size" variable which based on "Roll" variable. If the value higher than the median will classed into "Large", the other is "Small"
$Size <- ifelse(dta$Roll > median(dta$Roll), "Large", "Small")
dta
## Remove "Size" column
$Size <- NULL
dta
## show the first 6 row of dta
head(dta)
## ID Name City Auth Dec Roll
## 1 1015 Hora Hora School Whangarei State 2 318
## 2 1052 Morningside School Whangarei State 3 200
## 3 1062 Onerahi School Whangarei State 4 455
## 4 1092 Raurimu Avenue School Whangarei State 2 86
## 5 1130 Whangarei School Whangarei State 4 577
## 6 1018 Hurupaki School Whangarei State 8 329
## create "Size" variable which based on "Roll" that divides the range of "Roll" into 3 intervals and codes the "Small", "Mediam", "Large" in "Roll" according to which interval they fall.
$Size <- cut(dta$Roll, 3, labels=c("Small", "Medium", "Large"))
dta
## create cross tabulation show each number of level in the "Size" variable.
table(dta$Size)
##
## Small Medium Large
## 2555 15 1
## create "RollOrd" to show the position of Roll values with descending order
$RollOrd <- order(dta$Roll, decreasing=T)
dta
## Top 6 Roll value
## similar to the result: head(sort(dta$Roll, decreasing=T))
head(dta[dta$RollOrd, ])
## ID Name City Auth Dec Roll Size RollOrd
## 1726 498 Correspondence School Wellington State NA 5546 Large 753
## 301 28 Rangitoto College Auckland State 10 3022 Medium 353
## 376 78 Avondale College Auckland State 4 2613 Medium 712
## 2307 319 Burnside High School Christchurch State 8 2588 Medium 709
## 615 41 Macleans College Auckland State 10 2476 Medium 1915
## 199 43 Massey High School Auckland State 5 2452 Medium 1683
## Last 6 Roll value
tail(dta[dta$RollOrd, ])
## ID Name City Auth Dec Roll Size
## 2401 1641 Amana Christian School Dunedin Private 9 7 Small
## 1590 2461 Tangimoana School Manawatu State 4 6 Small
## 1996 3598 Woodbank School Kaikoura State 4 6 Small
## 2112 3386 Jacobs River School Jacobs River State 5 6 Small
## 1514 2407 Ngamatapouri School Sth Taranaki District State 9 5 Small
## 1575 2420 Papanui Junction School Taihape State 5 5 Small
## RollOrd
## 2401 2562
## 1590 266
## 1996 2478
## 2112 1501
## 1514 2377
## 1575 1542
## 先按城市名字再依Roll值,由大排到小 然後呈現前六列
## head(sort(dta$City, dta$Roll, decreasing=T))
head(dta[order(dta$City, dta$Roll, decreasing=T), ])
## ID Name City Auth Dec Roll Size RollOrd
## 2548 401 Menzies College Wyndham State 4 356 Small 859
## 2549 4054 Wyndham School Wyndham State 5 94 Small 1163
## 1611 2742 Woodville School Woodville State 3 147 Small 726
## 1630 2640 Papatawa School Woodville State 7 27 Small 2273
## 2041 3600 Woodend School Woodend State 9 375 Small 1401
## 1601 399 Central Southland College Winton State 7 549 Small 450
## 先按城市名字再依Roll值,由大排到小 然後呈現最後六列
tail(dta[order(dta$City, dta$Roll, decreasing=T), ])
## ID Name City Auth Dec Roll Size RollOrd
## 2169 3273 Albury School Albury State 8 30 Small 1010
## 2018 350 Akaroa Area School Akaroa State 8 125 Small 1051
## 2023 3332 Duvauchelle School Akaroa State 9 41 Small 749
## 335 1200 Ahuroa School Ahuroa State 7 22 Small 193
## 99 1000 Ahipara School Ahipara State 3 241 Small 1963
## 2117 2105 Awahono School - Grey Valley Ahaura State 4 119 Small 364
## create cross tabulation show each number of level in the "Auth" variable.
table(dta$Auth)
##
## Other Private State State Integrated
## 1 99 2144 327
## save as "authtbl" object and show authtbl
<- table(dta$Auth); authtbl authtbl
##
## Other Private State State Integrated
## 1 99 2144 327
## show the class of "authtbl" object
class(authtbl)
## [1] "table"
## only show the data with the "Auth" column equal to the value "Other"
$Auth == "Other", ] dta[dta
## ID Name City Auth Dec Roll Size RollOrd
## 2315 518 Kingslea School Christchurch Other 1 51 Small 1579
## Create a contingency table from cross-classifying factors by Decile and authority types.
## Decile: A schools decile indicated the extent to which the school draws its students from the low socio-economic communities.
## Decile 1 schools are the 10% of schools with the highest proportion of students from the low socio-economic communities, whereas decile 10 schools are the 10% of schools with the lowest proportion of these students.
## A schools decile does not indicate the overall socio-economic mix of the school.
xtabs(~ Auth + Dec, data=dta)
## Dec
## Auth 1 2 3 4 5 6 7 8 9 10
## Other 1 0 0 0 0 0 0 0 0 0
## Private 0 0 2 6 2 2 6 11 12 38
## State 259 230 208 219 214 215 188 200 205 205
## State Integrated 12 22 35 28 38 34 45 45 37 31
## calculate the mean of "Roll"
mean(dta$Roll)
## [1] 295.4737
## calculate the mean "Roll" of the "Private" authority types
mean(dta$Roll[dta$Auth == "Private"])
## [1] 308.798
## calculate the mean "Roll" of the each types of authority
aggregate(dta["Roll"], by=list(dta$Auth), FUN=mean)
## Group.1 Roll
## 1 Other 51.0000
## 2 Private 308.7980
## 3 State 300.6301
## 4 State Integrated 258.3792
## Create "Rich" variable which based on those decile higher than 5 will be TRUE, the other will be False
$Rich <- dta$Dec > 5 ; # dta$Rich
dta
## calculate the mean "Roll" of the each types of authority and Rich group
aggregate(dta["Roll"], by=list(dta$Auth, dta$Rich), FUN=mean)
## Group.1 Group.2 Roll
## 1 Other FALSE 51.0000
## 2 Private FALSE 151.4000
## 3 State FALSE 261.7487
## 4 State Integrated FALSE 183.2370
## 5 Private TRUE 402.5362
## 6 State TRUE 338.8243
## 7 State Integrated TRUE 311.2135
## show the range of Roll in each types of authority
## by() tapply()
by(dta["Roll"], INDICES=list(dta$Auth), FUN=range)
## : Other
## [1] 51 51
## ------------------------------------------------------------
## : Private
## [1] 7 1663
## ------------------------------------------------------------
## : State
## [1] 5 5546
## ------------------------------------------------------------
## : State Integrated
## [1] 18 1475
Task: Split the ChickWeight{datasets} data by individual chicks to extract separate slope estimates of regressing weight onto Time for each chick.
::p_load(dplyr, readr, tidyr, tidyverse, ggplot2)
pacman
<- ChickWeight
dat
head(dat)
## weight Time Chick Diet
## 1 42 0 1 1
## 2 51 2 1 1
## 3 59 4 1 1
## 4 64 6 1 1
## 5 76 8 1 1
## 6 93 10 1 1
str(dat)
## Classes 'nfnGroupedData', 'nfGroupedData', 'groupedData' and 'data.frame': 578 obs. of 4 variables:
## $ weight: num 42 51 59 64 76 93 106 125 149 171 ...
## $ Time : num 0 2 4 6 8 10 12 14 16 18 ...
## $ Chick : Ord.factor w/ 50 levels "18"<"16"<"15"<..: 15 15 15 15 15 15 15 15 15 15 ...
## $ Diet : Factor w/ 4 levels "1","2","3","4": 1 1 1 1 1 1 1 1 1 1 ...
## - attr(*, "formula")=Class 'formula' language weight ~ Time | Chick
## .. ..- attr(*, ".Environment")=<environment: R_EmptyEnv>
## - attr(*, "outer")=Class 'formula' language ~Diet
## .. ..- attr(*, ".Environment")=<environment: R_EmptyEnv>
## - attr(*, "labels")=List of 2
## ..$ x: chr "Time"
## ..$ y: chr "Body weight"
## - attr(*, "units")=List of 2
## ..$ x: chr "(days)"
## ..$ y: chr "(gm)"
with(dat, by(dat, Chick, function(x) lm(weight ~ Time, data=x)))|> sapply(coef)
## 18 16 15 13 9 20 10
## (Intercept) 39 43.392857 46.83333 43.384359 52.094086 37.667826 38.695054
## Time -2 1.053571 1.89881 2.239601 2.663137 3.732718 4.066102
## 8 17 19 4 6 11 3
## (Intercept) 43.727273 43.030706 31.21222 32.86568 44.123431 47.921948 23.17955
## Time 4.827273 4.531538 5.08743 6.08864 6.378006 7.510967 8.48737
## 1 12 2 5 14 7 24
## (Intercept) 24.465436 21.939797 24.724853 16.89563 20.52488 5.842535 53.067766
## Time 7.987899 8.440629 8.719861 10.05536 11.98245 13.205264 1.207533
## 30 22 23 27 28 26 25
## (Intercept) 39.109666 40.082590 38.428074 29.858569 23.984874 20.70715 19.65119
## Time 5.898351 5.877931 6.685978 7.379368 9.703676 10.10316 11.30676
## 29 21 33 37 36 31 39
## (Intercept) 5.882771 15.56330 45.830283 29.608834 25.85403 19.13099 17.03661
## Time 12.453487 15.47512 5.855241 6.677053 9.99047 10.02617 10.73710
## 38 32 40 34 35 44 45
## (Intercept) 10.67282 13.69173 10.83830 5.081682 4.757979 44.909091 35.673121
## Time 12.06051 13.18091 13.44229 15.000151 17.258811 6.354545 7.686432
## 43 41 47 49 46 50 42
## (Intercept) 52.185751 39.337922 36.489790 31.662986 27.771744 23.78218 19.86507
## Time 8.318863 8.159885 8.374981 9.717894 9.738466 11.33293 11.83679
## 48
## (Intercept) 7.947663
## Time 13.714718
revision based on lecture code
<- dat |>
slopetidy group_by(Chick)|>
do({res <- lm(weight ~ Time, data=.)
::tidy(coef(res))
broom
}|>
) subset(names=="Time")|>
arrange(x)
::datatable(slopetidy, rownames=FALSE, options=list(pageLength=5)) DT
<-with(dat, split(dat, Chick))|>
slopesplit lapply(function(x) lm(weight ~ Time, data=x))|>
sapply(coef)|>
::melt()|>
reshape2reshape(idvar = "Var2", timevar = "Var1", direction = "wide")|>
arrange(value.Time)
colnames(slopesplit)<-c("Chick", "Intercept", "Time")
::datatable(slopesplit, rownames=FALSE, options=list(pageLength=5)) DT
<- dat[, -c(3,4)]
datshade %>%
datmutate(Chick=factor(Chick, levels=slopesplit$Chick))%>%
ggplot(., aes(x=Time, y=weight))+
geom_point()+
geom_smooth(method = lm, formula=y ~ x, se = FALSE)+
geom_line(data=datshade, aes(x=Time, y=weight),
stat="smooth",method = "lm", formula = y ~ x,
se = FALSE, color="firebrick", alpha=0.5)+
facet_wrap(.~Chick, ncol=10, nrow=5)+
theme_minimal()
Task: Convert the script in the NCEA 2007 example into a rmarkdown file and provide comments to each code chunk indicated by ‘##’. Give alternative code to perform the same calculation where appropriate.
## read data
<- read.table("NCEA2007.txt", sep=":", quote="", h=T, as.is=T)
dta2
## Dimension of dta2 (row, column)
dim(dta2)
## [1] 88 4
## show display the structure of dta2
str(dta2)
## 'data.frame': 88 obs. of 4 variables:
## $ Name : chr "Al-Madinah School" "Alfriston College" "Ambury Park Centre for Riding Therapy" "Aorere College" ...
## $ Level1: num 61.5 53.9 33.3 39.5 71.2 22.1 50.8 57.3 89.3 59.8 ...
## $ Level2: num 75 44.1 20 50.2 78.9 30.8 34.8 49.8 89.7 65.7 ...
## $ Level3: num 0 0 0 30.6 55.5 26.3 48.9 44.6 88.6 50.4 ...
## show first 6 row of dta2
head(dta2)
## Name Level1 Level2 Level3
## 1 Al-Madinah School 61.5 75.0 0.0
## 2 Alfriston College 53.9 44.1 0.0
## 3 Ambury Park Centre for Riding Therapy 33.3 20.0 0.0
## 4 Aorere College 39.5 50.2 30.6
## 5 Auckland Girls' Grammar School 71.2 78.9 55.5
## 6 Auckland Grammar 22.1 30.8 26.3
## calculate the mean of each level
# output is numeric object
apply(dta2[, -1], MARGIN=2, FUN=mean)
## Level1 Level2 Level3
## 62.26705 61.06818 47.97614
## list apply
## output is list object
lapply(dta2[, -1], FUN=mean)
## $Level1
## [1] 62.26705
##
## $Level2
## [1] 61.06818
##
## $Level3
## [1] 47.97614
## simplify the list apply
## output is numeric object
sapply(dta2[, -1], FUN=mean)
## Level1 Level2 Level3
## 62.26705 61.06818 47.97614
Range of each level
## output is "matrix" "array" object
apply(dta2[, -1], MARGIN=2, FUN=range)
## Level1 Level2 Level3
## [1,] 2.8 0.0 0.0
## [2,] 97.4 95.7 95.7
## output is list object
lapply(dta2[, -1], FUN=range)
## $Level1
## [1] 2.8 97.4
##
## $Level2
## [1] 0.0 95.7
##
## $Level3
## [1] 0.0 95.7
## output is "matrix" "array" object
sapply(dta2[, -1], FUN=range)
## Level1 Level2 Level3
## [1,] 2.8 0.0 0.0
## [2,] 97.4 95.7 95.7
##
<- split(dta$Roll, dta$Auth)
rollsByAuth
##
str(rollsByAuth)
## List of 4
## $ Other : int 51
## $ Private : int [1:99] 255 39 154 73 83 25 95 85 94 729 ...
## $ State : int [1:2144] 318 200 455 86 577 329 637 395 201 267 ...
## $ State Integrated: int [1:327] 438 26 191 560 151 114 126 171 211 57 ...
##
class(rollsByAuth)
## [1] "list"
##
lapply(split(dta$Roll, dta$Auth), mean)
## $Other
## [1] 51
##
## $Private
## [1] 308.798
##
## $State
## [1] 300.6301
##
## $`State Integrated`
## [1] 258.3792
##
sapply(split(dta$Roll, dta$Auth), mean)
## Other Private State State Integrated
## 51.0000 308.7980 300.6301 258.3792
## colMeans
colMeans(dta2[,-1])
## Level1 Level2 Level3
## 62.26705 61.06818 47.97614
## range
range(dta2$Level1)
## [1] 2.8 97.4
## min and max
data.frame(min=sapply(dta2,min), max=sapply(dta2,max))
## min max
## Name Al-Madinah School Zayed College for Girls
## Level1 2.8 97.4
## Level2 0 95.7
## Level3 0 95.7
## aggregate
aggregate(dta[,"Roll"], by=list(dta$Auth), FUN=mean)
## Group.1 x
## 1 Other 51.0000
## 2 Private 308.7980
## 3 State 300.6301
## 4 State Integrated 258.3792
## dplyr
|>
dtagroup_by(Auth)|>
summarise_at(vars(Roll), list(mean = mean))
## # A tibble: 4 x 2
## Auth mean
## <fct> <dbl>
## 1 Other 51
## 2 Private 309.
## 3 State 301.
## 4 State Integrated 258.
The following R script uses Cushings{MASS} to demonstrates several ways to achieve the same objective in R.
Task: Explain the advantages or disadvantages of each method.
# Cushings example
library(pacman)
::p_load(MASS, tidyverse) pacman
class: “data.frame”
+: 比較符合自己習慣的寫法?
-: 無法看到轉換前的資料樣態
aggregate( . ~ Type, data = Cushings, mean)
## Type Tetrahydrocortisone Pregnanetriol
## 1 a 2.966667 2.44
## 2 b 8.180000 1.12
## 3 c 19.720000 5.50
## 4 u 14.016667 1.20
class: “matrix” “array”
similar to method 3
+: 資料量大時,演算比較有效率?
-: 不太容易上手的語法
sapply(split(Cushings[,-3], Cushings$Type), function(x) apply(x, 2, mean))
## a b c u
## Tetrahydrocortisone 2.966667 8.18 19.72 14.01667
## Pregnanetriol 2.440000 1.12 5.50 1.20000
class: “matrix” “array”
+: 資料量大時,演算比較有效率?
-: 不太容易上手的語法
do.call("rbind", as.list(
by(Cushings, list(Cushings$Type), function(x) {
<- subset(x, select = -Type)
y apply(y, 2, mean)
} )))
## Tetrahydrocortisone Pregnanetriol
## a 2.966667 2.44
## b 8.180000 1.12
## c 19.720000 5.50
## u 14.016667 1.20
class: “tbl_df” “tbl” “data.frame”
+: 比較符合目前自己習慣的寫法?
-: 無法看到轉換前的資料樣態
## dplyr
%>%
Cushings group_by(Type) %>%
summarize(t_m = mean(Tetrahydrocortisone), p_m = mean(Pregnanetriol))
## # A tibble: 4 x 3
## Type t_m p_m
## <fct> <dbl> <dbl>
## 1 a 2.97 2.44
## 2 b 8.18 1.12
## 3 c 19.7 5.5
## 4 u 14.0 1.2
class: “tbl_df” “tbl” “data.frame”
+: 包含原本資料的樣態
-: 占用的記憶體較多, 語法較複雜
%>%
Cushings nest(-Type) %>% # unnest?
mutate(avg = map(data, ~ apply(., 2, mean)),
res_1 = map_dbl(avg, "Tetrahydrocortisone"),
res_2 = map_dbl(avg, "Pregnanetriol"))
## Warning: All elements of `...` must be named.
## Did you want `data = c(Tetrahydrocortisone, Pregnanetriol)`?
## # A tibble: 4 x 5
## Type data avg res_1 res_2
## <fct> <list> <list> <dbl> <dbl>
## 1 a <tibble [6 x 2]> <dbl [2]> 2.97 2.44
## 2 b <tibble [10 x 2]> <dbl [2]> 8.18 1.12
## 3 c <tibble [5 x 2]> <dbl [2]> 19.7 5.5
## 4 u <tibble [6 x 2]> <dbl [2]> 14.0 1.2
Task: Use the data in the high schools example to solve the following problems:
<- read.table("hs0.txt", sep="\t", quote="", header=T, as.is=T)
dat
str(dat)
## 'data.frame': 200 obs. of 11 variables:
## $ id : int 70 121 86 141 172 113 50 11 84 48 ...
## $ female : chr "male" "female" "male" "male" ...
## $ race : chr "white" "white" "white" "white" ...
## $ ses : chr "low" "middle" "high" "high" ...
## $ schtyp : chr "public" "public" "public" "public" ...
## $ prog : chr "general" "vocation" "general" "vocation" ...
## $ read : int 57 68 44 63 47 44 50 34 63 57 ...
## $ write : int 52 59 33 44 52 52 59 46 57 55 ...
## $ math : int 41 53 54 47 57 51 42 45 54 52 ...
## $ science: int 47 63 58 53 53 63 53 39 58 NA ...
## $ socst : int 57 61 31 56 61 61 61 36 51 51 ...
<- dat %>% reshape2::melt(., measure.vars = colnames(.)[7:11])
datl
<- dat %>%
res select_if(is.numeric) %>%
::melt(., id='id')
reshape2
pairwise.t.test(res$value, res$variable, p.adjust = "none")
##
## Pairwise comparisons using t tests with pooled SD
##
## data: res$value and res$variable
##
## read write math science
## write 0.58 - - -
## math 0.68 0.90 - -
## science 0.76 0.39 0.47 -
## socst 0.86 0.71 0.81 0.63
##
## P value adjustment method: none
%>%
datl group_by(variable) %>%
summarise(group_MEAN = mean(value, na.rm = TRUE),
group_SE = sd(value, na.rm = TRUE) / sqrt(sum(!is.na(value)))) %>%
ggplot(aes(x = variable, y = group_MEAN)) +
geom_point(aes(shape = variable), size = 4) +
geom_errorbar(aes(ymax = group_MEAN + 1.96*group_SE,
ymin = group_MEAN - 1.96*group_SE), width=.3, size=.4) +
labs(x="Variable", y="Mean score")+
theme_minimal() +
theme(legend.position = 'none')
<-dat[ ,c(3, 7, 8 ,9 , 10, 11)]
dat2
<- dat2%>%
res2 ::melt(., id='race')
reshape2
lapply(split(res2, res2$variable), function(x) anova(lm(x$value~factor(x$race))))
## $read
## Analysis of Variance Table
##
## Response: x$value
## Df Sum Sq Mean Sq F value Pr(>F)
## factor(x$race) 3 1749.8 583.27 5.9637 0.0006539 ***
## Residuals 196 19169.6 97.80
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $write
## Analysis of Variance Table
##
## Response: x$value
## Df Sum Sq Mean Sq F value Pr(>F)
## factor(x$race) 3 1914.2 638.05 7.8334 5.785e-05 ***
## Residuals 196 15964.7 81.45
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $math
## Analysis of Variance Table
##
## Response: x$value
## Df Sum Sq Mean Sq F value Pr(>F)
## factor(x$race) 3 1842.1 614.05 7.7033 6.84e-05 ***
## Residuals 196 15623.7 79.71
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $science
## Analysis of Variance Table
##
## Response: x$value
## Df Sum Sq Mean Sq F value Pr(>F)
## factor(x$race) 3 3169.5 1056.51 13.063 8.505e-08 ***
## Residuals 191 15447.2 80.88
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $socst
## Analysis of Variance Table
##
## Response: x$value
## Df Sum Sq Mean Sq F value Pr(>F)
## factor(x$race) 3 943.9 314.63 2.804 0.04098 *
## Residuals 196 21992.3 112.21
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
%>%
datl group_by(race, variable) %>%
summarise(group_MEAN = mean(value, na.rm = TRUE),
group_SE = sd(value, na.rm = TRUE) / sqrt(sum(!is.na(value)))) %>%
ggplot(aes(x = race, y = group_MEAN)) +
geom_point(aes(shape = race), size = 4) +
geom_errorbar(aes(ymax = group_MEAN + 1.96*group_SE,
ymin = group_MEAN - 1.96*group_SE), width=.3, size=.4) +
labs(x="Variable", y="Score")+
facet_wrap(. ~ variable, nrow = 1) +
theme_minimal() +
theme(legend.position = "none",
axis.text.x = element_text(angle = 90))
## `summarise()` has grouped output by 'race'. You can override using the `.groups` argument.
<-dat[ ,c(7:11)]
dat3
lapply(dat3, function(x){
coefficients( lm(cbind(dat3$read,
$write,
dat3$math,
dat3$science,
dat3$socst)~x, data=dat3))
dat3 } )
## $read
## [,1] [,2] [,3] [,4] [,5]
## (Intercept) -8.141284e-15 24.4095462 21.2425264 20.6602830 18.5561875
## x 1.000000e+00 0.5425249 0.6009356 0.5998076 0.6476622
##
## $write
## [,1] [,2] [,3] [,4] [,5]
## (Intercept) 18.7309118 -8.955412e-14 20.6572869 21.1309429 16.5279046
## x 0.6336486 1.000000e+00 0.6055514 0.5843927 0.6791647
##
## $math
## [,1] [,2] [,3] [,4] [,5]
## (Intercept) 14.5396491 20.2651022 0 17.49497 19.9529454
## x 0.7148764 0.6167729 1 0.65494 0.6155894
##
## $science
## [,1] [,2] [,3] [,4] [,5]
## (Intercept) 18.1571108 24.3565993 21.3916641 -8.141284e-15 26.1686196
## x 0.6540264 0.5455811 0.6003186 1.000000e+00 0.5034689
##
## $socst
## [,1] [,2] [,3] [,4] [,5]
## (Intercept) 21.5368195 25.229771 28.1291585 30.1197077 1.628257e-14
## x 0.5845412 0.524823 0.4670406 0.4167311 1.000000e+00
The formula
\[P = L (\frac{r}{1-(1+r)^{-M}})\] describes the payment you have to make per month for M number of months if you take out a loan of L amount today at a monthly interest rate of r.
Task: Compute how much you will have to pay per month for 10, 15, 20, 25, or 30 years if you borrow NT$5,000,000, 10,000,000, or 15,000,000 from a bank that charges you 2%, 5%, or 7% for the monthly interest rate.
<- function(L, r, M) {
interest <- L * (r / (1 - (1 + r)^(-M)))
P return(P)}
data.frame(L = rep(c(5*10^6, 10*10^6, 15*10^6), each = 15),
r = rep(c(2, 5, 7) / 100, each = 5, 3),
M = rep(c(10, 15, 20, 25, 30)*12, 9)) |>
mutate(P = round(mapply(interest, L, r, M), digits = 2))|>
::datatable(rownames=FALSE, options=list(pageLength=5)) DT
Task: Modify this R script to create a function to compute the c-statistic illustrated with the data set in the article: Tryon, W.W. (1984). A simplified time-series analysis for evaluating treatment interventions. Journal of Applied Behavioral Analysis, 34(4), 230-233.
# c-stat example
# read in data
<- read.table("cstat.txt", header=T)
dat
str(dat)
## 'data.frame': 42 obs. of 1 variable:
## $ nc: int 28 46 39 45 24 20 35 37 36 40 ...
head(dat)
## nc
## 1 28
## 2 46
## 3 39
## 4 45
## 5 24
## 6 20
# plot figure 1
plot(1:42, dat[,1], xlab="Observations", ylab="Number of Children")
lines(1:42, dat[,1])
abline(v=10, lty=2)
abline(v=32, lty=2)
segments(1, mean(dat[1:10,1]),10, mean(dat[1:10,1]),col="red")
segments(11, mean(dat[11:32,1]),32, mean(dat[11:32,1]),col="red")
segments(33, mean(dat[33:42,1]),42, mean(dat[33:42,1]),col="red")
# calculate c-stat for first baseline phase
<- 1-(sum(diff(dat[1:10,1])^2)/(2*(10-1)*var(dat[1:10,1])))
cden
<- sqrt((10-2)/((10-1)*(10+1)))
sc
<- 1-pnorm(cden/sc)
pval pval
## [1] 0.2866238
<- function(x) {
f_cstat <- length(x)
n <- 1 - (sum(diff(x)^2)/(2*(n-1)*var(x)))
cden return(cden)
}
<- function(x) {
f_sc <- length(x)
n <- sqrt((n-2) / ((n-1) * (n+1)))
sc return(sc)
}
<- function(x) {
f_pval <- f_cstat(x)
cden <- f_sc(x)
sc return(1 - pnorm(cden / sc))
}
f_cstat(dat[1:10, 1])
## [1] 0.1601208
f_sc(dat[1:10, 1])
## [1] 0.2842676
f_pval(dat[1:10, 1])
## [1] 0.2866238
# calculate c-stat for first baseline plus group tokens
<- 32
n <- 1-(sum(diff(dat[1:n,1])^2)/(2*(n-1)*var(dat[1:n,1])))
cden <- sqrt((n-2)/((n-1)*(n+1)))
sc <- 1-pnorm(cden/sc)
pval list(z=cden/sc,pvalue=pval)
## $z
## [1] 3.879054
##
## $pvalue
## [1] 5.243167e-05
<- function(x) {
fgroup_cstat <- length(x)
n <- 1 - (sum(diff(x)^2)/(2*(n-1)*var(x)))
cden <- sqrt((n-2)/((n-1)*(n+1)))
sc <- 1-pnorm(cden/sc)
pval return(list(c.stat = cden, sc = sc, z = cden/sc, p.value = pval))
}
fgroup_cstat(dat[1:32, 1])
## $c.stat
## [1] 0.6642762
##
## $sc
## [1] 0.1712469
##
## $z
## [1] 3.879054
##
## $p.value
## [1] 5.243167e-05