Refer to the questions here.
library(MASS)
# count the sum of female
dta <- minn38
sum_sex <- aggregate(dta$f, list(dta$sex), sum)
sum_sex[1,2]
## [1] 7861
# count the sum of female to college
f.dta <- minn38[85:168,]
female_phs <- aggregate(f.dta$f, list(f.dta$phs), sum)
female_phs[1,2]
## [1] 2027
library("bitops")
library("RCurl")
library("psych")
#read in data
dta <- read.table(textConnection(getURL("http://titan.ccunix.ccu.edu.tw/~psycfs/lmm/Data/verbalIQ.txt")), header = TRUE)
mydta <- dta[,-c(1,5)]
describe(mydta)
## vars n mean sd median trimmed mad min
## pupil 1 2287 1337225.18 705458.10 1417007 1341135.43 904383.03 17001
## viq 2 2287 11.83 2.07 12 11.88 1.48 4
## language 3 2287 40.93 9.00 42 41.51 10.38 9
## ses 4 2287 27.81 10.91 27 27.26 10.38 10
## max range skew kurtosis se
## pupil 2587010 2570009 -0.06 -1.18 14751.57
## viq 18 14 -0.23 0.67 0.04
## language 58 49 -0.52 -0.32 0.19
## ses 50 40 0.40 -0.75 0.23
#do correlation
lm(language ~ viq+ses, data = mydta)
##
## Call:
## lm(formula = language ~ viq + ses, data = mydta)
##
## Coefficients:
## (Intercept) viq ses
## 8.3313 2.4054 0.1488
dta <- nlschools
dta_mean <- aggregate(dta[,1:2], by = list(dta$class), mean)
head(dta_mean)
## Group.1 lang IQ
## 1 180 36.40000 10.320000
## 2 280 23.71429 9.000000
## 3 1082 30.40000 10.500000
## 4 1280 30.86667 9.433333
## 5 1580 30.87500 11.500000
## 6 1680 41.50000 11.687500
data("Gcsemv", package = "mlmRev")
dta <-Gcsemv
complete_dta <- dta[complete.cases(dta), ] #remove rows with nas
# correlation between written and course / or class in gender first
Gcs_m <- aggregate(cbind(written, course) ~ gender, data = complete_dta,
FUN = mean)
with(complete_dta, plot(written, course, col = "gray"))
abline(lm(course ~ written, data = complete_dta), col = "gray")
with(Gcs_m, points(course, written))
abline(lm(written ~ course, data = Gcs_m))
library(magrittr)
library(plyr)
library(lattice)
library(ggplot2)
data("Gcsemv", package = "mlmRev")
dta <-Gcsemv
complete_dta <- dta[complete.cases(dta), ]
# all correlation
dtaall_cor <- cor(complete_dta$written, complete_dta$course)
# find each school's correlation
each_cor <- ddply(complete_dta, "school", summarise, corr=cor(written, course))
each_cor %<>% na.omit(each_cor)
# draw histogram
ggplot(data=each_cor, aes(each_cor$corr)) +
geom_histogram(fill = "skyblue") +
labs(x = "Correlation coefficients", y = "count") +
geom_vline(xintercept = dtaall_cor) +
geom_vline(xintercept = mean(each_cor[,2]), linetype="dotted", col="red")
library(pacman)
## Warning: package 'pacman' was built under R version 3.4.2
library(Hmisc)
## Warning: package 'Hmisc' was built under R version 3.4.2
# packages in use in this script
pacman::p_load(tidyverse, car, HSAUR3)
# make sure data is invoked
data(BtheB)
# data structure
str(BtheB)
## 'data.frame': 100 obs. of 8 variables:
## $ drug : Factor w/ 2 levels "No","Yes": 1 2 2 1 2 2 2 1 2 2 ...
## $ length : Factor w/ 2 levels "<6m",">6m": 2 2 1 2 2 1 1 2 1 2 ...
## $ treatment: Factor w/ 2 levels "TAU","BtheB": 1 2 1 2 2 2 1 1 2 2 ...
## $ bdi.pre : num 29 32 25 21 26 7 17 20 18 20 ...
## $ bdi.2m : num 2 16 20 17 23 0 7 20 13 5 ...
## $ bdi.3m : num 2 24 NA 16 NA 0 7 21 14 5 ...
## $ bdi.5m : num NA 17 NA 10 NA 0 3 19 20 8 ...
## $ bdi.8m : num NA 20 NA 9 NA 0 7 13 11 12 ...
# first 6 lines
head(BtheB)
## drug length treatment bdi.pre bdi.2m bdi.3m bdi.5m bdi.8m
## 1 No >6m TAU 29 2 2 NA NA
## 2 Yes >6m BtheB 32 16 24 17 20
## 3 Yes <6m TAU 25 20 NA NA NA
## 4 No >6m BtheB 21 17 16 10 9
## 5 Yes >6m BtheB 26 23 NA NA NA
## 6 Yes <6m BtheB 7 0 0 0 0
# add ID to the original data and save a copy to dta
dta <- data.frame(ID = rownames(BtheB), BtheB)
# rename variables 3-7 to different names
names(dta)[5:9] <- paste0("month_", c(0, 2, 3, 5, 8))
# convert wide to long format
# add time variable and remove useless column
dtaL <- dta %>% gather(Test, Score, 5:9) %>%
separate(Test, c("Var", "Month")) %>%
mutate(Month = as.numeric(Month) , Group = treatment) %>%
dplyr::select( -Var)
#
ot <- theme_set(theme_bw())
# panel plot by group
ggplot(data = dtaL , aes(x = Month, y = Score, group = ID)) +
geom_point() +
geom_line() +
facet_wrap(~ Group)
## Warning: Removed 120 rows containing missing values (geom_point).
## Warning: Removed 120 rows containing missing values (geom_path).
# means plus-minus CIs
ggplot(data = dtaL , aes(x = Month, y = Score, color = Group)) +
stat_summary(fun.data = "mean_cl_boot", geom = "pointrange",
position = position_dodge(.3))
## Warning: Removed 120 rows containing non-finite values (stat_summary).
library(WWGbook)
# load in data
dta <- autism
#turn into factor
sicdegp <- factor(dta$sicdegp)
#plot
g1 <- ggplot(data = dta , aes(x = age, y = vsae, group = childid)) +
geom_point() +
geom_line() +
facet_wrap(~ sicdegp)
g2 <- g1 + labs(x = "Age (years) ", y= "VSAE score")
g2
## Warning: Removed 2 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_path).