Pull in the dataset:
install.packages("RCurl", repos = "http://cran.us.r-project.org")
##
## The downloaded binary packages are in
## /var/folders/_x/1p72wwgj3zd_c9z4dkzzc7s80000gn/T//Rtmpn3RhQ8/downloaded_packages
library(RCurl)
url <- getURL("https://raw.githubusercontent.com/fredwillie/R_PhDProg_NYU_2020/master/Crusio1.csv")
crusio1 <- read.csv(text = url)
- Look up the arguments for read.csv. Phrased another way: what is the command to view the help information for a function in R?)
help("read.csv")
- Use apply() to find the median value of number of arms explored for days 1 through five (hint variable num_arms_d1 etc).
# To seach for the names and position number inside the data frame
colnames(crusio1)
## [1] "strain" "sex" "id"
## [4] "bw" "center_time" "center_dist"
## [7] "periphery_time" "periphery_dist" "periphery_dist_pct"
## [10] "activity" "lean" "rear"
## [13] "jump" "defec" "groom_freq"
## [16] "groom_dur" "groom_bout" "task_time_d1"
## [19] "task_time_d2" "task_time_d3" "task_time_d4"
## [22] "task_time_d5" "num_arms_d1" "num_arms_d2"
## [25] "num_arms_d3" "num_arms_d4" "num_arms_d5"
## [28] "num_arms_adj_d3" "num_arms_adj_d4" "num_arms_adj_d5"
## [31] "num_arms_adj_d3_d5" "errors_d1" "errors_d2"
## [34] "errors_d3" "errors_d4" "errors_d5"
## [37] "errors_d3_d5" "visit_time_d3" "visit_time_d4"
## [40] "visit_time_d5" "visit_time_d3_d5" "latency_d1"
## [43] "latency_d2" "attack_d1" "attack_d2"
## [46] "attack_combine" "brain_wt" "brain_wt_pct"
## [49] "hippocampus_L" "hippocampus_R" "iipmf_L"
## [52] "iipmf_R" "iipmf_pct_L" "iipmf_pct_R"
## [55] "iipmf_pct_mean" "hilus_L" "hilus_R"
## [58] "hilus_pct_L" "hilus_pct_R" "hilus_pct_mean"
## [61] "supra_L" "supra_R" "supra_pct_L"
## [64] "supra_pct_R" "supra_pct_mean" "pyr_L"
## [67] "pyr_R" "pyr_pct_L" "pyr_pct_R"
## [70] "pyr_pct_mean" "oriens_L" "oriens_R"
## [73] "oriens_pct_L" "oriens_pct_R" "oriens_pct_mean"
## [76] "rad_L" "rad_R" "rad_pct_L"
## [79] "rad_pct_R" "rad_pct_mean" "lacun_L"
## [82] "lacun_R" "lacun_pct_L" "lacun_pct_R"
## [85] "lacun_pct_mean"
#Function that permits utilizr a function targeting specific row and column in a data matrix
apply(crusio1[1:5,23:27],2,median, na.rm=T)
## num_arms_d1 num_arms_d2 num_arms_d3 num_arms_d4 num_arms_d5
## 5 6 5 6 6
- Do the same using sapply()
#IS the same as "apply" function but you don't need to specify MARGIN
sapply(crusio1[1:5,23:27],median, na.rm=T)
## num_arms_d1 num_arms_d2 num_arms_d3 num_arms_d4 num_arms_d5
## 5 6 5 6 6
- Use a for loop to find the median value of columns 18:22
names(crusio1)
## [1] "strain" "sex" "id"
## [4] "bw" "center_time" "center_dist"
## [7] "periphery_time" "periphery_dist" "periphery_dist_pct"
## [10] "activity" "lean" "rear"
## [13] "jump" "defec" "groom_freq"
## [16] "groom_dur" "groom_bout" "task_time_d1"
## [19] "task_time_d2" "task_time_d3" "task_time_d4"
## [22] "task_time_d5" "num_arms_d1" "num_arms_d2"
## [25] "num_arms_d3" "num_arms_d4" "num_arms_d5"
## [28] "num_arms_adj_d3" "num_arms_adj_d4" "num_arms_adj_d5"
## [31] "num_arms_adj_d3_d5" "errors_d1" "errors_d2"
## [34] "errors_d3" "errors_d4" "errors_d5"
## [37] "errors_d3_d5" "visit_time_d3" "visit_time_d4"
## [40] "visit_time_d5" "visit_time_d3_d5" "latency_d1"
## [43] "latency_d2" "attack_d1" "attack_d2"
## [46] "attack_combine" "brain_wt" "brain_wt_pct"
## [49] "hippocampus_L" "hippocampus_R" "iipmf_L"
## [52] "iipmf_R" "iipmf_pct_L" "iipmf_pct_R"
## [55] "iipmf_pct_mean" "hilus_L" "hilus_R"
## [58] "hilus_pct_L" "hilus_pct_R" "hilus_pct_mean"
## [61] "supra_L" "supra_R" "supra_pct_L"
## [64] "supra_pct_R" "supra_pct_mean" "pyr_L"
## [67] "pyr_R" "pyr_pct_L" "pyr_pct_R"
## [70] "pyr_pct_mean" "oriens_L" "oriens_R"
## [73] "oriens_pct_L" "oriens_pct_R" "oriens_pct_mean"
## [76] "rad_L" "rad_R" "rad_pct_L"
## [79] "rad_pct_R" "rad_pct_mean" "lacun_L"
## [82] "lacun_R" "lacun_pct_L" "lacun_pct_R"
## [85] "lacun_pct_mean"
# Create a small function a variable and niche
for (i in 18:22){
total <- median(crusio1[,i],na.rm=T)
# create a variable that will contarin results of the applied function
print(c(colnames(crusio1[i]),total))
#print the output and attach the corresponding column name
}
## [1] "task_time_d1" "466"
## [1] "task_time_d2" "323"
## [1] "task_time_d3" "268"
## [1] "task_time_d4" "225"
## [1] "task_time_d5" "174"
- Make a for loop to find the standard deviation (hint sd() ) of errors on each day
names(crusio1)
## [1] "strain" "sex" "id"
## [4] "bw" "center_time" "center_dist"
## [7] "periphery_time" "periphery_dist" "periphery_dist_pct"
## [10] "activity" "lean" "rear"
## [13] "jump" "defec" "groom_freq"
## [16] "groom_dur" "groom_bout" "task_time_d1"
## [19] "task_time_d2" "task_time_d3" "task_time_d4"
## [22] "task_time_d5" "num_arms_d1" "num_arms_d2"
## [25] "num_arms_d3" "num_arms_d4" "num_arms_d5"
## [28] "num_arms_adj_d3" "num_arms_adj_d4" "num_arms_adj_d5"
## [31] "num_arms_adj_d3_d5" "errors_d1" "errors_d2"
## [34] "errors_d3" "errors_d4" "errors_d5"
## [37] "errors_d3_d5" "visit_time_d3" "visit_time_d4"
## [40] "visit_time_d5" "visit_time_d3_d5" "latency_d1"
## [43] "latency_d2" "attack_d1" "attack_d2"
## [46] "attack_combine" "brain_wt" "brain_wt_pct"
## [49] "hippocampus_L" "hippocampus_R" "iipmf_L"
## [52] "iipmf_R" "iipmf_pct_L" "iipmf_pct_R"
## [55] "iipmf_pct_mean" "hilus_L" "hilus_R"
## [58] "hilus_pct_L" "hilus_pct_R" "hilus_pct_mean"
## [61] "supra_L" "supra_R" "supra_pct_L"
## [64] "supra_pct_R" "supra_pct_mean" "pyr_L"
## [67] "pyr_R" "pyr_pct_L" "pyr_pct_R"
## [70] "pyr_pct_mean" "oriens_L" "oriens_R"
## [73] "oriens_pct_L" "oriens_pct_R" "oriens_pct_mean"
## [76] "rad_L" "rad_R" "rad_pct_L"
## [79] "rad_pct_R" "rad_pct_mean" "lacun_L"
## [82] "lacun_R" "lacun_pct_L" "lacun_pct_R"
## [85] "lacun_pct_mean"
#same principies as teh previous exercise but replacing the median function with sd
for (i in 32:36){
errore <- sd(crusio1[,i],na.rm = T)
print(c(colnames(crusio1[i]),errore))
}
## [1] "errors_d1" "9.77762088513169"
## [1] "errors_d2" "6.76161734556014"
## [1] "errors_d3" "6.26035585362257"
## [1] "errors_d4" "5.31552765418239"
## [1] "errors_d5" "4.82222973762832"
- Challenge: Create a vector in which to put the standard deviations
# First we must create the vertor and the length we desire
vc <- vector("numeric", length = 5)
# We have to assemble the function we will utilize
for (i in 32:36){
vc[i-31] <- sd(crusio1[,i],na.rm = T)
# Specify rhe function adn when creating a new vector, making sure the columns we will include in the output
}
names(vc) <- names(crusio1[,32:36])
#attach names to output file
vc
## errors_d1 errors_d2 errors_d3 errors_d4 errors_d5
## 9.777621 6.761617 6.260356 5.315528 4.822230
- Use ifelse to create a new variable of high and normal task times on day one dichotomizing with falling in the upper quartile as a threshold for high levels
#TO CREATE AN ADDITIONAL COLUMN AND NAME NA EVERY VALUE INSIDE THIS NEWLY CREATED COLUMN
crusio1[,86]<- NA
# Veryfiy if the column was created in the data matrix
colnames(crusio1)
## [1] "strain" "sex" "id"
## [4] "bw" "center_time" "center_dist"
## [7] "periphery_time" "periphery_dist" "periphery_dist_pct"
## [10] "activity" "lean" "rear"
## [13] "jump" "defec" "groom_freq"
## [16] "groom_dur" "groom_bout" "task_time_d1"
## [19] "task_time_d2" "task_time_d3" "task_time_d4"
## [22] "task_time_d5" "num_arms_d1" "num_arms_d2"
## [25] "num_arms_d3" "num_arms_d4" "num_arms_d5"
## [28] "num_arms_adj_d3" "num_arms_adj_d4" "num_arms_adj_d5"
## [31] "num_arms_adj_d3_d5" "errors_d1" "errors_d2"
## [34] "errors_d3" "errors_d4" "errors_d5"
## [37] "errors_d3_d5" "visit_time_d3" "visit_time_d4"
## [40] "visit_time_d5" "visit_time_d3_d5" "latency_d1"
## [43] "latency_d2" "attack_d1" "attack_d2"
## [46] "attack_combine" "brain_wt" "brain_wt_pct"
## [49] "hippocampus_L" "hippocampus_R" "iipmf_L"
## [52] "iipmf_R" "iipmf_pct_L" "iipmf_pct_R"
## [55] "iipmf_pct_mean" "hilus_L" "hilus_R"
## [58] "hilus_pct_L" "hilus_pct_R" "hilus_pct_mean"
## [61] "supra_L" "supra_R" "supra_pct_L"
## [64] "supra_pct_R" "supra_pct_mean" "pyr_L"
## [67] "pyr_R" "pyr_pct_L" "pyr_pct_R"
## [70] "pyr_pct_mean" "oriens_L" "oriens_R"
## [73] "oriens_pct_L" "oriens_pct_R" "oriens_pct_mean"
## [76] "rad_L" "rad_R" "rad_pct_L"
## [79] "rad_pct_R" "rad_pct_mean" "lacun_L"
## [82] "lacun_R" "lacun_pct_L" "lacun_pct_R"
## [85] "lacun_pct_mean" "V86"
# Condition to reach the the Q3 or 75% quantile for the last column
crusio1$V86<- ifelse(crusio1$task_time_d1 > 75.00, crusio1$V86 <- 1, crusio1$V86 <- 0)
crusio1$V86
## [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [26] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [51] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [76] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [101] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [126] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [151] 1 1 1 1 1 1 1 1 NA 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [176] 1 NA 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [201] 1 1 1 NA NA 1 1 NA NA 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [226] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [251] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [276] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [301] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 NA 1 1 1 1 1 1 1 1 1
## [326] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [351] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [376] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [401] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [426] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [451] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [476] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [501] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [526] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [551] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [576] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [601] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [626] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [651] NA 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [676] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [701] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 NA 1
## [726] 1 1 1 1 1 NA 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [751] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [776] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [801] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [826] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [851] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [876] 1 1 1 1 1 1 1 1 1 1 NA NA NA 1 1 NA NA 1 1 1 1 1 1 1 1
## [901] 1 1 1 1 1 1 1 1 NA 1 1 1 1 1 1 1 1 1 1 1 1 1 1
# Observe the wuatile for the whole column of interest
quantile(crusio1$task_time_d1, probs = seq(.1,.9,.05), na.rm = T)
## 10% 15% 20% 25% 30% 35% 40% 45% 50% 55% 60%
## 236.0 267.0 298.0 323.5 343.0 364.0 397.4 432.0 466.0 493.0 530.6
## 65% 70% 75% 80% 85% 90%
## 566.9 608.0 660.0 730.0 859.3 1056.4