Week 2 Homework

Pull in the dataset:

install.packages("RCurl", repos = "http://cran.us.r-project.org")
## 
## The downloaded binary packages are in
##  /var/folders/_x/1p72wwgj3zd_c9z4dkzzc7s80000gn/T//Rtmpn3RhQ8/downloaded_packages
library(RCurl)
url <- getURL("https://raw.githubusercontent.com/fredwillie/R_PhDProg_NYU_2020/master/Crusio1.csv")
crusio1 <- read.csv(text = url)
  • Look up the arguments for read.csv. Phrased another way: what is the command to view the help information for a function in R?)
help("read.csv")
  • Use apply() to find the median value of number of arms explored for days 1 through five (hint variable num_arms_d1 etc).
# To seach for the names and position number inside the data frame
colnames(crusio1)
##  [1] "strain"             "sex"                "id"                
##  [4] "bw"                 "center_time"        "center_dist"       
##  [7] "periphery_time"     "periphery_dist"     "periphery_dist_pct"
## [10] "activity"           "lean"               "rear"              
## [13] "jump"               "defec"              "groom_freq"        
## [16] "groom_dur"          "groom_bout"         "task_time_d1"      
## [19] "task_time_d2"       "task_time_d3"       "task_time_d4"      
## [22] "task_time_d5"       "num_arms_d1"        "num_arms_d2"       
## [25] "num_arms_d3"        "num_arms_d4"        "num_arms_d5"       
## [28] "num_arms_adj_d3"    "num_arms_adj_d4"    "num_arms_adj_d5"   
## [31] "num_arms_adj_d3_d5" "errors_d1"          "errors_d2"         
## [34] "errors_d3"          "errors_d4"          "errors_d5"         
## [37] "errors_d3_d5"       "visit_time_d3"      "visit_time_d4"     
## [40] "visit_time_d5"      "visit_time_d3_d5"   "latency_d1"        
## [43] "latency_d2"         "attack_d1"          "attack_d2"         
## [46] "attack_combine"     "brain_wt"           "brain_wt_pct"      
## [49] "hippocampus_L"      "hippocampus_R"      "iipmf_L"           
## [52] "iipmf_R"            "iipmf_pct_L"        "iipmf_pct_R"       
## [55] "iipmf_pct_mean"     "hilus_L"            "hilus_R"           
## [58] "hilus_pct_L"        "hilus_pct_R"        "hilus_pct_mean"    
## [61] "supra_L"            "supra_R"            "supra_pct_L"       
## [64] "supra_pct_R"        "supra_pct_mean"     "pyr_L"             
## [67] "pyr_R"              "pyr_pct_L"          "pyr_pct_R"         
## [70] "pyr_pct_mean"       "oriens_L"           "oriens_R"          
## [73] "oriens_pct_L"       "oriens_pct_R"       "oriens_pct_mean"   
## [76] "rad_L"              "rad_R"              "rad_pct_L"         
## [79] "rad_pct_R"          "rad_pct_mean"       "lacun_L"           
## [82] "lacun_R"            "lacun_pct_L"        "lacun_pct_R"       
## [85] "lacun_pct_mean"
#Function that permits utilizr a function targeting specific row and column in a data matrix
apply(crusio1[1:5,23:27],2,median, na.rm=T)
## num_arms_d1 num_arms_d2 num_arms_d3 num_arms_d4 num_arms_d5 
##           5           6           5           6           6
  • Do the same using sapply()
#IS the same as "apply" function but you don't need to specify MARGIN
sapply(crusio1[1:5,23:27],median, na.rm=T)
## num_arms_d1 num_arms_d2 num_arms_d3 num_arms_d4 num_arms_d5 
##           5           6           5           6           6
  • Use a for loop to find the median value of columns 18:22
names(crusio1)
##  [1] "strain"             "sex"                "id"                
##  [4] "bw"                 "center_time"        "center_dist"       
##  [7] "periphery_time"     "periphery_dist"     "periphery_dist_pct"
## [10] "activity"           "lean"               "rear"              
## [13] "jump"               "defec"              "groom_freq"        
## [16] "groom_dur"          "groom_bout"         "task_time_d1"      
## [19] "task_time_d2"       "task_time_d3"       "task_time_d4"      
## [22] "task_time_d5"       "num_arms_d1"        "num_arms_d2"       
## [25] "num_arms_d3"        "num_arms_d4"        "num_arms_d5"       
## [28] "num_arms_adj_d3"    "num_arms_adj_d4"    "num_arms_adj_d5"   
## [31] "num_arms_adj_d3_d5" "errors_d1"          "errors_d2"         
## [34] "errors_d3"          "errors_d4"          "errors_d5"         
## [37] "errors_d3_d5"       "visit_time_d3"      "visit_time_d4"     
## [40] "visit_time_d5"      "visit_time_d3_d5"   "latency_d1"        
## [43] "latency_d2"         "attack_d1"          "attack_d2"         
## [46] "attack_combine"     "brain_wt"           "brain_wt_pct"      
## [49] "hippocampus_L"      "hippocampus_R"      "iipmf_L"           
## [52] "iipmf_R"            "iipmf_pct_L"        "iipmf_pct_R"       
## [55] "iipmf_pct_mean"     "hilus_L"            "hilus_R"           
## [58] "hilus_pct_L"        "hilus_pct_R"        "hilus_pct_mean"    
## [61] "supra_L"            "supra_R"            "supra_pct_L"       
## [64] "supra_pct_R"        "supra_pct_mean"     "pyr_L"             
## [67] "pyr_R"              "pyr_pct_L"          "pyr_pct_R"         
## [70] "pyr_pct_mean"       "oriens_L"           "oriens_R"          
## [73] "oriens_pct_L"       "oriens_pct_R"       "oriens_pct_mean"   
## [76] "rad_L"              "rad_R"              "rad_pct_L"         
## [79] "rad_pct_R"          "rad_pct_mean"       "lacun_L"           
## [82] "lacun_R"            "lacun_pct_L"        "lacun_pct_R"       
## [85] "lacun_pct_mean"
# Create a small function a variable and niche
for (i in 18:22){
  total <- median(crusio1[,i],na.rm=T)
  # create a variable that will contarin results of the applied function
  print(c(colnames(crusio1[i]),total))
  #print the output and attach the corresponding column name
}
## [1] "task_time_d1" "466"         
## [1] "task_time_d2" "323"         
## [1] "task_time_d3" "268"         
## [1] "task_time_d4" "225"         
## [1] "task_time_d5" "174"
  • Make a for loop to find the standard deviation (hint sd() ) of errors on each day
names(crusio1)
##  [1] "strain"             "sex"                "id"                
##  [4] "bw"                 "center_time"        "center_dist"       
##  [7] "periphery_time"     "periphery_dist"     "periphery_dist_pct"
## [10] "activity"           "lean"               "rear"              
## [13] "jump"               "defec"              "groom_freq"        
## [16] "groom_dur"          "groom_bout"         "task_time_d1"      
## [19] "task_time_d2"       "task_time_d3"       "task_time_d4"      
## [22] "task_time_d5"       "num_arms_d1"        "num_arms_d2"       
## [25] "num_arms_d3"        "num_arms_d4"        "num_arms_d5"       
## [28] "num_arms_adj_d3"    "num_arms_adj_d4"    "num_arms_adj_d5"   
## [31] "num_arms_adj_d3_d5" "errors_d1"          "errors_d2"         
## [34] "errors_d3"          "errors_d4"          "errors_d5"         
## [37] "errors_d3_d5"       "visit_time_d3"      "visit_time_d4"     
## [40] "visit_time_d5"      "visit_time_d3_d5"   "latency_d1"        
## [43] "latency_d2"         "attack_d1"          "attack_d2"         
## [46] "attack_combine"     "brain_wt"           "brain_wt_pct"      
## [49] "hippocampus_L"      "hippocampus_R"      "iipmf_L"           
## [52] "iipmf_R"            "iipmf_pct_L"        "iipmf_pct_R"       
## [55] "iipmf_pct_mean"     "hilus_L"            "hilus_R"           
## [58] "hilus_pct_L"        "hilus_pct_R"        "hilus_pct_mean"    
## [61] "supra_L"            "supra_R"            "supra_pct_L"       
## [64] "supra_pct_R"        "supra_pct_mean"     "pyr_L"             
## [67] "pyr_R"              "pyr_pct_L"          "pyr_pct_R"         
## [70] "pyr_pct_mean"       "oriens_L"           "oriens_R"          
## [73] "oriens_pct_L"       "oriens_pct_R"       "oriens_pct_mean"   
## [76] "rad_L"              "rad_R"              "rad_pct_L"         
## [79] "rad_pct_R"          "rad_pct_mean"       "lacun_L"           
## [82] "lacun_R"            "lacun_pct_L"        "lacun_pct_R"       
## [85] "lacun_pct_mean"
#same principies as teh previous exercise but replacing the median function with sd
for (i in 32:36){
  errore <- sd(crusio1[,i],na.rm = T)
  print(c(colnames(crusio1[i]),errore))
}
## [1] "errors_d1"        "9.77762088513169"
## [1] "errors_d2"        "6.76161734556014"
## [1] "errors_d3"        "6.26035585362257"
## [1] "errors_d4"        "5.31552765418239"
## [1] "errors_d5"        "4.82222973762832"
  • Challenge: Create a vector in which to put the standard deviations
# First we must create the vertor and the length we desire
vc <- vector("numeric", length = 5)
# We have to assemble the function we will utilize
for (i in 32:36){
  vc[i-31] <- sd(crusio1[,i],na.rm = T)
  # Specify rhe function adn when creating a new vector, making sure the columns we will include in the output
}
names(vc) <- names(crusio1[,32:36])
#attach names to output file
vc
## errors_d1 errors_d2 errors_d3 errors_d4 errors_d5 
##  9.777621  6.761617  6.260356  5.315528  4.822230
  • Use ifelse to create a new variable of high and normal task times on day one dichotomizing with falling in the upper quartile as a threshold for high levels
#TO CREATE AN ADDITIONAL COLUMN AND NAME NA EVERY VALUE INSIDE THIS NEWLY CREATED COLUMN
crusio1[,86]<- NA
# Veryfiy if the column was created in the data matrix
colnames(crusio1)
##  [1] "strain"             "sex"                "id"                
##  [4] "bw"                 "center_time"        "center_dist"       
##  [7] "periphery_time"     "periphery_dist"     "periphery_dist_pct"
## [10] "activity"           "lean"               "rear"              
## [13] "jump"               "defec"              "groom_freq"        
## [16] "groom_dur"          "groom_bout"         "task_time_d1"      
## [19] "task_time_d2"       "task_time_d3"       "task_time_d4"      
## [22] "task_time_d5"       "num_arms_d1"        "num_arms_d2"       
## [25] "num_arms_d3"        "num_arms_d4"        "num_arms_d5"       
## [28] "num_arms_adj_d3"    "num_arms_adj_d4"    "num_arms_adj_d5"   
## [31] "num_arms_adj_d3_d5" "errors_d1"          "errors_d2"         
## [34] "errors_d3"          "errors_d4"          "errors_d5"         
## [37] "errors_d3_d5"       "visit_time_d3"      "visit_time_d4"     
## [40] "visit_time_d5"      "visit_time_d3_d5"   "latency_d1"        
## [43] "latency_d2"         "attack_d1"          "attack_d2"         
## [46] "attack_combine"     "brain_wt"           "brain_wt_pct"      
## [49] "hippocampus_L"      "hippocampus_R"      "iipmf_L"           
## [52] "iipmf_R"            "iipmf_pct_L"        "iipmf_pct_R"       
## [55] "iipmf_pct_mean"     "hilus_L"            "hilus_R"           
## [58] "hilus_pct_L"        "hilus_pct_R"        "hilus_pct_mean"    
## [61] "supra_L"            "supra_R"            "supra_pct_L"       
## [64] "supra_pct_R"        "supra_pct_mean"     "pyr_L"             
## [67] "pyr_R"              "pyr_pct_L"          "pyr_pct_R"         
## [70] "pyr_pct_mean"       "oriens_L"           "oriens_R"          
## [73] "oriens_pct_L"       "oriens_pct_R"       "oriens_pct_mean"   
## [76] "rad_L"              "rad_R"              "rad_pct_L"         
## [79] "rad_pct_R"          "rad_pct_mean"       "lacun_L"           
## [82] "lacun_R"            "lacun_pct_L"        "lacun_pct_R"       
## [85] "lacun_pct_mean"     "V86"
# Condition to reach the the Q3 or 75% quantile for the last column
crusio1$V86<- ifelse(crusio1$task_time_d1 > 75.00, crusio1$V86 <- 1, crusio1$V86 <- 0)

crusio1$V86
##   [1]  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
##  [26]  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
##  [51]  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
##  [76]  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
## [101]  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
## [126]  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
## [151]  1  1  1  1  1  1  1  1 NA  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
## [176]  1 NA  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
## [201]  1  1  1 NA NA  1  1 NA NA  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
## [226]  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
## [251]  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
## [276]  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
## [301]  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1 NA  1  1  1  1  1  1  1  1  1
## [326]  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
## [351]  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
## [376]  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
## [401]  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
## [426]  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
## [451]  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
## [476]  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
## [501]  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
## [526]  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
## [551]  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
## [576]  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
## [601]  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
## [626]  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
## [651] NA  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
## [676]  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
## [701]  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1 NA  1
## [726]  1  1  1  1  1 NA  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
## [751]  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
## [776]  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
## [801]  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
## [826]  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
## [851]  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
## [876]  1  1  1  1  1  1  1  1  1  1 NA NA NA  1  1 NA NA  1  1  1  1  1  1  1  1
## [901]  1  1  1  1  1  1  1  1 NA  1  1  1  1  1  1  1  1  1  1  1  1  1  1
# Observe the wuatile for the whole column of interest
quantile(crusio1$task_time_d1, probs = seq(.1,.9,.05), na.rm = T)
##    10%    15%    20%    25%    30%    35%    40%    45%    50%    55%    60% 
##  236.0  267.0  298.0  323.5  343.0  364.0  397.4  432.0  466.0  493.0  530.6 
##    65%    70%    75%    80%    85%    90% 
##  566.9  608.0  660.0  730.0  859.3 1056.4