Exercise-1

base R approach

# read data from library HSAUR3
dta_backpain <- HSAUR3::backpain

# cheack the structure
str(dta_backpain)
## 'data.frame':    434 obs. of  4 variables:
##  $ ID      : Factor w/ 217 levels "1","2","3","4",..: 1 1 2 2 3 3 4 4 5 5 ...
##  $ status  : Factor w/ 2 levels "case","control": 1 2 1 2 1 2 1 2 1 2 ...
##  $ driver  : Factor w/ 2 levels "no","yes": 2 2 2 2 2 2 1 1 2 2 ...
##  $ suburban: Factor w/ 2 levels "no","yes": 2 1 2 2 1 2 1 1 1 2 ...
# print the first 6 lines
head(dta_backpain)
##   ID  status driver suburban
## 1  1    case    yes      yes
## 2  1 control    yes       no
## 3  2    case    yes      yes
## 4  2 control    yes      yes
## 5  3    case    yes       no
## 6  3 control    yes      yes
# create a summary table and make it a data frame
dta_backpain_table <- as.data.frame(with(dta_backpain, table(status, driver, suburban)))

# print it we can see there are 8 categories and the numbers of people in each category
dta_backpain_table
##    status driver suburban Freq
## 1    case     no       no   26
## 2 control     no       no   47
## 3    case    yes       no   64
## 4 control    yes       no   63
## 5    case     no      yes    6
## 6 control     no      yes    7
## 7    case    yes      yes  121
## 8 control    yes      yes  100
# make it wide shape 
dta_backpain_table_wide <- reshape(dta_backpain_table,
        # the columns we don't want to expand
        idvar=c("driver", "suburban"),
        # the column we want to expand
        timevar="status",
        # reshape to wide format
        direction="wide")

# rename the 3rd and 4th columns
colnames(dta_backpain_table_wide)[3:4] <- c("case", "control")

# compute the total number of each row
dta_backpain_table_wide$total <- dta_backpain_table_wide$case + dta_backpain_table_wide$control
# print it to check the format
dta_backpain_table_wide
##   driver suburban case control total
## 1     no       no   26      47    73
## 3    yes       no   64      63   127
## 5     no      yes    6       7    13
## 7    yes      yes  121     100   221
# sort the table in the provided order
dta_backpain_table_wide[order(dta_backpain_table_wide$driver), ]
##   driver suburban case control total
## 1     no       no   26      47    73
## 5     no      yes    6       7    13
## 3    yes       no   64      63   127
## 7    yes      yes  121     100   221

Tidyverse approach

# load tidyverse
library(tidyverse)

dta_backpain %>% 
  # group these 3 variables
  group_by(driver, suburban, status) %>% 
  # compute the number of subjects in each category
  summarise(count = length(status)) %>%
  # make it wide shape, col names from status and value from count
  pivot_wider(names_from = status, values_from = count) %>% 
  # compute the total number of each row and store it in the new column named total
  mutate(total = case + control)
## # A tibble: 4 x 5
## # Groups:   driver, suburban [4]
##   driver suburban  case control total
##   <fct>  <fct>    <int>   <int> <int>
## 1 no     no          26      47    73
## 2 no     yes          6       7    13
## 3 yes    no          64      63   127
## 4 yes    yes        121     100   221

Exercise-2

dta_state <- as.data.frame(datasets::state.x77)
dta_arrest <- as.data.frame(datasets::USArrests)

# create a column for row names
dta_state$state <-  row.names(dta_state)
dta_arrest$state <- row.names(dta_arrest)

# merge two data object by ID "state"
dta_state_arrest_merge <- merge(dta_state, dta_arrest, by = "state", all = T)
head(dta_state_arrest_merge)
##        state Population Income Illiteracy Life Exp Murder.x HS Grad Frost
## 1    Alabama       3615   3624        2.1    69.05     15.1    41.3    20
## 2     Alaska        365   6315        1.5    69.31     11.3    66.7   152
## 3    Arizona       2212   4530        1.8    70.55      7.8    58.1    15
## 4   Arkansas       2110   3378        1.9    70.66     10.1    39.9    65
## 5 California      21198   5114        1.1    71.71     10.3    62.6    20
## 6   Colorado       2541   4884        0.7    72.06      6.8    63.9   166
##     Area Murder.y Assault UrbanPop Rape
## 1  50708     13.2     236       58 21.2
## 2 566432     10.0     263       48 44.5
## 3 113417      8.1     294       80 31.0
## 4  51945      8.8     190       50 19.5
## 5 156361      9.0     276       91 40.6
## 6 103766      7.9     204       78 38.7
# compute all pair-wise correlations for numerical variables, in other words remove the first column
cor(dta_state_arrest_merge[, -1])
##             Population      Income  Illiteracy    Life Exp    Murder.x
## Population  1.00000000  0.20822756  0.10762237 -0.06805195  0.34364275
## Income      0.20822756  1.00000000 -0.43707519  0.34025534 -0.23007761
## Illiteracy  0.10762237 -0.43707519  1.00000000 -0.58847793  0.70297520
## Life Exp   -0.06805195  0.34025534 -0.58847793  1.00000000 -0.78084575
## Murder.x    0.34364275 -0.23007761  0.70297520 -0.78084575  1.00000000
## HS Grad    -0.09848975  0.61993232 -0.65718861  0.58221620 -0.48797102
## Frost      -0.33215245  0.22628218 -0.67194697  0.26206801 -0.53888344
## Area        0.02254384  0.36331544  0.07726113 -0.10733194  0.22839021
## Murder.y    0.32024487 -0.21520501  0.70677564 -0.77849850  0.93369089
## Assault     0.31702281  0.04093255  0.51101299 -0.62665800  0.73976479
## UrbanPop    0.51260491  0.48053302 -0.06219936  0.27146824  0.01638255
## Rape        0.30523361  0.35738678  0.15459686 -0.26956828  0.57996132
##                HS Grad      Frost        Area    Murder.y     Assault
## Population -0.09848975 -0.3321525  0.02254384  0.32024487  0.31702281
## Income      0.61993232  0.2262822  0.36331544 -0.21520501  0.04093255
## Illiteracy -0.65718861 -0.6719470  0.07726113  0.70677564  0.51101299
## Life Exp    0.58221620  0.2620680 -0.10733194 -0.77849850 -0.62665800
## Murder.x   -0.48797102 -0.5388834  0.22839021  0.93369089  0.73976479
## HS Grad     1.00000000  0.3667797  0.33354187 -0.52159126 -0.23030510
## Frost       0.36677970  1.0000000  0.05922910 -0.54139702 -0.46823989
## Area        0.33354187  0.0592291  1.00000000  0.14808597  0.23120879
## Murder.y   -0.52159126 -0.5413970  0.14808597  1.00000000  0.80187331
## Assault    -0.23030510 -0.4682399  0.23120879  0.80187331  1.00000000
## UrbanPop    0.35868123 -0.2461862 -0.06154747  0.06957262  0.25887170
## Rape        0.27072504 -0.2792054  0.52495510  0.56357883  0.66524123
##               UrbanPop       Rape
## Population  0.51260491  0.3052336
## Income      0.48053302  0.3573868
## Illiteracy -0.06219936  0.1545969
## Life Exp    0.27146824 -0.2695683
## Murder.x    0.01638255  0.5799613
## HS Grad     0.35868123  0.2707250
## Frost      -0.24618618 -0.2792054
## Area       -0.06154747  0.5249551
## Murder.y    0.06957262  0.5635788
## Assault     0.25887170  0.6652412
## UrbanPop    1.00000000  0.4113412
## Rape        0.41134124  1.0000000

Exercise-3

The data set concerns species and weight of animals caught in plots in a study area in Arizona over time.

Each row holds information for a single animal, and the columns represent:

  • record_id: Unique id for the observation
  • month: month of observation
  • day: day of observation
  • year: year of observation
  • plot_id: ID of a particular plot
  • species_id: 2-letter code
  • sex: sex of animal (“M”, “F”)
  • hindfoot_length: length of the hindfoot in mm
  • weight: weight of the animal in grams
  • genus: genus of animal
  • species: species of animal
  • taxa: e.g. Rodent, Reptile, Bird, Rabbit
  • plot_type: type of plot
# load package
pacman::p_load(tidyverse)
# read data from url
dta <- read_csv("http://kbroman.org/datacarp/portal_data_joined.csv")
# print the number of rows
# print the number of cols
# print the col names
# print the data type of each col
# print some example entries of each col
glimpse(dta)
## Observations: 34,786
## Variables: 13
## $ record_id       <dbl> 1, 72, 224, 266, 349, 363, 435, 506, 588, 661, 748,...
## $ month           <dbl> 7, 8, 9, 10, 11, 11, 12, 1, 2, 3, 4, 5, 6, 8, 9, 10...
## $ day             <dbl> 16, 19, 13, 16, 12, 12, 10, 8, 18, 11, 8, 6, 9, 5, ...
## $ year            <dbl> 1977, 1977, 1977, 1977, 1977, 1977, 1977, 1978, 197...
## $ plot_id         <dbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, ...
## $ species_id      <chr> "NL", "NL", "NL", "NL", "NL", "NL", "NL", "NL", "NL...
## $ sex             <chr> "M", "M", NA, NA, NA, NA, NA, NA, "M", NA, NA, "M",...
## $ hindfoot_length <dbl> 32, 31, NA, NA, NA, NA, NA, NA, NA, NA, NA, 32, NA,...
## $ weight          <dbl> NA, NA, NA, NA, NA, NA, NA, NA, 218, NA, NA, 204, 2...
## $ genus           <chr> "Neotoma", "Neotoma", "Neotoma", "Neotoma", "Neotom...
## $ species         <chr> "albigula", "albigula", "albigula", "albigula", "al...
## $ taxa            <chr> "Rodent", "Rodent", "Rodent", "Rodent", "Rodent", "...
## $ plot_type       <chr> "Control", "Control", "Control", "Control", "Contro...
# check dimensions of dta
dim(dta)
## [1] 34786    13
# print the first 6 lines of three varibales, plot_id, species_id, weight.
dplyr::select(dta, plot_id, species_id, weight) %>% head()
## # A tibble: 6 x 3
##   plot_id species_id weight
##     <dbl> <chr>       <dbl>
## 1       2 NL             NA
## 2       2 NL             NA
## 3       2 NL             NA
## 4       2 NL             NA
## 5       2 NL             NA
## 6       2 NL             NA
# print the first 6 lines of dta besides two variables, record_id, species_id
dplyr::select(dta, -record_id, -species_id) %>% head()
## # A tibble: 6 x 11
##   month   day  year plot_id sex   hindfoot_length weight genus species taxa 
##   <dbl> <dbl> <dbl>   <dbl> <chr>           <dbl>  <dbl> <chr> <chr>   <chr>
## 1     7    16  1977       2 M                  32     NA Neot~ albigu~ Rode~
## 2     8    19  1977       2 M                  31     NA Neot~ albigu~ Rode~
## 3     9    13  1977       2 <NA>               NA     NA Neot~ albigu~ Rode~
## 4    10    16  1977       2 <NA>               NA     NA Neot~ albigu~ Rode~
## 5    11    12  1977       2 <NA>               NA     NA Neot~ albigu~ Rode~
## 6    11    12  1977       2 <NA>               NA     NA Neot~ albigu~ Rode~
## # ... with 1 more variable: plot_type <chr>
# the first 6 lines of dta from year 1995
dplyr::filter(dta, year == 1995) %>% head()
## # A tibble: 6 x 13
##   record_id month   day  year plot_id species_id sex   hindfoot_length weight
##       <dbl> <dbl> <dbl> <dbl>   <dbl> <chr>      <chr>           <dbl>  <dbl>
## 1     22314     6     7  1995       2 NL         M                  34     NA
## 2     22728     9    23  1995       2 NL         F                  32    165
## 3     22899    10    28  1995       2 NL         F                  32    171
## 4     23032    12     2  1995       2 NL         F                  33     NA
## 5     22003     1    11  1995       2 DM         M                  37     41
## 6     22042     2     4  1995       2 DM         F                  36     45
## # ... with 4 more variables: genus <chr>, species <chr>, taxa <chr>,
## #   plot_type <chr>
# remove all the entries that have more than 5 in weight
# select 3 column to display, weight, species_id, sex
# print the first 6 lines
head(dplyr::select(dplyr::filter(dta, weight <= 5), species_id, sex, weight))
## # A tibble: 6 x 3
##   species_id sex   weight
##   <chr>      <chr>  <dbl>
## 1 PF         M          5
## 2 PF         F          5
## 3 PF         F          5
## 4 PF         F          4
## 5 PF         F          5
## 6 PF         F          4
dta %>% 
  # remove the rows where weight is larger than 5
  dplyr::filter(weight <= 5) %>% 
  # select 3 variables, species_id, sex and weight to display
  dplyr::select(species_id, sex, weight) %>% 
  # print the first 6 lines
  head
## # A tibble: 6 x 3
##   species_id sex   weight
##   <chr>      <chr>  <dbl>
## 1 PF         M          5
## 2 PF         F          5
## 3 PF         F          5
## 4 PF         F          4
## 5 PF         F          5
## 6 PF         F          4
dta %>% 
  # create two new columns
  # weight_kg =  weight / 100
  # weight_lb =  weight_kg * 2.2
  mutate(weight_kg = weight / 1000,
         weight_lb = weight_kg * 2.2) %>% 
  # print the first 6 lines
  head()
## # A tibble: 6 x 15
##   record_id month   day  year plot_id species_id sex   hindfoot_length weight
##       <dbl> <dbl> <dbl> <dbl>   <dbl> <chr>      <chr>           <dbl>  <dbl>
## 1         1     7    16  1977       2 NL         M                  32     NA
## 2        72     8    19  1977       2 NL         M                  31     NA
## 3       224     9    13  1977       2 NL         <NA>               NA     NA
## 4       266    10    16  1977       2 NL         <NA>               NA     NA
## 5       349    11    12  1977       2 NL         <NA>               NA     NA
## 6       363    11    12  1977       2 NL         <NA>               NA     NA
## # ... with 6 more variables: genus <chr>, species <chr>, taxa <chr>,
## #   plot_type <chr>, weight_kg <dbl>, weight_lb <dbl>
dta %>% 
  # remove all the entries that have na value in column weight
  filter(!is.na(weight)) %>%
  # group the data by sex and species_id
  group_by(sex, species_id) %>%
  # compute the mean of column weight within the group which was specified in the above line
  summarize(mean_weight = mean(weight)) %>%
  # sort the data by column mean_weight in an descending order
  arrange(desc(mean_weight)) %>% 
  # print the first 6 lines
  head()
## # A tibble: 6 x 3
## # Groups:   sex [3]
##   sex   species_id mean_weight
##   <chr> <chr>            <dbl>
## 1 <NA>  NL                168.
## 2 M     NL                166.
## 3 F     NL                154.
## 4 M     SS                130 
## 5 <NA>  SH                130 
## 6 M     DS                122.
dta %>%
  # group the data by the variable sex
  group_by(sex) %>%
  # compute the freq within the group
  tally
## # A tibble: 3 x 2
##   sex       n
##   <chr> <int>
## 1 F     15690
## 2 M     17348
## 3 <NA>   1748
dta %>%
  # compute the freq of the variable sex
  count(sex)
## # A tibble: 3 x 2
##   sex       n
##   <chr> <int>
## 1 F     15690
## 2 M     17348
## 3 <NA>   1748
dta %>%
  group_by(sex) %>%
  summarize(count = n())
## # A tibble: 3 x 2
##   sex   count
##   <chr> <int>
## 1 F     15690
## 2 M     17348
## 3 <NA>   1748
dta %>%
  group_by(sex) %>%
  # compute the freq of sex using varible year
  summarize(count = sum(!is.na(year)))
## # A tibble: 3 x 2
##   sex   count
##   <chr> <int>
## 1 F     15690
## 2 M     17348
## 3 <NA>   1748
  # !is.na(year) return a logical vector within which one FALSE represents a missing value
  # when you compute a logical vector all the TRUEs would become 1 and all the FALSEs would become 0
  # therefore, when you perform a summation for a logical vector, it will return an integer which means how many TRUEs in it
  # in this case, it returns the number of valid entries.
dta_gw <- dta %>% 
  # remove all the rows with na value in column weight
  filter(!is.na(weight)) %>%
  # group the data by variables genus and plot_id
  group_by(genus, plot_id) %>%
  # compute the mean of column weight within the group
  summarize(mean_weight = mean(weight))
# print the number of rows and cols
# print the data type of each col
# print some example entries of each col
glimpse(dta_gw)
## Observations: 196
## Variables: 3
## Groups: genus [10]
## $ genus       <chr> "Baiomys", "Baiomys", "Baiomys", "Baiomys", "Baiomys", ...
## $ plot_id     <dbl> 1, 2, 3, 5, 18, 19, 20, 21, 1, 2, 3, 4, 5, 6, 7, 8, 9, ...
## $ mean_weight <dbl> 7.000000, 6.000000, 8.611111, 7.750000, 9.500000, 9.533...
dta_w <- dta_gw %>%
  # transform the data into a wide form
  # which means using the value of gunus as columns
  # and put the value of mean_weight in the correspoding entries
  spread(key = genus, value = mean_weight)
# rows drops from 196 to 24
# cols increases from 3 to 11
glimpse(dta_w)
## Observations: 24
## Variables: 11
## $ plot_id         <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, ...
## $ Baiomys         <dbl> 7.000000, 6.000000, 8.611111, NA, 7.750000, NA, NA,...
## $ Chaetodipus     <dbl> 22.19939, 25.11014, 24.63636, 23.02381, 17.98276, 2...
## $ Dipodomys       <dbl> 60.23214, 55.68259, 52.04688, 57.52454, 51.11356, 5...
## $ Neotoma         <dbl> 156.2222, 169.1436, 158.2414, 164.1667, 190.0370, 1...
## $ Onychomys       <dbl> 27.67550, 26.87302, 26.03241, 28.09375, 27.01695, 2...
## $ Perognathus     <dbl> 9.625000, 6.947368, 7.507812, 7.824427, 8.658537, 7...
## $ Peromyscus      <dbl> 22.22222, 22.26966, 21.37037, 22.60000, 21.23171, 2...
## $ Reithrodontomys <dbl> 11.375000, 10.680556, 10.516588, 10.263158, 11.1545...
## $ Sigmodon        <dbl> NA, 70.85714, 65.61404, 82.00000, 82.66667, 68.7777...
## $ Spermophilus    <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
dta_gw %>%
  # make a wide form
  # replace missing values by 0
  spread(genus, mean_weight, fill = 0) %>%
  # print the first 6 lines
  head()
## # A tibble: 6 x 11
##   plot_id Baiomys Chaetodipus Dipodomys Neotoma Onychomys Perognathus Peromyscus
##     <dbl>   <dbl>       <dbl>     <dbl>   <dbl>     <dbl>       <dbl>      <dbl>
## 1       1    7           22.2      60.2    156.      27.7        9.62       22.2
## 2       2    6           25.1      55.7    169.      26.9        6.95       22.3
## 3       3    8.61        24.6      52.0    158.      26.0        7.51       21.4
## 4       4    0           23.0      57.5    164.      28.1        7.82       22.6
## 5       5    7.75        18.0      51.1    190.      27.0        8.66       21.2
## 6       6    0           24.9      58.6    180.      25.9        7.81       21.8
## # ... with 3 more variables: Reithrodontomys <dbl>, Sigmodon <dbl>,
## #   Spermophilus <dbl>
dta_l <- dta_w %>%
  # make a long form
  # stored all cols besides plot_id into a new col named genus
  # stored all values besides those in col plot_id into a new col named mean_weight
  gather(key = genus, value = mean_weight, -plot_id)
# rows increases from 24 to 240
# cols drops form 11 to 3
glimpse(dta_l)
## Observations: 240
## Variables: 3
## $ plot_id     <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, ...
## $ genus       <chr> "Baiomys", "Baiomys", "Baiomys", "Baiomys", "Baiomys", ...
## $ mean_weight <dbl> 7.000000, 6.000000, 8.611111, NA, 7.750000, NA, NA, NA,...

I have no idea why the number of rows in the long form is 240. I expected that the number should have been equal to dta_gw that is 196.

dta_w %>%
  # make a long form
  # creat a new col named genus
  # store col from Baiomys to Spermophilus into genus
  # put the corresponding values into a new colunmn mean_weight 
  gather(key = genus, value = mean_weight, Baiomys:Spermophilus) %>%
  head()
## # A tibble: 6 x 3
##   plot_id genus   mean_weight
##     <dbl> <chr>         <dbl>
## 1       1 Baiomys        7   
## 2       2 Baiomys        6   
## 3       3 Baiomys        8.61
## 4       4 Baiomys       NA   
## 5       5 Baiomys        7.75
## 6       6 Baiomys       NA
dta_complete <- dta %>%
  # remove all entries with na value in column weight
  filter(!is.na(weight),
  # remove all entries with na value in column hindfoot_length
         !is.na(hindfoot_length),  
  # remove all entries with na value in column sex
         !is.na(sex))                
species_counts <- dta_complete %>%
  # compute freq for each category in column species_id
    count(species_id) %>% 
  # remove all the entries with value less than 50 in column n
    filter(n >= 50)
dta_complete <- dta_complete %>%
  # remove the entries in data object dta_complete where the column species_id has the same value in the column species_id in data object species_counts
  filter(species_id %in% species_counts$species_id)

Exercise-4

library(car)
data(Vocab)
dta_vocab <- Vocab

base R approach

head(aggregate(data = dta_vocab, .~year + sex, mean))
##   year    sex education vocabulary
## 1 1974 Female  11.82429   6.077519
## 2 1976 Female  11.60710   6.140684
## 3 1978 Female  11.78978   6.019744
## 4 1982 Female  12.10911   5.765766
## 5 1984 Female  12.38200   6.070560
## 6 1987 Female  12.45627   5.752371

tidyverse approach

dta_vocab %>% group_by(year, sex) %>% 
  summarise(mean_edu = mean(education), mean_vocab = mean(vocabulary)) %>% head()
## # A tibble: 6 x 4
## # Groups:   year [3]
##    year sex    mean_edu mean_vocab
##   <dbl> <fct>     <dbl>      <dbl>
## 1  1974 Female     11.8       6.08
## 2  1974 Male       11.9       5.96
## 3  1976 Female     11.6       6.14
## 4  1976 Male       12.1       5.95
## 5  1978 Female     11.8       6.02
## 6  1978 Male       12.4       5.89

Exercise-5

# read data
dta_animal <- MASS::Animals
dta_mammal <- MASS::mammals
# make row names a column
dta_animal$names <- row.names(dta_animal)
dta_mammal$names <- row.names(dta_mammal)

# combind two data objects by rows 
dta <- rbind(dta_animal, dta_mammal)
# see how many duplicated observations in dta
sum(duplicated(dta$names))
## [1] 23
# remove duplicated observations
dta <- dta[-which(duplicated(dta$names)), ]
# check data structure, observations drops from 90 to 67
str(dta)
## 'data.frame':    67 obs. of  3 variables:
##  $ body : num  1.35 465 36.33 27.66 1.04 ...
##  $ brain: num  8.1 423 119.5 115 5.5 ...
##  $ names: chr  "Mountain beaver" "Cow" "Grey wolf" "Goat" ...

Exercise-6

## read data
dta_probe <- read.table("probeL.txt", header = T)

base R approach

dta_probe_w <- reshape(dta_probe, 
                       idvar = "ID",
                       # the variabel we want to expand
                       timevar = "Position",
                       # the value comes from column Response_Time
                       v.names = "Response_Time",
                       # direction from long to wide
                       direction = "wide")
# rename cols
colnames(dta_probe_w)[-1] <- paste("Pos_", 1:5, sep = "")
# print the first 11 lines
head(dta_probe_w, 11)
##     ID Pos_1 Pos_2 Pos_3 Pos_4 Pos_5
## 1  S01    51    36    50    35    42
## 6  S02    27    20    26    17    27
## 11 S03    37    22    41    37    30
## 16 S04    42    36    32    34    27
## 21 S05    27    18    33    14    29
## 26 S06    43    32    43    35    40
## 31 S07    41    22    36    25    38
## 36 S08    38    21    31    20    16
## 41 S09    36    23    27    25    28
## 46 S10    26    31    31    32    36
## 51 S11    29    20    25    26    25

tidyverse approach

# make it a wide form
dta_probe_w <- dta_probe %>% spread(key = Position, value = Response_Time)
# rename cols
colnames(dta_probe_w)[-1] <- paste("Pos_", 1:5, sep = "")
# print the first 11 lines
head(dta_probe_w, 11)
##     ID Pos_1 Pos_2 Pos_3 Pos_4 Pos_5
## 1  S01    51    36    50    35    42
## 2  S02    27    20    26    17    27
## 3  S03    37    22    41    37    30
## 4  S04    42    36    32    34    27
## 5  S05    27    18    33    14    29
## 6  S06    43    32    43    35    40
## 7  S07    41    22    36    25    38
## 8  S08    38    21    31    20    16
## 9  S09    36    23    27    25    28
## 10 S10    26    31    31    32    36
## 11 S11    29    20    25    26    25