Importing data

library(readr)
gdp <- read_csv("gdp.csv")
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   `Country Name` = col_character(),
##   `Country Code` = col_character()
## )
## See spec(...) for full column specifications.

Accessing columns and rows

Use the square brackets. Separate by rows and columns

gdp[1,] #Shows first row with all variables
## # A tibble: 1 x 60
##   `Country Name` `Country Code` `1960` `1961` `1962` `1963` `1964` `1965` `1966`
##   <chr>          <chr>           <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
## 1 Aruba          ABW                NA     NA     NA     NA     NA     NA     NA
## # … with 51 more variables: `1967` <dbl>, `1968` <dbl>, `1969` <dbl>,
## #   `1970` <dbl>, `1971` <dbl>, `1972` <dbl>, `1973` <dbl>, `1974` <dbl>,
## #   `1975` <dbl>, `1976` <dbl>, `1977` <dbl>, `1978` <dbl>, `1979` <dbl>,
## #   `1980` <dbl>, `1981` <dbl>, `1982` <dbl>, `1983` <dbl>, `1984` <dbl>,
## #   `1985` <dbl>, `1986` <dbl>, `1987` <dbl>, `1988` <dbl>, `1989` <dbl>,
## #   `1990` <dbl>, `1991` <dbl>, `1992` <dbl>, `1993` <dbl>, `1994` <dbl>,
## #   `1995` <dbl>, `1996` <dbl>, `1997` <dbl>, `1998` <dbl>, `1999` <dbl>,
## #   `2000` <dbl>, `2001` <dbl>, `2002` <dbl>, `2003` <dbl>, `2004` <dbl>,
## #   `2005` <dbl>, `2006` <dbl>, `2007` <dbl>, `2008` <dbl>, `2009` <dbl>,
## #   `2010` <dbl>, `2011` <dbl>, `2012` <dbl>, `2013` <dbl>, `2014` <dbl>,
## #   `2015` <dbl>, `2016` <dbl>, `2017` <dbl>
gdp[,1] #shows Country Name variable and all rows
## # A tibble: 264 x 1
##    `Country Name`      
##    <chr>               
##  1 Aruba               
##  2 Afghanistan         
##  3 Angola              
##  4 Albania             
##  5 Andorra             
##  6 Arab World          
##  7 United Arab Emirates
##  8 Argentina           
##  9 Armenia             
## 10 American Samoa      
## # … with 254 more rows

To do an unordered selection of columns and rows, use c()

gdp[c(1:6),c(1,2,57:60)] #rows 1:6, columns 1,2, and 57 through 60 inclusive
## # A tibble: 6 x 6
##   `Country Name` `Country Code`   `2014`   `2015`   `2016`   `2017`
##   <chr>          <chr>             <dbl>    <dbl>    <dbl>    <dbl>
## 1 Aruba          ABW            NA       NA       NA       NA      
## 2 Afghanistan    AFG             2.06e10  1.92e10  1.95e10  2.08e10
## 3 Angola         AGO             1.27e11  1.03e11  9.53e10  1.24e11
## 4 Albania        ALB             1.32e10  1.14e10  1.19e10  1.30e10
## 5 Andorra        AND             3.35e 9  2.81e 9  2.88e 9  3.01e 9
## 6 Arab World     ARB             2.91e12  2.55e12  2.50e12  2.59e12

Adding a new column way 1

gdp$newcolumn <- NA #
print(gdp$newcolumn)
##   [1] NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
##  [26] NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
##  [51] NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
##  [76] NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
## [101] NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
## [126] NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
## [151] NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
## [176] NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
## [201] NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
## [226] NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
## [251] NA NA NA NA NA NA NA NA NA NA NA NA NA NA

Check to see that a new variable was added

dim(gdp)
## [1] 264  61

Adding a new column way 2

gdp["anothercolumn"] <- NA #
print(gdp$anothercolumn)
##   [1] NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
##  [26] NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
##  [51] NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
##  [76] NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
## [101] NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
## [126] NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
## [151] NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
## [176] NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
## [201] NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
## [226] NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
## [251] NA NA NA NA NA NA NA NA NA NA NA NA NA NA
dim(gdp)
## [1] 264  62

Reordering data frame columns

http://www.sthda.com/english/wiki/reordering-data-frame-columns-in-r

gdp2 <- gdp[, c(61, 62, 1:60)]
gdp2
## # A tibble: 264 x 62
##    newcolumn anothercolumn `Country Name` `Country Code`  `1960`  `1961`
##    <lgl>     <lgl>         <chr>          <chr>            <dbl>   <dbl>
##  1 NA        NA            Aruba          ABW            NA      NA     
##  2 NA        NA            Afghanistan    AFG             5.38e8  5.49e8
##  3 NA        NA            Angola         AGO            NA      NA     
##  4 NA        NA            Albania        ALB            NA      NA     
##  5 NA        NA            Andorra        AND            NA      NA     
##  6 NA        NA            Arab World     ARB            NA      NA     
##  7 NA        NA            United Arab E… ARE            NA      NA     
##  8 NA        NA            Argentina      ARG            NA      NA     
##  9 NA        NA            Armenia        ARM            NA      NA     
## 10 NA        NA            American Samoa ASM            NA      NA     
## # … with 254 more rows, and 56 more variables: `1962` <dbl>, `1963` <dbl>,
## #   `1964` <dbl>, `1965` <dbl>, `1966` <dbl>, `1967` <dbl>, `1968` <dbl>,
## #   `1969` <dbl>, `1970` <dbl>, `1971` <dbl>, `1972` <dbl>, `1973` <dbl>,
## #   `1974` <dbl>, `1975` <dbl>, `1976` <dbl>, `1977` <dbl>, `1978` <dbl>,
## #   `1979` <dbl>, `1980` <dbl>, `1981` <dbl>, `1982` <dbl>, `1983` <dbl>,
## #   `1984` <dbl>, `1985` <dbl>, `1986` <dbl>, `1987` <dbl>, `1988` <dbl>,
## #   `1989` <dbl>, `1990` <dbl>, `1991` <dbl>, `1992` <dbl>, `1993` <dbl>,
## #   `1994` <dbl>, `1995` <dbl>, `1996` <dbl>, `1997` <dbl>, `1998` <dbl>,
## #   `1999` <dbl>, `2000` <dbl>, `2001` <dbl>, `2002` <dbl>, `2003` <dbl>,
## #   `2004` <dbl>, `2005` <dbl>, `2006` <dbl>, `2007` <dbl>, `2008` <dbl>,
## #   `2009` <dbl>, `2010` <dbl>, `2011` <dbl>, `2012` <dbl>, `2013` <dbl>,
## #   `2014` <dbl>, `2015` <dbl>, `2016` <dbl>, `2017` <dbl>

Removing a column

library(dplyr )
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
gdp2 <- select(gdp2, -anothercolumn)
dim(gdp2)
## [1] 264  61

Learning more about our data

str(gdp2)
## tibble [264 × 61] (S3: tbl_df/tbl/data.frame)
##  $ newcolumn   : logi [1:264] NA NA NA NA NA NA ...
##  $ Country Name: chr [1:264] "Aruba" "Afghanistan" "Angola" "Albania" ...
##  $ Country Code: chr [1:264] "ABW" "AFG" "AGO" "ALB" ...
##  $ 1960        : num [1:264] NA 5.38e+08 NA NA NA ...
##  $ 1961        : num [1:264] NA 5.49e+08 NA NA NA ...
##  $ 1962        : num [1:264] NA 5.47e+08 NA NA NA ...
##  $ 1963        : num [1:264] NA 7.51e+08 NA NA NA ...
##  $ 1964        : num [1:264] NA 8e+08 NA NA NA ...
##  $ 1965        : num [1:264] NA 1.01e+09 NA NA NA ...
##  $ 1966        : num [1:264] NA 1.4e+09 NA NA NA ...
##  $ 1967        : num [1:264] NA 1.67e+09 NA NA NA ...
##  $ 1968        : num [1:264] NA 1.37e+09 NA NA NA ...
##  $ 1969        : num [1:264] NA 1.41e+09 NA NA NA ...
##  $ 1970        : num [1:264] NA 1.75e+09 NA NA 7.86e+07 ...
##  $ 1971        : num [1:264] NA 1.83e+09 NA NA 8.94e+07 ...
##  $ 1972        : num [1:264] NA 1.60e+09 NA NA 1.13e+08 ...
##  $ 1973        : num [1:264] NA 1.73e+09 NA NA 1.51e+08 ...
##  $ 1974        : num [1:264] NA 2.16e+09 NA NA 1.87e+08 ...
##  $ 1975        : num [1:264] NA 2.37e+09 NA NA 2.20e+08 ...
##  $ 1976        : num [1:264] NA 2.56e+09 NA NA 2.27e+08 ...
##  $ 1977        : num [1:264] NA 2.95e+09 NA NA 2.54e+08 ...
##  $ 1978        : num [1:264] NA 3.30e+09 NA NA 3.08e+08 ...
##  $ 1979        : num [1:264] NA 3.70e+09 NA NA 4.12e+08 ...
##  $ 1980        : num [1:264] NA 3.64e+09 5.93e+09 NA 4.46e+08 ...
##  $ 1981        : num [1:264] NA 3.48e+09 5.55e+09 NA 3.89e+08 ...
##  $ 1982        : num [1:264] NA NA 5.55e+09 NA 3.76e+08 ...
##  $ 1983        : num [1:264] NA NA 5.78e+09 NA 3.28e+08 ...
##  $ 1984        : num [1:264] NA NA 6.13e+09 1.92e+09 3.30e+08 ...
##  $ 1985        : num [1:264] NA NA 7.55e+09 1.97e+09 3.47e+08 ...
##  $ 1986        : num [1:264] NA NA 7.07e+09 2.17e+09 4.82e+08 ...
##  $ 1987        : num [1:264] NA NA 8.08e+09 2.16e+09 6.11e+08 ...
##  $ 1988        : num [1:264] NA NA 8.77e+09 2.13e+09 7.21e+08 ...
##  $ 1989        : num [1:264] NA NA 1.02e+10 2.34e+09 7.95e+08 ...
##  $ 1990        : num [1:264] NA NA 1.12e+10 2.10e+09 1.03e+09 ...
##  $ 1991        : num [1:264] NA NA 1.06e+10 1.14e+09 1.11e+09 ...
##  $ 1992        : num [1:264] NA NA 8.31e+09 7.09e+08 1.21e+09 ...
##  $ 1993        : num [1:264] NA NA 5.77e+09 1.23e+09 1.01e+09 ...
##  $ 1994        : num [1:264] 1.33e+09 NA 4.44e+09 1.99e+09 1.02e+09 ...
##  $ 1995        : num [1:264] 1.32e+09 NA 5.54e+09 2.42e+09 1.18e+09 ...
##  $ 1996        : num [1:264] 1.38e+09 NA 7.53e+09 3.31e+09 1.22e+09 ...
##  $ 1997        : num [1:264] 1.53e+09 NA 7.65e+09 2.36e+09 1.18e+09 ...
##  $ 1998        : num [1:264] 1.67e+09 NA 6.51e+09 2.71e+09 1.21e+09 ...
##  $ 1999        : num [1:264] 1.72e+09 NA 6.15e+09 3.41e+09 1.24e+09 ...
##  $ 2000        : num [1:264] 1.87e+09 NA 9.13e+09 3.63e+09 1.43e+09 ...
##  $ 2001        : num [1:264] 1.92e+09 2.46e+09 8.94e+09 4.06e+09 1.50e+09 ...
##  $ 2002        : num [1:264] 1.94e+09 4.13e+09 1.25e+10 4.44e+09 1.73e+09 ...
##  $ 2003        : num [1:264] 2.02e+09 4.58e+09 1.42e+10 5.75e+09 2.40e+09 ...
##  $ 2004        : num [1:264] 2.23e+09 5.29e+09 1.96e+10 7.31e+09 2.94e+09 ...
##  $ 2005        : num [1:264] 2.33e+09 6.28e+09 2.82e+10 8.16e+09 3.26e+09 ...
##  $ 2006        : num [1:264] 2.42e+09 7.06e+09 4.18e+10 8.99e+09 3.54e+09 ...
##  $ 2007        : num [1:264] 2.62e+09 9.84e+09 6.04e+10 1.07e+10 4.02e+09 ...
##  $ 2008        : num [1:264] 2.79e+09 1.02e+10 8.42e+10 1.29e+10 4.01e+09 ...
##  $ 2009        : num [1:264] 2.50e+09 1.25e+10 7.55e+10 1.20e+10 3.66e+09 ...
##  $ 2010        : num [1:264] 2.47e+09 1.59e+10 8.25e+10 1.19e+10 3.36e+09 ...
##  $ 2011        : num [1:264] 2.58e+09 1.79e+10 1.04e+11 1.29e+10 3.44e+09 ...
##  $ 2012        : num [1:264] NA 2.05e+10 1.14e+11 1.23e+10 3.16e+09 ...
##  $ 2013        : num [1:264] NA 2.03e+10 1.25e+11 1.28e+10 3.28e+09 ...
##  $ 2014        : num [1:264] NA 2.06e+10 1.27e+11 1.32e+10 3.35e+09 ...
##  $ 2015        : num [1:264] NA 1.92e+10 1.03e+11 1.14e+10 2.81e+09 ...
##  $ 2016        : num [1:264] NA 1.95e+10 9.53e+10 1.19e+10 2.88e+09 ...
##  $ 2017        : num [1:264] NA 2.08e+10 1.24e+11 1.30e+10 3.01e+09 ...

Summary

summary(gdp2)
##  newcolumn      Country Name       Country Code            1960          
##  Mode:logical   Length:264         Length:264         Min.   :1.201e+07  
##  NA's:264       Class :character   Class :character   1st Qu.:5.357e+08  
##                 Mode  :character   Mode  :character   Median :2.761e+09  
##                                                       Mean   :7.419e+10  
##                                                       3rd Qu.:2.992e+10  
##                                                       Max.   :1.366e+12  
##                                                       NA's   :131        
##       1961                1962                1963          
##  Min.   :1.159e+07   Min.   :9.123e+06   Min.   :1.084e+07  
##  1st Qu.:5.489e+08   1st Qu.:5.604e+08   1st Qu.:6.323e+08  
##  Median :3.034e+09   Median :3.287e+09   Median :3.601e+09  
##  Mean   :7.658e+10   Mean   :8.110e+10   Mean   :8.768e+10  
##  3rd Qu.:3.108e+10   3rd Qu.:3.368e+10   3rd Qu.:3.862e+10  
##  Max.   :1.421e+12   Max.   :1.526e+12   Max.   :1.643e+12  
##  NA's   :131         NA's   :129         NA's   :129        
##       1964                1965                1966          
##  Min.   :1.271e+07   Min.   :1.359e+07   Min.   :1.447e+07  
##  1st Qu.:6.505e+08   1st Qu.:5.998e+08   1st Qu.:6.500e+08  
##  Median :3.459e+09   Median :3.120e+09   Median :3.157e+09  
##  Mean   :9.616e+10   Mean   :9.872e+10   Mean   :1.059e+11  
##  3rd Qu.:3.768e+10   3rd Qu.:3.724e+10   3rd Qu.:3.748e+10  
##  Max.   :1.800e+12   Max.   :1.961e+12   Max.   :2.127e+12  
##  NA's   :129         NA's   :119         NA's   :118        
##       1967                1968                1969          
##  Min.   :1.584e+07   Min.   :1.460e+07   Min.   :1.585e+07  
##  1st Qu.:6.212e+08   1st Qu.:6.412e+08   1st Qu.:6.680e+08  
##  Median :3.371e+09   Median :3.910e+09   Median :4.461e+09  
##  Mean   :1.101e+11   Mean   :1.155e+11   Mean   :1.274e+11  
##  3rd Qu.:3.165e+10   3rd Qu.:3.266e+10   3rd Qu.:3.662e+10  
##  Max.   :2.263e+12   Max.   :2.442e+12   Max.   :2.689e+12  
##  NA's   :115         NA's   :111         NA's   :111        
##       1970                1971                1972          
##  Min.   :1.430e+07   Min.   :1.528e+07   Min.   :1.894e+07  
##  1st Qu.:5.238e+08   1st Qu.:5.376e+08   1st Qu.:5.820e+08  
##  Median :4.179e+09   Median :4.476e+09   Median :5.710e+09  
##  Mean   :1.339e+11   Mean   :1.469e+11   Mean   :1.693e+11  
##  3rd Qu.:4.114e+10   3rd Qu.:4.582e+10   3rd Qu.:5.333e+10  
##  Max.   :2.957e+12   Max.   :3.267e+12   Max.   :3.768e+12  
##  NA's   :102         NA's   :101         NA's   :101        
##       1973                1974                1975          
##  Min.   :2.420e+07   Min.   :3.151e+07   Min.   :3.251e+07  
##  1st Qu.:7.128e+08   1st Qu.:1.032e+09   1st Qu.:1.105e+09  
##  Median :7.002e+09   Median :8.894e+09   Median :9.397e+09  
##  Mean   :2.079e+11   Mean   :2.408e+11   Mean   :2.630e+11  
##  3rd Qu.:6.732e+10   3rd Qu.:9.086e+10   3rd Qu.:9.761e+10  
##  Max.   :4.591e+12   Max.   :5.296e+12   Max.   :5.896e+12  
##  NA's   :101         NA's   :100         NA's   :96         
##       1976                1977                1978          
##  Min.   :3.004e+07   Min.   :3.414e+07   Min.   :4.157e+07  
##  1st Qu.:1.065e+09   1st Qu.:9.354e+08   1st Qu.:1.196e+09  
##  Median :9.649e+09   Median :1.103e+10   Median :1.287e+10  
##  Mean   :2.831e+11   Mean   :3.126e+11   Mean   :3.673e+11  
##  3rd Qu.:1.013e+11   3rd Qu.:1.049e+11   3rd Qu.:1.194e+11  
##  Max.   :6.415e+12   Max.   :7.257e+12   Max.   :8.543e+12  
##  NA's   :95          NA's   :91          NA's   :92         
##       1979                1980                1981          
##  Min.   :4.262e+07   Min.   :3.872e+07   Min.   :3.102e+07  
##  1st Qu.:1.289e+09   1st Qu.:1.378e+09   1st Qu.:1.390e+09  
##  Median :1.507e+10   Median :1.346e+10   Median :1.335e+10  
##  Mean   :4.274e+11   Mean   :4.540e+11   Mean   :4.561e+11  
##  3rd Qu.:1.347e+11   3rd Qu.:1.401e+11   3rd Qu.:1.298e+11  
##  Max.   :9.925e+12   Max.   :1.117e+13   Max.   :1.146e+13  
##  NA's   :91          NA's   :79          NA's   :76         
##       1982                1983                1984          
##  Min.   :3.492e+07   Min.   :3.784e+07   Min.   :4.125e+07  
##  1st Qu.:1.295e+09   1st Qu.:1.256e+09   1st Qu.:1.367e+09  
##  Median :1.365e+10   Median :1.003e+10   Median :9.701e+09  
##  Mean   :4.480e+11   Mean   :4.502e+11   Mean   :4.615e+11  
##  3rd Qu.:1.259e+11   3rd Qu.:1.107e+11   3rd Qu.:1.079e+11  
##  Max.   :1.136e+13   Max.   :1.163e+13   Max.   :1.207e+13  
##  NA's   :75          NA's   :74          NA's   :73         
##       1985                1986                1987          
##  Min.   :3.213e+07   Min.   :3.209e+07   Min.   :3.361e+07  
##  1st Qu.:1.418e+09   1st Qu.:1.719e+09   1st Qu.:2.003e+09  
##  Median :1.001e+10   Median :1.062e+10   Median :1.136e+10  
##  Mean   :4.795e+11   Mean   :5.588e+11   Mean   :6.215e+11  
##  3rd Qu.:1.078e+11   3rd Qu.:1.251e+11   3rd Qu.:1.371e+11  
##  Max.   :1.268e+13   Max.   :1.502e+13   Max.   :1.710e+13  
##  NA's   :71          NA's   :69          NA's   :65         
##       1988                1989                1990          
##  Min.   :4.297e+07   Min.   :4.112e+07   Min.   :8.824e+06  
##  1st Qu.:2.153e+09   1st Qu.:2.248e+09   1st Qu.:2.561e+09  
##  Median :1.058e+10   Median :1.039e+10   Median :1.229e+10  
##  Mean   :6.865e+11   Mean   :7.179e+11   Mean   :7.554e+11  
##  3rd Qu.:1.333e+11   3rd Qu.:1.651e+11   3rd Qu.:1.665e+11  
##  Max.   :1.915e+13   Max.   :2.008e+13   Max.   :2.257e+13  
##  NA's   :63          NA's   :59          NA's   :43         
##       1991                1992                1993          
##  Min.   :9.365e+06   Min.   :9.743e+06   Min.   :9.631e+06  
##  1st Qu.:2.653e+09   1st Qu.:2.317e+09   1st Qu.:2.348e+09  
##  Median :1.134e+10   Median :1.138e+10   Median :1.314e+10  
##  Mean   :7.981e+11   Mean   :8.388e+11   Mean   :8.380e+11  
##  3rd Qu.:1.646e+11   3rd Qu.:1.574e+11   3rd Qu.:1.801e+11  
##  Max.   :2.392e+13   Max.   :2.541e+13   Max.   :2.582e+13  
##  NA's   :43          NA's   :42          NA's   :38         
##       1994                1995                1996          
##  Min.   :1.089e+07   Min.   :1.103e+07   Min.   :1.233e+07  
##  1st Qu.:2.163e+09   1st Qu.:2.512e+09   1st Qu.:2.786e+09  
##  Median :1.291e+10   Median :1.358e+10   Median :1.390e+10  
##  Mean   :8.917e+11   Mean   :9.619e+11   Mean   :9.868e+11  
##  3rd Qu.:1.688e+11   3rd Qu.:1.734e+11   3rd Qu.:1.830e+11  
##  Max.   :2.775e+13   Max.   :3.085e+13   Max.   :3.154e+13  
##  NA's   :36          NA's   :28          NA's   :27         
##       1997                1998                1999          
##  Min.   :1.270e+07   Min.   :1.276e+07   Min.   :1.369e+07  
##  1st Qu.:2.910e+09   1st Qu.:2.981e+09   1st Qu.:3.035e+09  
##  Median :1.492e+10   Median :1.509e+10   Median :1.566e+10  
##  Mean   :9.901e+11   Mean   :9.793e+11   Mean   :1.001e+12  
##  3rd Qu.:1.829e+11   3rd Qu.:1.775e+11   3rd Qu.:1.788e+11  
##  Max.   :3.143e+13   Max.   :3.135e+13   Max.   :3.251e+13  
##  NA's   :28          NA's   :26          NA's   :25         
##       2000                2001                2002          
##  Min.   :1.374e+07   Min.   :1.320e+07   Min.   :1.545e+07  
##  1st Qu.:2.905e+09   1st Qu.:2.794e+09   1st Qu.:3.020e+09  
##  Median :1.376e+10   Median :1.318e+10   Median :1.428e+10  
##  Mean   :1.009e+12   Mean   :1.007e+12   Mean   :1.032e+12  
##  3rd Qu.:1.895e+11   3rd Qu.:1.905e+11   3rd Qu.:1.915e+11  
##  Max.   :3.357e+13   Max.   :3.337e+13   Max.   :3.464e+13  
##  NA's   :19          NA's   :19          NA's   :15         
##       2003                2004                2005          
##  Min.   :1.823e+07   Min.   :2.153e+07   Min.   :2.184e+07  
##  1st Qu.:3.446e+09   1st Qu.:3.875e+09   1st Qu.:4.530e+09  
##  Median :1.720e+10   Median :2.015e+10   Median :2.285e+10  
##  Mean   :1.165e+12   Mean   :1.318e+12   Mean   :1.443e+12  
##  3rd Qu.:2.176e+11   3rd Qu.:2.550e+11   3rd Qu.:3.047e+11  
##  Max.   :3.888e+13   Max.   :4.379e+13   Max.   :4.741e+13  
##  NA's   :15          NA's   :14          NA's   :14         
##       2006                2007                2008          
##  Min.   :2.290e+07   Min.   :2.043e+07   Min.   :3.029e+07  
##  1st Qu.:4.710e+09   1st Qu.:5.761e+09   1st Qu.:6.110e+09  
##  Median :2.583e+10   Median :3.235e+10   Median :3.914e+10  
##  Mean   :1.577e+12   Mean   :1.807e+12   Mean   :2.030e+12  
##  3rd Qu.:3.451e+11   3rd Qu.:4.085e+11   3rd Qu.:5.102e+11  
##  Max.   :5.134e+13   Max.   :5.783e+13   Max.   :6.343e+13  
##  NA's   :13          NA's   :13          NA's   :15         
##       2009                2010                2011          
##  Min.   :2.710e+07   Min.   :3.182e+07   Min.   :3.871e+07  
##  1st Qu.:5.833e+09   1st Qu.:6.960e+09   1st Qu.:7.674e+09  
##  Median :3.744e+10   Median :4.028e+10   Median :4.586e+10  
##  Mean   :1.925e+12   Mean   :2.141e+12   Mean   :2.395e+12  
##  3rd Qu.:4.291e+11   3rd Qu.:4.835e+11   3rd Qu.:5.298e+11  
##  Max.   :6.014e+13   Max.   :6.596e+13   Max.   :7.330e+13  
##  NA's   :15          NA's   :15          NA's   :14         
##       2012                2013                2014          
##  Min.   :3.767e+07   Min.   :3.751e+07   Min.   :3.729e+07  
##  1st Qu.:8.681e+09   1st Qu.:8.995e+09   1st Qu.:1.019e+10  
##  Median :5.039e+10   Median :5.447e+10   Median :5.673e+10  
##  Mean   :2.490e+12   Mean   :2.569e+12   Mean   :2.655e+12  
##  3rd Qu.:5.552e+11   3rd Qu.:5.587e+11   3rd Qu.:5.712e+11  
##  Max.   :7.497e+13   Max.   :7.705e+13   Max.   :7.913e+13  
##  NA's   :17          NA's   :16          NA's   :17         
##       2015                2016                2017          
##  Min.   :3.556e+07   Min.   :3.657e+07   Min.   :3.973e+07  
##  1st Qu.:8.758e+09   1st Qu.:9.412e+09   1st Qu.:1.151e+10  
##  Median :5.317e+10   Median :5.324e+10   Median :5.945e+10  
##  Mean   :2.506e+12   Mean   :2.586e+12   Mean   :2.848e+12  
##  3rd Qu.:5.786e+11   3rd Qu.:6.449e+11   3rd Qu.:7.175e+11  
##  Max.   :7.484e+13   Max.   :7.594e+13   Max.   :8.068e+13  
##  NA's   :18          NA's   :23          NA's   :30

Converting to log scale

This is the basic logarithm function with 9 as the value and 3 as the base. The results are 2 because 9 is the square of 3.

# log in r - core syntax
log(9,3)
## [1] 2

Here, the second parameter has been omitted resulting in a base of e producing the natural logarithm of 5.

log(5)
## [1] 1.609438

Inverse

exp(1.609438)
## [1] 5

Log transformation

A log transformation is a process of applying a logarithm to data to reduce its skew. This is usually done when the numbers are highly skewed to reduce the skew so the data can be understood easier. Log transformation in R is accomplished by applying the log() function to vector, data-frame or other data set. Before the logarithm is applied, 1 is added to the base value to prevent applying a logarithm to a 0 value. The resulting presentation of the data is less skewed than the original making it easier to understand.

myvector = c(100,10,5,2,1,0.5,0.1,0.05,0.01,0.001,0.0001)
transformedvector=log(myvector+1)

plot(myvector)

plot(transformedvector)

data frame

Log transforming your data in R for a data frame is a little trickier because getting the log requires separating the data. Taking the log of the entire dataset get you the log of each data point. However, you usually need the log from only one column of data.

 ChickWeight$logweight=log(ChickWeight$weight)
#head(ChickWeight)

plot(head(ChickWeight$Time),head(ChickWeight$logweight))

plot(head(ChickWeight$Time),head(ChickWeight$weight))

Visualize it

boxplot(gdp$`2017`[1:10])

#boxplot(gdp$`2017`)

Let's apply this to a column in GDP

gdp[,3:60] <- log(gdp[,3:60] + 1)
#(or use log1p(x) computes log(1+x) accurately)

Learn more about using the natural logarithm with economic data: https://econbrowser.com/archives/2014/02/use-of-logarithms-in-economics

Subsetting

gdp <- read_csv("gdp.csv")
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   `Country Name` = col_character(),
##   `Country Code` = col_character()
## )
## See spec(...) for full column specifications.
uae <- subset(gdp, `Country Code` == "ARE")
library(dplyr)
uae2 = filter(gdp, `Country Code` == "ARE")

Single column - Multiple values

gdp_multiple= subset(gdp, `Country Code` %in% c("ARE", "CHN", "GBR"))
gdp_multiple2 = filter(gdp, `Country Code` == "ARE" | `Country Code` == "CHN" | `Country Code` == "GBR")