Clear everything.
# Clear the work space
rm(list = ls()) # Clear environment
gc() # Clear unused memory
## used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
## Ncells 525253 28.1 1166077 62.3 NA 669291 35.8
## Vcells 964700 7.4 8388608 64.0 32768 1840369 14.1
cat("\f") # Clear the console
#dev.off() # Clear the charts
sessionInfo()
## R version 4.2.1 (2022-06-23)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur ... 10.16
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## loaded via a namespace (and not attached):
## [1] digest_0.6.32 R6_2.5.1 jsonlite_1.8.7 evaluate_0.21
## [5] cachem_1.0.8 rlang_1.1.1 cli_3.6.1 rstudioapi_0.14
## [9] jquerylib_0.1.4 bslib_0.5.0 rmarkdown_2.21 tools_4.2.1
## [13] xfun_0.39 yaml_2.3.7 fastmap_1.1.1 compiler_4.2.1
## [17] htmltools_0.5.5 knitr_1.42 sass_0.4.6
library(visdat)
library(vtable)
## Loading required package: kableExtra
library(gtools)
df1 <- read.csv("~/Library/CloudStorage/Dropbox/WCAS/Summer/Data Analysis/share/Day 9/mydata_1960_1980.csv")
vis_dat(df1)
??st
st(df1)
| Variable | N | Mean | Std. Dev. | Min | Pctl. 25 | Pctl. 75 | Max |
|---|---|---|---|---|---|---|---|
| year | 2850 | 1974 | 8.7 | 1960 | 1967 | 1982 | 1989 |
| gdppc | 2065 | 9256 | 10856 | 179 | 1695 | 15298 | 81947 |
| unemp | 448 | 7.9 | 4.8 | 0.6 | 4.1 | 10 | 28 |
| export | 1740 | 31925198701 | 60411559532 | 126529127 | 2789683321 | 30484781664 | 556004000000 |
| import | 1750 | 32304559329 | 64205761728 | 198918483 | 3778882830 | 29240323123 | 653068000000 |
table(df1$year)
##
## 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975
## 95 95 95 95 95 95 95 95 95 95 95 95 95 95 95 95
## 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989
## 95 95 95 95 95 95 95 95 95 95 95 95 95 95
df2 <- read.csv("~/Library/CloudStorage/Dropbox/WCAS/Summer/Data Analysis/share/Day 9/mydata_1990_2020.csv")
vis_dat(df2)
??st
st(df2)
| Variable | N | Mean | Std. Dev. | Min | Pctl. 25 | Pctl. 75 | Max |
|---|---|---|---|---|---|---|---|
| year | 2280 | 2002 | 6.9 | 1990 | 1996 | 2007 | 2013 |
| Gdppc | 2254 | 13746 | 15846 | 198 | 2486 | 22950 | 87717 |
| unemp | 2121 | 9.1 | 6.2 | 0.6 | 4.8 | 11 | 52 |
| export | 2209 | 102121255849 | 203883942176 | 223329024 | 5575464298 | 104000000000 | 1910000000000 |
| Import | 2209 | 101990170968 | 223469497481 | 312413409 | 6518361074 | 90823243497 | 2280000000000 |
table(df2$year)
##
## 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005
## 95 95 95 95 95 95 95 95 95 95 95 95 95 95 95 95
## 2006 2007 2008 2009 2010 2011 2012 2013
## 95 95 95 95 95 95 95 95
Put one data on top of the other.
?rbind # gives an error
# rbind(df1, df2)
?smartbind # brute force append
appended_dataset <- smartbind(df1, df2)
vis_dat(appended_dataset[,c(1:2, 3,6:8)])
names(df2)[names(df2) == 'Gdppc'] <- 'gdppc'
names(df2)[names(df2) == 'Import'] <- 'import'
appended_dataset <- rbind(df1, df2)
vis_dat(appended_dataset)
df3 <- read.csv("~/Library/CloudStorage/Dropbox/WCAS/Summer/Data Analysis/share/Day 9/mydata_pol.csv")
vis_dat(df3)
appended_data is a panel data
df3 is a panel data.
country, year together uniquely identify
each row.
names(appended_dataset)
## [1] "year" "country" "gdppc" "unemp" "export" "import"
names(df3)
## [1] "year" "country" "politics"
# remove(merged_data)
?merge
merged_data <-
merge(x = appended_dataset,
y = df3,
by.x = c("country", "year"), # unique identifier name in x data
by.y = c("country", "year"), # unique identifier name in y data
all.x = TRUE
)
vis_dat(merged_data)
7 columns make sense.
country and year together uniquely
identify each row.
gdpcc, unemp, export and
import from master data, while politics
(polictical score from using data
?write.csv
getwd() # print my working directory
## [1] "/Users/arvindsharma/Library/CloudStorage/Dropbox/WCAS/Summer/Data Analysis/share/Day 9"
write.csv(x = merged_data, # data frame
file = "exported_data.csv",
row.names = FALSE # row numbers not printed out as a column
)