dplyr
.All operations in points 3-10 should be done with dplyr
functions
dat <- read.csv("http://math-info.hse.ru/f/2018-19/pep/hw/CPDS.csv")
# str(dat)
read.csv()
:dec = ","
so that R can regard numbers with commas as numbers, not stringsstringsAsFactors = FALSE
so that R can regard text values as character ones, not factor.
Make sure that now everything is correct.
dat <- read.csv("http://math-info.hse.ru/f/2018-19/pep/hw/CPDS.csv",
dec = ",", stringsAsFactors = TRUE)
year
, country
, iso
, poco
, eu
, gov_right1
, gov_cent1
, gov_left1
, gov_party
, gov_type
, womenpar
, pop
and save them to the data frame small
.library(dplyr)
small <- dat %>% select(year, country, iso, poco, eu,
gov_right1, gov_cent1, gov_left1,
gov_party, gov_type, womenpar, pop)
log_pop
with values of the natural logarithm of population and add it to small
.small <- dat %>% mutate(log_pop = log(pop))
small
correspond to post-communist and not post-communist states?small %>% group_by(poco) %>% tally
## # A tibble: 2 x 2
## poco n
## <int> <int>
## 1 0 1371
## 2 1 279
small
? Hint: n_distinct()
in dplyr
combined with summarise()
might be helpful.small %>% group_by(poco) %>% summarise(n = n_distinct(country))
## # A tibble: 2 x 2
## poco n
## <int> <int>
## 1 0 25
## 2 1 11
small %>% summarise(left = mean(gov_left1, na.rm = TRUE),
center = mean(gov_cent1, na.rm = TRUE),
right = mean(gov_right1, na.rm = TRUE))
## left center right
## 1 32.37808 23.45062 39.4144
small %>% group_by(eu) %>% summarise(left = mean(gov_left1, na.rm = TRUE),
center = mean(gov_cent1, na.rm = TRUE),
right = mean(gov_right1, na.rm = TRUE))
## # A tibble: 2 x 4
## eu left center right
## <int> <dbl> <dbl> <dbl>
## 1 0 30.4 21.0 44.3
## 2 1 34.5 26.0 34.2
small %>% filter(poco == 1 & gov_right1 > 50) %>% View
# rows (observations)
small %>% filter(poco == 1 & gov_right1 > 50) %>% tally
## n
## 1 126
# countries (unique)
small %>% filter(poco == 1 & gov_right1 > 50) %>%
summarise(n = n_distinct(country))
## n
## 1 10
# calculate a Pearson's correlation coef and test its significance
cor.test(small$womenpar, small$gov_right1)
# save results in test
test <- cor.test(small$womenpar, small$gov_right1)
# look at this structure
str(test)
Now you can choose any element from test, for example, the correlation coefficient itself or corresponding p-value.
test$estimate
test$p.value
small
in the following way: calculate the correlation coefficient between womenpar
and gov_right1
for post-communist and not post-communist states separately, and report the coefficient and the corresponding p-value for each group.small %>% group_by(poco) %>%
summarise(corr = cor.test(womenpar, gov_right1)$estimate,
pvalue = cor.test(womenpar, gov_right1)$p.value)
## # A tibble: 2 x 3
## poco corr pvalue
## <int> <dbl> <dbl>
## 1 0 -0.121 0.00000703
## 2 1 0.154 0.0108