print ("Hello world")
## [1] "Hello world"
z <- c(pi, 205, 149, -2) y <- c(z, 555, z) y <- 2 * y + 760 my_sqrt <- sqrt(y - 1)
z <- c(pi, 205, 149, -2)
y <- c(z, 555, z)
y <- 2 * y + 760
my_sqrt <- sqrt(y - 1)
0/0
## [1] NaN
0/0 + 5
## [1] NaN
age <- c(12, 28, 35, 27, NA, 25, 32, 45, 31, 23, NA, 34)
If we were interested in getting the vector without the missing values, which of the following lines of code would be useful to achieve this purpose? (Select all that apply)
library(tidyverse) papers <- as_tibble(read_csv(“[YOURFILEPATH]/CitesforSara.csv”))
Great! Let’s create a simplified dataset which only keeps the following variables contained in the papers dataset in this order: journal, year, cites, title, and au1. Use the method select() to accomplish this. Set this output to the variable papers_select. Drag and drop to create the code. Note that there may be more than one way to do this but there is only one correct answer from the following drag-and-drop options.
library(tidyverse)
## Warning: package 'ggplot2' was built under R version 4.4.1
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 4.0.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
papers <- as_tibble(read_csv("/Users/trangan/Desktop/Prepare for PhD in Econ at Curtin/MIT_Data Analysis for Social Scientists/CitesforSara.csv"))
## Rows: 4182 Columns: 22
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): journal, title, au1, au2, au3
## dbl (17): year, cites, female1, female2, female3, page, order, nauthor, past...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
papers_select = select (papers, journal, year, cites, title, au1)
head(papers_select, 20)
## # A tibble: 20 × 5
## journal year cites title au1
## <chr> <dbl> <dbl> <chr> <chr>
## 1 American-Economic-Review 1993 31 Jeux Sans Frontieres: Tax Compet… Kanb…
## 2 American-Economic-Review 1993 4 Changes in Economic Instability i… Jame…
## 3 American-Economic-Review 1993 28 Factor Shares and Savings in Endo… Bert…
## 4 American-Economic-Review 1993 10 Strategic Discipline in Monetary … Garf…
## 5 American-Economic-Review 1993 5 Will Affirmative-Action Policies … Coat…
## 6 American-Economic-Review 1993 21 Mergers and Market Power: Eviden… Kim,…
## 7 American-Economic-Review 1993 45 A Search-Theoretic Approach to Mo… Kiyo…
## 8 American-Economic-Review 1993 13 An Experimental Test of the Publi… Andr…
## 9 American-Economic-Review 1993 5 A General Experiment on Bargainin… Kahn…
## 10 American-Economic-Review 1993 2 Nominal-Contracting Theories of U… Kean…
## 11 American-Economic-Review 1993 6 Is There a Peso Problem? Evidenc… Kami…
## 12 American-Economic-Review 1993 5 Stock Options and the Strategic U… Reit…
## 13 American-Economic-Review 1993 51 What Do We Learn About Consumer D… Blun…
## 14 American-Economic-Review 1993 15 Economic Policy, Economic Perform… Harr…
## 15 American-Economic-Review 1993 16 Large-Scale Privatization in Tran… Laba…
## 16 American-Economic-Review 1993 8 The Development of Nominal Wage R… Hane…
## 17 American-Economic-Review 1993 7 Low Investment and Large LDC Debt… Cohe…
## 18 American-Economic-Review 1993 4 Entry, Dumping, and Shakeout Clar…
## 19 American-Economic-Review 1993 1 The Relative Pricing of High-Yiel… Damm…
## 20 American-Economic-Review 1993 5 The Economics of Rotating Savings… Besl…
filter(papers, cites >= 100)
## # A tibble: 205 × 22
## journal year cites title au1 au2 au3 female1 female2 female3 page
## <chr> <dbl> <dbl> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 American-E… 1994 117 Is I… Pers… Tabe… <NA> 0 0 NA 22
## 2 Econometri… 1971 149 Furt… Nerl… <NA> <NA> 0 NA NA 24
## 3 Econometri… 1971 170 The … Madd… <NA> <NA> NA NA NA 18
## 4 Econometri… 1971 155 Inve… Luca… Pres… <NA> 0 0 NA 23
## 5 Econometri… 1971 139 Some… Crag… <NA> <NA> 0 NA NA 16
## 6 Econometri… 1971 108 Iden… Roth… <NA> <NA> 0 NA NA 15
## 7 Econometri… 1972 164 Meth… Fair… Jaff… <NA> 0 0 NA 18
## 8 Econometri… 1972 150 Exis… Radn… <NA> <NA> 0 NA NA 15
## 9 Econometri… 1973 361 Mani… Gibb… <NA> <NA> 0 NA NA 15
## 10 Econometri… 1973 107 On a… Kram… <NA> <NA> 0 NA NA 13
## # ℹ 195 more rows
## # ℹ 11 more variables: order <dbl>, nauthor <dbl>, past5 <dbl>, aflpn90 <dbl>,
## # spage <dbl>, field <dbl>, subfld <dbl>, aulpn90 <dbl>, aulpn80 <dbl>,
## # aulpn70 <dbl>, lcites <dbl>
?group_by
summarise(group_by(filter(papers, journal == 'Econometrica'), journal), total_cites = sum(cites, na.rm = TRUE))
## # A tibble: 1 × 2
## journal total_cites
## <chr> <dbl>
## 1 Econometrica 75789
eco_papers = filter(papers, journal == "Econometrica")
eco_groups = group_by(eco_papers, journal)
summarise(eco_groups, total_cites = sum(cites, na.rm = TRUE))
## # A tibble: 1 × 2
## journal total_cites
## <chr> <dbl>
## 1 Econometrica 75789
select(papers, contains("female"))
## # A tibble: 4,182 × 3
## female1 female2 female3
## <dbl> <dbl> <dbl>
## 1 0 0 NA
## 2 0 NA NA
## 3 0 NA NA
## 4 1 0 NA
## 5 0 0 NA
## 6 0 0 NA
## 7 0 0 NA
## 8 0 NA NA
## 9 0 0 NA
## 10 0 NA NA
## # ℹ 4,172 more rows