Source file ⇒ Assignment_9.Rmd

page <- "http://en.wikipedia.org/wiki/List_of_nuclear_reactors"
xpath <- '//*[@id="mw-content-text"]/table' 
table_list <- page %>%
  read_html() %>%
  html_nodes(xpath = xpath) %>%
  html_table(fill = TRUE)
Japan_list <- table_list[[23]] 
new_names <- c("Name", "Reactor No.", "Reactor Type", "Reactor Model", "Status", "Net Capacity in MW", "Gross Capacity in MW", "Construction Start Date", "Commercial Operation Date", "Closure")
names(Japan_list) <- new_names
Japan_list <- Japan_list %>% filter(row_number() != 1)
head(Japan_list)
##                Name Reactor No. Reactor Type Reactor Model
## 1 Fukushima Daiichi           1          BWR         BWR-3
## 2 Fukushima Daiichi           2          BWR         BWR-4
## 3 Fukushima Daiichi           3          BWR         BWR-4
## 4 Fukushima Daiichi           4          BWR         BWR-4
## 5 Fukushima Daiichi           5          BWR         BWR-4
## 6 Fukushima Daiichi           6          BWR         BWR-5
##                  Status Net Capacity in MW Gross Capacity in MW
## 1            Inoperable                439                  460
## 2            Inoperable                760                  784
## 3            Inoperable                760                  784
## 4 Shut down/ Inoperable                760                  784
## 5             Shut down                760                  784
## 6             Shut down               1067                 1100
##   Construction Start Date Commercial Operation Date          Closure
## 1            25 July 1967             26 March 1971      19 May 2011
## 2             9 June 1969              18 July 1974      19 May 2011
## 3        28 December 1970             27 March 1976      19 May 2011
## 4        12 February 1973           12 October 1978      19 May 2011
## 5             22 May 1972             18 April 1978 17 December 2013
## 6         26 October 1973           24 October 1979 17 December 2013
nuclear_power_graph <- Japan_list %>% ggplot(aes(x = dmy(Japan_list$`Construction Start Date`), y = Japan_list$`Net Capacity in MW`, col = Japan_list$`Reactor Type`)) + geom_point() + labs(x = "Construction Date", y = "Net", title = "Nuclear Graph Output", col = "Type") 

nuclear_power_graph
## Warning: Removed 3 rows containing missing values (geom_point).

mutated_table <- Japan_list %>%
  mutate(Site = paste0(Japan_list$`Name`, Japan_list$`Reactor No.`))

Construction_Delays <- mutated_table %>% ggplot(aes(x = dmy(mutated_table$`Construction Start Date`), y = mutated_table$Site)) +
geom_segment(aes(x = dmy(mutated_table$`Construction Start Date`), y = mutated_table$Site, xend = dmy(mutated_table$`Commercial Operation Date`), yend = mutated_table$Site)) + labs(y = "Name", x = "Operation Time", title = "Construction Delays")

Construction_Delays
## Warning: Removed 5 rows containing missing values (geom_segment).

UNIX Commands

mkdir LifeExpectancy_Project_Stats133

mv ~/Desktop/lifeexpectancy.csv lifespan

ls

less lifespan

wc ~l lifespan

249 lines

152 , 177 , 202

cut -f 1,152,177,202 -d ‘,’ lifespan

cat lifespan | cut -f 1,152,177,202 -d “,” | egrep [0-9] > lifespan.clean.csv

cd MyDataScienceToolbox vagrant up vagrant ssh R install.packages(“maps”) install.packages(“fields”) R CMD BATCH cat lifeexpectancy.csv ls lifeexpectancy.csv

Extra Credit

  1. He uses a Scatterplot.
  2. We would need data for country asize and average income of the country in order ot make a plot similar to the video.
  3. Rosling labels the countries in the plot, years change as plot changes through the animation, and color codes his points based off certain factors which can be done by labelling of the couuntries and the color coding bosed off a factor in R.