Source file ⇒ Assignment_9.Rmd
page <- "http://en.wikipedia.org/wiki/List_of_nuclear_reactors"
xpath <- '//*[@id="mw-content-text"]/table'
table_list <- page %>%
read_html() %>%
html_nodes(xpath = xpath) %>%
html_table(fill = TRUE)
Japan_list <- table_list[[23]]
new_names <- c("Name", "Reactor No.", "Reactor Type", "Reactor Model", "Status", "Net Capacity in MW", "Gross Capacity in MW", "Construction Start Date", "Commercial Operation Date", "Closure")
names(Japan_list) <- new_names
Japan_list <- Japan_list %>% filter(row_number() != 1)
head(Japan_list)
## Name Reactor No. Reactor Type Reactor Model
## 1 Fukushima Daiichi 1 BWR BWR-3
## 2 Fukushima Daiichi 2 BWR BWR-4
## 3 Fukushima Daiichi 3 BWR BWR-4
## 4 Fukushima Daiichi 4 BWR BWR-4
## 5 Fukushima Daiichi 5 BWR BWR-4
## 6 Fukushima Daiichi 6 BWR BWR-5
## Status Net Capacity in MW Gross Capacity in MW
## 1 Inoperable 439 460
## 2 Inoperable 760 784
## 3 Inoperable 760 784
## 4 Shut down/ Inoperable 760 784
## 5 Shut down 760 784
## 6 Shut down 1067 1100
## Construction Start Date Commercial Operation Date Closure
## 1 25 July 1967 26 March 1971 19 May 2011
## 2 9 June 1969 18 July 1974 19 May 2011
## 3 28 December 1970 27 March 1976 19 May 2011
## 4 12 February 1973 12 October 1978 19 May 2011
## 5 22 May 1972 18 April 1978 17 December 2013
## 6 26 October 1973 24 October 1979 17 December 2013
nuclear_power_graph <- Japan_list %>% ggplot(aes(x = dmy(Japan_list$`Construction Start Date`), y = Japan_list$`Net Capacity in MW`, col = Japan_list$`Reactor Type`)) + geom_point() + labs(x = "Construction Date", y = "Net", title = "Nuclear Graph Output", col = "Type")
nuclear_power_graph
## Warning: Removed 3 rows containing missing values (geom_point).
mutated_table <- Japan_list %>%
mutate(Site = paste0(Japan_list$`Name`, Japan_list$`Reactor No.`))
Construction_Delays <- mutated_table %>% ggplot(aes(x = dmy(mutated_table$`Construction Start Date`), y = mutated_table$Site)) +
geom_segment(aes(x = dmy(mutated_table$`Construction Start Date`), y = mutated_table$Site, xend = dmy(mutated_table$`Commercial Operation Date`), yend = mutated_table$Site)) + labs(y = "Name", x = "Operation Time", title = "Construction Delays")
Construction_Delays
## Warning: Removed 5 rows containing missing values (geom_segment).
mkdir LifeExpectancy_Project_Stats133
mv ~/Desktop/lifeexpectancy.csv lifespan
ls
less lifespan
wc ~l lifespan
249 lines
152 , 177 , 202
cut -f 1,152,177,202 -d ‘,’ lifespan
cat lifespan | cut -f 1,152,177,202 -d “,” | egrep [0-9] > lifespan.clean.csv
cd MyDataScienceToolbox vagrant up vagrant ssh R install.packages(“maps”) install.packages(“fields”) R CMD BATCH cat lifeexpectancy.csv ls lifeexpectancy.csv