#Import the data
library(readr)
nuclearData <- read.csv("nuclear.csv")
nuclearData
## cost date t1 t2 cap pr ne ct bw cum.n pt
## 1 460.05 68.58 14 46 687 0 1 0 0 14 0
## 2 452.99 67.33 10 73 1065 0 0 1 0 1 0
## 3 443.22 67.33 10 85 1065 1 0 1 0 1 0
## 4 652.32 68.00 11 67 1065 0 1 1 0 12 0
## 5 642.23 68.00 11 78 1065 1 1 1 0 12 0
## 6 345.39 67.92 13 51 514 0 1 1 0 3 0
## 7 272.37 68.17 12 50 822 0 0 0 0 5 0
## 8 317.21 68.42 14 59 457 0 0 0 0 1 0
## 9 457.12 68.42 15 55 822 1 0 0 0 5 0
## 10 690.19 68.33 12 71 792 0 1 1 1 2 0
## 11 350.63 68.58 12 64 560 0 0 0 0 3 0
## 12 402.59 68.75 13 47 790 0 1 0 0 6 0
## 13 412.18 68.42 15 62 530 0 0 1 0 2 0
## 14 495.58 68.92 17 52 1050 0 0 0 0 7 0
## 15 394.36 68.92 13 65 850 0 0 0 1 16 0
## 16 423.32 68.42 11 67 778 0 0 0 0 3 0
## 17 712.27 69.50 18 60 845 0 1 0 0 17 0
## 18 289.66 68.42 15 76 530 1 0 1 0 2 0
## 19 881.24 69.17 15 67 1090 0 0 0 0 1 0
## 20 490.88 68.92 16 59 1050 1 0 0 0 8 0
## 21 567.79 68.75 11 70 913 0 0 1 1 15 0
## 22 665.99 70.92 22 57 828 1 1 0 0 20 0
## 23 621.45 69.67 16 59 786 0 0 1 0 18 0
## 24 608.80 70.08 19 58 821 1 0 0 0 3 0
## 25 473.64 70.42 19 44 538 0 0 1 0 19 0
## 26 697.14 71.08 20 57 1130 0 0 1 0 21 0
## 27 207.51 67.25 13 63 745 0 0 0 0 8 1
## 28 288.48 67.17 9 48 821 0 0 1 0 7 1
## 29 284.88 67.83 12 63 886 0 0 0 1 11 1
## 30 280.36 67.83 12 71 886 1 0 0 1 11 1
## 31 217.38 67.25 13 72 745 1 0 0 0 8 1
## 32 270.71 67.83 7 80 886 1 0 0 1 11 1
Question 1
summary(nuclearData)
## cost date t1 t2
## Min. :207.5 Min. :67.17 Min. : 7.00 Min. :44.00
## 1st Qu.:310.3 1st Qu.:67.90 1st Qu.:11.75 1st Qu.:56.50
## Median :448.1 Median :68.42 Median :13.00 Median :62.50
## Mean :461.6 Mean :68.58 Mean :13.75 Mean :62.38
## 3rd Qu.:612.0 3rd Qu.:68.92 3rd Qu.:15.25 3rd Qu.:70.25
## Max. :881.2 Max. :71.08 Max. :22.00 Max. :85.00
## cap pr ne ct
## Min. : 457.0 Min. :0.0000 Min. :0.00 Min. :0.0000
## 1st Qu.: 745.0 1st Qu.:0.0000 1st Qu.:0.00 1st Qu.:0.0000
## Median : 822.0 Median :0.0000 Median :0.00 Median :0.0000
## Mean : 825.4 Mean :0.3125 Mean :0.25 Mean :0.4062
## 3rd Qu.: 947.2 3rd Qu.:1.0000 3rd Qu.:0.25 3rd Qu.:1.0000
## Max. :1130.0 Max. :1.0000 Max. :1.00 Max. :1.0000
## bw cum.n pt
## Min. :0.0000 Min. : 1.000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.: 3.000 1st Qu.:0.0000
## Median :0.0000 Median : 7.500 Median :0.0000
## Mean :0.1875 Mean : 8.531 Mean :0.1875
## 3rd Qu.:0.0000 3rd Qu.:12.500 3rd Qu.:0.0000
## Max. :1.0000 Max. :21.000 Max. :1.0000
mean(nuclearData$cost)
## [1] 461.5603
median(nuclearData$cost)
## [1] 448.105
mean(nuclearData$cap)
## [1] 825.375
median(nuclearData$cap)
## [1] 822
head(nuclearData)
## cost date t1 t2 cap pr ne ct bw cum.n pt
## 1 460.05 68.58 14 46 687 0 1 0 0 14 0
## 2 452.99 67.33 10 73 1065 0 0 1 0 1 0
## 3 443.22 67.33 10 85 1065 1 0 1 0 1 0
## 4 652.32 68.00 11 67 1065 0 1 1 0 12 0
## 5 642.23 68.00 11 78 1065 1 1 1 0 12 0
## 6 345.39 67.92 13 51 514 0 1 1 0 3 0
Question 2
nuclearData2 <- data.frame(nuclearData[seq(1, nrow(nuclearData), 2),1:5])
nuclearData2 <- subset(nuclearData2, date > 68)
nuclearData2
## cost date t1 t2 cap
## 1 460.05 68.58 14 46 687
## 7 272.37 68.17 12 50 822
## 9 457.12 68.42 15 55 822
## 11 350.63 68.58 12 64 560
## 13 412.18 68.42 15 62 530
## 15 394.36 68.92 13 65 850
## 17 712.27 69.50 18 60 845
## 19 881.24 69.17 15 67 1090
## 21 567.79 68.75 11 70 913
## 23 621.45 69.67 16 59 786
## 25 473.64 70.42 19 44 538
Question 3
library(plyr)
colnames(nuclearData2) <- c("Cost", "Date", "Time1", "Time2", "Capacity")
head(nuclearData2)
## Cost Date Time1 Time2 Capacity
## 1 460.05 68.58 14 46 687
## 7 272.37 68.17 12 50 822
## 9 457.12 68.42 15 55 822
## 11 350.63 68.58 12 64 560
## 13 412.18 68.42 15 62 530
## 15 394.36 68.92 13 65 850
Question 4
summary(nuclearData2)
## Cost Date Time1 Time2
## Min. :272.4 Min. :68.17 Min. :11.00 Min. :44.00
## 1st Qu.:403.3 1st Qu.:68.50 1st Qu.:12.50 1st Qu.:52.50
## Median :460.1 Median :68.75 Median :15.00 Median :60.00
## Mean :509.4 Mean :68.96 Mean :14.55 Mean :58.36
## 3rd Qu.:594.6 3rd Qu.:69.33 3rd Qu.:15.50 3rd Qu.:64.50
## Max. :881.2 Max. :70.42 Max. :19.00 Max. :70.00
## Capacity
## Min. : 530.0
## 1st Qu.: 623.5
## Median : 822.0
## Mean : 767.5
## 3rd Qu.: 847.5
## Max. :1090.0
mean(nuclearData2$Cost)
## [1] 509.3727
median(nuclearData2$Cost)
## [1] 460.05
mean(nuclearData2$Capacity)
## [1] 767.5455
median(nuclearData2$Capacity)
## [1] 822
head(nuclearData2)
## Cost Date Time1 Time2 Capacity
## 1 460.05 68.58 14 46 687
## 7 272.37 68.17 12 50 822
## 9 457.12 68.42 15 55 822
## 11 350.63 68.58 12 64 560
## 13 412.18 68.42 15 62 530
## 15 394.36 68.92 13 65 850
Recall the previous values:
Mean - cost: 461.5603
Median - cost: 448.105
Mean - capacity: 825.375
Median - capacity: 822
New values for quick reference:
Mean - cost: 509.3727
Median - cost: 460.05
Mean - capacity: 767.5455
Median - capacity: 822
After changing the range of values in the data set, the mean for
Cost increased, and the mean for Capacity decreased. The median
increased for Cost, and the median for Capacity remained the same. The
difference in data sets was that the second data frame included every
other row from the first data frame. This excluded many values. When the
mean increased, some of the lower values were removed. When the mean
decreased, some of the higher values were removed. When the median
increased, some of the lower values were removed. When the median
remained the same, an equal number of lower and higher values from the
median were removed. To see more specific examples of changes, compare
the summary statistics from Question 1 and Question 4.
Question 5
library(base64)
suppressWarnings(nuclearData2$Capacity <- replace(nuclearData2$Capacity, as.numeric(nuclearData2$Capacity) < 700, 'Low'))
suppressWarnings(nuclearData2$Capacity <- replace(nuclearData2$Capacity, as.numeric(nuclearData2$Capacity) < 900, 'Medium'))
suppressWarnings(nuclearData2$Capacity <- replace(nuclearData2$Capacity, as.numeric(nuclearData2$Capacity) < 1100, 'High'))
nuclearData2
## Cost Date Time1 Time2 Capacity
## 1 460.05 68.58 14 46 Low
## 7 272.37 68.17 12 50 Medium
## 9 457.12 68.42 15 55 Medium
## 11 350.63 68.58 12 64 Low
## 13 412.18 68.42 15 62 Low
## 15 394.36 68.92 13 65 Medium
## 17 712.27 69.50 18 60 Medium
## 19 881.24 69.17 15 67 High
## 21 567.79 68.75 11 70 High
## 23 621.45 69.67 16 59 Medium
## 25 473.64 70.42 19 44 Low
Question 6
See the results after each of the questions above.
Question 7
csvFile <- read.csv("https://raw.githubusercontent.com/juliaDataScience-22/cuny-summer-23/main/nuclear.csv")
head(csvFile)
## cost date t1 t2 cap pr ne ct bw cum.n pt
## 1 460.05 68.58 14 46 687 0 1 0 0 14 0
## 2 452.99 67.33 10 73 1065 0 0 1 0 1 0
## 3 443.22 67.33 10 85 1065 1 0 1 0 1 0
## 4 652.32 68.00 11 67 1065 0 1 1 0 12 0
## 5 642.23 68.00 11 78 1065 1 1 1 0 12 0
## 6 345.39 67.92 13 51 514 0 1 1 0 3 0