This contains a description of the data set: https://vincentarelbundock.github.io/Rdatasets/doc/boot/nuclear.html
#Import the data
library(readr)
nuclearData <- read.csv("nuclear.csv")

nuclearData
##      cost  date t1 t2  cap pr ne ct bw cum.n pt
## 1  460.05 68.58 14 46  687  0  1  0  0    14  0
## 2  452.99 67.33 10 73 1065  0  0  1  0     1  0
## 3  443.22 67.33 10 85 1065  1  0  1  0     1  0
## 4  652.32 68.00 11 67 1065  0  1  1  0    12  0
## 5  642.23 68.00 11 78 1065  1  1  1  0    12  0
## 6  345.39 67.92 13 51  514  0  1  1  0     3  0
## 7  272.37 68.17 12 50  822  0  0  0  0     5  0
## 8  317.21 68.42 14 59  457  0  0  0  0     1  0
## 9  457.12 68.42 15 55  822  1  0  0  0     5  0
## 10 690.19 68.33 12 71  792  0  1  1  1     2  0
## 11 350.63 68.58 12 64  560  0  0  0  0     3  0
## 12 402.59 68.75 13 47  790  0  1  0  0     6  0
## 13 412.18 68.42 15 62  530  0  0  1  0     2  0
## 14 495.58 68.92 17 52 1050  0  0  0  0     7  0
## 15 394.36 68.92 13 65  850  0  0  0  1    16  0
## 16 423.32 68.42 11 67  778  0  0  0  0     3  0
## 17 712.27 69.50 18 60  845  0  1  0  0    17  0
## 18 289.66 68.42 15 76  530  1  0  1  0     2  0
## 19 881.24 69.17 15 67 1090  0  0  0  0     1  0
## 20 490.88 68.92 16 59 1050  1  0  0  0     8  0
## 21 567.79 68.75 11 70  913  0  0  1  1    15  0
## 22 665.99 70.92 22 57  828  1  1  0  0    20  0
## 23 621.45 69.67 16 59  786  0  0  1  0    18  0
## 24 608.80 70.08 19 58  821  1  0  0  0     3  0
## 25 473.64 70.42 19 44  538  0  0  1  0    19  0
## 26 697.14 71.08 20 57 1130  0  0  1  0    21  0
## 27 207.51 67.25 13 63  745  0  0  0  0     8  1
## 28 288.48 67.17  9 48  821  0  0  1  0     7  1
## 29 284.88 67.83 12 63  886  0  0  0  1    11  1
## 30 280.36 67.83 12 71  886  1  0  0  1    11  1
## 31 217.38 67.25 13 72  745  1  0  0  0     8  1
## 32 270.71 67.83  7 80  886  1  0  0  1    11  1

Question 1

summary(nuclearData)
##       cost            date             t1              t2       
##  Min.   :207.5   Min.   :67.17   Min.   : 7.00   Min.   :44.00  
##  1st Qu.:310.3   1st Qu.:67.90   1st Qu.:11.75   1st Qu.:56.50  
##  Median :448.1   Median :68.42   Median :13.00   Median :62.50  
##  Mean   :461.6   Mean   :68.58   Mean   :13.75   Mean   :62.38  
##  3rd Qu.:612.0   3rd Qu.:68.92   3rd Qu.:15.25   3rd Qu.:70.25  
##  Max.   :881.2   Max.   :71.08   Max.   :22.00   Max.   :85.00  
##       cap               pr               ne             ct        
##  Min.   : 457.0   Min.   :0.0000   Min.   :0.00   Min.   :0.0000  
##  1st Qu.: 745.0   1st Qu.:0.0000   1st Qu.:0.00   1st Qu.:0.0000  
##  Median : 822.0   Median :0.0000   Median :0.00   Median :0.0000  
##  Mean   : 825.4   Mean   :0.3125   Mean   :0.25   Mean   :0.4062  
##  3rd Qu.: 947.2   3rd Qu.:1.0000   3rd Qu.:0.25   3rd Qu.:1.0000  
##  Max.   :1130.0   Max.   :1.0000   Max.   :1.00   Max.   :1.0000  
##        bw             cum.n              pt        
##  Min.   :0.0000   Min.   : 1.000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.: 3.000   1st Qu.:0.0000  
##  Median :0.0000   Median : 7.500   Median :0.0000  
##  Mean   :0.1875   Mean   : 8.531   Mean   :0.1875  
##  3rd Qu.:0.0000   3rd Qu.:12.500   3rd Qu.:0.0000  
##  Max.   :1.0000   Max.   :21.000   Max.   :1.0000
mean(nuclearData$cost)
## [1] 461.5603
median(nuclearData$cost)
## [1] 448.105
mean(nuclearData$cap)
## [1] 825.375
median(nuclearData$cap)
## [1] 822
head(nuclearData)
##     cost  date t1 t2  cap pr ne ct bw cum.n pt
## 1 460.05 68.58 14 46  687  0  1  0  0    14  0
## 2 452.99 67.33 10 73 1065  0  0  1  0     1  0
## 3 443.22 67.33 10 85 1065  1  0  1  0     1  0
## 4 652.32 68.00 11 67 1065  0  1  1  0    12  0
## 5 642.23 68.00 11 78 1065  1  1  1  0    12  0
## 6 345.39 67.92 13 51  514  0  1  1  0     3  0

Question 2

nuclearData2 <- data.frame(nuclearData[seq(1, nrow(nuclearData), 2),1:5])
nuclearData2 <- subset(nuclearData2, date > 68)

nuclearData2
##      cost  date t1 t2  cap
## 1  460.05 68.58 14 46  687
## 7  272.37 68.17 12 50  822
## 9  457.12 68.42 15 55  822
## 11 350.63 68.58 12 64  560
## 13 412.18 68.42 15 62  530
## 15 394.36 68.92 13 65  850
## 17 712.27 69.50 18 60  845
## 19 881.24 69.17 15 67 1090
## 21 567.79 68.75 11 70  913
## 23 621.45 69.67 16 59  786
## 25 473.64 70.42 19 44  538

Question 3

library(plyr)
colnames(nuclearData2) <- c("Cost", "Date", "Time1", "Time2", "Capacity")

head(nuclearData2)
##      Cost  Date Time1 Time2 Capacity
## 1  460.05 68.58    14    46      687
## 7  272.37 68.17    12    50      822
## 9  457.12 68.42    15    55      822
## 11 350.63 68.58    12    64      560
## 13 412.18 68.42    15    62      530
## 15 394.36 68.92    13    65      850

Question 4

summary(nuclearData2)
##       Cost            Date           Time1           Time2      
##  Min.   :272.4   Min.   :68.17   Min.   :11.00   Min.   :44.00  
##  1st Qu.:403.3   1st Qu.:68.50   1st Qu.:12.50   1st Qu.:52.50  
##  Median :460.1   Median :68.75   Median :15.00   Median :60.00  
##  Mean   :509.4   Mean   :68.96   Mean   :14.55   Mean   :58.36  
##  3rd Qu.:594.6   3rd Qu.:69.33   3rd Qu.:15.50   3rd Qu.:64.50  
##  Max.   :881.2   Max.   :70.42   Max.   :19.00   Max.   :70.00  
##     Capacity     
##  Min.   : 530.0  
##  1st Qu.: 623.5  
##  Median : 822.0  
##  Mean   : 767.5  
##  3rd Qu.: 847.5  
##  Max.   :1090.0
mean(nuclearData2$Cost)
## [1] 509.3727
median(nuclearData2$Cost)
## [1] 460.05
mean(nuclearData2$Capacity)
## [1] 767.5455
median(nuclearData2$Capacity)
## [1] 822
head(nuclearData2)
##      Cost  Date Time1 Time2 Capacity
## 1  460.05 68.58    14    46      687
## 7  272.37 68.17    12    50      822
## 9  457.12 68.42    15    55      822
## 11 350.63 68.58    12    64      560
## 13 412.18 68.42    15    62      530
## 15 394.36 68.92    13    65      850
Recall the previous values:
Mean - cost: 461.5603
Median - cost: 448.105
Mean - capacity: 825.375
Median - capacity: 822
New values for quick reference:
Mean - cost: 509.3727
Median - cost: 460.05
Mean - capacity: 767.5455
Median - capacity: 822
After changing the range of values in the data set, the mean for Cost increased, and the mean for Capacity decreased. The median increased for Cost, and the median for Capacity remained the same. The difference in data sets was that the second data frame included every other row from the first data frame. This excluded many values. When the mean increased, some of the lower values were removed. When the mean decreased, some of the higher values were removed. When the median increased, some of the lower values were removed. When the median remained the same, an equal number of lower and higher values from the median were removed. To see more specific examples of changes, compare the summary statistics from Question 1 and Question 4.

Question 5

library(base64)
suppressWarnings(nuclearData2$Capacity <- replace(nuclearData2$Capacity, as.numeric(nuclearData2$Capacity) < 700, 'Low'))
suppressWarnings(nuclearData2$Capacity <- replace(nuclearData2$Capacity, as.numeric(nuclearData2$Capacity) < 900, 'Medium'))
suppressWarnings(nuclearData2$Capacity <- replace(nuclearData2$Capacity, as.numeric(nuclearData2$Capacity) < 1100, 'High'))

nuclearData2
##      Cost  Date Time1 Time2 Capacity
## 1  460.05 68.58    14    46      Low
## 7  272.37 68.17    12    50   Medium
## 9  457.12 68.42    15    55   Medium
## 11 350.63 68.58    12    64      Low
## 13 412.18 68.42    15    62      Low
## 15 394.36 68.92    13    65   Medium
## 17 712.27 69.50    18    60   Medium
## 19 881.24 69.17    15    67     High
## 21 567.79 68.75    11    70     High
## 23 621.45 69.67    16    59   Medium
## 25 473.64 70.42    19    44      Low

Question 6

See the results after each of the questions above.

Question 7

csvFile <- read.csv("https://raw.githubusercontent.com/juliaDataScience-22/cuny-summer-23/main/nuclear.csv")

head(csvFile)
##     cost  date t1 t2  cap pr ne ct bw cum.n pt
## 1 460.05 68.58 14 46  687  0  1  0  0    14  0
## 2 452.99 67.33 10 73 1065  0  0  1  0     1  0
## 3 443.22 67.33 10 85 1065  1  0  1  0     1  0
## 4 652.32 68.00 11 67 1065  0  1  1  0    12  0
## 5 642.23 68.00 11 78 1065  1  1  1  0    12  0
## 6 345.39 67.92 13 51  514  0  1  1  0     3  0