library(DMwR2)
## Warning: package 'DMwR2' was built under R version 4.2.1
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
data(algae)
algae
## # A tibble: 200 × 18
## season size speed mxPH mnO2 Cl NO3 NH4 oPO4 PO4 Chla a1
## <fct> <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 winter small medium 8 9.8 60.8 6.24 578 105 170 50 0
## 2 spring small medium 8.35 8 57.8 1.29 370 429. 559. 1.3 1.4
## 3 autumn small medium 8.1 11.4 40.0 5.33 347. 126. 187. 15.6 3.3
## 4 spring small medium 8.07 4.8 77.4 2.30 98.2 61.2 139. 1.4 3.1
## 5 autumn small medium 8.06 9 55.4 10.4 234. 58.2 97.6 10.5 9.2
## 6 winter small high 8.25 13.1 65.8 9.25 430 18.2 56.7 28.4 15.1
## 7 summer small high 8.15 10.3 73.2 1.54 110 61.2 112. 3.2 2.4
## 8 autumn small high 8.05 10.6 59.1 4.99 206. 44.7 77.4 6.9 18.2
## 9 winter small medium 8.7 3.4 22.0 0.886 103. 36.3 71 5.54 25.4
## 10 winter small high 7.93 9.9 8 1.39 5.8 27.2 46.6 0.8 17
## # … with 190 more rows, and 6 more variables: a2 <dbl>, a3 <dbl>, a4 <dbl>,
## # a5 <dbl>, a6 <dbl>, a7 <dbl>
data(iris)
iris
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
## 7 4.6 3.4 1.4 0.3 setosa
## 8 5.0 3.4 1.5 0.2 setosa
## 9 4.4 2.9 1.4 0.2 setosa
## 10 4.9 3.1 1.5 0.1 setosa
## 11 5.4 3.7 1.5 0.2 setosa
## 12 4.8 3.4 1.6 0.2 setosa
## 13 4.8 3.0 1.4 0.1 setosa
## 14 4.3 3.0 1.1 0.1 setosa
## 15 5.8 4.0 1.2 0.2 setosa
## 16 5.7 4.4 1.5 0.4 setosa
## 17 5.4 3.9 1.3 0.4 setosa
## 18 5.1 3.5 1.4 0.3 setosa
## 19 5.7 3.8 1.7 0.3 setosa
## 20 5.1 3.8 1.5 0.3 setosa
## 21 5.4 3.4 1.7 0.2 setosa
## 22 5.1 3.7 1.5 0.4 setosa
## 23 4.6 3.6 1.0 0.2 setosa
## 24 5.1 3.3 1.7 0.5 setosa
## 25 4.8 3.4 1.9 0.2 setosa
## 26 5.0 3.0 1.6 0.2 setosa
## 27 5.0 3.4 1.6 0.4 setosa
## 28 5.2 3.5 1.5 0.2 setosa
## 29 5.2 3.4 1.4 0.2 setosa
## 30 4.7 3.2 1.6 0.2 setosa
## 31 4.8 3.1 1.6 0.2 setosa
## 32 5.4 3.4 1.5 0.4 setosa
## 33 5.2 4.1 1.5 0.1 setosa
## 34 5.5 4.2 1.4 0.2 setosa
## 35 4.9 3.1 1.5 0.2 setosa
## 36 5.0 3.2 1.2 0.2 setosa
## 37 5.5 3.5 1.3 0.2 setosa
## 38 4.9 3.6 1.4 0.1 setosa
## 39 4.4 3.0 1.3 0.2 setosa
## 40 5.1 3.4 1.5 0.2 setosa
## 41 5.0 3.5 1.3 0.3 setosa
## 42 4.5 2.3 1.3 0.3 setosa
## 43 4.4 3.2 1.3 0.2 setosa
## 44 5.0 3.5 1.6 0.6 setosa
## 45 5.1 3.8 1.9 0.4 setosa
## 46 4.8 3.0 1.4 0.3 setosa
## 47 5.1 3.8 1.6 0.2 setosa
## 48 4.6 3.2 1.4 0.2 setosa
## 49 5.3 3.7 1.5 0.2 setosa
## 50 5.0 3.3 1.4 0.2 setosa
## 51 7.0 3.2 4.7 1.4 versicolor
## 52 6.4 3.2 4.5 1.5 versicolor
## 53 6.9 3.1 4.9 1.5 versicolor
## 54 5.5 2.3 4.0 1.3 versicolor
## 55 6.5 2.8 4.6 1.5 versicolor
## 56 5.7 2.8 4.5 1.3 versicolor
## 57 6.3 3.3 4.7 1.6 versicolor
## 58 4.9 2.4 3.3 1.0 versicolor
## 59 6.6 2.9 4.6 1.3 versicolor
## 60 5.2 2.7 3.9 1.4 versicolor
## 61 5.0 2.0 3.5 1.0 versicolor
## 62 5.9 3.0 4.2 1.5 versicolor
## 63 6.0 2.2 4.0 1.0 versicolor
## 64 6.1 2.9 4.7 1.4 versicolor
## 65 5.6 2.9 3.6 1.3 versicolor
## 66 6.7 3.1 4.4 1.4 versicolor
## 67 5.6 3.0 4.5 1.5 versicolor
## 68 5.8 2.7 4.1 1.0 versicolor
## 69 6.2 2.2 4.5 1.5 versicolor
## 70 5.6 2.5 3.9 1.1 versicolor
## 71 5.9 3.2 4.8 1.8 versicolor
## 72 6.1 2.8 4.0 1.3 versicolor
## 73 6.3 2.5 4.9 1.5 versicolor
## 74 6.1 2.8 4.7 1.2 versicolor
## 75 6.4 2.9 4.3 1.3 versicolor
## 76 6.6 3.0 4.4 1.4 versicolor
## 77 6.8 2.8 4.8 1.4 versicolor
## 78 6.7 3.0 5.0 1.7 versicolor
## 79 6.0 2.9 4.5 1.5 versicolor
## 80 5.7 2.6 3.5 1.0 versicolor
## 81 5.5 2.4 3.8 1.1 versicolor
## 82 5.5 2.4 3.7 1.0 versicolor
## 83 5.8 2.7 3.9 1.2 versicolor
## 84 6.0 2.7 5.1 1.6 versicolor
## 85 5.4 3.0 4.5 1.5 versicolor
## 86 6.0 3.4 4.5 1.6 versicolor
## 87 6.7 3.1 4.7 1.5 versicolor
## 88 6.3 2.3 4.4 1.3 versicolor
## 89 5.6 3.0 4.1 1.3 versicolor
## 90 5.5 2.5 4.0 1.3 versicolor
## 91 5.5 2.6 4.4 1.2 versicolor
## 92 6.1 3.0 4.6 1.4 versicolor
## 93 5.8 2.6 4.0 1.2 versicolor
## 94 5.0 2.3 3.3 1.0 versicolor
## 95 5.6 2.7 4.2 1.3 versicolor
## 96 5.7 3.0 4.2 1.2 versicolor
## 97 5.7 2.9 4.2 1.3 versicolor
## 98 6.2 2.9 4.3 1.3 versicolor
## 99 5.1 2.5 3.0 1.1 versicolor
## 100 5.7 2.8 4.1 1.3 versicolor
## 101 6.3 3.3 6.0 2.5 virginica
## 102 5.8 2.7 5.1 1.9 virginica
## 103 7.1 3.0 5.9 2.1 virginica
## 104 6.3 2.9 5.6 1.8 virginica
## 105 6.5 3.0 5.8 2.2 virginica
## 106 7.6 3.0 6.6 2.1 virginica
## 107 4.9 2.5 4.5 1.7 virginica
## 108 7.3 2.9 6.3 1.8 virginica
## 109 6.7 2.5 5.8 1.8 virginica
## 110 7.2 3.6 6.1 2.5 virginica
## 111 6.5 3.2 5.1 2.0 virginica
## 112 6.4 2.7 5.3 1.9 virginica
## 113 6.8 3.0 5.5 2.1 virginica
## 114 5.7 2.5 5.0 2.0 virginica
## 115 5.8 2.8 5.1 2.4 virginica
## 116 6.4 3.2 5.3 2.3 virginica
## 117 6.5 3.0 5.5 1.8 virginica
## 118 7.7 3.8 6.7 2.2 virginica
## 119 7.7 2.6 6.9 2.3 virginica
## 120 6.0 2.2 5.0 1.5 virginica
## 121 6.9 3.2 5.7 2.3 virginica
## 122 5.6 2.8 4.9 2.0 virginica
## 123 7.7 2.8 6.7 2.0 virginica
## 124 6.3 2.7 4.9 1.8 virginica
## 125 6.7 3.3 5.7 2.1 virginica
## 126 7.2 3.2 6.0 1.8 virginica
## 127 6.2 2.8 4.8 1.8 virginica
## 128 6.1 3.0 4.9 1.8 virginica
## 129 6.4 2.8 5.6 2.1 virginica
## 130 7.2 3.0 5.8 1.6 virginica
## 131 7.4 2.8 6.1 1.9 virginica
## 132 7.9 3.8 6.4 2.0 virginica
## 133 6.4 2.8 5.6 2.2 virginica
## 134 6.3 2.8 5.1 1.5 virginica
## 135 6.1 2.6 5.6 1.4 virginica
## 136 7.7 3.0 6.1 2.3 virginica
## 137 6.3 3.4 5.6 2.4 virginica
## 138 6.4 3.1 5.5 1.8 virginica
## 139 6.0 3.0 4.8 1.8 virginica
## 140 6.9 3.1 5.4 2.1 virginica
## 141 6.7 3.1 5.6 2.4 virginica
## 142 6.9 3.1 5.1 2.3 virginica
## 143 5.8 2.7 5.1 1.9 virginica
## 144 6.8 3.2 5.9 2.3 virginica
## 145 6.7 3.3 5.7 2.5 virginica
## 146 6.7 3.0 5.2 2.3 virginica
## 147 6.3 2.5 5.0 1.9 virginica
## 148 6.5 3.0 5.2 2.0 virginica
## 149 6.2 3.4 5.4 2.3 virginica
## 150 5.9 3.0 5.1 1.8 virginica
summary(algae)
## season size speed mxPH mnO2
## autumn:40 large :45 high :84 Min. :5.600 Min. : 1.500
## spring:53 medium:84 low :33 1st Qu.:7.700 1st Qu.: 7.725
## summer:45 small :71 medium:83 Median :8.060 Median : 9.800
## winter:62 Mean :8.012 Mean : 9.118
## 3rd Qu.:8.400 3rd Qu.:10.800
## Max. :9.700 Max. :13.400
## NA's :1 NA's :2
## Cl NO3 NH4 oPO4
## Min. : 0.222 Min. : 0.050 Min. : 5.00 Min. : 1.00
## 1st Qu.: 10.981 1st Qu.: 1.296 1st Qu.: 38.33 1st Qu.: 15.70
## Median : 32.730 Median : 2.675 Median : 103.17 Median : 40.15
## Mean : 43.636 Mean : 3.282 Mean : 501.30 Mean : 73.59
## 3rd Qu.: 57.824 3rd Qu.: 4.446 3rd Qu.: 226.95 3rd Qu.: 99.33
## Max. :391.500 Max. :45.650 Max. :24064.00 Max. :564.60
## NA's :10 NA's :2 NA's :2 NA's :2
## PO4 Chla a1 a2
## Min. : 1.00 Min. : 0.200 Min. : 0.00 Min. : 0.000
## 1st Qu.: 41.38 1st Qu.: 2.000 1st Qu.: 1.50 1st Qu.: 0.000
## Median :103.29 Median : 5.475 Median : 6.95 Median : 3.000
## Mean :137.88 Mean : 13.971 Mean :16.92 Mean : 7.458
## 3rd Qu.:213.75 3rd Qu.: 18.308 3rd Qu.:24.80 3rd Qu.:11.375
## Max. :771.60 Max. :110.456 Max. :89.80 Max. :72.600
## NA's :2 NA's :12
## a3 a4 a5 a6
## Min. : 0.000 Min. : 0.000 Min. : 0.000 Min. : 0.000
## 1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 0.000
## Median : 1.550 Median : 0.000 Median : 1.900 Median : 0.000
## Mean : 4.309 Mean : 1.992 Mean : 5.064 Mean : 5.964
## 3rd Qu.: 4.925 3rd Qu.: 2.400 3rd Qu.: 7.500 3rd Qu.: 6.925
## Max. :42.800 Max. :44.600 Max. :44.400 Max. :77.600
##
## a7
## Min. : 0.000
## 1st Qu.: 0.000
## Median : 1.000
## Mean : 2.495
## 3rd Qu.: 2.400
## Max. :31.600
##
summary(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## Min. :4.300 Min. :2.000 Min. :1.000 Min. :0.100
## 1st Qu.:5.100 1st Qu.:2.800 1st Qu.:1.600 1st Qu.:0.300
## Median :5.800 Median :3.000 Median :4.350 Median :1.300
## Mean :5.843 Mean :3.057 Mean :3.758 Mean :1.199
## 3rd Qu.:6.400 3rd Qu.:3.300 3rd Qu.:5.100 3rd Qu.:1.800
## Max. :7.900 Max. :4.400 Max. :6.900 Max. :2.500
## Species
## setosa :50
## versicolor:50
## virginica :50
##
##
##
“algae$mxPh” with “iris$Sepal.Length” and “algae$season” with “iris$Petal.Length”
Copy and/or take a screenshot your results for both and include them in this assignment (I only need the first 20 to 40 rows of each sub-group).
Mode <- function(x, na.rm=FALSE){
if(na.rm) x <- x[!is.na(x)]
ux <- unique(x)
return(ux[which.max(tabulate(match(x,ux)))])
}
Mode(iris$Sepal.Length)
## [1] 5
Mode(iris$Petal.Length)
## [1] 1.4
#Sepal Length by Species
aggregate(iris$Sepal.Length, list(Species=iris$Species), quantile)
## Species x.0% x.25% x.50% x.75% x.100%
## 1 setosa 4.300 4.800 5.000 5.200 5.800
## 2 versicolor 4.900 5.600 5.900 6.300 7.000
## 3 virginica 4.900 6.225 6.500 6.900 7.900
#Petal Length by Species
aggregate(Petal.Length ~ Species, data=iris, quantile)
## Species Petal.Length.0% Petal.Length.25% Petal.Length.50% Petal.Length.75%
## 1 setosa 1.000 1.400 1.500 1.575
## 2 versicolor 3.000 4.000 4.350 4.600
## 3 virginica 4.500 5.100 5.550 5.875
## Petal.Length.100%
## 1 1.900
## 2 5.100
## 3 6.900
Mode(iris$Species)
## [1] setosa
## Levels: setosa versicolor virginica
data(algae, package="DMwR2")
nasRow <- apply(algae,1,function(r) sum(is.na(r)))
cat("The algae dataset contains ", sum(nasRow)," NA values. \n")
## The algae dataset contains 33 NA values.
The code above is used to find the number of missing values in the Algae dataset. (b) What results are we looking for? We are looking through the rows to find the number of spots that have NA values.
summary(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## Min. :4.300 Min. :2.000 Min. :1.000 Min. :0.100
## 1st Qu.:5.100 1st Qu.:2.800 1st Qu.:1.600 1st Qu.:0.300
## Median :5.800 Median :3.000 Median :4.350 Median :1.300
## Mean :5.843 Mean :3.057 Mean :3.758 Mean :1.199
## 3rd Qu.:6.400 3rd Qu.:3.300 3rd Qu.:5.100 3rd Qu.:1.800
## Max. :7.900 Max. :4.400 Max. :6.900 Max. :2.500
## Species
## setosa :50
## versicolor:50
## virginica :50
##
##
##
library(Hmisc)
## Warning: package 'Hmisc' was built under R version 4.2.1
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## Loading required package: ggplot2
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:dplyr':
##
## src, summarize
## The following objects are masked from 'package:base':
##
## format.pval, units
describe(iris)
## iris
##
## 5 Variables 150 Observations
## --------------------------------------------------------------------------------
## Sepal.Length
## n missing distinct Info Mean Gmd .05 .10
## 150 0 35 0.998 5.843 0.9462 4.600 4.800
## .25 .50 .75 .90 .95
## 5.100 5.800 6.400 6.900 7.255
##
## lowest : 4.3 4.4 4.5 4.6 4.7, highest: 7.3 7.4 7.6 7.7 7.9
## --------------------------------------------------------------------------------
## Sepal.Width
## n missing distinct Info Mean Gmd .05 .10
## 150 0 23 0.992 3.057 0.4872 2.345 2.500
## .25 .50 .75 .90 .95
## 2.800 3.000 3.300 3.610 3.800
##
## lowest : 2.0 2.2 2.3 2.4 2.5, highest: 3.9 4.0 4.1 4.2 4.4
## --------------------------------------------------------------------------------
## Petal.Length
## n missing distinct Info Mean Gmd .05 .10
## 150 0 43 0.998 3.758 1.979 1.30 1.40
## .25 .50 .75 .90 .95
## 1.60 4.35 5.10 5.80 6.10
##
## lowest : 1.0 1.1 1.2 1.3 1.4, highest: 6.3 6.4 6.6 6.7 6.9
## --------------------------------------------------------------------------------
## Petal.Width
## n missing distinct Info Mean Gmd .05 .10
## 150 0 22 0.99 1.199 0.8676 0.2 0.2
## .25 .50 .75 .90 .95
## 0.3 1.3 1.8 2.2 2.3
##
## lowest : 0.1 0.2 0.3 0.4 0.5, highest: 2.1 2.2 2.3 2.4 2.5
## --------------------------------------------------------------------------------
## Species
## n missing distinct
## 150 0 3
##
## Value setosa versicolor virginica
## Frequency 50 50 50
## Proportion 0.333 0.333 0.333
## --------------------------------------------------------------------------------