Tasks
Task 1
getwd()
## [1] "C:/Users/aksel/OneDrive/Masaüstü/classes/ISEStats/Lab1"
Task 2
ddt <- read.csv("DDT-1.csv")
head(ddt)
## RIVER MILE SPECIES LENGTH WEIGHT DDT
## 1 FCM 5 CCATFISH 42.5 732 10
## 2 FCM 5 CCATFISH 44.0 795 16
## 3 FCM 5 CCATFISH 41.5 547 23
## 4 FCM 5 CCATFISH 39.0 465 21
## 5 FCM 5 CCATFISH 50.5 1252 50
## 6 FCM 5 CCATFISH 52.0 1255 150
Task 3
# In order to find qualitative variables, we need focus on our categorical dimensions which are type <chr> character in this data set. Creating a sub data frame that only includes character will result in a new data set with out qualitative variables, which are RIVER and SPECIES
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
qualitative_ddt <- ddt %>%
select(where(is.character))
qualitative_ddt
## RIVER SPECIES
## 1 FCM CCATFISH
## 2 FCM CCATFISH
## 3 FCM CCATFISH
## 4 FCM CCATFISH
## 5 FCM CCATFISH
## 6 FCM CCATFISH
## 7 LCM CCATFISH
## 8 LCM CCATFISH
## 9 LCM CCATFISH
## 10 LCM CCATFISH
## 11 LCM CCATFISH
## 12 LCM CCATFISH
## 13 SCM CCATFISH
## 14 SCM CCATFISH
## 15 SCM CCATFISH
## 16 SCM CCATFISH
## 17 SCM CCATFISH
## 18 SCM CCATFISH
## 19 TRM CCATFISH
## 20 TRM CCATFISH
## 21 TRM CCATFISH
## 22 TRM CCATFISH
## 23 TRM CCATFISH
## 24 TRM CCATFISH
## 25 TRM CCATFISH
## 26 TRM CCATFISH
## 27 TRM CCATFISH
## 28 TRM CCATFISH
## 29 TRM CCATFISH
## 30 TRM CCATFISH
## 31 TRM SMBUFFALO
## 32 TRM SMBUFFALO
## 33 TRM SMBUFFALO
## 34 TRM SMBUFFALO
## 35 TRM SMBUFFALO
## 36 TRM SMBUFFALO
## 37 TRM CCATFISH
## 38 TRM CCATFISH
## 39 TRM CCATFISH
## 40 TRM CCATFISH
## 41 TRM CCATFISH
## 42 TRM CCATFISH
## 43 TRM LMBASS
## 44 TRM LMBASS
## 45 TRM LMBASS
## 46 TRM LMBASS
## 47 TRM LMBASS
## 48 TRM LMBASS
## 49 TRM CCATFISH
## 50 TRM CCATFISH
## 51 TRM CCATFISH
## 52 TRM CCATFISH
## 53 TRM CCATFISH
## 54 TRM CCATFISH
## 55 TRM SMBUFFALO
## 56 TRM SMBUFFALO
## 57 TRM SMBUFFALO
## 58 TRM SMBUFFALO
## 59 TRM SMBUFFALO
## 60 TRM SMBUFFALO
## 61 TRM CCATFISH
## 62 TRM CCATFISH
## 63 TRM CCATFISH
## 64 TRM CCATFISH
## 65 TRM CCATFISH
## 66 TRM CCATFISH
## 67 TRM CCATFISH
## 68 TRM CCATFISH
## 69 TRM CCATFISH
## 70 TRM CCATFISH
## 71 TRM CCATFISH
## 72 TRM CCATFISH
## 73 TRM SMBUFFALO
## 74 TRM SMBUFFALO
## 75 TRM SMBUFFALO
## 76 TRM SMBUFFALO
## 77 TRM SMBUFFALO
## 78 TRM SMBUFFALO
## 79 TRM CCATFISH
## 80 TRM CCATFISH
## 81 TRM CCATFISH
## 82 TRM CCATFISH
## 83 TRM CCATFISH
## 84 TRM CCATFISH
## 85 TRM CCATFISH
## 86 TRM CCATFISH
## 87 TRM CCATFISH
## 88 TRM CCATFISH
## 89 TRM CCATFISH
## 90 TRM CCATFISH
## 91 TRM SMBUFFALO
## 92 TRM SMBUFFALO
## 93 TRM SMBUFFALO
## 94 TRM SMBUFFALO
## 95 TRM SMBUFFALO
## 96 TRM SMBUFFALO
## 97 TRM CCATFISH
## 98 TRM CCATFISH
## 99 TRM CCATFISH
## 100 TRM CCATFISH
## 101 TRM CCATFISH
## 102 TRM CCATFISH
## 103 TRM CCATFISH
## 104 TRM CCATFISH
## 105 TRM CCATFISH
## 106 TRM CCATFISH
## 107 TRM CCATFISH
## 108 TRM CCATFISH
## 109 TRM SMBUFFALO
## 110 TRM SMBUFFALO
## 111 TRM SMBUFFALO
## 112 TRM SMBUFFALO
## 113 TRM SMBUFFALO
## 114 TRM SMBUFFALO
## 115 TRM CCATFISH
## 116 TRM CCATFISH
## 117 TRM CCATFISH
## 118 TRM CCATFISH
## 119 TRM CCATFISH
## 120 TRM CCATFISH
## 121 TRM CCATFISH
## 122 TRM CCATFISH
## 123 TRM CCATFISH
## 124 TRM CCATFISH
## 125 TRM CCATFISH
## 126 TRM CCATFISH
## 127 TRM SMBUFFALO
## 128 TRM SMBUFFALO
## 129 TRM CCATFISH
## 130 TRM CCATFISH
## 131 TRM CCATFISH
## 132 TRM CCATFISH
## 133 TRM CCATFISH
## 134 TRM CCATFISH
## 135 TRM SMBUFFALO
## 136 TRM SMBUFFALO
## 137 TRM SMBUFFALO
## 138 TRM SMBUFFALO
## 139 TRM LMBASS
## 140 TRM LMBASS
## 141 TRM LMBASS
## 142 TRM LMBASS
## 143 TRM LMBASS
## 144 TRM LMBASS
# In order to find quantitative variables, we need focus on our numerical dimensions which are type <int> and <dbl> in this data set. Creating a sub data frame that only includes integers and doubles will result in a new data set with out quantitative variables, which are MILE, LENGTH, WEIGHT AND DDT
quant_ddt <- ddt %>%
select(where(is.numeric))
quant_ddt
## MILE LENGTH WEIGHT DDT
## 1 5 42.5 732 10.00
## 2 5 44.0 795 16.00
## 3 5 41.5 547 23.00
## 4 5 39.0 465 21.00
## 5 5 50.5 1252 50.00
## 6 5 52.0 1255 150.00
## 7 3 40.5 741 28.00
## 8 3 48.0 1151 7.70
## 9 3 48.0 1186 2.00
## 10 3 43.5 754 19.00
## 11 3 40.5 679 16.00
## 12 3 47.5 985 5.40
## 13 1 44.5 1133 2.60
## 14 1 46.0 1139 3.10
## 15 1 48.0 1186 3.50
## 16 1 45.0 984 9.10
## 17 1 43.0 965 7.80
## 18 1 45.0 1084 4.10
## 19 275 48.0 986 8.40
## 20 275 45.0 1023 15.00
## 21 275 49.0 1266 25.00
## 22 275 50.0 1086 5.60
## 23 275 46.0 1044 4.60
## 24 275 52.0 1770 8.20
## 25 280 48.0 1048 6.10
## 26 280 51.0 1641 13.00
## 27 280 48.5 1331 6.00
## 28 280 51.0 1728 6.60
## 29 280 44.0 917 5.50
## 30 280 51.0 1398 11.00
## 31 280 49.0 1763 4.50
## 32 280 46.0 1459 4.20
## 33 280 52.0 2302 3.00
## 34 280 46.0 1614 2.30
## 35 280 46.0 1444 2.50
## 36 280 48.0 2006 6.80
## 37 285 44.0 936 19.00
## 38 285 42.0 1058 7.20
## 39 285 42.5 800 6.00
## 40 285 45.5 1087 10.00
## 41 285 48.0 1329 12.00
## 42 285 44.0 897 2.80
## 43 285 28.5 778 0.48
## 44 285 26.0 532 0.18
## 45 285 25.5 441 0.34
## 46 285 25.0 544 0.11
## 47 285 23.0 393 0.22
## 48 285 28.0 733 0.80
## 49 290 41.0 961 8.70
## 50 290 44.0 886 22.00
## 51 290 41.0 678 13.00
## 52 290 42.0 1011 3.50
## 53 290 42.5 947 9.30
## 54 290 44.0 989 21.00
## 55 290 43.5 1291 3.40
## 56 290 46.5 1186 13.00
## 57 290 43.0 1293 5.60
## 58 290 47.0 1709 12.00
## 59 290 46.0 1425 21.00
## 60 290 41.0 1176 8.00
## 61 295 36.0 980 12.00
## 62 295 47.5 1176 6.00
## 63 295 41.5 989 4.70
## 64 295 49.5 1084 31.00
## 65 295 46.0 1115 5.20
## 66 295 46.5 724 27.00
## 67 300 36.0 847 18.00
## 68 300 37.0 876 7.50
## 69 300 35.0 844 3.00
## 70 300 36.0 908 13.00
## 71 300 48.0 1358 7.30
## 72 300 49.0 1019 15.00
## 73 300 35.5 1300 1.30
## 74 300 46.0 1365 4.80
## 75 300 45.0 1437 5.10
## 76 300 44.5 1460 5.10
## 77 300 49.0 1671 4.00
## 78 300 47.5 1717 10.00
## 79 305 35.0 613 12.00
## 80 305 51.0 353 22.00
## 81 305 42.5 909 10.00
## 82 305 38.0 886 11.00
## 83 305 41.0 890 17.00
## 84 305 47.0 1031 9.70
## 85 310 45.0 1083 12.00
## 86 310 45.5 864 4.70
## 87 310 45.0 886 6.00
## 88 310 45.0 965 3.80
## 89 310 39.0 537 17.00
## 90 310 40.5 630 12.00
## 91 310 46.0 1486 1.40
## 92 310 47.0 1743 6.10
## 93 310 48.5 2061 2.80
## 94 310 48.0 1707 4.80
## 95 310 38.0 862 5.70
## 96 310 38.5 911 3.30
## 97 315 29.5 476 3.30
## 98 315 42.0 743 3.70
## 99 315 47.5 1128 9.90
## 100 315 43.5 848 6.80
## 101 315 47.5 1091 13.00
## 102 315 43.5 715 8.80
## 103 320 47.5 983 57.00
## 104 320 51.5 1251 96.00
## 105 320 49.5 1255 360.00
## 106 320 47.0 1152 130.00
## 107 320 47.5 1085 13.00
## 108 320 47.0 1118 61.00
## 109 320 36.0 1285 12.00
## 110 320 34.5 1178 33.00
## 111 320 44.5 1492 48.00
## 112 320 46.0 1524 10.00
## 113 320 46.0 1473 44.00
## 114 320 32.5 520 0.43
## 115 325 46.0 863 1100.00
## 116 325 40.0 549 9.40
## 117 325 43.5 810 4.10
## 118 325 46.5 908 2.80
## 119 325 43.0 804 0.74
## 120 325 47.5 1179 14.00
## 121 330 32.0 556 22.00
## 122 330 40.5 659 9.10
## 123 330 51.5 1229 140.00
## 124 330 48.0 1050 4.20
## 125 330 47.0 952 12.00
## 126 330 41.0 826 2.00
## 127 330 33.5 599 0.30
## 128 330 47.0 1704 1.20
## 129 340 50.0 1207 7.10
## 130 340 45.0 911 180.00
## 131 340 49.0 1498 1.50
## 132 340 49.5 1496 2.40
## 133 340 50.0 1142 4.30
## 134 340 45.0 879 3.90
## 135 340 32.5 525 0.99
## 136 340 38.0 806 0.45
## 137 340 38.5 694 2.50
## 138 340 36.0 643 0.25
## 139 345 26.5 514 0.58
## 140 345 23.5 358 2.00
## 141 345 30.0 856 2.20
## 142 345 29.0 793 7.40
## 143 345 17.5 173 0.35
## 144 345 36.0 1433 1.90
# How many SPECIES are there?
length(unique(ddt$SPECIES))
## [1] 3
# Subset: LMBASS + WEIGHT > 800
ddt[ddt$SPECIES == "LMBASS" & ddt$WEIGHT > 800,]
## RIVER MILE SPECIES LENGTH WEIGHT DDT
## 141 TRM 345 LMBASS 30 856 2.2
## 144 TRM 345 LMBASS 36 1433 1.9
# Subset: LMBASS + WEIGHT > 800
ddt[ddt$RIVER == "SCM" & ddt$DDT > 4.0,]
## RIVER MILE SPECIES LENGTH WEIGHT DDT
## 16 SCM 1 CCATFISH 45 984 9.1
## 17 SCM 1 CCATFISH 43 965 7.8
## 18 SCM 1 CCATFISH 45 1084 4.1
Clicker Questions
# Mean length of fish
mean(ddt$LENGTH)
## [1] 42.8125
# Standard deviation of the weight
sd(ddt$WEIGHT)
## [1] 376.5461
# Yes, this is the correct plot
# Ans: 1
plot(ddt$LENGTH, ddt$WEIGHT)

# 20/20. Hence, 1
v = 1:20
v/20
## [1] 0.05 0.10 0.15 0.20 0.25 0.30 0.35 0.40 0.45 0.50 0.55 0.60 0.65 0.70 0.75
## [16] 0.80 0.85 0.90 0.95 1.00
Task 4
riv <- ddt$RIVER
# Tables
tab <- table(riv)
tab
## riv
## FCM LCM SCM TRM
## 6 6 6 126
tab2 <- table(ddt$RIVER, ddt$SPECIES)
tab2
##
## CCATFISH LMBASS SMBUFFALO
## FCM 6 0 0
## LCM 6 0 0
## SCM 6 0 0
## TRM 78 12 36
# Bar plots
barplot(tab, col = rainbow(4))

barplot(tab2, col = rainbow(4))

Task 5
# Pie Charts
spec <- table(ddt$SPECIES)
pie(spec)

pie(table(ddt$RIVER))

Task 6
#Box Plots
boxplot(ddt$DDT)

boxplot(ddt$WEIGHT)

boxplot(ddt$LENGTH)

Task 7
# Coplot 1
weight <- (ddt$WEIGHT)
lenght <- (ddt$LENGTH)
coplot(weight ~ lenght | riv)

# Coplot 2
weight <- (ddt$WEIGHT)
species <- ddt$SPECIES
pollutant_DDT <- ddt$DDT
coplot(pollutant_DDT ~ weight | species)

Task 8
library(ggplot2)
ggplot(ddt, aes(x = SPECIES, y = WEIGHT, fill = RIVER)) + geom_boxplot() + labs(title = "Aksel Can Sozudogru", x = "SPECIES", y = "WEIGHT")

ggplot(ddt, aes(x = RIVER, y = LENGTH, fill = SPECIES)) + geom_violin() + labs(title = "Aksel Can Sozudogru", x = "RIVER", y = "LENGTH")

ggplot(ddt, aes(x = WEIGHT, y = LENGTH, color = SPECIES)) + geom_point() + labs(title = "Aksel Can Sozudogru", x = "WEIGHT", y = "LENGTH")
