Tasks

Task 1

getwd()
## [1] "C:/Users/aksel/OneDrive/Masaüstü/classes/ISEStats/Lab1"

Task 2

ddt <- read.csv("DDT-1.csv")
head(ddt)
##   RIVER MILE  SPECIES LENGTH WEIGHT DDT
## 1   FCM    5 CCATFISH   42.5    732  10
## 2   FCM    5 CCATFISH   44.0    795  16
## 3   FCM    5 CCATFISH   41.5    547  23
## 4   FCM    5 CCATFISH   39.0    465  21
## 5   FCM    5 CCATFISH   50.5   1252  50
## 6   FCM    5 CCATFISH   52.0   1255 150

Task 3

# In order to find qualitative variables, we need focus on our categorical dimensions which are type <chr> character in this data set. Creating a sub data frame that only includes character will result in a new data set with out qualitative variables, which are RIVER and SPECIES

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
qualitative_ddt <- ddt %>%
  select(where(is.character))
qualitative_ddt
##     RIVER   SPECIES
## 1     FCM  CCATFISH
## 2     FCM  CCATFISH
## 3     FCM  CCATFISH
## 4     FCM  CCATFISH
## 5     FCM  CCATFISH
## 6     FCM  CCATFISH
## 7     LCM  CCATFISH
## 8     LCM  CCATFISH
## 9     LCM  CCATFISH
## 10    LCM  CCATFISH
## 11    LCM  CCATFISH
## 12    LCM  CCATFISH
## 13    SCM  CCATFISH
## 14    SCM  CCATFISH
## 15    SCM  CCATFISH
## 16    SCM  CCATFISH
## 17    SCM  CCATFISH
## 18    SCM  CCATFISH
## 19    TRM  CCATFISH
## 20    TRM  CCATFISH
## 21    TRM  CCATFISH
## 22    TRM  CCATFISH
## 23    TRM  CCATFISH
## 24    TRM  CCATFISH
## 25    TRM  CCATFISH
## 26    TRM  CCATFISH
## 27    TRM  CCATFISH
## 28    TRM  CCATFISH
## 29    TRM  CCATFISH
## 30    TRM  CCATFISH
## 31    TRM SMBUFFALO
## 32    TRM SMBUFFALO
## 33    TRM SMBUFFALO
## 34    TRM SMBUFFALO
## 35    TRM SMBUFFALO
## 36    TRM SMBUFFALO
## 37    TRM  CCATFISH
## 38    TRM  CCATFISH
## 39    TRM  CCATFISH
## 40    TRM  CCATFISH
## 41    TRM  CCATFISH
## 42    TRM  CCATFISH
## 43    TRM    LMBASS
## 44    TRM    LMBASS
## 45    TRM    LMBASS
## 46    TRM    LMBASS
## 47    TRM    LMBASS
## 48    TRM    LMBASS
## 49    TRM  CCATFISH
## 50    TRM  CCATFISH
## 51    TRM  CCATFISH
## 52    TRM  CCATFISH
## 53    TRM  CCATFISH
## 54    TRM  CCATFISH
## 55    TRM SMBUFFALO
## 56    TRM SMBUFFALO
## 57    TRM SMBUFFALO
## 58    TRM SMBUFFALO
## 59    TRM SMBUFFALO
## 60    TRM SMBUFFALO
## 61    TRM  CCATFISH
## 62    TRM  CCATFISH
## 63    TRM  CCATFISH
## 64    TRM  CCATFISH
## 65    TRM  CCATFISH
## 66    TRM  CCATFISH
## 67    TRM  CCATFISH
## 68    TRM  CCATFISH
## 69    TRM  CCATFISH
## 70    TRM  CCATFISH
## 71    TRM  CCATFISH
## 72    TRM  CCATFISH
## 73    TRM SMBUFFALO
## 74    TRM SMBUFFALO
## 75    TRM SMBUFFALO
## 76    TRM SMBUFFALO
## 77    TRM SMBUFFALO
## 78    TRM SMBUFFALO
## 79    TRM  CCATFISH
## 80    TRM  CCATFISH
## 81    TRM  CCATFISH
## 82    TRM  CCATFISH
## 83    TRM  CCATFISH
## 84    TRM  CCATFISH
## 85    TRM  CCATFISH
## 86    TRM  CCATFISH
## 87    TRM  CCATFISH
## 88    TRM  CCATFISH
## 89    TRM  CCATFISH
## 90    TRM  CCATFISH
## 91    TRM SMBUFFALO
## 92    TRM SMBUFFALO
## 93    TRM SMBUFFALO
## 94    TRM SMBUFFALO
## 95    TRM SMBUFFALO
## 96    TRM SMBUFFALO
## 97    TRM  CCATFISH
## 98    TRM  CCATFISH
## 99    TRM  CCATFISH
## 100   TRM  CCATFISH
## 101   TRM  CCATFISH
## 102   TRM  CCATFISH
## 103   TRM  CCATFISH
## 104   TRM  CCATFISH
## 105   TRM  CCATFISH
## 106   TRM  CCATFISH
## 107   TRM  CCATFISH
## 108   TRM  CCATFISH
## 109   TRM SMBUFFALO
## 110   TRM SMBUFFALO
## 111   TRM SMBUFFALO
## 112   TRM SMBUFFALO
## 113   TRM SMBUFFALO
## 114   TRM SMBUFFALO
## 115   TRM  CCATFISH
## 116   TRM  CCATFISH
## 117   TRM  CCATFISH
## 118   TRM  CCATFISH
## 119   TRM  CCATFISH
## 120   TRM  CCATFISH
## 121   TRM  CCATFISH
## 122   TRM  CCATFISH
## 123   TRM  CCATFISH
## 124   TRM  CCATFISH
## 125   TRM  CCATFISH
## 126   TRM  CCATFISH
## 127   TRM SMBUFFALO
## 128   TRM SMBUFFALO
## 129   TRM  CCATFISH
## 130   TRM  CCATFISH
## 131   TRM  CCATFISH
## 132   TRM  CCATFISH
## 133   TRM  CCATFISH
## 134   TRM  CCATFISH
## 135   TRM SMBUFFALO
## 136   TRM SMBUFFALO
## 137   TRM SMBUFFALO
## 138   TRM SMBUFFALO
## 139   TRM    LMBASS
## 140   TRM    LMBASS
## 141   TRM    LMBASS
## 142   TRM    LMBASS
## 143   TRM    LMBASS
## 144   TRM    LMBASS
# In order to find quantitative variables, we need focus on our numerical dimensions which are type <int> and <dbl> in this data set. Creating a sub data frame that only includes integers and doubles will result in a new data set with out quantitative variables, which are MILE, LENGTH, WEIGHT AND DDT


quant_ddt <- ddt %>%
  select(where(is.numeric))
quant_ddt
##     MILE LENGTH WEIGHT     DDT
## 1      5   42.5    732   10.00
## 2      5   44.0    795   16.00
## 3      5   41.5    547   23.00
## 4      5   39.0    465   21.00
## 5      5   50.5   1252   50.00
## 6      5   52.0   1255  150.00
## 7      3   40.5    741   28.00
## 8      3   48.0   1151    7.70
## 9      3   48.0   1186    2.00
## 10     3   43.5    754   19.00
## 11     3   40.5    679   16.00
## 12     3   47.5    985    5.40
## 13     1   44.5   1133    2.60
## 14     1   46.0   1139    3.10
## 15     1   48.0   1186    3.50
## 16     1   45.0    984    9.10
## 17     1   43.0    965    7.80
## 18     1   45.0   1084    4.10
## 19   275   48.0    986    8.40
## 20   275   45.0   1023   15.00
## 21   275   49.0   1266   25.00
## 22   275   50.0   1086    5.60
## 23   275   46.0   1044    4.60
## 24   275   52.0   1770    8.20
## 25   280   48.0   1048    6.10
## 26   280   51.0   1641   13.00
## 27   280   48.5   1331    6.00
## 28   280   51.0   1728    6.60
## 29   280   44.0    917    5.50
## 30   280   51.0   1398   11.00
## 31   280   49.0   1763    4.50
## 32   280   46.0   1459    4.20
## 33   280   52.0   2302    3.00
## 34   280   46.0   1614    2.30
## 35   280   46.0   1444    2.50
## 36   280   48.0   2006    6.80
## 37   285   44.0    936   19.00
## 38   285   42.0   1058    7.20
## 39   285   42.5    800    6.00
## 40   285   45.5   1087   10.00
## 41   285   48.0   1329   12.00
## 42   285   44.0    897    2.80
## 43   285   28.5    778    0.48
## 44   285   26.0    532    0.18
## 45   285   25.5    441    0.34
## 46   285   25.0    544    0.11
## 47   285   23.0    393    0.22
## 48   285   28.0    733    0.80
## 49   290   41.0    961    8.70
## 50   290   44.0    886   22.00
## 51   290   41.0    678   13.00
## 52   290   42.0   1011    3.50
## 53   290   42.5    947    9.30
## 54   290   44.0    989   21.00
## 55   290   43.5   1291    3.40
## 56   290   46.5   1186   13.00
## 57   290   43.0   1293    5.60
## 58   290   47.0   1709   12.00
## 59   290   46.0   1425   21.00
## 60   290   41.0   1176    8.00
## 61   295   36.0    980   12.00
## 62   295   47.5   1176    6.00
## 63   295   41.5    989    4.70
## 64   295   49.5   1084   31.00
## 65   295   46.0   1115    5.20
## 66   295   46.5    724   27.00
## 67   300   36.0    847   18.00
## 68   300   37.0    876    7.50
## 69   300   35.0    844    3.00
## 70   300   36.0    908   13.00
## 71   300   48.0   1358    7.30
## 72   300   49.0   1019   15.00
## 73   300   35.5   1300    1.30
## 74   300   46.0   1365    4.80
## 75   300   45.0   1437    5.10
## 76   300   44.5   1460    5.10
## 77   300   49.0   1671    4.00
## 78   300   47.5   1717   10.00
## 79   305   35.0    613   12.00
## 80   305   51.0    353   22.00
## 81   305   42.5    909   10.00
## 82   305   38.0    886   11.00
## 83   305   41.0    890   17.00
## 84   305   47.0   1031    9.70
## 85   310   45.0   1083   12.00
## 86   310   45.5    864    4.70
## 87   310   45.0    886    6.00
## 88   310   45.0    965    3.80
## 89   310   39.0    537   17.00
## 90   310   40.5    630   12.00
## 91   310   46.0   1486    1.40
## 92   310   47.0   1743    6.10
## 93   310   48.5   2061    2.80
## 94   310   48.0   1707    4.80
## 95   310   38.0    862    5.70
## 96   310   38.5    911    3.30
## 97   315   29.5    476    3.30
## 98   315   42.0    743    3.70
## 99   315   47.5   1128    9.90
## 100  315   43.5    848    6.80
## 101  315   47.5   1091   13.00
## 102  315   43.5    715    8.80
## 103  320   47.5    983   57.00
## 104  320   51.5   1251   96.00
## 105  320   49.5   1255  360.00
## 106  320   47.0   1152  130.00
## 107  320   47.5   1085   13.00
## 108  320   47.0   1118   61.00
## 109  320   36.0   1285   12.00
## 110  320   34.5   1178   33.00
## 111  320   44.5   1492   48.00
## 112  320   46.0   1524   10.00
## 113  320   46.0   1473   44.00
## 114  320   32.5    520    0.43
## 115  325   46.0    863 1100.00
## 116  325   40.0    549    9.40
## 117  325   43.5    810    4.10
## 118  325   46.5    908    2.80
## 119  325   43.0    804    0.74
## 120  325   47.5   1179   14.00
## 121  330   32.0    556   22.00
## 122  330   40.5    659    9.10
## 123  330   51.5   1229  140.00
## 124  330   48.0   1050    4.20
## 125  330   47.0    952   12.00
## 126  330   41.0    826    2.00
## 127  330   33.5    599    0.30
## 128  330   47.0   1704    1.20
## 129  340   50.0   1207    7.10
## 130  340   45.0    911  180.00
## 131  340   49.0   1498    1.50
## 132  340   49.5   1496    2.40
## 133  340   50.0   1142    4.30
## 134  340   45.0    879    3.90
## 135  340   32.5    525    0.99
## 136  340   38.0    806    0.45
## 137  340   38.5    694    2.50
## 138  340   36.0    643    0.25
## 139  345   26.5    514    0.58
## 140  345   23.5    358    2.00
## 141  345   30.0    856    2.20
## 142  345   29.0    793    7.40
## 143  345   17.5    173    0.35
## 144  345   36.0   1433    1.90
# How many SPECIES are there?
length(unique(ddt$SPECIES))
## [1] 3
# Subset: LMBASS + WEIGHT > 800

ddt[ddt$SPECIES == "LMBASS" & ddt$WEIGHT > 800,]
##     RIVER MILE SPECIES LENGTH WEIGHT DDT
## 141   TRM  345  LMBASS     30    856 2.2
## 144   TRM  345  LMBASS     36   1433 1.9
# Subset: LMBASS + WEIGHT > 800

ddt[ddt$RIVER == "SCM" & ddt$DDT > 4.0,]
##    RIVER MILE  SPECIES LENGTH WEIGHT DDT
## 16   SCM    1 CCATFISH     45    984 9.1
## 17   SCM    1 CCATFISH     43    965 7.8
## 18   SCM    1 CCATFISH     45   1084 4.1

Clicker Questions

# Mean length of fish

mean(ddt$LENGTH)
## [1] 42.8125
# Standard deviation of the weight

sd(ddt$WEIGHT)
## [1] 376.5461
# Yes, this is the correct plot
# Ans: 1
plot(ddt$LENGTH, ddt$WEIGHT)

# 20/20. Hence, 1
v = 1:20
v/20
##  [1] 0.05 0.10 0.15 0.20 0.25 0.30 0.35 0.40 0.45 0.50 0.55 0.60 0.65 0.70 0.75
## [16] 0.80 0.85 0.90 0.95 1.00

Task 4

riv <- ddt$RIVER
# Tables
tab <- table(riv)
tab
## riv
## FCM LCM SCM TRM 
##   6   6   6 126
tab2 <- table(ddt$RIVER, ddt$SPECIES)
tab2
##      
##       CCATFISH LMBASS SMBUFFALO
##   FCM        6      0         0
##   LCM        6      0         0
##   SCM        6      0         0
##   TRM       78     12        36
# Bar plots
barplot(tab, col = rainbow(4))

barplot(tab2, col = rainbow(4))

Task 5

# Pie Charts
spec <- table(ddt$SPECIES)
pie(spec)

pie(table(ddt$RIVER))

Task 6

#Box Plots
boxplot(ddt$DDT)

boxplot(ddt$WEIGHT)

boxplot(ddt$LENGTH)

Task 7

# Coplot 1

weight <- (ddt$WEIGHT)
lenght <- (ddt$LENGTH)
coplot(weight ~ lenght | riv)

# Coplot 2

weight <- (ddt$WEIGHT)
species <- ddt$SPECIES
pollutant_DDT <- ddt$DDT
coplot(pollutant_DDT ~ weight | species)

Task 8

  library(ggplot2)

ggplot(ddt, aes(x = SPECIES, y = WEIGHT, fill = RIVER)) + geom_boxplot() + labs(title = "Aksel Can Sozudogru", x = "SPECIES", y = "WEIGHT")

ggplot(ddt, aes(x = RIVER, y = LENGTH, fill = SPECIES)) + geom_violin() + labs(title = "Aksel Can Sozudogru", x = "RIVER", y = "LENGTH")

ggplot(ddt, aes(x = WEIGHT, y = LENGTH, color = SPECIES)) + geom_point() + labs(title = "Aksel Can Sozudogru", x = "WEIGHT", y = "LENGTH")