rm(list=ls())
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.2 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
#install.packages("GGally")
library(GGally)
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
#install.packages("AppliedPredictiveModeling")
#install.packages("ggplot2")
#library(ggplot2)
ABALONE <- read_csv("C:/Users/cu_dv/Documents/Abalone_DATA.csv")
## Rows: 4177 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Sex
## dbl (8): Length, Diameter, Height, Whole weight, Shucked weight, Viscera wei...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
ABALONE
## # A tibble: 4,177 × 9
## Sex Length Diameter Height `Whole weight` `Shucked weight` `Viscera weight`
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 M 0.455 0.365 0.095 0.514 0.224 0.101
## 2 M 0.35 0.265 0.09 0.226 0.0995 0.0485
## 3 F 0.53 0.42 0.135 0.677 0.256 0.142
## 4 M 0.44 0.365 0.125 0.516 0.216 0.114
## 5 I 0.33 0.255 0.08 0.205 0.0895 0.0395
## 6 I 0.425 0.3 0.095 0.352 0.141 0.0775
## 7 F 0.53 0.415 0.15 0.778 0.237 0.142
## 8 F 0.545 0.425 0.125 0.768 0.294 0.150
## 9 M 0.475 0.37 0.125 0.509 0.216 0.112
## 10 F 0.55 0.44 0.15 0.894 0.314 0.151
## # ℹ 4,167 more rows
## # ℹ 2 more variables: `Shell weight` <dbl>, Rings <dbl>
#Vaiable Descriptions
# Sex: Categorical, indicating Male (M), Female (F), or Infant (I).
# Physical Measurements (continuous):
# Length-longest shell measurement
# diameter-Diameter perpendicular to the length
# height-Height with the meat inside the shell
# Weights
# whole weight-Weight of whole abalone
# shucked weight-Weight of the meat
# viscera weight-"Gut Weight(After Bleeding)
# shell weight- After being dried
# Rings: Integer, representing the number of rings on the shell, which approximates the age in years when 1.5 is added.
#Histogram of Continous Variables
hist(ABALONE$Length)

hist(ABALONE$Diameter)

hist(ABALONE$Height)

hist(ABALONE$`Whole weight`)

hist(ABALONE$`Shucked weight`)

hist(ABALONE$`Viscera weight`)

hist(ABALONE$`Shell weight`)

#Table of Categorical Variable
table(ABALONE$Sex)
##
## F I M
## 1307 1342 1528
#Summary Statistics of the Continous Variables
summary(ABALONE$Length)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.075 0.450 0.545 0.524 0.615 0.815
summary(ABALONE$Diameter)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0550 0.3500 0.4250 0.4079 0.4800 0.6500
summary(ABALONE$Height)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.1150 0.1400 0.1395 0.1650 1.1300
summary(ABALONE$`Whole weight`)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0020 0.4415 0.7995 0.8287 1.1530 2.8255
summary(ABALONE$`Shucked weight`)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0010 0.1860 0.3360 0.3594 0.5020 1.4880
summary(ABALONE$`Viscera weight`)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0005 0.0935 0.1710 0.1806 0.2530 0.7600
summary(ABALONE$`Shell weight`)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0015 0.1300 0.2340 0.2388 0.3290 1.0050
# ggplot(ABALONE,aes(x=Sex)) +geom_bar()+
# ggtitle("FrequencyoftheSeasons")+coord_flip()
#Graph of Length vs Height
ggplot(ABALONE,aes(x=Length,y=Height)) + geom_point() +
xlab("Length") + ylab("Height") +
ggtitle("Relationship between Length and Height")

#Boxplot of Whole weight by Sex
ggplot(ABALONE,aes(x=Sex,y=`Whole weight`)) + geom_boxplot()

#Scatterplot Matrix
pairs(ABALONE[,2:8])

ggpairs(ABALONE[,2:8])

#Parallel Coordinate Plot
ggparcoord(ABALONE,columns=2:8,groupColumn="Sex")
