R Markdown

Part 1

install.packages("psych", repos = "https://cloud.r-project.org")
## 
## The downloaded binary packages are in
##  /var/folders/5_/389qrkvs1sd7nkp792bslx5r0000gn/T//RtmpQrivO3/downloaded_packages
library("psych")
?psych

data(iris)
x <- iris 

describe(x)
##              vars   n mean   sd median trimmed  mad min max range  skew
## Sepal.Length    1 150 5.84 0.83   5.80    5.81 1.04 4.3 7.9   3.6  0.31
## Sepal.Width     2 150 3.06 0.44   3.00    3.04 0.44 2.0 4.4   2.4  0.31
## Petal.Length    3 150 3.76 1.77   4.35    3.76 1.85 1.0 6.9   5.9 -0.27
## Petal.Width     4 150 1.20 0.76   1.30    1.18 1.04 0.1 2.5   2.4 -0.10
## Species*        5 150 2.00 0.82   2.00    2.00 1.48 1.0 3.0   2.0  0.00
##              kurtosis   se
## Sepal.Length    -0.61 0.07
## Sepal.Width      0.14 0.04
## Petal.Length    -1.42 0.14
## Petal.Width     -1.36 0.06
## Species*        -1.52 0.07
nrow(iris)
## [1] 150
#they are 150 cases in the data set. 
summary(x)
##   Sepal.Length    Sepal.Width     Petal.Length    Petal.Width   
##  Min.   :4.300   Min.   :2.000   Min.   :1.000   Min.   :0.100  
##  1st Qu.:5.100   1st Qu.:2.800   1st Qu.:1.600   1st Qu.:0.300  
##  Median :5.800   Median :3.000   Median :4.350   Median :1.300  
##  Mean   :5.843   Mean   :3.057   Mean   :3.758   Mean   :1.199  
##  3rd Qu.:6.400   3rd Qu.:3.300   3rd Qu.:5.100   3rd Qu.:1.800  
##  Max.   :7.900   Max.   :4.400   Max.   :6.900   Max.   :2.500  
##        Species  
##  setosa    :50  
##  versicolor:50  
##  virginica :50  
##                 
##                 
## 
#There are 4 numerical variables in the data. 1-Sepal.length, 
#2-Sepal.Width, 3-petal.Length, 4- Petal.Width
?str
str(x)
## 'data.frame':    150 obs. of  5 variables:
##  $ Sepal.Length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
##  $ Sepal.Width : num  3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
##  $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
##  $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
##  $ Species     : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
y <- sapply(x, is.numeric)
y
## Sepal.Length  Sepal.Width Petal.Length  Petal.Width      Species 
##         TRUE         TRUE         TRUE         TRUE        FALSE
#The numeric variables are continuous. 

# There is only one categorical variable "Species" 
?levels
levels(x$Species)
## [1] "setosa"     "versicolor" "virginica"
# The categories of Species are: "setosa, versicolor, virginica" 

Part 2

data(women)

w <- women 
w
##    height weight
## 1      58    115
## 2      59    117
## 3      60    120
## 4      61    123
## 5      62    126
## 6      63    129
## 7      64    132
## 8      65    135
## 9      66    139
## 10     67    142
## 11     68    146
## 12     69    150
## 13     70    154
## 14     71    159
## 15     72    164
summary(w)
##      height         weight     
##  Min.   :58.0   Min.   :115.0  
##  1st Qu.:61.5   1st Qu.:124.5  
##  Median :65.0   Median :135.0  
##  Mean   :65.0   Mean   :136.7  
##  3rd Qu.:68.5   3rd Qu.:148.0  
##  Max.   :72.0   Max.   :164.0
# this data is cross sectional 
class(w$height)
## [1] "numeric"
class(w$weight)
## [1] "numeric"
typeof(w$height)
## [1] "double"
typeof(w$weight)
## [1] "double"
# both variables are numeric and continuous.
?plot
## Help on topic 'plot' was found in the following packages:
## 
##   Package               Library
##   base                  /Library/Frameworks/R.framework/Resources/library
##   graphics              /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/library
## 
## 
## Using the first match ...
plot(w$height, w$weight, main = "US women",
     xlab="Height", ylab= "Weight")

?abline
?lm
abline(lm(w$weight~w$height), col='purple')

install.packages("car", repos = "https://cloud.r-project.org")
## 
## The downloaded binary packages are in
##  /var/folders/5_/389qrkvs1sd7nkp792bslx5r0000gn/T//RtmpQrivO3/downloaded_packages
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:psych':
## 
##     logit
scatterplot(w$weight ~ w$height,
            xlab = "Height", ylab = "weight",
            main = "US women's DATA")

?scatterplotMatrix
scatterplotMatrix(~w$weight+w$height, data = w,
                  main = "US Woomen's")

install.packages("scatterplot3d", repos = "https://cloud.r-project.org")
## 
## The downloaded binary packages are in
##  /var/folders/5_/389qrkvs1sd7nkp792bslx5r0000gn/T//RtmpQrivO3/downloaded_packages
library(scatterplot3d)
scatterplot3d(w$weight,w$height, pch=16, highlight.3d = TRUE, 
              type = "h", main = "US Women's")