Discussion: Types of Data

Author

Betty Wang

library(AER)
Loading required package: car
Loading required package: carData
Loading required package: lmtest
Loading required package: zoo

Attaching package: 'zoo'
The following objects are masked from 'package:base':

    as.Date, as.Date.numeric
Loading required package: sandwich
Loading required package: survival
library(ggplot2)
library(AER)
library(MASS)
?Rabbit
?menarche
?cpus
?eagles
?immer
?eagles
?mammals
?menarche

data("mammals")
data("menarche")
head(mammals)
                   body brain
Arctic fox        3.385  44.5
Owl monkey        0.480  15.5
Mountain beaver   1.350   8.1
Cow             465.000 423.0
Grey wolf        36.330 119.5
Goat             27.660 115.0
head(menarche)
    Age Total Menarche
1  9.21   376        0
2 10.21   200        0
3 10.58    93        0
4 10.83   120        2
5 11.08    90        2
6 11.33    88        5
str(mammals)
'data.frame':   62 obs. of  2 variables:
 $ body : num  3.38 0.48 1.35 465 36.33 ...
 $ brain: num  44.5 15.5 8.1 423 119.5 ...
str(menarche)
'data.frame':   25 obs. of  3 variables:
 $ Age     : num  9.21 10.21 10.58 10.83 11.08 ...
 $ Total   : num  376 200 93 120 90 88 105 111 100 93 ...
 $ Menarche: num  0 0 0 2 2 5 10 17 16 29 ...
# Load the MASS package
library(MASS)

# Load the mammals dataset
data(mammals)

# View the first few rows
head(mammals)
                   body brain
Arctic fox        3.385  44.5
Owl monkey        0.480  15.5
Mountain beaver   1.350   8.1
Cow             465.000 423.0
Grey wolf        36.330 119.5
Goat             27.660 115.0
# Summary statistics
summary(mammals)
      body              brain        
 Min.   :   0.005   Min.   :   0.14  
 1st Qu.:   0.600   1st Qu.:   4.25  
 Median :   3.342   Median :  17.25  
 Mean   : 198.790   Mean   : 283.13  
 3rd Qu.:  48.202   3rd Qu.: 166.00  
 Max.   :6654.000   Max.   :5712.00  
# Scatter plot of brain vs. body weight
plot(mammals$body, mammals$brain,
     xlab = "Body Weight (kg)",
     ylab = "Brain Weight (g)",
     main = "Brain Weight vs. Body Weight")

# Compute the correlation between body and brain weight
cor(mammals$body, mammals$brain)
[1] 0.9341638
#the two large numbers on the scatterplot do not seem like outliers because they are close to the best fitted line that has a positive correlation

head(x = mammals, n = 20L)
                              body   brain
Arctic fox                   3.385   44.50
Owl monkey                   0.480   15.50
Mountain beaver              1.350    8.10
Cow                        465.000  423.00
Grey wolf                   36.330  119.50
Goat                        27.660  115.00
Roe deer                    14.830   98.20
Guinea pig                   1.040    5.50
Verbet                       4.190   58.00
Chinchilla                   0.425    6.40
Ground squirrel              0.101    4.00
Arctic ground squirrel       0.920    5.70
African giant pouched rat    1.000    6.60
Lesser short-tailed shrew    0.005    0.14
Star-nosed mole              0.060    1.00
Nine-banded armadillo        3.500   10.80
Tree hyrax                   2.000   12.30
N.A. opossum                 1.700    6.30
Asian elephant            2547.000 4603.00
Big brown bat                0.023    0.30
ggplot(mammals, aes(x = "Body Weight (kg)", y = "Brain Weight (g)")) + geom_point() + geom_smooth(method = "lm", color = "red", se = TRUE) + labs(title = "Brain Weight vs Body Weight", x = "Body Weight (kg)", y = "Brain Weight (g)")
Warning in geom_point(): All aesthetics have length 1, but the data has 62 rows.
ℹ Please consider using `annotate()` or provide this layer with data containing
  a single row.
Warning in geom_smooth(method = "lm", color = "red", se = TRUE): All aesthetics have length 1, but the data has 62 rows.
ℹ Please consider using `annotate()` or provide this layer with data containing
  a single row.
`geom_smooth()` using formula = 'y ~ x'

# Load the necessary libraries
library(MASS)    # For the 'mammals' dataset
library(ggplot2) # For plotting
library(broom)   # For tidy model output

# Load the dataset
data(mammals)

# Perform the linear regression
model <- lm(brain ~ body, data = mammals)

# Get the coefficients
coefficients <- coef(model)
intercept <- coefficients[1]
slope <- coefficients[2]

# Create a regression equation string
equation <- paste0("y = ", round(intercept, 2), " + ", round(slope, 2), " * x")

# Create the scatter plot with regression line and equation
plot <- ggplot(mammals, aes(x = body, y = brain)) +
  geom_point() +  # Add points
  geom_smooth(method = "lm", se = TRUE) +  # Add regression line with confidence interval
  annotate("text", x = max(mammals$body), y = max(mammals$brain), 
           label = equation, hjust = 1, vjust = 1, size = 5, color = "blue") +
  labs(title = "Scatter Plot of Brain Weight vs. Body Weight",
       x = "Body Weight (kg)",
       y = "Brain Weight (g)") +
  theme_minimal()

# Display the plot
print(plot)
`geom_smooth()` using formula = 'y ~ x'

# Load the menarche dataset
data(menarche)

# View the first few rows
head(menarche)
    Age Total Menarche
1  9.21   376        0
2 10.21   200        0
3 10.58    93        0
4 10.83   120        2
5 11.08    90        2
6 11.33    88        5
# Summary statistics
summary(menarche)
      Age            Total           Menarche      
 Min.   : 9.21   Min.   :  88.0   Min.   :   0.00  
 1st Qu.:11.58   1st Qu.:  98.0   1st Qu.:  10.00  
 Median :13.08   Median : 105.0   Median :  51.00  
 Mean   :13.10   Mean   : 156.7   Mean   :  92.32  
 3rd Qu.:14.58   3rd Qu.: 117.0   3rd Qu.:  92.00  
 Max.   :17.58   Max.   :1049.0   Max.   :1049.00  
# Scatter plot of Menarche vs. Age
plot(menarche$Age, menarche$Menarche,
     xlab = "Age",
     ylab = "Menarche",
     main = "Menarche vs. Age")

# Compute the correlation between Age and Menarche
cor(menarche$Age, menarche$Menarche)
[1] 0.6251757
head(x = menarche, n = 15L)
     Age Total Menarche
1   9.21   376        0
2  10.21   200        0
3  10.58    93        0
4  10.83   120        2
5  11.08    90        2
6  11.33    88        5
7  11.58   105       10
8  11.83   111       17
9  12.08   100       16
10 12.33    93       29
11 12.58   100       39
12 12.83   108       51
13 13.08    99       47
14 13.33   106       67
15 13.58   105       81
ggplot(menarche, aes(x = "Age", y = "Menarche")) + geom_point() + geom_smooth(method = "lm", color = "red", se = TRUE) + labs(title = "Menarche vs Age", x = "Age", y = "Menarche")
Warning in geom_point(): All aesthetics have length 1, but the data has 25 rows.
ℹ Please consider using `annotate()` or provide this layer with data containing
  a single row.
Warning in geom_smooth(method = "lm", color = "red", se = TRUE): All aesthetics have length 1, but the data has 25 rows.
ℹ Please consider using `annotate()` or provide this layer with data containing
  a single row.
`geom_smooth()` using formula = 'y ~ x'

# Load the necessary libraries
library(MASS)    # For the 'menarche' dataset
library(ggplot2) # For plotting
library(broom)   # For tidy model output

# Load the dataset
data(menarche)

# Perform the linear regression
model <- lm(Menarche ~ Age, data = menarche)

# Get the coefficients
coefficients <- coef(model)
intercept <- coefficients[1]
slope <- coefficients[2]

# Create a regression equation string
equation <- paste0("y = ", round(intercept, 2), " + ", round(slope, 2), " * x")

# Create the scatter plot with regression line and equation
plot <- ggplot(menarche, aes(x = Age, y = Menarche)) +
  geom_point() +  # Add points
  geom_smooth(method = "lm", se = TRUE) +  # Add regression line with confidence interval
  annotate("text", x = max(menarche$Age), y = max(menarche$Menarche), 
           label = equation, hjust = 1, vjust = 1, size = 5, color = "blue") +
  labs(title = "Scatter Plot of Number who have reached menarche vs. Average age of the group",
       x = "Age",
       y = "Menarche") +
  theme_minimal()

# Display the plot
print(plot)
`geom_smooth()` using formula = 'y ~ x'

[1] 4