#1
data(cars)
median(cars[, 1])
[1] 15
#2
# install.packages("jsonlite")
library(jsonlite)
url <- "https://min-api.cryptocompare.com/data/v2/histoday?fsym=BTC&tsym=USD&limit=99"
data_url <- fromJSON(url)
close_prices <- data_url$Data$Data$close
maximum_close <- max(close_prices)
print(maximum_close)
[1] 69020.94
#3 #1) Project Title: College Salary
#2) Research Questions: # Does the college you attend significantly impact your salary? # Does your undergraduate major influence your salary? # What is the relationship between college type, region, and salary growth?
#3) Relevant Data Sources: #Kaggle: Wall Street Journal / PayScale Inc. Data: #salaries-by-college-type.csv #salaries-by-region.csv #degrees-that-pay-back.csv
#4&5) Code: Data Extraction and Preliminary Descriptive Analysis and Data Cleaning
# Load necessary library
library(readr)
# Load datasets
salaries_by_college_type <- read_csv("salaries-by-college-type.csv")
Rows: 269 Columns: 8
── Column specification ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (8): School Name, School Type, Starting Median Salary, Mid-Career Median Salary, Mid-Career 10th Percentile Salary, Mid-Career 25th Percentile Salary, Mid-Career 75th Percentile Salary...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
print(head(salaries_by_college_type))
summary(salaries_by_college_type)
School Name School Type Starting Median Salary Mid-Career Median Salary Mid-Career 10th Percentile Salary Mid-Career 25th Percentile Salary Mid-Career 75th Percentile Salary
Length:269 Length:269 Length:269 Length:269 Length:269 Length:269 Length:269
Class :character Class :character Class :character Class :character Class :character Class :character Class :character
Mode :character Mode :character Mode :character Mode :character Mode :character Mode :character Mode :character
Mid-Career 90th Percentile Salary
Length:269
Class :character
Mode :character
salaries_by_region <- read_csv("salaries-by-region.csv")
Rows: 320 Columns: 8
── Column specification ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (8): School Name, Region, Starting Median Salary, Mid-Career Median Salary, Mid-Career 10th Percentile Salary, Mid-Career 25th Percentile Salary, Mid-Career 75th Percentile Salary, Mid...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
print(head(salaries_by_region))
summary(salaries_by_region)
School Name Region Starting Median Salary Mid-Career Median Salary Mid-Career 10th Percentile Salary Mid-Career 25th Percentile Salary Mid-Career 75th Percentile Salary
Length:320 Length:320 Length:320 Length:320 Length:320 Length:320 Length:320
Class :character Class :character Class :character Class :character Class :character Class :character Class :character
Mode :character Mode :character Mode :character Mode :character Mode :character Mode :character Mode :character
Mid-Career 90th Percentile Salary
Length:320
Class :character
Mode :character
degrees_that_pay_back <- read_csv("degrees-that-pay-back.csv")
Rows: 50 Columns: 8
── Column specification ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (7): Undergraduate Major, Starting Median Salary, Mid-Career Median Salary, Mid-Career 10th Percentile Salary, Mid-Career 25th Percentile Salary, Mid-Career 75th Percentile Salary, Mid...
dbl (1): Percent change from Starting to Mid-Career Salary
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
print(head(degrees_that_pay_back))
summary(degrees_that_pay_back)
Undergraduate Major Starting Median Salary Mid-Career Median Salary Percent change from Starting to Mid-Career Salary Mid-Career 10th Percentile Salary Mid-Career 25th Percentile Salary
Length:50 Length:50 Length:50 Min. : 23.40 Length:50 Length:50
Class :character Class :character Class :character 1st Qu.: 59.12 Class :character Class :character
Mode :character Mode :character Mode :character Median : 67.80 Mode :character Mode :character
Mean : 69.27
3rd Qu.: 82.42
Max. :103.50
Mid-Career 75th Percentile Salary Mid-Career 90th Percentile Salary
Length:50 Length:50
Class :character Class :character
Mode :character Mode :character
# Check for missing values
colSums(is.na(college_type))
School Name School Type Starting Median Salary Mid-Career Median Salary Mid-Career 10th Percentile Salary
0 0 0 0 0
Mid-Career 25th Percentile Salary Mid-Career 75th Percentile Salary Mid-Career 90th Percentile Salary
0 0 0
colSums(is.na(region))
School Name Region Starting Median Salary Mid-Career Median Salary Mid-Career 10th Percentile Salary
0 0 0 0 0
Mid-Career 25th Percentile Salary Mid-Career 75th Percentile Salary Mid-Career 90th Percentile Salary
0 0 0
colSums(is.na(majors))
Undergraduate Major Starting Median Salary Mid-Career Median Salary
0 0 0
Percent change from Starting to Mid-Career Salary Mid-Career 10th Percentile Salary Mid-Career 25th Percentile Salary
0 0 0
Mid-Career 75th Percentile Salary Mid-Career 90th Percentile Salary
0 0
#6) Future Data Preparation: # Data Visualization: Create plots to show the differences in salaries by college type and region. # Prediction Models: Use regression analysis to predict mid-career salary based on starting salary, region, and major. # Further Cleaning: Standardize column names and formats across datasets for smooth merging.
#4 Extra Credit
# install.packages("tidyverse")
library(tidyverse)
data <- mtcars
tidy_cars <- data %>% select(mpg, wt, hp)
print(head(tidy_cars))
model_1 <- lm(mpg ~ wt + hp, data = mtcars)
print(summary(model_1))
Call:
lm(formula = mpg ~ wt + hp, data = mtcars)
Residuals:
Min 1Q Median 3Q Max
-3.941 -1.600 -0.182 1.050 5.854
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 37.22727 1.59879 23.285 < 2e-16 ***
wt -3.87783 0.63273 -6.129 1.12e-06 ***
hp -0.03177 0.00903 -3.519 0.00145 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 2.593 on 29 degrees of freedom
Multiple R-squared: 0.8268, Adjusted R-squared: 0.8148
F-statistic: 69.21 on 2 and 29 DF, p-value: 9.109e-12
ggplot(tidy_cars, aes(x = wt, y = mpg)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE, color = "blue") +
labs(title = "Car Weight vs MPG", x = "Weight (1000 lbs)", y = "MPG")
`geom_smooth()` using formula = 'y ~ x'