data(cars)
# The 'cars' dataset has two columns: 'speed' (first) and 'dist' (second).
# The first column is 'speed'.
median(cars$speed)
## [1] 15
# 1. Install and load the jsonlite package.
library(jsonlite)
# 2. Construct the API URL.
# The endpoint for Daily Pair OHLCV is /data/v2/histoday.
# - fsym=BTC (From Symbol: Bitcoin)
# - tsym=USD (To Symbol: US Dollar)
# - limit=100 (Get the last 100 days of data)
api_url <- "https://min-api.cryptocompare.com/data/v2/histoday?fsym=BTC&tsym=USD&limit=100"
# 3. Use fromJSON() to retrieve and parse the data.
btc_raw_data <- fromJSON(api_url)
# 4. Extract the actual historical data frame.
btc_ohlcv_df <- btc_raw_data$Data$Data
# 5. What is the maximum of the daily close price?
max_close_price <- max(btc_ohlcv_df$close, na.rm = TRUE)
# 6. Display the final result using print statements.
print("Maximum daily close price for BTC/USD over the last 100 days:")
## [1] "Maximum daily close price for BTC/USD over the last 100 days:"
print(max_close_price)
## [1] 124723
# Title: Major Decisions: A Comparative Analysis of Economic Outcomes for Recent College Graduates
# 3 Questions:
# 1. Which Major Categories yield the highest and lowest average median salaries?
# 2. What is the relationship between a major's median salary and the rate of low-wage job employment?
# 3. How does the total size of a major category correlate with its unemployment rate?
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(knitr)
# URL for the FiveThirtyEight recent graduates dataset
data_url <- "https://raw.githubusercontent.com/fivethirtyeight/data/master/college-majors/recent-grads.csv"
# Use base R's read.csv() function
raw_data <- read.csv(data_url)
print("STATUS: Data Extracted.")
## [1] "STATUS: Data Extracted."
print(paste("Dimensions of Raw Data (Rows, Columns):", paste(dim(raw_data), collapse = ", ")))
## [1] "Dimensions of Raw Data (Rows, Columns): 173, 21"
# Check the structure of the raw data
print("--- Initial Data Structure (str) ---")
## [1] "--- Initial Data Structure (str) ---"
str(raw_data)
## 'data.frame': 173 obs. of 21 variables:
## $ Rank : int 1 2 3 4 5 6 7 8 9 10 ...
## $ Major_code : int 2419 2416 2415 2417 2405 2418 6202 5001 2414 2408 ...
## $ Major : chr "PETROLEUM ENGINEERING" "MINING AND MINERAL ENGINEERING" "METALLURGICAL ENGINEERING" "NAVAL ARCHITECTURE AND MARINE ENGINEERING" ...
## $ Total : int 2339 756 856 1258 32260 2573 3777 1792 91227 81527 ...
## $ Men : int 2057 679 725 1123 21239 2200 2110 832 80320 65511 ...
## $ Women : int 282 77 131 135 11021 373 1667 960 10907 16016 ...
## $ Major_category : chr "Engineering" "Engineering" "Engineering" "Engineering" ...
## $ ShareWomen : num 0.121 0.102 0.153 0.107 0.342 ...
## $ Sample_size : int 36 7 3 16 289 17 51 10 1029 631 ...
## $ Employed : int 1976 640 648 758 25694 1857 2912 1526 76442 61928 ...
## $ Full_time : int 1849 556 558 1069 23170 2038 2924 1085 71298 55450 ...
## $ Part_time : int 270 170 133 150 5180 264 296 553 13101 12695 ...
## $ Full_time_year_round: int 1207 388 340 692 16697 1449 2482 827 54639 41413 ...
## $ Unemployed : int 37 85 16 40 1672 400 308 33 4650 3895 ...
## $ Unemployment_rate : num 0.0184 0.1172 0.0241 0.0501 0.0611 ...
## $ Median : int 110000 75000 73000 70000 65000 65000 62000 62000 60000 60000 ...
## $ P25th : int 95000 55000 50000 43000 50000 50000 53000 31500 48000 45000 ...
## $ P75th : int 125000 90000 105000 80000 75000 102000 72000 109000 70000 72000 ...
## $ College_jobs : int 1534 350 456 529 18314 1142 1768 972 52844 45829 ...
## $ Non_college_jobs : int 364 257 176 102 4440 657 314 500 16384 10874 ...
## $ Low_wage_jobs : int 193 50 0 0 972 244 259 220 3253 3170 ...
# 3.1. Removing missing values for key columns (Similar to na.omit(), but explicit subsetting)
rows_before <- nrow(raw_data)
# Filter out NA values in Total, Unemployment_rate, and Employed columns
cleaned_data <- raw_data[!is.na(raw_data$Total) &
!is.na(raw_data$Unemployment_rate) &
!is.na(raw_data$Employed),]
rows_after <- nrow(cleaned_data)
print(paste("\nSTATUS: NA Cleaning Complete. Removed", rows_before - rows_after, "row(s). Retained", rows_after, "rows."))
## [1] "\nSTATUS: NA Cleaning Complete. Removed 1 row(s). Retained 172 rows."
analysis_data <- cleaned_data %>%
# Use mutate() to derive new metrics and rename columns (Section 3.3)
mutate(
# Q2 Derivation: Calculate the Low-Wage Job Rate (%)
Low_Wage_Job_Rate = (Low_wage_jobs / Employed) * 100,
# Rename columns for clarity
Median_Salary = Median,
Major_Category = Major_category
) %>%
# Use select() to keep only the variables needed for analysis (Section 3.3)
select(
Major, Major_Category, Total, Median_Salary,
Unemployment_rate, Low_Wage_Job_Rate
)
print("\nSTATUS: Derived metrics (Low_Wage_Job_Rate) calculated.")
## [1] "\nSTATUS: Derived metrics (Low_Wage_Job_Rate) calculated."
category_summary <- analysis_data %>%
# Create groups for aggregation (Section 3.3)
group_by(Major_Category) %>%
# Summarize the data to get category-level metrics (Section 3.3)
summarise(
Avg_Median_Salary = mean(Median_Salary, na.rm = TRUE),
Avg_Unemployment_Rate = mean(Unemployment_rate, na.rm = TRUE),
Avg_Low_Wage_Job_Rate = mean(Low_Wage_Job_Rate, na.rm = TRUE),
Total_Graduates = sum(Total, na.rm = TRUE)
) %>%
# Sort the data by salary to answer Q1 (Section 3.3)
arrange(desc(Avg_Median_Salary))
print("\n--- Q1: Summary by Major Category (Highest and Lowest Paying) ---")
## [1] "\n--- Q1: Summary by Major Category (Highest and Lowest Paying) ---"
print("Highest Paying Categories:")
## [1] "Highest Paying Categories:"
print(head(category_summary, 5) %>% kable(caption = "Top 5 Major Categories by Average Median Salary", digits = 0))
## Warning in attr(x, "align"): 'xfun::attr()' is deprecated.
## Use 'xfun::attr2()' instead.
## See help("Deprecated")
## Warning in attr(x, "format"): 'xfun::attr()' is deprecated.
## Use 'xfun::attr2()' instead.
## See help("Deprecated")
##
##
## Table: Top 5 Major Categories by Average Median Salary
##
## |Major_Category | Avg_Median_Salary| Avg_Unemployment_Rate| Avg_Low_Wage_Job_Rate| Total_Graduates|
## |:-----------------------|-----------------:|---------------------:|---------------------:|---------------:|
## |Engineering | 57383| 0| 6| 537583|
## |Business | 43538| 0| 11| 1302376|
## |Computers & Mathematics | 42745| 0| 7| 299008|
## |Law & Public Policy | 42200| 0| 13| 179107|
## |Physical Sciences | 41890| 0| 12| 185479|
print("\nLowest Paying Categories:")
## [1] "\nLowest Paying Categories:"
print(tail(category_summary, 5) %>% kable(caption = "Bottom 5 Major Categories by Average Median Salary", digits = 0))
## Warning in attr(x, "align"): 'xfun::attr()' is deprecated.
## Use 'xfun::attr2()' instead.
## See help("Deprecated")
## Warning in attr(x, "align"): 'xfun::attr()' is deprecated.
## Use 'xfun::attr2()' instead.
## See help("Deprecated")
##
##
## Table: Bottom 5 Major Categories by Average Median Salary
##
## |Major_Category | Avg_Median_Salary| Avg_Unemployment_Rate| Avg_Low_Wage_Job_Rate| Total_Graduates|
## |:---------------------------|-----------------:|---------------------:|---------------------:|---------------:|
## |Communications & Journalism | 34500| 0| 15| 392601|
## |Arts | 33062| 0| 23| 357130|
## |Education | 32350| 0| 9| 559129|
## |Humanities & Liberal Arts | 31913| 0| 18| 713468|
## |Psychology & Social Work | 30100| 0| 12| 481007|
print("\n--- Correlation Results (Answering Q2 and Q3) ---")
## [1] "\n--- Correlation Results (Answering Q2 and Q3) ---"
# Q2: Relationship between Median Salary and Low-Wage Job Rate
salary_low_wage_corr <- cor(analysis_data$Median_Salary, analysis_data$Low_Wage_Job_Rate, use = "complete.obs")
# Q3: Correlation between Total Graduates (size) and Avg Unemployment Rate
total_unemp_corr <- cor(category_summary$Total_Graduates, category_summary$Avg_Unemployment_Rate, use = "complete.obs")
print(paste("Q2 (Median Salary vs Low-Wage Job Rate):", round(salary_low_wage_corr, 3)))
## [1] "Q2 (Median Salary vs Low-Wage Job Rate): -0.459"
print(paste("Q3 (Total Graduates vs Avg Unemployment Rate):", round(total_unemp_corr, 3)))
## [1] "Q3 (Total Graduates vs Avg Unemployment Rate): 0.151"
print("\n--- Final Prepared Analysis Dataset (First 5 Rows) ---")
## [1] "\n--- Final Prepared Analysis Dataset (First 5 Rows) ---"
print(head(analysis_data, 5))
## Major Major_Category Total Median_Salary
## 1 PETROLEUM ENGINEERING Engineering 2339 110000
## 2 MINING AND MINERAL ENGINEERING Engineering 756 75000
## 3 METALLURGICAL ENGINEERING Engineering 856 73000
## 4 NAVAL ARCHITECTURE AND MARINE ENGINEERING Engineering 1258 70000
## 5 CHEMICAL ENGINEERING Engineering 32260 65000
## Unemployment_rate Low_Wage_Job_Rate
## 1 0.01838053 9.767206
## 2 0.11724138 7.812500
## 3 0.02409639 0.000000
## 4 0.05012531 0.000000
## 5 0.06109771 3.782984