Problem 1
getwd()
## [1] "C:/Users/Al Haque/Documents"
## Problem 1 copy the whole URL of the Raw Code from Github and make the csv read the URl ##
file <- read.csv("https://raw.githubusercontent.com/alhaque1/R-Homework-2-/main/ChinaIncome.csv?token=GHSAT0AAAAAABQLSBKAFKYGPLHLQ5VTSGFOYPEWWWA" ,header=TRUE ,sep=",")
file1 <-data.frame(file)
## Find the mean and median of the agriculture column
sprintf("The mean of the agriculture column is: %.2f",mean(file1$agriculture))
## [1] "The mean of the agriculture column is: 151.95"
sprintf("The median of the agriculture column is: %.2f",median(file1$agriculture))
## [1] "The median of the agriculture column is: 139.80"
## Find the mean and median of the industry column
sprintf("The mean of the values under the industry column is: %.2f ",mean(file1$industry))
## [1] "The mean of the values under the industry column is: 1244.24 "
sprintf("The median of the values under the industry column is %.2f ", median(file1$industry))
## [1] "The median of the values under the industry column is 863.00 "
Problem 2
## Problem 2: Make a new dataframe
## renamed the subset data of the file as data_subset
## printed the first 3 rows and 3 columns of the data [edit!]
data_subset <- file1[c(1:3),c(1:3)]
data_subset
## X agriculture commerce
## 1 1 100.0 100.0
## 2 2 101.6 133.0
## 3 3 103.3 136.4
Problem 3
##Problem 3: Create new names for the column names
#Change the name of the df data_subset but only where the original name was commerce to Commerce this method is preferred to change column names ##
names(data_subset)[1] <- "x"
names(data_subset)[2] <- "Agriculture"
names(data_subset)[names(data_subset)== "commerce"] <- "Commerce"
data_subset
## x Agriculture Commerce
## 1 1 100.0 100.0
## 2 2 101.6 133.0
## 3 3 103.3 136.4
Problem 4
## Problem 4
summary(data_subset)
## x Agriculture Commerce
## Min. :1.0 Min. :100.0 Min. :100.0
## 1st Qu.:1.5 1st Qu.:100.8 1st Qu.:116.5
## Median :2.0 Median :101.6 Median :133.0
## Mean :2.0 Mean :101.6 Mean :123.1
## 3rd Qu.:2.5 3rd Qu.:102.5 3rd Qu.:134.7
## Max. :3.0 Max. :103.3 Max. :136.4
## Problem 4 Calculating the new values of the mean and median for the Agriculture column ##
sprintf("The new mean of the agriculture column is: %.2f", mean(data_subset$Agriculture))
## [1] "The new mean of the agriculture column is: 101.63"
sprintf("The new median of the agriculture column is: %.2f", median(data_subset$Agriculture))
## [1] "The new median of the agriculture column is: 101.60"
## Calculating the new values of the mean and median for the Transportation Column ##
sprintf("The new mean of the X column is: %.2f",mean(data_subset$x))
## [1] "The new mean of the X column is: 2.00"
sprintf("The new median of the X column is: %.2f",median(data_subset$x))
## [1] "The new median of the X column is: 2.00"
Problem 5
## Problem 5 rename the 3 different/distinct columns so that every value in that column is renamed changed the e to excellent, made c into Cow,D for duck, and a for Average, and x to Varx ##
names(data_subset)[names(data_subset)== "x"] <- "Varx"
names(data_subset)[names(data_subset)== "Agriculture"] <- "AVGgriculture"
names(data_subset)[names(data_subset)== "Commerce"] <- "CommercExcellent"
data_subset[1,1]<- strtoi(10)
data_subset[2,1]<- strtoi(20)
data_subset[3,1] <-strtoi(30)
summary(data_subset)
## Varx AVGgriculture CommercExcellent
## Min. :10 Min. :100.0 Min. :100.0
## 1st Qu.:15 1st Qu.:100.8 1st Qu.:116.5
## Median :20 Median :101.6 Median :133.0
## Mean :20 Mean :101.6 Mean :123.1
## 3rd Qu.:25 3rd Qu.:102.5 3rd Qu.:134.7
## Max. :30 Max. :103.3 Max. :136.4
Problem 6
## Problem 6 Write a function that's shows all the table changes from 1 to 5
data_subset ##creates the Table of 3 rows
## Varx AVGgriculture CommercExcellent
## 1 10 100.0 100.0
## 2 20 101.6 133.0
## 3 30 103.3 136.4
summary(data_subset)
## Varx AVGgriculture CommercExcellent
## Min. :10 Min. :100.0 Min. :100.0
## 1st Qu.:15 1st Qu.:100.8 1st Qu.:116.5
## Median :20 Median :101.6 Median :133.0
## Mean :20 Mean :101.6 Mean :123.1
## 3rd Qu.:25 3rd Qu.:102.5 3rd Qu.:134.7
## Max. :30 Max. :103.3 Max. :136.4
## As shown above the I calculated the new means of the new subset of the columns with the mean and median functions and also for #7 I copied the direct github link to the raw file of the csv and used the read csv function to make R read it directly from github ##