Problem 1
getwd()
## [1] "C:/Users/Al Haque/Documents"
## Problem 1 copy the whole URL of the Raw Code from Github and make the csv read the URl ##
file <- read.csv("https://raw.githubusercontent.com/alhaque1/R-Homework-2-/main/ChinaIncome.csv?token=GHSAT0AAAAAABQLSBKBPCNWCCZSLYISFPV6YO7GVKA" ,header=TRUE ,sep=",")
file1 <-data.frame(file)
summary(file1)
## X agriculture commerce construction industry
## Min. : 1 Min. : 83.6 Min. :100.0 Min. : 100.0 Min. : 100.0
## 1st Qu.:10 1st Qu.:111.9 1st Qu.:146.6 1st Qu.: 259.0 1st Qu.: 374.9
## Median :19 Median :139.8 Median :199.2 Median : 421.0 Median : 863.0
## Mean :19 Mean :151.9 Mean :261.0 Mean : 549.9 Mean :1244.2
## 3rd Qu.:28 3rd Qu.:168.4 3rd Qu.:316.8 3rd Qu.: 584.1 3rd Qu.:1814.7
## Max. :37 Max. :279.4 Max. :760.8 Max. :1884.0 Max. :4765.0
## transport
## Min. : 100.0
## 1st Qu.: 221.1
## Median : 370.8
## Mean : 449.5
## 3rd Qu.: 560.8
## Max. :1413.6
## Find the mean and median of the agriculture column
sprintf("The mean of the agriculture column is: %.2f",mean(file1$agriculture))
## [1] "The mean of the agriculture column is: 151.95"
sprintf("The median of the agriculture column is: %.2f",median(file1$agriculture))
## [1] "The median of the agriculture column is: 139.80"
## Find the mean and median of the industry column
sprintf("The mean of the values under the industry column is: %.2f ",mean(file1$industry))
## [1] "The mean of the values under the industry column is: 1244.24 "
sprintf("The median of the values under the industry column is %.2f ", median(file1$industry))
## [1] "The median of the values under the industry column is 863.00 "
Problem 2
## Problem 2: Make a new dataframe
## renamed the subset data of the file as data_subset
## printed the first 3 rows of the data
data_subset <- file1[1:3,]
data_subset
## X agriculture commerce construction industry transport
## 1 1 100.0 100.0 100.0 100.0 100
## 2 2 101.6 133.0 138.1 133.6 120
## 3 3 103.3 136.4 133.3 159.1 136
Problem 3
##Problem 3: Create new names for the column names
#Change the name of the df data_subset but only where the original name was commerce to Commerce this method is preferred to change column names ##
names(data_subset)[1] <- "x"
names(data_subset)[2] <- "Agriculture"
names(data_subset)[names(data_subset)== "commerce"] <- "Commerce"
names(data_subset)[names(data_subset)=="construction"] <- "Construction"
names(data_subset)[names(data_subset)=="industry"] <- "Industry"
names(data_subset)[names(data_subset)=="transport"] <- "Transportation"
data_subset
## x Agriculture Commerce Construction Industry Transportation
## 1 1 100.0 100.0 100.0 100.0 100
## 2 2 101.6 133.0 138.1 133.6 120
## 3 3 103.3 136.4 133.3 159.1 136
Problem 4
## Problem 4
summary(data_subset)
## x Agriculture Commerce Construction Industry
## Min. :1.0 Min. :100.0 Min. :100.0 Min. :100.0 Min. :100.0
## 1st Qu.:1.5 1st Qu.:100.8 1st Qu.:116.5 1st Qu.:116.7 1st Qu.:116.8
## Median :2.0 Median :101.6 Median :133.0 Median :133.3 Median :133.6
## Mean :2.0 Mean :101.6 Mean :123.1 Mean :123.8 Mean :130.9
## 3rd Qu.:2.5 3rd Qu.:102.5 3rd Qu.:134.7 3rd Qu.:135.7 3rd Qu.:146.3
## Max. :3.0 Max. :103.3 Max. :136.4 Max. :138.1 Max. :159.1
## Transportation
## Min. :100.0
## 1st Qu.:110.0
## Median :120.0
## Mean :118.7
## 3rd Qu.:128.0
## Max. :136.0
## Problem 4 Calculating the new values of the mean and median for the Agriculture column ##
sprintf("The new mean of the agriculture column is: %.2f", mean(data_subset$Agriculture))
## [1] "The new mean of the agriculture column is: 101.63"
sprintf("The new median of the agriculture column is: %.2f", median(data_subset$Agriculture))
## [1] "The new median of the agriculture column is: 101.60"
## Calculating the new values of the mean and median for the Transportation Column ##
sprintf("The new mean of the Transportation column is: %.2f",mean(data_subset$Transportation))
## [1] "The new mean of the Transportation column is: 118.67"
sprintf("The new median of the Transportation column is: %.2f",median(data_subset$Transportation))
## [1] "The new median of the Transportation column is: 120.00"
Problem 5
## Problem 5 rename the 3 different/distinct columns so that every value in that column is renamed changed the e to excellent, made c into Cow,D for duck, and a for Average, and x to Varx ##
names(data_subset)[names(data_subset)== "x"] <- "Varx"
names(data_subset)[names(data_subset)== "Agriculture"] <- "AVGgriculture"
names(data_subset)[names(data_subset)== "Commerce"] <- "CommercExcellent"
names(data_subset)[names(data_subset)=="Construction"] <- "CoWnstruction"
names(data_subset)[names(data_subset)=="Industry"] <- "InDuckstry"
names(data_subset)[names(data_subset)=="Transportation"] <- "TransportaVeragetion"
summary(data_subset)
## Varx AVGgriculture CommercExcellent CoWnstruction InDuckstry
## Min. :1.0 Min. :100.0 Min. :100.0 Min. :100.0 Min. :100.0
## 1st Qu.:1.5 1st Qu.:100.8 1st Qu.:116.5 1st Qu.:116.7 1st Qu.:116.8
## Median :2.0 Median :101.6 Median :133.0 Median :133.3 Median :133.6
## Mean :2.0 Mean :101.6 Mean :123.1 Mean :123.8 Mean :130.9
## 3rd Qu.:2.5 3rd Qu.:102.5 3rd Qu.:134.7 3rd Qu.:135.7 3rd Qu.:146.3
## Max. :3.0 Max. :103.3 Max. :136.4 Max. :138.1 Max. :159.1
## TransportaVeragetion
## Min. :100.0
## 1st Qu.:110.0
## Median :120.0
## Mean :118.7
## 3rd Qu.:128.0
## Max. :136.0
Problem 6
## Problem 6 Write a function thats shows all the table changes from 1 to 5
data_subset ##creates the Table of 3 rows
## Varx AVGgriculture CommercExcellent CoWnstruction InDuckstry
## 1 1 100.0 100.0 100.0 100.0
## 2 2 101.6 133.0 138.1 133.6
## 3 3 103.3 136.4 133.3 159.1
## TransportaVeragetion
## 1 100
## 2 120
## 3 136
summary(data_subset) ## shows the summary of the data set with the new column names
## Varx AVGgriculture CommercExcellent CoWnstruction InDuckstry
## Min. :1.0 Min. :100.0 Min. :100.0 Min. :100.0 Min. :100.0
## 1st Qu.:1.5 1st Qu.:100.8 1st Qu.:116.5 1st Qu.:116.7 1st Qu.:116.8
## Median :2.0 Median :101.6 Median :133.0 Median :133.3 Median :133.6
## Mean :2.0 Mean :101.6 Mean :123.1 Mean :123.8 Mean :130.9
## 3rd Qu.:2.5 3rd Qu.:102.5 3rd Qu.:134.7 3rd Qu.:135.7 3rd Qu.:146.3
## Max. :3.0 Max. :103.3 Max. :136.4 Max. :138.1 Max. :159.1
## TransportaVeragetion
## Min. :100.0
## 1st Qu.:110.0
## Median :120.0
## Mean :118.7
## 3rd Qu.:128.0
## Max. :136.0
## As shown above the I calculated the new means of the new subset of the columns with the mean and median functions and also for #7 I copied the direct github link to the raw file of the csv and used the read csv function to make R read it directly from github ##