Home_Work2.R

Problem 1

getwd()

## [1] "C:/Users/Al Haque/Documents"

## Problem 1 copy the whole URL of the Raw Code from Github and make the csv read the URl ##
file <- read.csv("https://raw.githubusercontent.com/alhaque1/R-Homework-2-/main/ChinaIncome.csv?token=GHSAT0AAAAAABQLSBKAFKYGPLHLQ5VTSGFOYPEWWWA" ,header=TRUE ,sep=",")
file1 <-data.frame(file)

## Find the mean and median of the agriculture column
sprintf("The mean of the agriculture column is: %.2f",mean(file1$agriculture))

## [1] "The mean of the agriculture column is: 151.95"

sprintf("The median of the agriculture column is: %.2f",median(file1$agriculture))

## [1] "The median of the agriculture column is: 139.80"

## Find the mean and median of the industry column 
sprintf("The mean of the values under the industry column is: %.2f ",mean(file1$industry))

## [1] "The mean of the values under the industry column is: 1244.24 "

sprintf("The median of the values under the industry column is %.2f ", median(file1$industry))

## [1] "The median of the values under the industry column is 863.00 "

Problem 2

## Problem 2: Make a new dataframe
## renamed the subset data of the file as data_subset
## printed the first 3 rows and 3 columns of the data [edit!]
data_subset <- file1[c(1:3),c(1:3)]  
data_subset

##   X agriculture commerce
## 1 1       100.0    100.0
## 2 2       101.6    133.0
## 3 3       103.3    136.4

Problem 3

##Problem 3: Create new names for the column names
#Change the name of the df data_subset but only where the original name was commerce to Commerce this method is preferred to change column names ##
names(data_subset)[1] <- "x"
names(data_subset)[2] <- "Agriculture"
names(data_subset)[names(data_subset)== "commerce"] <- "Commerce"
data_subset

##   x Agriculture Commerce
## 1 1       100.0    100.0
## 2 2       101.6    133.0
## 3 3       103.3    136.4

Problem 4

## Problem 4 
summary(data_subset)

##        x        Agriculture       Commerce    
##  Min.   :1.0   Min.   :100.0   Min.   :100.0  
##  1st Qu.:1.5   1st Qu.:100.8   1st Qu.:116.5  
##  Median :2.0   Median :101.6   Median :133.0  
##  Mean   :2.0   Mean   :101.6   Mean   :123.1  
##  3rd Qu.:2.5   3rd Qu.:102.5   3rd Qu.:134.7  
##  Max.   :3.0   Max.   :103.3   Max.   :136.4

## Problem 4 Calculating the new values of the mean and median for the Agriculture column ##

sprintf("The new mean of the agriculture column is: %.2f", mean(data_subset$Agriculture))

## [1] "The new mean of the agriculture column is: 101.63"

sprintf("The new median of the agriculture column is: %.2f", median(data_subset$Agriculture))

## [1] "The new median of the agriculture column is: 101.60"

## Calculating the new values of the mean and median for the Transportation Column ##

sprintf("The new mean of the X column is: %.2f",mean(data_subset$x))

## [1] "The new mean of the X column is: 2.00"

sprintf("The new median of the X column is: %.2f",median(data_subset$x))

## [1] "The new median of the X column is: 2.00"

Problem 5

## Problem 5 rename the 3 different/distinct columns so that every value in that column is renamed changed the e to excellent, made c into Cow,D for duck, and a for Average, and x to Varx ##
names(data_subset)[names(data_subset)== "x"] <- "Varx"
names(data_subset)[names(data_subset)== "Agriculture"] <- "AVGgriculture"
names(data_subset)[names(data_subset)== "Commerce"] <- "CommercExcellent"
data_subset[1,1]<- strtoi(10)
data_subset[2,1]<- strtoi(20)
data_subset[3,1] <-strtoi(30)
summary(data_subset)

##       Varx    AVGgriculture   CommercExcellent
##  Min.   :10   Min.   :100.0   Min.   :100.0   
##  1st Qu.:15   1st Qu.:100.8   1st Qu.:116.5   
##  Median :20   Median :101.6   Median :133.0   
##  Mean   :20   Mean   :101.6   Mean   :123.1   
##  3rd Qu.:25   3rd Qu.:102.5   3rd Qu.:134.7   
##  Max.   :30   Max.   :103.3   Max.   :136.4

Problem 6

## Problem 6 Write a function that's shows all the table changes from 1 to 5 

data_subset   ##creates the Table of 3 rows

##   Varx AVGgriculture CommercExcellent
## 1   10         100.0            100.0
## 2   20         101.6            133.0
## 3   30         103.3            136.4

summary(data_subset)

##       Varx    AVGgriculture   CommercExcellent
##  Min.   :10   Min.   :100.0   Min.   :100.0   
##  1st Qu.:15   1st Qu.:100.8   1st Qu.:116.5   
##  Median :20   Median :101.6   Median :133.0   
##  Mean   :20   Mean   :101.6   Mean   :123.1   
##  3rd Qu.:25   3rd Qu.:102.5   3rd Qu.:134.7   
##  Max.   :30   Max.   :103.3   Max.   :136.4

## As shown above the I calculated the new means of the new subset of the columns with the mean and median functions and also for #7 I copied the direct github link to the raw file of the csv and used the read csv function to make R read it directly from github ##