knitr::opts_chunk$set(echo = TRUE)
setwd("~/Desktop/mydata")
require(readr)
## Loading required package: readr
Column Names and dimensions of Data
# 1a. view data
library(readr)
data <- read_csv("social_media_users.csv")
## Rows: 10000 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): Platform, Owner, Primary Usage, Country, Verified Account
## dbl (1): Daily Time Spent (min)
## date (1): Date Joined
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# view data
data
## # A tibble: 10,000 × 7
## Platform Owner `Primary Usage` Country Daily Time Spent (mi…¹
## <chr> <chr> <chr> <chr> <dbl>
## 1 WhatsApp Meta Messaging Switze… 114.
## 2 WeChat Tencent Messaging and … Madaga… 49.6
## 3 Snapchat Snap Inc. Multimedia mes… Pitcai… 29.0
## 4 Instagram Meta Photo and vide… Timor-… 295.
## 5 Threads Meta Text-based soc… Bermuda 71.8
## 6 X (formerly Twitter) X Corp. Microblogging Falkla… 92.8
## 7 Instagram Meta Photo and vide… Maurit… 228.
## 8 TikTok ByteDance Short-form vid… Uganda 44.9
## 9 Quora Quora In… Q&A knowledge … Norway 140.
## 10 X (formerly Twitter) X Corp. Microblogging Anguil… 235.
## # ℹ 9,990 more rows
## # ℹ abbreviated name: ¹`Daily Time Spent (min)`
## # ℹ 2 more variables: `Verified Account` <chr>, `Date Joined` <date>
# look at column names
names(data)
## [1] "Platform" "Owner" "Primary Usage"
## [4] "Country" "Daily Time Spent (min)" "Verified Account"
## [7] "Date Joined"
# look at dimensions of data (rows and columns)
dim(data)
## [1] 10000 7
Basic Summary of Dataset
#omit NAs
data <- na.omit(data)
summary(data)
## Platform Owner Primary Usage Country
## Length:10000 Length:10000 Length:10000 Length:10000
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## Daily Time Spent (min) Verified Account Date Joined
## Min. : 5.02 Length:10000 Min. :2015-05-04
## 1st Qu.: 78.92 Class :character 1st Qu.:2017-10-25
## Median :152.74 Mode :character Median :2020-04-22
## Mean :152.21 Mean :2020-04-27
## 3rd Qu.:225.64 3rd Qu.:2022-10-20
## Max. :300.00 Max. :2025-05-03
Earliest and Latest Date Joined
# Convert to Date
data$DateJoined <- as.Date(data$`Date Joined`)
# Find earliest
earliest_row <- data[which.min(data$DateJoined), c("Platform", "DateJoined")]
# Print as a sentence
print(paste("The earliest date joined was", earliest_row$DateJoined, "by platform:",earliest_row$Platform))
## [1] "The earliest date joined was 2015-05-04 by platform: Quora"
# Find latest
latest_row <- data[which.max(data$DateJoined), c("Platform", "DateJoined")]
# Print as a sentence
print(paste("The latest date joined was", latest_row$DateJoined, "by platform:", latest_row$Platform))
## [1] "The latest date joined was 2025-05-03 by platform: Instagram"