library(readr)
library(ggplot2)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.2     ✔ stringr   1.5.0
## ✔ forcats   1.0.0     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
data <- read_csv("D:/social media.csv")
## Rows: 1628 Columns: 26
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (9): City, Current Status, Do you own multiple profiles on Instagram?, ...
## dbl  (3): Age, Latitude, Longitude
## num (14): How many followers do you have on Instagram? (In case of multiple ...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(data)

#Average age
mean(data$Age)
## [1] 26.85811
# Median age
median(data$Age)
## [1] 24
# Maximum age
max(data$Age)
## [1] 74
# Minimum age
min(data$Age)
## [1] 13
# Total number of males
sum(data$Gender == "Male")
## [1] 813
# Total number of females
sum(data$Gender == "Female")
## [1] 813
# Average number of followers on Instagram
mean(data$`How many followers do you have on Instagram? (In case of multiple accounts, please mention the one with the maximum)`)
## [1] 868.1474
# Median number of followers on Instagram
median(data$`How many followers do you have on Instagram? (In case of multiple accounts, please mention the one with the maximum)`)
## [1] 370
# Maximum number of followers on Instagram
max(data$`How many followers do you have on Instagram? (In case of multiple accounts, please mention the one with the maximum)`)
## [1] 116000
#  Minimum number of followers on Instagram
min(data$`How many followers do you have on Instagram? (In case of multiple accounts, please mention the one with the maximum)`)
## [1] 0
#  Average number of posts on Instagram
mean(data$`How many posts do you have on Instagram?`)
## [1] 99.07985
#  Median number of posts on Instagram
median(data$`How many posts do you have on Instagram?`)
## [1] 43.5
#  Maximum number of posts on Instagram
max(data$`How many posts do you have on Instagram?`)
## [1] 2858
#  Minimum number of posts on Instagram
min(data$`How many posts do you have on Instagram?`)
## [1] 0
#  Total time spent on Facebook in last week
sum(data$`Time Spent on Facebook in last week (in minutes)`)
## [1] 285275
#  Total time spent on Instagram in last week
sum(data$`Time Spent on Instagram in last week (in minutes)`)
## [1] 822407
# Total time spent on WhatsApp in last week
sum(data$`Time Spent on WhatsApp in last week (in minutes)`)
## [1] 1391726
# Average total social media usage
mean(data$`Total Social Media Usage`)
## [1] 2120.885
# Median total social media usage
median(data$`Total Social Media Usage`)
## [1] 1658.5
# Maximum total social media usage
max(data$`Total Social Media Usage`)
## [1] 15780
#@**************************************************************************************************************

# Only females
females <- subset(data, Gender == "Female")
females
## # A tibble: 813 × 26
##      Age City           `Current Status`     Do you own multiple profil…¹ Gender
##    <dbl> <chr>          <chr>                <chr>                        <chr> 
##  1    24 Delhi          Working professional No                           Female
##  2    39 Delhi          Working professional No                           Female
##  3    26 Bengaluru      Sabbatical           Yes                          Female
##  4    25 Vishakhapatnam Working professional Yes                          Female
##  5    45 Durgapur       Sabbatical           No                           Female
##  6    45 Delhi          Working professional No                           Female
##  7    21 Delhi          Working professional No                           Female
##  8    26 Delhi          Working professional No                           Female
##  9    25 Mumbai         Sabbatical           No                           Female
## 10    22 Kolkata        Student              No                           Female
## # ℹ 803 more rows
## # ℹ abbreviated name: ¹​`Do you own multiple profiles on Instagram?`
## # ℹ 21 more variables: `Highest Education` <chr>,
## #   `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## #   Zone <chr>,
## #   `How many followers do you have on Instagram? (In case of multiple accounts, please mention the one with the maximum)` <dbl>,
## #   `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>, …
# Only males
males <- subset(data, Gender == "Male")
males
## # A tibble: 813 × 26
##      Age City      `Current Status`     Do you own multiple profiles on…¹ Gender
##    <dbl> <chr>     <chr>                <chr>                             <chr> 
##  1    22 Mumbai    Working professional No                                Male  
##  2    50 Delhi     Working professional No                                Male  
##  3    52 Jaipur    Working professional No                                Male  
##  4    25 Bengaluru Student              No                                Male  
##  5    27 Delhi     Student              Yes                               Male  
##  6    27 Bengaluru Working professional No                                Male  
##  7    22 Delhi     Sabbatical           Yes                               Male  
##  8    26 Agra      Working professional No                                Male  
##  9    25 Ahmedabad Student              No                                Male  
## 10    18 Jaipur    Student              No                                Male  
## # ℹ 803 more rows
## # ℹ abbreviated name: ¹​`Do you own multiple profiles on Instagram?`
## # ℹ 21 more variables: `Highest Education` <chr>,
## #   `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## #   Zone <chr>,
## #   `How many followers do you have on Instagram? (In case of multiple accounts, please mention the one with the maximum)` <dbl>,
## #   `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>, …
# Only students
students <- subset(data, `Current Status` == "Student")
students
## # A tibble: 637 × 26
##      Age City      `Current Status` Do you own multiple profiles on Ins…¹ Gender
##    <dbl> <chr>     <chr>            <chr>                                 <chr> 
##  1    25 Bengaluru Student          No                                    Male  
##  2    27 Delhi     Student          Yes                                   Male  
##  3    25 Ahmedabad Student          No                                    Male  
##  4    18 Jaipur    Student          No                                    Male  
##  5    22 Kolkata   Student          No                                    Female
##  6    26 Kolkata   Student          No                                    Female
##  7    23 Delhi     Student          No                                    Female
##  8    17 Mumbai    Student          Yes                                   Male  
##  9    23 Kolkata   Student          Yes                                   Female
## 10    22 Ahmedabad Student          No                                    Male  
## # ℹ 627 more rows
## # ℹ abbreviated name: ¹​`Do you own multiple profiles on Instagram?`
## # ℹ 21 more variables: `Highest Education` <chr>,
## #   `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## #   Zone <chr>,
## #   `How many followers do you have on Instagram? (In case of multiple accounts, please mention the one with the maximum)` <dbl>,
## #   `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>, …
# Only working professionals
working_professionals <- subset(data, `Current Status` == "Working professional")
working_professionals
## # A tibble: 796 × 26
##      Age City           `Current Status`     Do you own multiple profil…¹ Gender
##    <dbl> <chr>          <chr>                <chr>                        <chr> 
##  1    24 Delhi          Working professional No                           Female
##  2    39 Delhi          Working professional No                           Female
##  3    22 Mumbai         Working professional No                           Male  
##  4    50 Delhi          Working professional No                           Male  
##  5    25 Vishakhapatnam Working professional Yes                          Female
##  6    52 Jaipur         Working professional No                           Male  
##  7    27 Bengaluru      Working professional No                           Male  
##  8    45 Delhi          Working professional No                           Female
##  9    21 Delhi          Working professional No                           Female
## 10    26 Agra           Working professional No                           Male  
## # ℹ 786 more rows
## # ℹ abbreviated name: ¹​`Do you own multiple profiles on Instagram?`
## # ℹ 21 more variables: `Highest Education` <chr>,
## #   `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## #   Zone <chr>,
## #   `How many followers do you have on Instagram? (In case of multiple accounts, please mention the one with the maximum)` <dbl>,
## #   `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>, …
# Only people from Northern zone
northern <- subset(data, Zone == "Northern")
northern
## # A tibble: 542 × 26
##      Age City   `Current Status`     Do you own multiple profiles on In…¹ Gender
##    <dbl> <chr>  <chr>                <chr>                                <chr> 
##  1    24 Delhi  Working professional No                                   Female
##  2    39 Delhi  Working professional No                                   Female
##  3    50 Delhi  Working professional No                                   Male  
##  4    52 Jaipur Working professional No                                   Male  
##  5    27 Delhi  Student              Yes                                  Male  
##  6    45 Delhi  Working professional No                                   Female
##  7    22 Delhi  Sabbatical           Yes                                  Male  
##  8    21 Delhi  Working professional No                                   Female
##  9    26 Agra   Working professional No                                   Male  
## 10    26 Delhi  Working professional No                                   Female
## # ℹ 532 more rows
## # ℹ abbreviated name: ¹​`Do you own multiple profiles on Instagram?`
## # ℹ 21 more variables: `Highest Education` <chr>,
## #   `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## #   Zone <chr>,
## #   `How many followers do you have on Instagram? (In case of multiple accounts, please mention the one with the maximum)` <dbl>,
## #   `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>, …
# Only people from Southern zone
southern <- subset(data, Zone == "Southern")
southern
## # A tibble: 211 × 26
##      Age City           `Current Status`     Do you own multiple profil…¹ Gender
##    <dbl> <chr>          <chr>                <chr>                        <chr> 
##  1    26 Bengaluru      Sabbatical           Yes                          Female
##  2    25 Vishakhapatnam Working professional Yes                          Female
##  3    25 Bengaluru      Student              No                           Male  
##  4    27 Bengaluru      Working professional No                           Male  
##  5    32 Bengaluru      Working professional No                           Male  
##  6    27 Chennai        Working professional No                           Male  
##  7    23 Chennai        Student              No                           Female
##  8    22 Chennai        Student              Yes                          Female
##  9    32 Bengaluru      Working professional No                           Female
## 10    21 Chennai        Student              No                           Female
## # ℹ 201 more rows
## # ℹ abbreviated name: ¹​`Do you own multiple profiles on Instagram?`
## # ℹ 21 more variables: `Highest Education` <chr>,
## #   `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## #   Zone <chr>,
## #   `How many followers do you have on Instagram? (In case of multiple accounts, please mention the one with the maximum)` <dbl>,
## #   `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>, …
#  Only people from Eastern zone
eastern <- subset(data, Zone == "Eastern")
eastern
## # A tibble: 271 × 26
##      Age City        `Current Status`     Do you own multiple profiles …¹ Gender
##    <dbl> <chr>       <chr>                <chr>                           <chr> 
##  1    45 Durgapur    Sabbatical           No                              Female
##  2    24 Cooch-behar Working professional No                              Male  
##  3    22 Kolkata     Student              No                              Female
##  4    26 Kolkata     Student              No                              Female
##  5    50 Kolkata     Working professional No                              Female
##  6    23 Kolkata     Student              Yes                             Female
##  7    25 Kolkata     Working professional No                              Male  
##  8    45 Bagdogra    Working professional Yes                             Female
##  9    25 Kolkata     Student              No                              Male  
## 10    45 Kolkata     Working professional No                              Female
## # ℹ 261 more rows
## # ℹ abbreviated name: ¹​`Do you own multiple profiles on Instagram?`
## # ℹ 21 more variables: `Highest Education` <chr>,
## #   `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## #   Zone <chr>,
## #   `How many followers do you have on Instagram? (In case of multiple accounts, please mention the one with the maximum)` <dbl>,
## #   `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>, …
# Only people from Western zone
western <- subset(data, Zone == "Western")
western
## # A tibble: 543 × 26
##      Age City      `Current Status`     Do you own multiple profiles on…¹ Gender
##    <dbl> <chr>     <chr>                <chr>                             <chr> 
##  1    22 Mumbai    Working professional No                                Male  
##  2    25 Ahmedabad Student              No                                Male  
##  3    25 Mumbai    Sabbatical           No                                Female
##  4    25 Ahmedabad Self Employed        Yes                               Male  
##  5    17 Mumbai    Student              Yes                               Male  
##  6    22 Ahmedabad Student              No                                Male  
##  7    24 Pune      Working professional No                                Female
##  8    21 Mumbai    Sabbatical           Yes                               Female
##  9    51 Ahmedabad Working professional No                                Male  
## 10    22 Mumbai    Sabbatical           No                                Male  
## # ℹ 533 more rows
## # ℹ abbreviated name: ¹​`Do you own multiple profiles on Instagram?`
## # ℹ 21 more variables: `Highest Education` <chr>,
## #   `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## #   Zone <chr>,
## #   `How many followers do you have on Instagram? (In case of multiple accounts, please mention the one with the maximum)` <dbl>,
## #   `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>, …
#Only people with Android phone OS
android <- subset(data, `Phone OS` == "Android")
android
## # A tibble: 1,115 × 26
##      Age City           `Current Status`     Do you own multiple profil…¹ Gender
##    <dbl> <chr>          <chr>                <chr>                        <chr> 
##  1    22 Mumbai         Working professional No                           Male  
##  2    26 Bengaluru      Sabbatical           Yes                          Female
##  3    25 Vishakhapatnam Working professional Yes                          Female
##  4    52 Jaipur         Working professional No                           Male  
##  5    45 Durgapur       Sabbatical           No                           Female
##  6    25 Bengaluru      Student              No                           Male  
##  7    27 Delhi          Student              Yes                          Male  
##  8    27 Bengaluru      Working professional No                           Male  
##  9    21 Delhi          Working professional No                           Female
## 10    26 Agra           Working professional No                           Male  
## # ℹ 1,105 more rows
## # ℹ abbreviated name: ¹​`Do you own multiple profiles on Instagram?`
## # ℹ 21 more variables: `Highest Education` <chr>,
## #   `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## #   Zone <chr>,
## #   `How many followers do you have on Instagram? (In case of multiple accounts, please mention the one with the maximum)` <dbl>,
## #   `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>, …
# Only people with iOs phone OS
ios <- subset(data, `Phone OS` == "iOs")
ios
## # A tibble: 508 × 26
##      Age City     `Current Status`     Do you own multiple profiles on …¹ Gender
##    <dbl> <chr>    <chr>                <chr>                              <chr> 
##  1    24 Delhi    Working professional No                                 Female
##  2    39 Delhi    Working professional No                                 Female
##  3    50 Delhi    Working professional No                                 Male  
##  4    45 Delhi    Working professional No                                 Female
##  5    22 Delhi    Sabbatical           Yes                                Male  
##  6    18 Jaipur   Student              No                                 Male  
##  7    22 Chennai  Student              Yes                                Female
##  8    23 Guwahati Student              No                                 Female
##  9    45 Bagdogra Working professional Yes                                Female
## 10    28 Kolkata  Sabbatical           No                                 Female
## # ℹ 498 more rows
## # ℹ abbreviated name: ¹​`Do you own multiple profiles on Instagram?`
## # ℹ 21 more variables: `Highest Education` <chr>,
## #   `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## #   Zone <chr>,
## #   `How many followers do you have on Instagram? (In case of multiple accounts, please mention the one with the maximum)` <dbl>,
## #   `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>, …
#Only people with Graduation education
graduation <- subset(data, `Highest Education` == "Graduation")
graduation
## # A tibble: 950 × 26
##      Age City      `Current Status`     Do you own multiple profiles on…¹ Gender
##    <dbl> <chr>     <chr>                <chr>                             <chr> 
##  1    24 Delhi     Working professional No                                Female
##  2    22 Mumbai    Working professional No                                Male  
##  3    26 Bengaluru Sabbatical           Yes                               Female
##  4    50 Delhi     Working professional No                                Male  
##  5    45 Durgapur  Sabbatical           No                                Female
##  6    25 Bengaluru Student              No                                Male  
##  7    27 Delhi     Student              Yes                               Male  
##  8    27 Bengaluru Working professional No                                Male  
##  9    45 Delhi     Working professional No                                Female
## 10    21 Delhi     Working professional No                                Female
## # ℹ 940 more rows
## # ℹ abbreviated name: ¹​`Do you own multiple profiles on Instagram?`
## # ℹ 21 more variables: `Highest Education` <chr>,
## #   `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## #   Zone <chr>,
## #   `How many followers do you have on Instagram? (In case of multiple accounts, please mention the one with the maximum)` <dbl>,
## #   `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>, …
#Only people with Post graduation education
post_graduation <- subset(data, `Highest Education` == "Post graduation")
post_graduation
## # A tibble: 541 × 26
##      Age City           `Current Status`     Do you own multiple profil…¹ Gender
##    <dbl> <chr>          <chr>                <chr>                        <chr> 
##  1    39 Delhi          Working professional No                           Female
##  2    25 Vishakhapatnam Working professional Yes                          Female
##  3    52 Jaipur         Working professional No                           Male  
##  4    22 Delhi          Sabbatical           Yes                          Male  
##  5    26 Delhi          Working professional No                           Female
##  6    25 Mumbai         Sabbatical           No                           Female
##  7    22 Kolkata        Student              No                           Female
##  8    26 Kolkata        Student              No                           Female
##  9    27 Chennai        Working professional No                           Male  
## 10    32 Bengaluru      Working professional No                           Female
## # ℹ 531 more rows
## # ℹ abbreviated name: ¹​`Do you own multiple profiles on Instagram?`
## # ℹ 21 more variables: `Highest Education` <chr>,
## #   `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## #   Zone <chr>,
## #   `How many followers do you have on Instagram? (In case of multiple accounts, please mention the one with the maximum)` <dbl>,
## #   `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>, …
#Only people with High School education
high_school <- subset(data, `Highest Education` == "High School")
high_school
## # A tibble: 137 × 26
##      Age City      `Current Status`     Do you own multiple profiles on…¹ Gender
##    <dbl> <chr>     <chr>                <chr>                             <chr> 
##  1    18 Jaipur    Student              No                                Male  
##  2    35 Delhi     Working professional Yes                               Female
##  3    23 Delhi     Student              No                                Female
##  4    50 Kolkata   Working professional No                                Female
##  5    17 Mumbai    Student              Yes                               Male  
##  6    16 Kolkata   Student              No                                Male  
##  7    16 Jaipur    Student              No                                Female
##  8    15 Chennai   Student              Yes                               Female
##  9    20 Hyderabad Student              Yes                               Male  
## 10    16 Mumbai    Student              No                                Male  
## # ℹ 127 more rows
## # ℹ abbreviated name: ¹​`Do you own multiple profiles on Instagram?`
## # ℹ 21 more variables: `Highest Education` <chr>,
## #   `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## #   Zone <chr>,
## #   `How many followers do you have on Instagram? (In case of multiple accounts, please mention the one with the maximum)` <dbl>,
## #   `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>, …
#Only people with multiple profiles on Instagram
multiple_profiles <- subset(data, `Do you own multiple profiles on Instagram?` == "Yes")
multiple_profiles
## # A tibble: 308 × 26
##      Age City           `Current Status`     Do you own multiple profil…¹ Gender
##    <dbl> <chr>          <chr>                <chr>                        <chr> 
##  1    26 Bengaluru      Sabbatical           Yes                          Female
##  2    25 Vishakhapatnam Working professional Yes                          Female
##  3    27 Delhi          Student              Yes                          Male  
##  4    22 Delhi          Sabbatical           Yes                          Male  
##  5    25 Ahmedabad      Self Employed        Yes                          Male  
##  6    35 Delhi          Working professional Yes                          Female
##  7    17 Mumbai         Student              Yes                          Male  
##  8    23 Kolkata        Student              Yes                          Female
##  9    26 Kanpur         Working professional Yes                          Male  
## 10    22 Chennai        Student              Yes                          Female
## # ℹ 298 more rows
## # ℹ abbreviated name: ¹​`Do you own multiple profiles on Instagram?`
## # ℹ 21 more variables: `Highest Education` <chr>,
## #   `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## #   Zone <chr>,
## #   `How many followers do you have on Instagram? (In case of multiple accounts, please mention the one with the maximum)` <dbl>,
## #   `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>, …
#Only people who own a single profile on Instagram
single_profile <- subset(data, `Do you own multiple profiles on Instagram?` == "No")
single_profile
## # A tibble: 1,316 × 26
##      Age City      `Current Status`     Do you own multiple profiles on…¹ Gender
##    <dbl> <chr>     <chr>                <chr>                             <chr> 
##  1    24 Delhi     Working professional No                                Female
##  2    39 Delhi     Working professional No                                Female
##  3    22 Mumbai    Working professional No                                Male  
##  4    50 Delhi     Working professional No                                Male  
##  5    52 Jaipur    Working professional No                                Male  
##  6    45 Durgapur  Sabbatical           No                                Female
##  7    25 Bengaluru Student              No                                Male  
##  8    27 Bengaluru Working professional No                                Male  
##  9    45 Delhi     Working professional No                                Female
## 10    21 Delhi     Working professional No                                Female
## # ℹ 1,306 more rows
## # ℹ abbreviated name: ¹​`Do you own multiple profiles on Instagram?`
## # ℹ 21 more variables: `Highest Education` <chr>,
## #   `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## #   Zone <chr>,
## #   `How many followers do you have on Instagram? (In case of multiple accounts, please mention the one with the maximum)` <dbl>,
## #   `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>, …
#Only people who spent more than 500 minutes on Facebook in last week
more_than_500_fb <- subset(data, `Time Spent on Facebook in last week (in minutes)` > 500)
more_than_500_fb
## # A tibble: 130 × 26
##      Age City           `Current Status`     Do you own multiple profil…¹ Gender
##    <dbl> <chr>          <chr>                <chr>                        <chr> 
##  1    39 Delhi          Working professional No                           Female
##  2    26 Bengaluru      Sabbatical           Yes                          Female
##  3    50 Delhi          Working professional No                           Male  
##  4    25 Vishakhapatnam Working professional Yes                          Female
##  5    45 Durgapur       Sabbatical           No                           Female
##  6    25 Bengaluru      Student              No                           Male  
##  7    27 Bengaluru      Working professional No                           Male  
##  8    22 Delhi          Sabbatical           Yes                          Male  
##  9    18 Jaipur         Student              No                           Male  
## 10    25 Mumbai         Sabbatical           No                           Female
## # ℹ 120 more rows
## # ℹ abbreviated name: ¹​`Do you own multiple profiles on Instagram?`
## # ℹ 21 more variables: `Highest Education` <chr>,
## #   `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## #   Zone <chr>,
## #   `How many followers do you have on Instagram? (In case of multiple accounts, please mention the one with the maximum)` <dbl>,
## #   `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>, …
#Only people who spent more than 500 minutes on Instagram in last week
more_than_500_insta <- subset(data, `Time Spent on Instagram in last week (in minutes)` > 500)
more_than_500_insta
## # A tibble: 559 × 26
##      Age City           `Current Status`     Do you own multiple profil…¹ Gender
##    <dbl> <chr>          <chr>                <chr>                        <chr> 
##  1    24 Delhi          Working professional No                           Female
##  2    22 Mumbai         Working professional No                           Male  
##  3    26 Bengaluru      Sabbatical           Yes                          Female
##  4    25 Vishakhapatnam Working professional Yes                          Female
##  5    25 Bengaluru      Student              No                           Male  
##  6    27 Delhi          Student              Yes                          Male  
##  7    45 Delhi          Working professional No                           Female
##  8    22 Delhi          Sabbatical           Yes                          Male  
##  9    18 Jaipur         Student              No                           Male  
## 10    25 Mumbai         Sabbatical           No                           Female
## # ℹ 549 more rows
## # ℹ abbreviated name: ¹​`Do you own multiple profiles on Instagram?`
## # ℹ 21 more variables: `Highest Education` <chr>,
## #   `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## #   Zone <chr>,
## #   `How many followers do you have on Instagram? (In case of multiple accounts, please mention the one with the maximum)` <dbl>,
## #   `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>, …
#Only people who spent more than 500 minutes on WhatsApp in last week
more_than_500_whatsapp <- subset(data, `Time Spent on WhatsApp in last week (in minutes)` > 500)
more_than_500_whatsapp
## # A tibble: 913 × 26
##      Age City           `Current Status`     Do you own multiple profil…¹ Gender
##    <dbl> <chr>          <chr>                <chr>                        <chr> 
##  1    24 Delhi          Working professional No                           Female
##  2    39 Delhi          Working professional No                           Female
##  3    22 Mumbai         Working professional No                           Male  
##  4    26 Bengaluru      Sabbatical           Yes                          Female
##  5    50 Delhi          Working professional No                           Male  
##  6    25 Vishakhapatnam Working professional Yes                          Female
##  7    52 Jaipur         Working professional No                           Male  
##  8    45 Durgapur       Sabbatical           No                           Female
##  9    25 Bengaluru      Student              No                           Male  
## 10    27 Delhi          Student              Yes                          Male  
## # ℹ 903 more rows
## # ℹ abbreviated name: ¹​`Do you own multiple profiles on Instagram?`
## # ℹ 21 more variables: `Highest Education` <chr>,
## #   `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## #   Zone <chr>,
## #   `How many followers do you have on Instagram? (In case of multiple accounts, please mention the one with the maximum)` <dbl>,
## #   `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>, …
#Only people who have more than 1000 followers on Instagram
more_than_1000_followers <- subset(data, `How many followers do you have on Instagram? (In case of multiple accounts, please mention the one with the maximum)` > 1000)
more_than_1000_followers
## # A tibble: 199 × 26
##      Age City      `Current Status`     Do you own multiple profiles on…¹ Gender
##    <dbl> <chr>     <chr>                <chr>                             <chr> 
##  1    25 Bengaluru Student              No                                Male  
##  2    23 Kolkata   Student              Yes                               Female
##  3    21 Chennai   Student              No                                Female
##  4    21 Chennai   Student              Yes                               Female
##  5    21 Mumbai    Sabbatical           Yes                               Female
##  6    57 Delhi     Sabbatical           Yes                               Male  
##  7    22 Delhi     Student              No                                Female
##  8    35 Mumbai    Working professional Yes                               Female
##  9    26 Ahmedabad Working professional No                                Male  
## 10    21 Mumbai    Student              No                                Female
## # ℹ 189 more rows
## # ℹ abbreviated name: ¹​`Do you own multiple profiles on Instagram?`
## # ℹ 21 more variables: `Highest Education` <chr>,
## #   `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## #   Zone <chr>,
## #   `How many followers do you have on Instagram? (In case of multiple accounts, please mention the one with the maximum)` <dbl>,
## #   `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>, …
#Only people who have more than 100 posts on Instagram
more_than_100_posts <- subset(data, `How many posts do you have on Instagram?` > 100)
more_than_100_posts
## # A tibble: 444 × 26
##      Age City           `Current Status`     Do you own multiple profil…¹ Gender
##    <dbl> <chr>          <chr>                <chr>                        <chr> 
##  1    25 Vishakhapatnam Working professional Yes                          Female
##  2    25 Bengaluru      Student              No                           Male  
##  3    25 Ahmedabad      Self Employed        Yes                          Male  
##  4    50 Kolkata        Working professional No                           Female
##  5    21 Chennai        Student              No                           Female
##  6    25 Kolkata        Working professional No                           Male  
##  7    45 Bagdogra       Working professional Yes                          Female
##  8    38 Bengaluru      Working professional No                           Female
##  9    24 Kolkata        Student              Yes                          Male  
## 10    21 Chennai        Student              Yes                          Female
## # ℹ 434 more rows
## # ℹ abbreviated name: ¹​`Do you own multiple profiles on Instagram?`
## # ℹ 21 more variables: `Highest Education` <chr>,
## #   `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## #   Zone <chr>,
## #   `How many followers do you have on Instagram? (In case of multiple accounts, please mention the one with the maximum)` <dbl>,
## #   `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>, …
# Create a linear regression model
model <- lm(`Total Social Media Usage` ~ Age + `How many followers do you have on Instagram? (In case of multiple accounts, please mention the one with the maximum)` + `Time Spent on Facebook in last week (in minutes)`, data = data)

# Create a data frame with the actual and predicted values
predictions <- data.frame(Actual = data$`Total Social Media Usage`, Predicted = predict(model))

# Create a scatter plot of the actual vs predicted values
ggplot(predictions, aes(x = Actual, y = Predicted)) +
  geom_point() +
  geom_abline(intercept = coef(model)[1], slope = coef(model)[2], color = "red") +
  labs(title = "Actual vs Predicted Total Social Media Usage", x = "Actual", y = "Predicted")

# Scatterplot on more than 1000 followers vs more than 100 posts
plot(data[data$`How many followers do you have on Instagram? (In case of multiple accounts, please mention the one with the maximum)` > 1000, ]$`How many followers do you have on Instagram? (In case of multiple accounts, please mention the one with the maximum)`, data[data$`How many followers do you have on Instagram? (In case of multiple accounts, please mention the one with the maximum)` > 1000, ]$`How many posts do you have on Instagram?`, col = "blue", xlab = "Number of followers", ylab = "Number of posts")
points(data[data$`How many followers do you have on Instagram? (In case of multiple accounts, please mention the one with the maximum)` <= 1000 & data$`How many posts do you have on Instagram?` > 100, ]$`How many followers do you have on Instagram? (In case of multiple accounts, please mention the one with the maximum)`, data[data$`How many followers do you have on Instagram? (In case of multiple accounts, please mention the one with the maximum)` <= 1000 & data$`How many posts do you have on Instagram?` > 100, ]$`How many posts do you have on Instagram?`, col = "red")
legend("topright", legend = c("More than 1000 followers", "More than 100 posts"), col = c("blue", "red"), pch = 1)

require(ggplot2)


# Create a pie chart for gender distribution
ggplot(data, aes(x = "", fill = Gender)) +
  geom_bar(width = 1) +
  coord_polar(theta = "y") +
  labs(title = "Gender Distribution", fill = "Gender")

# Create a histogram for age distribution
ggplot(data, aes(x = Age, fill = factor(Age))) +
  geom_histogram(binwidth = 5, color = "black") +
  scale_fill_viridis_d() +
  labs(title = "Age Distribution", x = "Age", y = "Count") +
  theme_minimal()

# Create a pie chart for the number of followers by age group
require(dplyr)
followers_by_age <- data %>%
  group_by(Age) %>%
  summarise(total_followers = sum(`How many followers do you have on Instagram? (In case of multiple accounts, please mention the one with the maximum)`))

ggplot(followers_by_age, aes(x = "", y = total_followers, fill = Age)) +
  geom_bar(width = 1, stat = "identity") +
  coord_polar(theta = "y") +
  labs(title = "Number of Followers on Instagram by Age Group", fill = "Age") +
  theme_void() +
  scale_fill_gradient(low = "#FFC0CB", high = "#ADD8E6")

# Create a bar chart with education level
ggplot(data, aes(x = `Highest Education`, fill = Gender)) +
  geom_bar(color = "black", size = 0.5, width = 0.7, position = position_dodge()) +
  labs(title = "Education Level Distribution", x = "Education Level", y = "Count") +
  theme_minimal() +
  theme(legend.position = "top", legend.title = element_blank()) +
  scale_fill_manual(values = c("#FFC0CB", "#ADD8E6", "#90EE90")) +
  guides(fill = guide_legend(reverse = TRUE)) +
  geom_text(aes(label=after_stat(count)), stat='count', position=position_dodge(width=0.7), vjust=-0.5, size=3)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# Create a bar chart with phone operating system
ggplot(data, aes(x = `Phone OS`, fill = Gender)) +
  geom_bar() +
  labs(title = "Phone Operating System Distribution", x = "Phone Operating System", y = "Count")

# Create a pie chart for phone operating system distribution
ggplot(data, aes(x = "", fill = `Phone OS`)) +
  geom_bar(width = 1) +
  coord_polar(theta = "y") +
  labs(title = "Phone Operating System Distribution", fill = "Phone Operating System")

# Create a scatter plot for time spent on Facebook and Instagram
ggplot(data, aes(x = `Time Spent on Facebook in last week (in minutes)`, y = `Time Spent on Instagram in last week (in minutes)`)) +
  geom_point(color = "#FFC0CB") +
  labs(title = "Time Spent on Facebook vs. Time Spent on Instagram", x = "Time Spent on Facebook (in minutes)", y = "Time Spent on Instagram (in minutes)") +
  scale_color_manual(values = c("#ADD8E6", "#FFC0CB"))

# Create a histogram for social media usage distribution
ggplot(data, aes(x = `Total Social Media Usage`)) +
  geom_histogram(binwidth = 500, fill = "#FFC0CB", color = "black") +
  labs(title = "Social Media Usage Distribution", x = "Total Social Media Usage (in minutes)", y = "Count") +
  theme_minimal() +
  theme(plot.background = element_rect(fill = "#ADD8E6"),
        axis.text = element_text(size = 12, color = "black"),
        axis.title = element_text(size = 14, color = "black"),
        plot.title = element_text(size = 16, color = "black"))

# Create a scatter plot for location and social media usage
ggplot(data, aes(x = Longitude, y = Latitude, color = `Total Social Media Usage`)) +
  borders("world", colour="gray50", fill="lightgreen") +
  geom_point(size = 3) +
  scale_color_gradient(low = "#ADD8E6", high = "#FFC0CB") +
  labs(title = "Location vs. Social Media Usage", x = "Longitude", y = "Latitude", color = "Total Social Media Usage (in minutes)")

# Create a histogram for time spent on WhatsApp distribution
ggplot(data, aes(x = `Time Spent on WhatsApp in last week (in minutes)`, fill = Gender)) +
  geom_histogram(binwidth = 100) +
  labs(title = "Time Spent on WhatsApp Distribution", x = "Time Spent on WhatsApp (in minutes)", y = "Count")

# Create a bar chart for education level and social media usage
ggplot(data, aes(x = `Highest Education`, y = `Total Social Media Usage`, fill = `Highest Education`)) +
  geom_bar(stat = "summary", fun = "mean") +
  labs(title = "Education Level vs. Social Media Usage", x = "Education Level", y = "Total Social Media Usage (in minutes)") +
  scale_fill_manual(values = c("#FFC0CB", "#ADD8E6", "#90EE90", "#FFD700")) +
  theme_minimal() +
  theme(plot.background = element_rect(fill = "#F5F5F5"),
        axis.text = element_text(size = 12, color = "black"),
        axis.title = element_text(size = 14, color = "black"),
        plot.title = element_text(size = 16, color = "black"))

# Distribution of the number of posts on Instagram
ggplot(data, aes(x = `How many posts do you have on Instagram?`)) +
  geom_histogram(binwidth = 50, fill = "#ADD8E6") +
  labs(title = "Number of Posts on Instagram Distribution", x = "Number of Posts on Instagram", y = "Count") +
  theme_minimal() +
  theme(plot.background = element_rect(fill = "#F5F5F5"),
        axis.text = element_text(size = 12, color = "black"),
        axis.title = element_text(size = 14, color = "black"),
        plot.title = element_text(size = 16, color = "black"))

# Relationship between age and the number of posts on Instagram
ggplot(data, aes(x = Age, y = `How many posts do you have on Instagram?`)) +
  geom_point() +
  labs(title = "Age vs Number of Posts on Instagram", x = "Age", y = "Number of Posts on Instagram")

# Distribution of the time spent on Facebook
ggplot(data, aes(x = `Time Spent on Facebook in last week (in minutes)`)) +
  geom_histogram(binwidth = 100, fill = "#ADD8E6") +
  labs(title = "Time Spent on Facebook Distribution", x = "Time Spent on Facebook (in minutes)", y = "Count") +
  theme_minimal() +
  theme(plot.background = element_rect(fill = "#F5F5F5"),
        axis.text = element_text(size = 12, color = "black"),
        axis.title = element_text(size = 14, color = "black"),
        plot.title = element_text(size = 16, color = "black"))

# Relationship between age and the time spent on Facebook
ggplot(data, aes(x = Age, y = `Time Spent on Facebook in last week (in minutes)`)) +
  geom_point() +
  labs(title = "Age vs Time Spent on Facebook", x = "Age", y = "Time Spent on Facebook (in minutes)")

# Relationship between age and the time spent on Instagram
ggplot(data, aes(x = Age, y = `Time Spent on Instagram in last week (in minutes)`)) +
  geom_point() +
  labs(title = "Age vs Time Spent on Instagram", x = "Age", y = "Time Spent on Instagram (in minutes)")

# Distribution of the total social media usage
ggplot(data, aes(x = `Total Social Media Usage`, fill = Gender)) +
  geom_histogram(binwidth = 500) +
  labs(title = "Total Social Media Usage Distribution", x = "Total Social Media Usage", y = "Count") +
  scale_fill_manual(values = c("#ADD8E6", "#FFC0CB", "#FF0000")) +  # Add a third color value
  theme_minimal() +
  theme(legend.position = "bottom")

# Relationship between age and the total social media usage
ggplot(data, aes(x = Age, y = `Total Social Media Usage`)) +
  geom_point() +
  labs(title = "Age vs Total Social Media Usage", x = "Age", y = "Total Social Media Usage")

# Relationship between the number of followers on Instagram and the number of posts on Instagram
ggplot(data, aes(x = `How many followers do you have on Instagram? (In case of multiple accounts, please mention the one with the maximum)`, y = `How many posts do you have on Instagram?`)) +
  geom_point() +
  labs(title = "Number of Followers vs Number of Posts on Instagram", x = "Number of Followers on Instagram", y = "Number of Posts on Instagram")

# Pie chart for current status distribution
ggplot(data, aes(x = "", fill = `Current Status`)) +
  geom_bar(width = 1) +
  coord_polar(theta = "y") +
  labs(title = "Current Status Distribution", fill = "Current Status")

#thank you mam
ggplot() +
  geom_text(aes(x = 0.5, y = 0.5, label = "Thank You Mam", size = 10)) +
  theme_void()