── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.4 ✔ readr 2.1.5
✔ forcats 1.0.0 ✔ stringr 1.5.1
✔ ggplot2 4.0.0 ✔ tibble 3.3.0
✔ lubridate 1.9.4 ✔ tidyr 1.3.1
✔ purrr 1.1.0
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
Registered S3 method overwritten by 'mosaic':
method from
fortify.SpatialPolygonsDataFrame ggplot2
The 'mosaic' package masks several functions from core packages in order to add
additional features. The original behavior of these functions should not be affected by this.
Attaching package: 'mosaic'
The following object is masked from 'package:Matrix':
mean
The following objects are masked from 'package:dplyr':
count, do, tally
The following object is masked from 'package:purrr':
cross
The following object is masked from 'package:ggplot2':
stat
The following objects are masked from 'package:stats':
binom.test, cor, cor.test, cov, fivenum, IQR, median, prop.test,
quantile, sd, t.test, var
The following objects are masked from 'package:base':
max, mean, min, prod, range, sample, sum
Attaching package: 'skimr'
The following object is masked from 'package:mosaic':
n_missing
Attaching package: 'naniar'
The following object is masked from 'package:skimr':
n_complete
Attaching package: 'janitor'
The following objects are masked from 'package:stats':
chisq.test, fisher.test
Attaching package: 'tinytable'
The following object is masked from 'package:ggplot2':
theme_void
Classwork 2
Cleaning
Setup R packages
Reading csv file
fastfood <- read_csv("https://vincentarelbundock.github.io/Rdatasets/csv/openintro/fastfood.csv") %>%
clean_names(case="snake")Rows: 515 Columns: 18
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (3): restaurant, item, salad
dbl (15): rownames, calories, cal_fat, total_fat, sat_fat, trans_fat, choles...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
fastfood# A tibble: 515 × 18
rownames restaurant item calories cal_fat total_fat sat_fat trans_fat
<dbl> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 Mcdonalds Artisan Gri… 380 60 7 2 0
2 2 Mcdonalds Single Baco… 840 410 45 17 1.5
3 3 Mcdonalds Double Baco… 1130 600 67 27 3
4 4 Mcdonalds Grilled Bac… 750 280 31 10 0.5
5 5 Mcdonalds Crispy Baco… 920 410 45 12 0.5
6 6 Mcdonalds Big Mac 540 250 28 10 1
7 7 Mcdonalds Cheeseburger 300 100 12 5 0.5
8 8 Mcdonalds Classic Chi… 510 210 24 4 0
9 9 Mcdonalds Double Chee… 430 190 21 11 1
10 10 Mcdonalds Double Quar… 770 400 45 21 2.5
# ℹ 505 more rows
# ℹ 10 more variables: cholesterol <dbl>, sodium <dbl>, total_carb <dbl>,
# fiber <dbl>, sugar <dbl>, protein <dbl>, vit_a <dbl>, vit_c <dbl>,
# calcium <dbl>, salad <chr>
Names function
names(fastfood) [1] "rownames" "restaurant" "item" "calories" "cal_fat"
[6] "total_fat" "sat_fat" "trans_fat" "cholesterol" "sodium"
[11] "total_carb" "fiber" "sugar" "protein" "vit_a"
[16] "vit_c" "calcium" "salad"
Glimpse, Dim, str
dplyr::glimpse(fastfood)Rows: 515
Columns: 18
$ rownames <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,…
$ restaurant <chr> "Mcdonalds", "Mcdonalds", "Mcdonalds", "Mcdonalds", "Mcdon…
$ item <chr> "Artisan Grilled Chicken Sandwich", "Single Bacon Smokehou…
$ calories <dbl> 380, 840, 1130, 750, 920, 540, 300, 510, 430, 770, 380, 62…
$ cal_fat <dbl> 60, 410, 600, 280, 410, 250, 100, 210, 190, 400, 170, 300,…
$ total_fat <dbl> 7, 45, 67, 31, 45, 28, 12, 24, 21, 45, 18, 34, 20, 34, 8, …
$ sat_fat <dbl> 2.0, 17.0, 27.0, 10.0, 12.0, 10.0, 5.0, 4.0, 11.0, 21.0, 4…
$ trans_fat <dbl> 0.0, 1.5, 3.0, 0.5, 0.5, 1.0, 0.5, 0.0, 1.0, 2.5, 0.0, 1.5…
$ cholesterol <dbl> 95, 130, 220, 155, 120, 80, 40, 65, 85, 175, 40, 95, 125, …
$ sodium <dbl> 1110, 1580, 1920, 1940, 1980, 950, 680, 1040, 1040, 1290, …
$ total_carb <dbl> 44, 62, 63, 62, 81, 46, 33, 49, 35, 42, 38, 48, 48, 67, 31…
$ fiber <dbl> 3, 2, 3, 2, 4, 3, 2, 3, 2, 3, 2, 3, 3, 5, 2, 2, 3, 3, 5, 2…
$ sugar <dbl> 11, 18, 18, 18, 18, 9, 7, 6, 7, 10, 5, 11, 11, 11, 6, 3, 1…
$ protein <dbl> 37, 46, 70, 55, 46, 25, 15, 25, 25, 51, 15, 32, 42, 33, 13…
$ vit_a <dbl> 4, 6, 10, 6, 6, 10, 10, 0, 20, 20, 2, 10, 10, 10, 2, 4, 6,…
$ vit_c <dbl> 20, 20, 20, 25, 20, 2, 2, 4, 4, 6, 0, 10, 20, 15, 2, 6, 15…
$ calcium <dbl> 20, 20, 50, 20, 20, 15, 10, 2, 15, 20, 15, 35, 35, 35, 4, …
$ salad <chr> "Other", "Other", "Other", "Other", "Other", "Other", "Oth…
base::dim(fastfood)[1] 515 18
utils::str(fastfood)spc_tbl_ [515 × 18] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
$ rownames : num [1:515] 1 2 3 4 5 6 7 8 9 10 ...
$ restaurant : chr [1:515] "Mcdonalds" "Mcdonalds" "Mcdonalds" "Mcdonalds" ...
$ item : chr [1:515] "Artisan Grilled Chicken Sandwich" "Single Bacon Smokehouse Burger" "Double Bacon Smokehouse Burger" "Grilled Bacon Smokehouse Chicken Sandwich" ...
$ calories : num [1:515] 380 840 1130 750 920 540 300 510 430 770 ...
$ cal_fat : num [1:515] 60 410 600 280 410 250 100 210 190 400 ...
$ total_fat : num [1:515] 7 45 67 31 45 28 12 24 21 45 ...
$ sat_fat : num [1:515] 2 17 27 10 12 10 5 4 11 21 ...
$ trans_fat : num [1:515] 0 1.5 3 0.5 0.5 1 0.5 0 1 2.5 ...
$ cholesterol: num [1:515] 95 130 220 155 120 80 40 65 85 175 ...
$ sodium : num [1:515] 1110 1580 1920 1940 1980 950 680 1040 1040 1290 ...
$ total_carb : num [1:515] 44 62 63 62 81 46 33 49 35 42 ...
$ fiber : num [1:515] 3 2 3 2 4 3 2 3 2 3 ...
$ sugar : num [1:515] 11 18 18 18 18 9 7 6 7 10 ...
$ protein : num [1:515] 37 46 70 55 46 25 15 25 25 51 ...
$ vit_a : num [1:515] 4 6 10 6 6 10 10 0 20 20 ...
$ vit_c : num [1:515] 20 20 20 25 20 2 2 4 4 6 ...
$ calcium : num [1:515] 20 20 50 20 20 15 10 2 15 20 ...
$ salad : chr [1:515] "Other" "Other" "Other" "Other" ...
- attr(*, "spec")=
.. cols(
.. rownames = col_double(),
.. restaurant = col_character(),
.. item = col_character(),
.. calories = col_double(),
.. cal_fat = col_double(),
.. total_fat = col_double(),
.. sat_fat = col_double(),
.. trans_fat = col_double(),
.. cholesterol = col_double(),
.. sodium = col_double(),
.. total_carb = col_double(),
.. fiber = col_double(),
.. sugar = col_double(),
.. protein = col_double(),
.. vit_a = col_double(),
.. vit_c = col_double(),
.. calcium = col_double(),
.. salad = col_character()
.. )
- attr(*, "problems")=<externalptr>
Finding Missing values
visdat::vis_dat(fastfood) +
labs(title = "Missing Data", subtitle = "Showing variable types") +
theme(plot.title = element_text(size = 20), plot.subtitle = element_text(size = 14))
vis_miss(fastfood) +
labs(title ="Missing Data in the FastFood Dataset", subtitle = "Showing missing & present data") +
theme(plot.title = element_text(size = 20), plot.subtitle = element_text(size = 14))
Removing missing data
fastfood_modified <- fastfood %>% tidyr::drop_na()Displaying modified data table
fastfood_modified# A tibble: 301 × 18
rownames restaurant item calories cal_fat total_fat sat_fat trans_fat
<dbl> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 Mcdonalds Artisan Gri… 380 60 7 2 0
2 2 Mcdonalds Single Baco… 840 410 45 17 1.5
3 3 Mcdonalds Double Baco… 1130 600 67 27 3
4 4 Mcdonalds Grilled Bac… 750 280 31 10 0.5
5 5 Mcdonalds Crispy Baco… 920 410 45 12 0.5
6 6 Mcdonalds Big Mac 540 250 28 10 1
7 7 Mcdonalds Cheeseburger 300 100 12 5 0.5
8 8 Mcdonalds Classic Chi… 510 210 24 4 0
9 9 Mcdonalds Double Chee… 430 190 21 11 1
10 10 Mcdonalds Double Quar… 770 400 45 21 2.5
# ℹ 291 more rows
# ℹ 10 more variables: cholesterol <dbl>, sodium <dbl>, total_carb <dbl>,
# fiber <dbl>, sugar <dbl>, protein <dbl>, vit_a <dbl>, vit_c <dbl>,
# calcium <dbl>, salad <chr>
Removing the column entirely
fastfood_modified2 <- fastfood %>%
dplyr::select(-vit_a)
fastfood_modified2 <- fastfood %>%
dplyr::select(-c(vit_a, vit_c))Converting into Factors
fastfood_modified1 <- fastfood %>%
mutate(
restaurant = as.factor(restaurant),
salad = as.factor(salad),
item = as.factor(item))
dplyr::glimpse(fastfood_modified1)Rows: 515
Columns: 18
$ rownames <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,…
$ restaurant <fct> Mcdonalds, Mcdonalds, Mcdonalds, Mcdonalds, Mcdonalds, Mcd…
$ item <fct> "Artisan Grilled Chicken Sandwich", "Single Bacon Smokehou…
$ calories <dbl> 380, 840, 1130, 750, 920, 540, 300, 510, 430, 770, 380, 62…
$ cal_fat <dbl> 60, 410, 600, 280, 410, 250, 100, 210, 190, 400, 170, 300,…
$ total_fat <dbl> 7, 45, 67, 31, 45, 28, 12, 24, 21, 45, 18, 34, 20, 34, 8, …
$ sat_fat <dbl> 2.0, 17.0, 27.0, 10.0, 12.0, 10.0, 5.0, 4.0, 11.0, 21.0, 4…
$ trans_fat <dbl> 0.0, 1.5, 3.0, 0.5, 0.5, 1.0, 0.5, 0.0, 1.0, 2.5, 0.0, 1.5…
$ cholesterol <dbl> 95, 130, 220, 155, 120, 80, 40, 65, 85, 175, 40, 95, 125, …
$ sodium <dbl> 1110, 1580, 1920, 1940, 1980, 950, 680, 1040, 1040, 1290, …
$ total_carb <dbl> 44, 62, 63, 62, 81, 46, 33, 49, 35, 42, 38, 48, 48, 67, 31…
$ fiber <dbl> 3, 2, 3, 2, 4, 3, 2, 3, 2, 3, 2, 3, 3, 5, 2, 2, 3, 3, 5, 2…
$ sugar <dbl> 11, 18, 18, 18, 18, 9, 7, 6, 7, 10, 5, 11, 11, 11, 6, 3, 1…
$ protein <dbl> 37, 46, 70, 55, 46, 25, 15, 25, 25, 51, 15, 32, 42, 33, 13…
$ vit_a <dbl> 4, 6, 10, 6, 6, 10, 10, 0, 20, 20, 2, 10, 10, 10, 2, 4, 6,…
$ vit_c <dbl> 20, 20, 20, 25, 20, 2, 2, 4, 4, 6, 0, 10, 20, 15, 2, 6, 15…
$ calcium <dbl> 20, 20, 50, 20, 20, 15, 10, 2, 15, 20, 15, 35, 35, 35, 4, …
$ salad <fct> Other, Other, Other, Other, Other, Other, Other, Other, Ot…
Relocate
fast_food_modified <- fastfood_modified %>%
mutate(
restaurant = as.factor(restaurant),
salad = as.factor(salad),
item = as.factor(item)
) %>%
rename("dish" = item) %>% # rename item to dish
# arrange the Qual variables first, Quant next
dplyr::relocate(where(is.factor), .after = rownames)
glimpse(fast_food_modified)Rows: 301
Columns: 18
$ rownames <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,…
$ restaurant <fct> Mcdonalds, Mcdonalds, Mcdonalds, Mcdonalds, Mcdonalds, Mcd…
$ dish <fct> "Artisan Grilled Chicken Sandwich", "Single Bacon Smokehou…
$ salad <fct> Other, Other, Other, Other, Other, Other, Other, Other, Ot…
$ calories <dbl> 380, 840, 1130, 750, 920, 540, 300, 510, 430, 770, 380, 62…
$ cal_fat <dbl> 60, 410, 600, 280, 410, 250, 100, 210, 190, 400, 170, 300,…
$ total_fat <dbl> 7, 45, 67, 31, 45, 28, 12, 24, 21, 45, 18, 34, 20, 34, 8, …
$ sat_fat <dbl> 2.0, 17.0, 27.0, 10.0, 12.0, 10.0, 5.0, 4.0, 11.0, 21.0, 4…
$ trans_fat <dbl> 0.0, 1.5, 3.0, 0.5, 0.5, 1.0, 0.5, 0.0, 1.0, 2.5, 0.0, 1.5…
$ cholesterol <dbl> 95, 130, 220, 155, 120, 80, 40, 65, 85, 175, 40, 95, 125, …
$ sodium <dbl> 1110, 1580, 1920, 1940, 1980, 950, 680, 1040, 1040, 1290, …
$ total_carb <dbl> 44, 62, 63, 62, 81, 46, 33, 49, 35, 42, 38, 48, 48, 67, 31…
$ fiber <dbl> 3, 2, 3, 2, 4, 3, 2, 3, 2, 3, 2, 3, 3, 5, 2, 2, 3, 3, 5, 2…
$ sugar <dbl> 11, 18, 18, 18, 18, 9, 7, 6, 7, 10, 5, 11, 11, 11, 6, 3, 1…
$ protein <dbl> 37, 46, 70, 55, 46, 25, 15, 25, 25, 51, 15, 32, 42, 33, 13…
$ vit_a <dbl> 4, 6, 10, 6, 6, 10, 10, 0, 20, 20, 2, 10, 10, 10, 2, 4, 6,…
$ vit_c <dbl> 20, 20, 20, 25, 20, 2, 2, 4, 4, 6, 0, 10, 20, 15, 2, 6, 15…
$ calcium <dbl> 20, 20, 50, 20, 20, 15, 10, 2, 15, 20, 15, 35, 35, 35, 4, …
Displaying Data
fast_food_modified %>%
head(10) %>%
tinytable::tt(caption = "Fast Food Dataset (Clean)")| rownames | restaurant | dish | salad | calories | cal_fat | total_fat | sat_fat | trans_fat | cholesterol | sodium | total_carb | fiber | sugar | protein | vit_a | vit_c | calcium |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | Mcdonalds | Artisan Grilled Chicken Sandwich | Other | 380 | 60 | 7 | 2 | 0.0 | 95 | 1110 | 44 | 3 | 11 | 37 | 4 | 20 | 20 |
| 2 | Mcdonalds | Single Bacon Smokehouse Burger | Other | 840 | 410 | 45 | 17 | 1.5 | 130 | 1580 | 62 | 2 | 18 | 46 | 6 | 20 | 20 |
| 3 | Mcdonalds | Double Bacon Smokehouse Burger | Other | 1130 | 600 | 67 | 27 | 3.0 | 220 | 1920 | 63 | 3 | 18 | 70 | 10 | 20 | 50 |
| 4 | Mcdonalds | Grilled Bacon Smokehouse Chicken Sandwich | Other | 750 | 280 | 31 | 10 | 0.5 | 155 | 1940 | 62 | 2 | 18 | 55 | 6 | 25 | 20 |
| 5 | Mcdonalds | Crispy Bacon Smokehouse Chicken Sandwich | Other | 920 | 410 | 45 | 12 | 0.5 | 120 | 1980 | 81 | 4 | 18 | 46 | 6 | 20 | 20 |
| 6 | Mcdonalds | Big Mac | Other | 540 | 250 | 28 | 10 | 1.0 | 80 | 950 | 46 | 3 | 9 | 25 | 10 | 2 | 15 |
| 7 | Mcdonalds | Cheeseburger | Other | 300 | 100 | 12 | 5 | 0.5 | 40 | 680 | 33 | 2 | 7 | 15 | 10 | 2 | 10 |
| 8 | Mcdonalds | Classic Chicken Sandwich | Other | 510 | 210 | 24 | 4 | 0.0 | 65 | 1040 | 49 | 3 | 6 | 25 | 0 | 4 | 2 |
| 9 | Mcdonalds | Double Cheeseburger | Other | 430 | 190 | 21 | 11 | 1.0 | 85 | 1040 | 35 | 2 | 7 | 25 | 20 | 4 | 15 |
| 10 | Mcdonalds | Double Quarter Pounder® with Cheese | Other | 770 | 400 | 45 | 21 | 2.5 | 175 | 1290 | 42 | 3 | 10 | 51 | 20 | 6 | 20 |