MACS 30500
University of Chicago
scorecard
scorecard
## # A tibble: 1,849 × 12
## unitid name state type
## <int> <chr> <chr> <chr>
## 1 450234 ITT Technical Institute-Wichita KS Private, for-profit
## 2 448479 ITT Technical Institute-Swartz Creek MI Private, for-profit
## 3 456427 ITT Technical Institute-Concord CA Private, for-profit
## 4 459596 ITT Technical Institute-Tallahassee FL Private, for-profit
## 5 459851 Herzing University-Brookfield WI Private, for-profit
## 6 482477 DeVry University-Illinois IL Private, for-profit
## 7 482547 DeVry University-Nevada NV Private, for-profit
## 8 482592 DeVry University-Oregon OR Private, for-profit
## 9 482617 DeVry University-Tennessee TN Private, for-profit
## 10 482662 DeVry University-Washington WA Private, for-profit
## # ... with 1,839 more rows, and 8 more variables: cost <int>,
## # admrate <dbl>, satavg <dbl>, avgfacsal <dbl>, pctpell <dbl>,
## # comprate <dbl>, firstgen <dbl>, debt <dbl>
# using arrange() and desc()
scorecard %>%
arrange(desc(cost))
## # A tibble: 1,849 × 12
## unitid name state
## <int> <chr> <chr>
## 1 195304 Sarah Lawrence College NY
## 2 179867 Washington University in St Louis MO
## 3 144050 University of Chicago IL
## 4 190150 Columbia University in the City of New York NY
## 5 182670 Dartmouth College NH
## 6 130697 Wesleyan University CT
## 7 147767 Northwestern University IL
## 8 120254 Occidental College CA
## 9 115409 Harvey Mudd College CA
## 10 230816 Bennington College VT
## # ... with 1,839 more rows, and 9 more variables: type <chr>, cost <int>,
## # admrate <dbl>, satavg <dbl>, avgfacsal <dbl>, pctpell <dbl>,
## # comprate <dbl>, firstgen <dbl>, debt <dbl>
# use filter() and percent_rank() to calculate percentage
scorecard %>%
select(name, type, cost) %>%
filter(type == "Private, nonprofit") %>%
mutate(cost_rank = percent_rank(cost)) %>%
filter(name == "University of Chicago")
## # A tibble: 1 × 4
## name type cost cost_rank
## <chr> <chr> <int> <dbl>
## 1 University of Chicago Private, nonprofit 62425 0.9981464
# use group_by() and summarize()
scorecard %>%
group_by(type) %>%
summarize(mean_sat = mean(satavg, na.rm = TRUE))
## # A tibble: 3 × 2
## type mean_sat
## <chr> <dbl>
## 1 Private, for-profit 1002.500
## 2 Private, nonprofit 1075.287
## 3 Public 1037.410
# using a boxplot
ggplot(scorecard, aes(type, satavg)) +
geom_boxplot()
# using a frequency polygon
ggplot(scorecard, aes(satavg, y = ..density.., color = type)) +
geom_freqpoly()
# using a histogram and facets
ggplot(scorecard, aes(satavg)) +
geom_histogram() +
facet_wrap(~ type)
# scatterplot and smoothing line
ggplot(scorecard, aes(cost, avgfacsal)) +
geom_point() +
geom_smooth()
# same as before, but use scale_color to visualize different relationships
# by college type and use alpha to make points semi-transparent
ggplot(scorecard, aes(cost, avgfacsal, color = type)) +
geom_point(alpha = .2) +
geom_smooth()
# scatterplot and smoothing line
ggplot(scorecard, aes(pctpell, debt)) +
geom_point() +
geom_smooth()
x <- c(1, 2, "abc", TRUE)
x
## [1] "1" "2" "abc" "TRUE"
parse_logical(x)
## [1] TRUE NA NA TRUE
## attr(,"problems")
## # A tibble: 2 × 4
## row col expected actual
## <int> <int> <chr> <chr>
## 1 2 NA 1/0/T/F/TRUE/FALSE 2
## 2 3 NA 1/0/T/F/TRUE/FALSE abc
parse_integer(x)
## [1] 1 2 NA NA
## attr(,"problems")
## # A tibble: 2 × 4
## row col expected actual
## <int> <int> <chr> <chr>
## 1 3 NA an integer abc
## 2 4 NA an integer TRUE
parse_character(x)
## [1] "1" "2" "abc" "TRUE"
col_integer()
col_character()
col_logical()
mutate()
as.character()
as.numeric()
NA
)