Vectors and iteration

MACS 30500
University of Chicago

October 12, 2016

fizzbuzz function

fizzbuzz <- function(x){
  if(x %% 3 == 0){
    return("fizz")
  } else if(x %% 5 == 0){
    return("buzz")
  } else if(x %% 3 == 0 && x %% 5 == 0){
    return("fizzbuzz")
  } else {
    return(x)
  }
}
fizzbuzz(15)
## [1] "fizz"

fizzbuzz function

fizzbuzz <- function(x){
  if(x %% 3 == 0 && x %% 5 == 0){
    return("fizzbuzz")
  } else if(x %% 3 == 0){
    return("fizz")
  } else if(x %% 5 == 0){
    return("buzz")
  } else{
    return(x)
  }
}
fizzbuzz(3)
## [1] "fizz"
fizzbuzz(5)
## [1] "buzz"
fizzbuzz(15)
## [1] "fizzbuzz"
fizzbuzz(4)
## [1] 4

Types of vectors

  • Logical
  • Numeric
  • Character

Subsetting vectors

x <- c("one", "two", "three", "four", "five")
x[c(3, 2, 5)]
## [1] "three" "two"   "five"
x <- c(10, 3, NA, 5, 8, 1, NA)

# All non-missing values of x
x[!is.na(x)]
## [1] 10  3  5  8  1
# All even (or missing!) values of x
x[x %% 2 == 0]
## [1] 10 NA  8 NA

Vector recycling

(x <- sample(10))
##  [1]  5  2  8  4  9  6  1  7 10  3
x + c(100, 100, 100, 100, 100, 100, 100, 100, 100, 100)
##  [1] 105 102 108 104 109 106 101 107 110 103
x + 100
##  [1] 105 102 108 104 109 106 101 107 110 103

Vector recycling

1:10 + 1:2
##  [1]  2  4  4  6  6  8  8 10 10 12

Lists

  1. Store a mix of objects.

    y <- list("a", 1L, 1.5, TRUE)
    str(y)
    ## List of 4
    ##  $ : chr "a"
    ##  $ : int 1
    ##  $ : num 1.5
    ##  $ : logi TRUE
  2. Contain other lists.

    z <- list(list(1, 2), list(3, 4))
    str(z)
    ## List of 2
    ##  $ :List of 2
    ##   ..$ : num 1
    ##   ..$ : num 2
    ##  $ :List of 2
    ##   ..$ : num 3
    ##   ..$ : num 4

Iteration with for loop

df <- tibble(
  a = rnorm(10),
  b = rnorm(10),
  c = rnorm(10),
  d = rnorm(10)
)

Compute the median for each column

median(df$a)
## [1] 0.3112762
median(df$b)
## [1] 0.2503242
median(df$c)
## [1] 0.6354677
median(df$d)
## [1] -0.001551442

Iteration with for loop

output <- vector("double", ncol(df))  # 1. output
for (i in seq_along(df)) {            # 2. sequence
  output[[i]] <- median(df[[i]])      # 3. body
}
output
## [1]  0.311276168  0.250324212  0.635467671 -0.001551442

Preallocation

x <- rnorm(1e04, mean = 0, sd = 1)
str(x)
##  num [1:10000] -0.0829 0.5971 0.2258 0.2622 1.4578 ...
# don't preallocate
system.time({
  output <- vector("numeric", 0)

  for(i in seq_along(x)){
    output <- c(output, x[[i]] + 1)
  }
})
##    user  system elapsed 
##   0.231   0.008   0.240
# preallocate
system.time({
  output <- vector("numeric", length(x))

  for(i in seq_along(x)){
    output[[i]] <- x[[i]] + 1
  }
})
##    user  system elapsed 
##   0.012   0.000   0.013

Write a for loop

  • Output - a numeric vector of length 30
  • Sequence - i in seq_along(x)
  • Body - square the \(i\)th element of the vector x, store the new value as the \(i\)th element of the vector output
x <- 1:30
x
##  [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
## [24] 24 25 26 27 28 29 30

Write a for loop

  • Output - a list of length 10 (10 columns in diamonds)
  • Sequence - i in seq_along(diamonds)
  • Body - get the maximum value of the \(i\)th column of the data frame diamonds, store the new value as the \(i\)th element of the list output
diamonds
## # A tibble: 53,940 × 10
##    carat       cut color clarity depth table price     x     y     z
##    <dbl>     <ord> <ord>   <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1   0.23     Ideal     E     SI2  61.5    55   326  3.95  3.98  2.43
## 2   0.21   Premium     E     SI1  59.8    61   326  3.89  3.84  2.31
## 3   0.23      Good     E     VS1  56.9    65   327  4.05  4.07  2.31
## 4   0.29   Premium     I     VS2  62.4    58   334  4.20  4.23  2.63
## 5   0.31      Good     J     SI2  63.3    58   335  4.34  4.35  2.75
## 6   0.24 Very Good     J    VVS2  62.8    57   336  3.94  3.96  2.48
## 7   0.24 Very Good     I    VVS1  62.3    57   336  3.95  3.98  2.47
## 8   0.26 Very Good     H     SI1  61.9    55   337  4.07  4.11  2.53
## 9   0.22      Fair     E     VS2  65.1    61   337  3.87  3.78  2.49
## 10  0.23 Very Good     H     VS1  59.4    61   338  4.00  4.05  2.39
## # ... with 53,930 more rows

Map functions

  • map() makes a list.
  • map_lgl() makes a logical vector.
  • map_int() makes an integer vector.
  • map_dbl() makes a double vector.
  • map_chr() makes a character vector.

Map functions

map_dbl(df, mean)
##         a         b         c         d 
## 0.2564294 0.2869979 0.2941796 0.1978090
map_dbl(df, median)
##            a            b            c            d 
##  0.311276168  0.250324212  0.635467671 -0.001551442
map_dbl(df, sd)
##         a         b         c         d 
## 0.7397457 1.1843367 1.2112464 0.8923603

Map functions

map_dbl(df, mean, na.rm = TRUE)
##         a         b         c         d 
## 0.2564294 0.2869979 0.2941796 0.1978090

Map functions

df %>%
  map_dbl(mean)
##         a         b         c         d 
## 0.2564294 0.2869979 0.2941796 0.1978090

Rewrite our for loops using a map() function

Squared value

x <- 1:30
x
##  [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
## [24] 24 25 26 27 28 29 30

Maximum value in each column of diamonds