library(knitr)
options(digits = 2)

Summary

ifelse can be made significantly faster without loss of backwards-compatibility by using anyNA and falling back to the original code whenever backwards-compatibility cannot be assured.

Current ifelse:

old_ifelse <- function (test, yes, no)
{
    if(is.atomic(test)) { # do not lose attributes
        if (typeof(test) != "logical")
            storage.mode(test) <- "logical"
        ## quick return for cases where 'new_ifelse(a, x, y)' is used
        ## instead of 'if (a) x else y'
        if (length(test) == 1 && is.null(attributes(test))) {
            if (is.na(test)) return(NA)
            else if (test) {
                if (length(yes) == 1) {
                    yat <- attributes(yes)
                    if (is.null(yat) || (is.function(yes) &&
                                         identical(names(yat), "srcref")))
                        return(yes)
                }
            }
            else if (length(no) == 1) {
                nat <- attributes(no)
                if (is.null(nat) || (is.function(no) &&
                                     identical(names(nat), "srcref")))
                    return(no)
            }
        }
    }
    else ## typically a "class"; storage.mode<-() typically fails
    test <- if(isS4(test)) methods::as(test, "logical") else as.logical(test)
    ans <- test
    ok <- !is.na(test)
    if (any(test[ok]))
    ans[test & ok] <- rep(yes, length.out = length(ans))[test & ok]
    if (any(!test[ok]))
    ans[!test & ok] <- rep(no, length.out = length(ans))[!test & ok]
    ans
}

The proposal is to change ifelse to take advantage of the speed of anyNA(), use is.na only once after (and only if needed), and to take advantage of the predictable nature of ifelse when the lengths of test, yes, and no are equal or length-one.

I also correct an apparent bug in ifelse where the attributes of test are not saved if isS4(test).

.new_ifelse <- function(test, yes, no) {
  ans <- test
  ok <- !is.na(test)
  if (any(test[ok]))
    ans[test & ok] <- rep(yes, length.out = length(ans))[test & ok]
  if (any(!test[ok]))
    ans[!test & ok] <- rep(no, length.out = length(ans))[!test & ok]
  ans
}

new_ifelse <- function (test, yes, no)
{
  attributes_of_test <- attributes(test)
  
  if(is.atomic(test)) { # do not lose attributes
    if (typeof(test) != "logical")
      storage.mode(test) <- "logical"
    ## quick return for cases where 'new_ifelse(a, x, y)' is used
    ## instead of 'if (a) x else y'
    if (length(test) == 1 && is.null(attributes_of_test)) {
      if (is.na(test)) return(NA)
      else if (test) {
        if (length(yes) == 1) {
          yat <- attributes(yes)
          if (is.null(yat) || (is.function(yes) &&
                               identical(names(yat), "srcref")))
            return(yes)
        }
      }
      else if (length(no) == 1) {
        nat <- attributes(no)
        if (is.null(nat) || (is.function(no) &&
                             identical(names(nat), "srcref")))
          return(no)
      }
    }
  }
  else ## typically a "class"; storage.mode<-() typically fails
    test <- if(isS4(test)) methods::as(test, "logical") else as.logical(test)
  
  # Give up attempting backwards-compatibility under these conditions:
  if (typeof(yes) %in% c("logical", "integer", "double", "character") &&
      typeof(no)  %in% c("logical", "integer", "double", "character") &&
      !is.factor(yes) &&
      !is.factor(no) &&
      length(no) != 0L &&
      length(yes) != 0L) {
    if (length(no) == length(test)) {
      out <- no
    } else if (length(no) == 1L) {
      out <- rep_len(no, length(test))
    } else {
      return(.new_ifelse(test, yes, no))
    }
    
    if (length(yes) != 1L && length(yes) != length(test)) {
      return(.new_ifelse(test, yes, no))
    }
    
    
    if (anyNA(test)) {
      # no benefit to saving the na results
      Yes <- which(test)
      out[is.na(test)] <- NA
      if (length(yes) == 1L) {
        out[Yes] <- yes
      } else if (length(yes) == length(test)) {
        out[Yes] <- yes[Yes]
      } else {
        return(.new_ifelse(test, yes, no))
      }
    } else {
      # No NAs to deal with
      if (length(yes) == 1L) {
        out[test] <- yes
      } else if (length(yes) == length(test)) {
        wtest <- which(test) # faster than test directly
        out[wtest] <- yes[wtest]
      } else {
        return(.new_ifelse(test, yes, no))
      }
    }
    if (!is.null(attributes_of_test)) {
      attributes(out) <- attributes_of_test
    }
    
    out
  } else {
    ans <- test
    ok <- !is.na(test)
    if (any(test[ok]))
      ans[test & ok] <- rep(yes, length.out = length(ans))[test & ok]
    if (any(!test[ok]))
      ans[!test & ok] <- rep(no, length.out = length(ans))[!test & ok]
    ans
  }
}

new_ifelse100 <- function (test, yes, no)
{
  attributes_of_test <- attributes(test)
  
  if(is.atomic(test)) { # do not lose attributes
    if (typeof(test) != "logical")
      storage.mode(test) <- "logical"
    ## quick return for cases where 'new_ifelse(a, x, y)' is used
    ## instead of 'if (a) x else y'
    if (length(test) == 1 && is.null(attributes_of_test)) {
      if (is.na(test)) return(NA)
      else if (test) {
        if (length(yes) == 1) {
          yat <- attributes(yes)
          if (is.null(yat) || (is.function(yes) &&
                               identical(names(yat), "srcref")))
            return(yes)
        }
      }
      else if (length(no) == 1) {
        nat <- attributes(no)
        if (is.null(nat) || (is.function(no) &&
                             identical(names(nat), "srcref")))
          return(no)
      }
    }
  }
  else ## typically a "class"; storage.mode<-() typically fails
    test <- if(isS4(test)) methods::as(test, "logical") else as.logical(test)
  
  # Give up attempting backwards-compatibility under these conditions:
  if (length(test) > 100L &&
      typeof(yes) %in% c("logical", "integer", "double", "character") &&
      typeof(no)  %in% c("logical", "integer", "double", "character") &&
      !is.factor(yes) &&
      !is.factor(no) &&
      length(no) != 0L &&
      length(yes) != 0L) {
    if (length(no) == length(test)) {
      out <- no
    } else if (length(no) == 1L) {
      out <- rep_len(no, length(test))
    } else {
      return(.new_ifelse(test, yes, no))
    }
    
    if (length(yes) != 1L && length(yes) != length(test)) {
      return(.new_ifelse(test, yes, no))
    }
    
    
    if (anyNA(test)) {
      # no benefit to saving the na results
      Yes <- which(test)
      out[is.na(test)] <- NA
      if (length(yes) == 1L) {
        out[Yes] <- yes
      } else if (length(yes) == length(test)) {
        out[Yes] <- yes[Yes]
      } else {
        return(.new_ifelse(test, yes, no))
      }
    } else {
      # No NAs to deal with
      if (length(yes) == 1L) {
        out[test] <- yes
      } else if (length(yes) == length(test)) {
        wtest <- which(test) # faster than test directly
        out[wtest] <- yes[wtest]
      } else {
        return(.new_ifelse(test, yes, no))
      }
    }
    if (!is.null(attributes_of_test)) {
      attributes(out) <- attributes_of_test
    }
    
    out
  } else {
    ans <- test
    ok <- !is.na(test)
    if (any(test[ok]))
      ans[test & ok] <- rep(yes, length.out = length(ans))[test & ok]
    if (any(!test[ok]))
      ans[!test & ok] <- rep(no, length.out = length(ans))[!test & ok]
    ans
  }
}

Unit tests

test_identicals <- function(test, yes, no) {
  old <- old_ifelse(test, yes, no)
  new <- new_ifelse(test, yes, no)
  if (!identical(old, new)) {
    # Will take a few minutes otherwise
    if (length(old)  + length(new) < 50) {
      print(old)
      print(new)
    }
    cat("Non-identical result\n")
    return(invisible(list(old = old, 
                          new = new)))
  }
}
# Attributes?
test1 <- c(a = FALSE, b = FALSE, c = TRUE, d = TRUE, e = TRUE)
test_identicals(test1, 1:4 + 0.0, 3)
yes1 <- 1:4 + 0
names(yes1) <- "srcref"
test_identicals(test1, yes1, 2:3)

test2 <- sample(test1, size = 1e7, replace = TRUE)
test_identicals(test2, 1:4 + 0.0, 3)
test2[5e5] <- NA
test_identicals(test2, 1:4 + 0.0, 3)

As aformentioned, S4 classes did not retain attributes:

if (requireNamespace("Matrix", quietly = TRUE)) {
  library(Matrix)
  M <- Matrix(-10 + 1:28, 4, 7)
  test_identicals(M, 1, 2)
}
## Non-identical result

These examples simply fall back to the defaults.

test_identicals(c(TRUE, TRUE), "a", raw(0x95))
test_identicals(c(TRUE, TRUE), "a", factor("a"))
test_identicals(c(FALSE, FALSE), "a", factor("a"))
test_identicals(c(FALSE, TRUE), 1 , factor("a"))
test_identicals(c(FALSE, TRUE, NA), 1, factor("a"))

Benchmarks

library(data.table)
library(ggplot2)
library(microbenchmark)
library(scales)
get_avg_time <- function(n, yes1 = TRUE, include_NA = FALSE) {
  test <- sample(c(TRUE, FALSE, if (include_NA) NA), size = n, replace = TRUE)
  if (yes1) {
    yes <- -1L
  } else {
    yes <- sample.int(n)
  }
  no <- 1L
  old <- as.data.table(microbenchmark(old_ifelse(test, yes, no)), times = if (n < 1e3) 100 else 10)
  new <- as.data.table(microbenchmark(new_ifelse(test, yes, no)), times = if (n < 1e3) 100 else 10)
  new100 <- as.data.table(microbenchmark(new_ifelse100(test, yes, no)), times = if (n < 1e3) 100 else 10)
  
  nmed <- new100[, median(time)]
  omed <- old[, median(time)]
  list(old[, median(time)], 
       new[, median(time)],
       new100[, median(time)], 
       prop_faster = abs(new100[, mean(time < omed)] - old[, mean(time < nmed)]))
}

DT <- 
  CJ(n = c(1:10, 10*(2:10), 50*(3:10), 750, 1000*(1:10), 10e3, 50e3, 100e3),
     yes1 = c(TRUE, FALSE), 
     include_NA = c(TRUE, FALSE))
DT[, I := .I]
DT[, c("old", "new", "new100", "prop_faster") := get_avg_time(n, yes1, include_NA), by = "I"]
DT[, f1 := "without NAs"]
DT[(include_NA), f1 := "with NAs"]
DT[, f2 := "yes length-n"]
DT[(yes1), f2 := "yes length-1"]
ggplot(melt(DT, measure.vars = c("old", "new", "new100"))[, `time (ms)` := value / 1e6],
       aes(x = n, y = `time (ms)`, color = variable, alpha = prop_faster)) + 
  geom_line(size = 1.1) + 
  facet_wrap(~ f2 + f1) + 
  scale_x_log10(labels = scales::comma, breaks = c(1, 10, 100, 1000, 100e3)) + 
  scale_y_log10(labels = scales::comma)