A new ifelse

Summary

ifelse can be made significantly faster without loss of backwards-compatibility by using anyNA and falling back to the original code whenever backwards-compatibility cannot be assured.

Current `ifelse`:

old_ifelse <- function (test, yes, no)
{
    if(is.atomic(test)) { # do not lose attributes
        if (typeof(test) != "logical")
            storage.mode(test) <- "logical"
        ## quick return for cases where 'new_ifelse(a, x, y)' is used
        ## instead of 'if (a) x else y'
        if (length(test) == 1 && is.null(attributes(test))) {
            if (is.na(test)) return(NA)
            else if (test) {
                if (length(yes) == 1) {
                    yat <- attributes(yes)
                    if (is.null(yat) || (is.function(yes) &&
                                         identical(names(yat), "srcref")))
                        return(yes)
                }
            }
            else if (length(no) == 1) {
                nat <- attributes(no)
                if (is.null(nat) || (is.function(no) &&
                                     identical(names(nat), "srcref")))
                    return(no)
            }
        }
    }
    else ## typically a "class"; storage.mode<-() typically fails
    test <- if(isS4(test)) methods::as(test, "logical") else as.logical(test)
    ans <- test
    ok <- !is.na(test)
    if (any(test[ok]))
    ans[test & ok] <- rep(yes, length.out = length(ans))[test & ok]
    if (any(!test[ok]))
    ans[!test & ok] <- rep(no, length.out = length(ans))[!test & ok]
    ans
}

The proposal is to change ifelse to take advantage of the speed of anyNA(), use is.na only once after (and only if needed), and to take advantage of the predictable nature of ifelse when the lengths of test, yes, and no are equal or length-one.

I also correct an apparent bug in ifelse where the attributes of test are not saved if isS4(test).

.new_ifelse <- function(test, yes, no) {
  ans <- test
  ok <- !is.na(test)
  if (any(test[ok]))
    ans[test & ok] <- rep(yes, length.out = length(ans))[test & ok]
  if (any(!test[ok]))
    ans[!test & ok] <- rep(no, length.out = length(ans))[!test & ok]
  ans
}

new_ifelse <- function (test, yes, no)
{
  attributes_of_test <- attributes(test)
  
  if(is.atomic(test)) { # do not lose attributes
    if (typeof(test) != "logical")
      storage.mode(test) <- "logical"
    ## quick return for cases where 'new_ifelse(a, x, y)' is used
    ## instead of 'if (a) x else y'
    if (length(test) == 1 && is.null(attributes_of_test)) {
      if (is.na(test)) return(NA)
      else if (test) {
        if (length(yes) == 1) {
          yat <- attributes(yes)
          if (is.null(yat) || (is.function(yes) &&
                               identical(names(yat), "srcref")))
            return(yes)
        }
      }
      else if (length(no) == 1) {
        nat <- attributes(no)
        if (is.null(nat) || (is.function(no) &&
                             identical(names(nat), "srcref")))
          return(no)
      }
    }
  }
  else ## typically a "class"; storage.mode<-() typically fails
    test <- if(isS4(test)) methods::as(test, "logical") else as.logical(test)
  
  # Give up attempting backwards-compatibility under these conditions:
  if (typeof(yes) %in% c("logical", "integer", "double", "character") &&
      typeof(no)  %in% c("logical", "integer", "double", "character") &&
      !is.factor(yes) &&
      !is.factor(no) &&
      length(no) != 0L &&
      length(yes) != 0L) {
    if (length(no) == length(test)) {
      out <- no
    } else if (length(no) == 1L) {
      out <- rep_len(no, length(test))
    } else {
      return(.new_ifelse(test, yes, no))
    }
    
    if (length(yes) != 1L && length(yes) != length(test)) {
      return(.new_ifelse(test, yes, no))
    }
    
    
    if (anyNA(test)) {
      # no benefit to saving the na results
      Yes <- which(test)
      out[is.na(test)] <- NA
      if (length(yes) == 1L) {
        out[Yes] <- yes
      } else if (length(yes) == length(test)) {
        out[Yes] <- yes[Yes]
      } else {
        return(.new_ifelse(test, yes, no))
      }
    } else {
      # No NAs to deal with
      if (length(yes) == 1L) {
        out[test] <- yes
      } else if (length(yes) == length(test)) {
        wtest <- which(test) # faster than test directly
        out[wtest] <- yes[wtest]
      } else {
        return(.new_ifelse(test, yes, no))
      }
    }
    if (!is.null(attributes_of_test)) {
      attributes(out) <- attributes_of_test
    }
    
    out
  } else {
    ans <- test
    ok <- !is.na(test)
    if (any(test[ok]))
      ans[test & ok] <- rep(yes, length.out = length(ans))[test & ok]
    if (any(!test[ok]))
      ans[!test & ok] <- rep(no, length.out = length(ans))[!test & ok]
    ans
  }
}

new_ifelse100 <- function (test, yes, no)
{
  attributes_of_test <- attributes(test)
  
  if(is.atomic(test)) { # do not lose attributes
    if (typeof(test) != "logical")
      storage.mode(test) <- "logical"
    ## quick return for cases where 'new_ifelse(a, x, y)' is used
    ## instead of 'if (a) x else y'
    if (length(test) == 1 && is.null(attributes_of_test)) {
      if (is.na(test)) return(NA)
      else if (test) {
        if (length(yes) == 1) {
          yat <- attributes(yes)
          if (is.null(yat) || (is.function(yes) &&
                               identical(names(yat), "srcref")))
            return(yes)
        }
      }
      else if (length(no) == 1) {
        nat <- attributes(no)
        if (is.null(nat) || (is.function(no) &&
                             identical(names(nat), "srcref")))
          return(no)
      }
    }
  }
  else ## typically a "class"; storage.mode<-() typically fails
    test <- if(isS4(test)) methods::as(test, "logical") else as.logical(test)
  
  # Give up attempting backwards-compatibility under these conditions:
  if (length(test) > 100L &&
      typeof(yes) %in% c("logical", "integer", "double", "character") &&
      typeof(no)  %in% c("logical", "integer", "double", "character") &&
      !is.factor(yes) &&
      !is.factor(no) &&
      length(no) != 0L &&
      length(yes) != 0L) {
    if (length(no) == length(test)) {
      out <- no
    } else if (length(no) == 1L) {
      out <- rep_len(no, length(test))
    } else {
      return(.new_ifelse(test, yes, no))
    }
    
    if (length(yes) != 1L && length(yes) != length(test)) {
      return(.new_ifelse(test, yes, no))
    }
    
    
    if (anyNA(test)) {
      # no benefit to saving the na results
      Yes <- which(test)
      out[is.na(test)] <- NA
      if (length(yes) == 1L) {
        out[Yes] <- yes
      } else if (length(yes) == length(test)) {
        out[Yes] <- yes[Yes]
      } else {
        return(.new_ifelse(test, yes, no))
      }
    } else {
      # No NAs to deal with
      if (length(yes) == 1L) {
        out[test] <- yes
      } else if (length(yes) == length(test)) {
        wtest <- which(test) # faster than test directly
        out[wtest] <- yes[wtest]
      } else {
        return(.new_ifelse(test, yes, no))
      }
    }
    if (!is.null(attributes_of_test)) {
      attributes(out) <- attributes_of_test
    }
    
    out
  } else {
    ans <- test
    ok <- !is.na(test)
    if (any(test[ok]))
      ans[test & ok] <- rep(yes, length.out = length(ans))[test & ok]
    if (any(!test[ok]))
      ans[!test & ok] <- rep(no, length.out = length(ans))[!test & ok]
    ans
  }
}

Unit tests

test_identicals <- function(test, yes, no) {
  old <- old_ifelse(test, yes, no)
  new <- new_ifelse(test, yes, no)
  if (!identical(old, new)) {
    # Will take a few minutes otherwise
    if (length(old)  + length(new) < 50) {
      print(old)
      print(new)
    }
    cat("Non-identical result\n")
    return(invisible(list(old = old, 
                          new = new)))
  }
}

# Attributes?
test1 <- c(a = FALSE, b = FALSE, c = TRUE, d = TRUE, e = TRUE)
test_identicals(test1, 1:4 + 0.0, 3)
yes1 <- 1:4 + 0
names(yes1) <- "srcref"
test_identicals(test1, yes1, 2:3)

test2 <- sample(test1, size = 1e7, replace = TRUE)
test_identicals(test2, 1:4 + 0.0, 3)
test2[5e5] <- NA
test_identicals(test2, 1:4 + 0.0, 3)

As aformentioned, S4 classes did not retain attributes:

if (requireNamespace("Matrix", quietly = TRUE)) {
  library(Matrix)
  M <- Matrix(-10 + 1:28, 4, 7)
  test_identicals(M, 1, 2)
}

## Non-identical result

These examples simply fall back to the defaults.

test_identicals(c(TRUE, TRUE), "a", raw(0x95))
test_identicals(c(TRUE, TRUE), "a", factor("a"))
test_identicals(c(FALSE, FALSE), "a", factor("a"))
test_identicals(c(FALSE, TRUE), 1 , factor("a"))
test_identicals(c(FALSE, TRUE, NA), 1, factor("a"))

A new ifelse

Hugh Parsonage

3 May 2018

Summary

Current `ifelse`:

Unit tests

Benchmarks

A new ifelse

Hugh Parsonage

3 May 2018

Summary

Current ifelse:

Unit tests

Benchmarks

Current `ifelse`: