diff --git a/NEWS.md b/NEWS.md index 45a36dcf0..9de21ce6f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -56,6 +56,8 @@ 11. `between()` now supports `Date` and `IDate` bounds with default `NAbounds=TRUE`, avoiding errors like "Not yet implemented NAbounds=TRUE for this non-numeric and non-character type" when date bounds contain `NA`, [#7281](https://github.com/Rdatatable/data.table/issues/7281). Thanks @grcatlin for the report and fix, and @ben-schwen and @aitap for assistance. +12. `print.data.table()` now truncates long character columns and list-column summaries by default to avoid horizontal console overflow, [#7718](https://github.com/Rdatatable/data.table/issues/7718). When `datatable.prettyprint.char` is `NULL` (the default), the truncation limit is now dynamically calculated based on the available console width. Use `options(datatable.prettyprint.char=Inf)` for the old default behavior (never truncate). Thanks @tdhock for the report and @venom1204 for the fix. + ### Notes 1. {data.table} now depends on R 3.5.0 (2018). diff --git a/R/print.data.table.R b/R/print.data.table.R index 88ff4ea50..ffb6bc522 100644 --- a/R/print.data.table.R +++ b/R/print.data.table.R @@ -88,7 +88,13 @@ print.data.table = function(x, topn=getOption("datatable.print.topn"), } require_bit64_if_needed(x) classes = classes1(toprint) - toprint=format.data.table(toprint, na.encode=FALSE, timezone = timezone, ...) # na.encode=FALSE so that NA in character cols print as + + trunc.char = getOption("datatable.prettyprint.char") + if (is.null(trunc.char)) { + rn_w = if (isTRUE(row.names)) nchar(as.character(max(rn))) + 2L else 0L + trunc.char = max(0L, getOption("width") - rn_w - 3L) + } + toprint=format.data.table(toprint, na.encode=FALSE, timezone = timezone, trunc.char = trunc.char, ...) # na.encode=FALSE so that NA in character cols print as # FR #353 - add row.names = logical argument to print.data.table if (isTRUE(row.names)) rownames(toprint)=paste0(format(rn,right=TRUE,scientific=FALSE),":") else rownames(toprint)=rep.int("", nrow(toprint)) @@ -155,12 +161,12 @@ print.data.table = function(x, topn=getOption("datatable.print.topn"), invisible(x) } -format.data.table = function(x, ..., justify="none") { +format.data.table = function(x, ..., trunc.char = getOption("datatable.prettyprint.char"), justify="none") { if (is.atomic(x) && !is.null(x)) { ## future R can use if (is.atomic(x)) stopf("Internal structure doesn't seem to be a list. Possibly corrupt data.table.") } - do.call(cbind, lapply(x, format_col, ..., justify=justify)) + do.call(cbind, lapply(x, format_col, ..., trunc.char = trunc.char, justify=justify)) } shouldPrint = function(x) { @@ -198,13 +204,13 @@ has_format_method = function(x) { any(vapply_1b(class(x), f)) } -format_col.default = function(x, ...) { +format_col.default = function(x, ..., trunc.char = getOption("datatable.prettyprint.char")) { if (!is.null(dim(x))) "" else if (is.list(x)) - vapply_1c(x, format_list_item, ...) + vapply_1c(x, format_list_item, ..., trunc.char = trunc.char) else - format(char.trunc(x), ...) # relevant to #37 + format(char.trunc(x, trunc.char = trunc.char), ...) # relevant to #37 } # #2842 -- different columns can have different tzone, so force usage in output @@ -221,20 +227,21 @@ format_col.POSIXct = function(x, ..., timezone=FALSE) { } # #3011 -- expression columns can wrap to newlines which breaks printing -format_col.expression = function(x, ...) format(char.trunc(as.character(x)), ...) +format_col.expression = function(x, ..., trunc.char = getOption("datatable.prettyprint.char")) { + format(char.trunc(as.character(x), trunc.char = trunc.char), ...) +} -format_list_item.default = function(x, ...) { - if (is.null(x)) # NULL item in a list column - "[NULL]" # not '' or 'NULL' to distinguish from those "common" string values in data - else if (is.atomic(x) || inherits(x, "formula")) # FR #2591 - format.data.table issue with columns of class "formula" - paste(c(format(head(x, 6L), ...), if (length(x) > 6L) sprintf("...[%d]", length(x))), collapse=",") # fix for #5435, #37, and #605 - format has to be added here... +format_list_item.default = function(x, ..., trunc.char = getOption("datatable.prettyprint.char")) { + res = if (is.null(x)) # NULL item in a list column + "[NULL]" + else if (is.atomic(x) || inherits(x, "formula")) + paste(c(format(head(x, 6L), ...), if (length(x) > 6L) sprintf("...[%d]", length(x))), collapse=",") else if (has_format_method(x) && length(formatted<-format(x, ...))==1L) { - # the column's class does not have a format method (otherwise it would have been used by format_col and this - # format_list_item would not be reached) but this particular list item does have a format method so use it formatted } else { paste0("<", class1(x), paste_dims(x), ">") } + char.trunc(res, trunc.char = trunc.char) } # #6592 -- nested 1-column frames breaks printing @@ -247,12 +254,14 @@ format_list_item.data.frame = function(x, ...) { # Current implementation may have issues when dealing with strings that have combinations of full-width and half-width characters, # if this becomes a problem in the future, we could consider string traversal instead. char.trunc = function(x, trunc.char = getOption("datatable.prettyprint.char")) { + if (is.null(trunc.char)) return(x) trunc.char = max(0L, suppressWarnings(as.integer(trunc.char[1L])), na.rm=TRUE) if (!is.character(x) || trunc.char <= 0L) return(x) - nchar_width = nchar(x, 'width') # Check whether string is full-width or half-width, #5096 - nchar_chars = nchar(x, 'char') + nchar_width = nchar(x, 'width', allowNA = TRUE) + nchar_chars = nchar(x, 'char', allowNA = TRUE) is_full_width = nchar_width > nchar_chars - idx = !is.na(x) & pmin(nchar_width, nchar_chars) > trunc.char + is_full_width[is.na(is_full_width)] = FALSE + idx = !is.na(x) & !is.na(nchar_width) & pmin(nchar_width, nchar_chars) > trunc.char x[idx] = paste0(strtrim(x[idx], trunc.char * fifelse(is_full_width[idx], 2L, 1L)), "...") x } diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 687bf929e..2de4d7544 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -21660,3 +21660,12 @@ test(2374.08, key(DT[, .(a, a)]), NULL) test(2374.09, key(subset(DT, select=c(a, a))), NULL) DT = data.table(a=1:2, a.1=3:4, val=10:11) test(2374.10, key(DT[, .(a.1, sum(val)), keyby=.(a, a)]), NULL) + +# print.data.table truncates long character columns based on width +test(2375.1, capture.output(print(data.table(x="1234567890"))), output="1234...", options=list(width=10, datatable.prettyprint.char=NULL)) +test(2375.11, print(data.table(x="1234567890")), output="1234567890", options=list(width=10, datatable.prettyprint.char=Inf)) +test(2375.2, capture.output(print(data.table(L=list(1:20)))), output="1,2,3,4,...", options=list(width=15, datatable.prettyprint.char=NULL)) +test(2375.3, capture.output(print(data.table(x=c("short", "abcdefghijklmnopqrstuvwxyz")))), output="abcdefghijklmn...", options=list(width=20, datatable.prettyprint.char=NULL)) +test(2375.4, capture.output(print(data.table(x="abcdefghijklmnopqrstuvwxyz"))), output="abcdefghijklmnopqrstuvwxyz", options=list(width=200, datatable.prettyprint.char=NULL)) +test(2375.5, capture.output(print(data.table(id=1L, score=99.1, txt="abcdefghijklmnopqrstuvwxyz"))), output="abcdefghijklmn...", options=list(width=20, datatable.prettyprint.char=NULL)) +test(2375.6, tail(capture.output(print(data.table(x=rep("ABCDEFGHIJKLMNOPQRSTUVWXYZ", 1000000)), topn=1)), 1), output="1000000: ABCDEFGHIJKLM...", options=list(width=25, datatable.prettyprint.char=NULL)) diff --git a/man/print.data.table.Rd b/man/print.data.table.Rd index 7d51a55fc..442194b23 100644 --- a/man/print.data.table.Rd +++ b/man/print.data.table.Rd @@ -30,17 +30,18 @@ timezone=FALSE, \dots) format_col(x, \dots) - \method{format_col}{default}(x, \dots) + \method{format_col}{default}(x, \dots, trunc.char = getOption("datatable.prettyprint.char")) \method{format_col}{POSIXct}(x, \dots, timezone=FALSE) - \method{format_col}{expression}(x, \dots) + \method{format_col}{expression}(x, \dots, trunc.char = getOption("datatable.prettyprint.char")) format_list_item(x, \dots) - \method{format_list_item}{default}(x, \dots) + \method{format_list_item}{default}(x, \dots, trunc.char = getOption("datatable.prettyprint.char")) } \arguments{ \item{x}{ A \code{data.table}. } \item{topn}{ The number of rows to be printed from the beginning and end of tables with more than \code{nrows} rows. } \item{nrows}{ The number of rows which will be printed before truncation is enforced. } + \item{trunc.char}{The number of characters at which character columns and list-column summaries are truncated. If \code{NULL} (the default), it is dynamically calculated based on \code{getOption("width")}.} \item{class}{ If \code{TRUE}, the resulting output will include above each column its storage class (or a self-evident abbreviation thereof). When combined with \code{col.names="auto"} and tables >20 rows, classes will also appear at the bottom.} \item{row.names}{ If \code{TRUE}, row indices will be printed alongside \code{x}. } \item{col.names}{ One of three flavours for controlling the display of column names in output. \code{"auto"} includes column names above the data, as well as below the table if \code{nrow(x) > 20} (when \code{class=TRUE}, column classes will also appear at the bottom). \code{"top"} excludes this lower register when applicable, and \code{"none"} suppresses column names altogether (as well as column classes if \code{class = TRUE}. } @@ -134,5 +135,13 @@ iris_agg = iris[ , .(reg = list(lm(Sepal.Length ~ Petal.Length))), by = Species] format_list_item.lm = function(x, ...) sprintf('', format(x$call$formula)) print(iris_agg) + + # Truncation based on console width + old = options(width = 25, datatable.prettyprint.char = NULL) + data.table(x = "abcdefghijklmnopqrstuvwxyz", L = list(1:25)) + + # Dynamic truncation: Content shrinks as row labels grow + print(data.table(x = rep("ABCDEFGHIJKLMNOPQRSTUVWXYZ", 1e6)), topn = 1) + options(old) }