diff --git a/DESCRIPTION b/DESCRIPTION
index bc5e3973..b2e7bc2b 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -5,7 +5,7 @@ Description: Base 'DataSHIELD' functions for the server side. 'DataSHIELD' is a
been designed to only share non disclosive summary statistics, with built in automated output
checking based on statistical disclosure control. With data sites setting the threshold values for
the automated output checks. For more details, see 'citation("dsBase")'.
-Version: 6.3.5.9000
+Version: 6.4.0.9000
Authors@R: c(person(given = "Paul",
family = "Burton",
role = c("aut"),
@@ -71,10 +71,16 @@ Imports:
gamlss,
gamlss.dist,
mice,
- childsds
+ childsds,
+ purrr,
+ tibble,
+ tidyselect,
+ stats,
+ lubridate,
+ tidytable
Suggests:
spelling,
testthat
-RoxygenNote: 7.3.3
+RoxygenNote: 8.0.0
Encoding: UTF-8
Language: en-GB
diff --git a/NAMESPACE b/NAMESPACE
index e52e5d10..0474b2b1 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -3,6 +3,7 @@
export(BooleDS)
export(absDS)
export(asCharacterDS)
+export(asDataFrameDS)
export(asDataMatrixDS)
export(asFactorDS1)
export(asFactorDS2)
@@ -35,13 +36,19 @@ export(dataFrameFillDS)
export(dataFrameSortDS)
export(dataFrameSubsetDS1)
export(dataFrameSubsetDS2)
+export(dateDS)
export(densityGridDS)
export(dimDS)
export(dmtC2SDS)
export(elsplineDS)
export(extractQuantilesDS1)
export(extractQuantilesDS2)
+export(fixClassDS)
+export(fixColsDS)
+export(fixLevelsDS)
export(gamlssDS)
+export(getAllLevelsDS)
+export(getClassAllColsDS)
export(getWGSRDS)
export(glmDS1)
export(glmDS2)
@@ -93,6 +100,7 @@ export(minMaxRandDS)
export(namesDS)
export(nsDS)
export(numNaDS)
+export(predictDS)
export(qlsplineDS)
export(quantileMeanDS)
export(rBinomDS)
@@ -112,8 +120,10 @@ export(recodeValuesDS)
export(repDS)
export(replaceNaDS)
export(rmDS)
+export(roundDS)
export(rowColCalcDS)
export(sampleDS)
+export(scaleDS)
export(scatterPlotDS)
export(seqDS)
export(setSeedDS)
@@ -139,5 +149,14 @@ import(dplyr)
import(gamlss)
import(gamlss.dist)
import(mice)
+importFrom(dplyr,"%>%")
+importFrom(dplyr,across)
+importFrom(dplyr,mutate)
+importFrom(dplyr,select)
importFrom(gamlss.dist,pST3)
importFrom(gamlss.dist,qST3)
+importFrom(purrr,map)
+importFrom(purrr,set_names)
+importFrom(tibble,as_tibble)
+importFrom(tidyselect,all_of)
+importFrom(tidyselect,peek_vars)
diff --git a/R/BooleDS.R b/R/BooleDS.R
index 0f54dfca..3a905953 100644
--- a/R/BooleDS.R
+++ b/R/BooleDS.R
@@ -30,127 +30,77 @@ BooleDS <- function(V1.name=NULL, V2.name=NULL, Boolean.operator.n=NULL, na.assi
# Check Permissive Privacy Control Level.
dsBase::checkPermissivePrivacyControlLevel(c('permissive', 'banana', 'carrot'))
-#########################################################################
-# DataSHIELD MODULE: CAPTURE THE nfilter SETTINGS #
-thr <- dsBase::listDisclosureSettingsDS() #
-#nfilter.tab<-as.numeric(thr$nfilter.tab) #
-#nfilter.glm<-as.numeric(thr$nfilter.glm) #
-#nfilter.subset<-as.numeric(thr$nfilter.subset) #
-#nfilter.string<-as.numeric(thr$nfilter.string) #
-#nfilter.stringShort<-as.numeric(thr$nfilter.stringShort) #
-#nfilter.kNN<-as.numeric(thr$nfilter.kNN) #
-#datashield.privacyLevel<-as.numeric(thr$datashield.privacyLevel) #
-#########################################################################
-
-
-#V1: numeric, factor or logical vector or scalar in .GlobalEnv
-#V2: numeric, factor or logical vector or scalar in .GlobalEnv or client specified scalar with which to compare V1
-
-#EVAL V1 and V2
-
-##########CHECK NOT LONG SPECIFIED VECTOR##############
-
-V1<-eval(parse(text=V1.name), envir = parent.frame())
-V2<-eval(parse(text=V2.name), envir = parent.frame())
-
-
-if(is.character(V1)){
- studysideMessage<-"FAILED: V_i is character, please convert to numeric, factor or logical before running Boole"
- stop(studysideMessage, call. = FALSE)
- }
-
-if(is.character(V2)){
- studysideMessage<-"FAILED: V_ii is character, please convert to numeric, factor or logical before running Boole"
- stop(studysideMessage, call. = FALSE)
- }
-
-V1.length<-length(V1)
-V2.length<-length(V2)
-
-if(!((V1.length == V2.length) | (V2.length==1))){
- studysideMessage<-"FAILED: V_ii must either be of length one or of length equal to V_i"
- stop(studysideMessage, call. = FALSE)
-}
-
-if(!is.numeric(Boolean.operator.n) | Boolean.operator.n==0){
- studysideMessage<-"FAILED: Boolean.operator specified incorrectly. Must be: '==', '!=', '<', '<=', '>' or '>='"
- stop(studysideMessage, call. = FALSE)
-}
-
-Boolean.operator<-" "
-if(Boolean.operator.n==1) Boolean.operator<-"=="
-if(Boolean.operator.n==2) Boolean.operator<-"!="
-if(Boolean.operator.n==3) Boolean.operator<-"<"
-if(Boolean.operator.n==4) Boolean.operator<-"<="
-if(Boolean.operator.n==5) Boolean.operator<-">"
-if(Boolean.operator.n==6) Boolean.operator<-">="
-
-
-#APPLY BOOLEAN OPERATOR SPECIFIED
-
-Boolean.indicator<-integer(length=V1.length)
-
-#EVALUATE DIFFERENTLY IF V2 IS SAME LENGTH AS V1 OR OF LENGTH 1
-if(V2.length==V1.length){
-for(j in 1:V1.length){
-command.text<-paste0(V1.name,"[",j,"] ",Boolean.operator," ",V2.name,"[",j,"]")
-Boolean.indicator[j]<-eval(parse(text=command.text), envir = parent.frame())*1
-}
-}
-
-if(V2.length==1){
-for(j in 1:V1.length){
-command.text<-paste0(V1.name,"[",j,"] ",Boolean.operator," ",V2.name)
-Boolean.indicator[j]<-eval(parse(text=command.text), envir = parent.frame())*1
-}
-}
-
-
-#BY DEFAULT NAs REMAIN AS NAs BUT IF YOU WANT TO YOU CAN FORCE THEM TO 1 OR 0 USING ARGUMENT
-
-if(na.assign.text=="1"){
-Boolean.indicator[is.na(Boolean.indicator)==1]<-1
-}
-
-if(na.assign.text=="0"){
-Boolean.indicator[is.na(Boolean.indicator)==1]<-0
-}
-
-
-outobj.b<-as.logical(Boolean.indicator)
-outobj<-Boolean.indicator
-
-
-
-#COMMENT OUT THIS CODE BLOCK BECAUSE TESTS OF MINIMUM CELL SIZE SHOULD ALL BE
-#ENACTED IN AGGREGATE FUNCTIONS. NO VECTOR IS DISCLOSIVE UNTIL IT RETURNS
-#SOMETHING TO THE CLIENTSIDE. I AM LEAVING THIS COMMENTED BUT UNDELETED
-#IN CASE WE LATER DECIDE TO CHANGE THIS STRATEGY
-#CHECK OUTPUT VECTOR VALIDITY
-# outobj.invalid<-0
-#
-# unique.values.outobj<-unique(outobj)
-# unique.values.noNA.outobj<-unique.values.outobj[complete.cases(unique.values.outobj)]
-#
-# #Boolean and can therefore only be binary so check this:
-# if(length(unique.values.noNA.outobj)>2) outobj.invalid<-1
-#
-# tabvar<-table(outobj,useNA="no")[table(outobj,useNA="no")>=1]
-# min.category<-min(tabvar)
-# if(min.category' or '>='"
+ stop(studysideMessage, call. = FALSE)
+ }
+
+ Boolean.operator <- " "
+ if(Boolean.operator.n==1) Boolean.operator <- "=="
+ if(Boolean.operator.n==2) Boolean.operator <- "!="
+ if(Boolean.operator.n==3) Boolean.operator <- "<"
+ if(Boolean.operator.n==4) Boolean.operator <- "<="
+ if(Boolean.operator.n==5) Boolean.operator <- ">"
+ if(Boolean.operator.n==6) Boolean.operator <- ">="
+
+ # APPLY BOOLEAN OPERATOR SPECIFIED
+ Boolean.indicator <- integer(length=V1.length)
+
+ # EVALUATE DIFFERENTLY IF V2 IS SAME LENGTH AS V1 OR OF LENGTH 1
+ if(V2.length==V1.length){
+ for(j in 1:V1.length){
+ command.text <- paste0(V1.name,"[",j,"] ",Boolean.operator," ",V2.name,"[",j,"]")
+ Boolean.indicator[j]<-eval(parse(text=command.text), envir = parent.frame())*1
+ }
+ }
+
+ if(V2.length==1){
+ for(j in 1:V1.length){
+ command.text<-paste0(V1.name,"[",j,"] ",Boolean.operator," ",V2.name)
+ Boolean.indicator[j]<-eval(parse(text=command.text), envir = parent.frame())*1
+ }
+ }
+
+ # BY DEFAULT NAs REMAIN AS NAs BUT IF YOU WANT TO YOU CAN FORCE THEM TO 1 OR 0 USING ARGUMENT
+ if(na.assign.text=="1"){
+ Boolean.indicator[is.na(Boolean.indicator)==1]<-1
+ }
+
+ if(na.assign.text=="0"){
+ Boolean.indicator[is.na(Boolean.indicator)==1]<-0
+ }
+
+ outobj.b <- as.logical(Boolean.indicator)
+ outobj <- Boolean.indicator
+
+ if(numeric.output==TRUE){
+ Boole.obj <- outobj
+ }else{
+ Boole.obj <- outobj.b
+ }
+
+ return(Boole.obj)
}
-#ASSIGN FUNCTION
+# ASSIGN FUNCTION
# BooleDS
diff --git a/R/asDataFrameDS.R b/R/asDataFrameDS.R
new file mode 100644
index 00000000..0e31cdd2
--- /dev/null
+++ b/R/asDataFrameDS.R
@@ -0,0 +1,29 @@
+#' @title asDataFrameDS a serverside assign function called by ds.asDataFrame
+#' @description Coerces an R object into a matrix maintaining original
+#' class for all columns in data.frames.
+#' @details This assign function is based on the native R function \code{data.frame}
+#' @param x.name the name of the input object to be coerced to class
+#' data.frame. Must be specified in inverted commas. But this argument is
+#' usually specified directly by argument of the clientside function
+#' \code{ds.asDataFrame}
+#' @return the object specified by the argument (or its default name
+#' "asdataframe.newobj") which is written to the serverside. For further
+#' details see help on the clientside function \code{ds.asDataMatrix}
+#' @author Tim Cadman
+#' @export
+asDataFrameDS <- function (x.name){
+
+ if(is.character(x.name)){
+ x<-eval(parse(text=x.name), envir = parent.frame())
+
+ }else{
+ studysideMessage<-"ERROR: x.name must be specified as a character string"
+ stop(studysideMessage, call. = FALSE)
+ }
+
+ output <- data.frame(x)
+
+ return(output)
+}
+# ASSIGN FUNCTION
+# asDataFrameDS
diff --git a/R/asDataMatrixDS.R b/R/asDataMatrixDS.R
index 3fff528b..2980965d 100644
--- a/R/asDataMatrixDS.R
+++ b/R/asDataMatrixDS.R
@@ -1,4 +1,4 @@
-#' @title asDataMatrixDS a serverside assign function called by ds.asDataMatrix
+#' @title asDataFrameDS a serverside assign function called by ds.asDataFrame
#' @description Coerces an R object into a matrix maintaining original
#' class for all columns in data.frames.
#' @details This assign function is based on the native R function \code{data.matrix}
@@ -8,13 +8,13 @@
#' the data.frame to a matrix but maintains all data columns in their
#' original class
#' @param x.name the name of the input object to be coerced to class
-#' data.matrix. Must be specified in inverted commas. But this argument is
+#' data.frame. Must be specified in inverted commas. But this argument is
#' usually specified directly by argument of the clientside function
-#' \code{ds.asDataMatrix}
+#' \code{ds.asDataFrame}
#' @return the object specified by the argument (or its default name
-#' "asdatamatrix.newobj") which is written to the serverside. For further
+#' "asdataframe.newobj") which is written to the serverside. For further
#' details see help on the clientside function \code{ds.asDataMatrix}
-#' @author Paul Burton for DataSHIELD Development Team
+#' @author Tim Cadman
#' @export
asDataMatrixDS <- function(x.name) {
if (is.character(x.name)) {
@@ -24,9 +24,9 @@ asDataMatrixDS <- function(x.name) {
stop(studysideMessage, call. = FALSE)
}
- output <- data.matrix(x)
+ output <- data.frame(x)
return(output)
}
# ASSIGN FUNCTION
-# asDataMatrixDS
+# asDataFrameDS
diff --git a/R/asFactorDS1.R b/R/asFactorDS1.R
index 6d4f0507..7a3a4e1a 100644
--- a/R/asFactorDS1.R
+++ b/R/asFactorDS1.R
@@ -10,6 +10,7 @@
#' @export
#'
asFactorDS1 <- function(input.var.name=NULL){
+
##################################################################
#MODULE 1: CAPTURE THE nfilter SETTINGS #
thr<-dsBase::listDisclosureSettingsDS() #
@@ -26,27 +27,29 @@ asFactorDS1 <- function(input.var.name=NULL){
input.var <- eval(parse(text=input.var.name), envir = parent.frame())
factor.levels.present.in.source <- levels(factor(input.var))
- num.levels<-length(factor.levels.present.in.source)
+ num.levels <- length(factor.levels.present.in.source)
max.levels.by.density<-nfilter.levels.density*length(input.var)
- if(num.levels>nfilter.levels.max)
- {
- error.message<-
- paste0("FAILED: this variable has too many levels and may be disclosive. It exceeds the max number of levels allowed by nfilter.levels.max: that is ",nfilter.levels.max,". In this study this variable has ",num.levels," factor levels")
+ if(num.levels > nfilter.levels.max){
+ error.message <- paste0("FAILED: this variable has too many levels and may be disclosive.
+ It exceeds the max number of levels allowed by nfilter.levels.max:
+ that is ", nfilter.levels.max, ". In this study this variable has ",
+ num.levels," factor levels")
stop(error.message, call. = FALSE)
}
- if(num.levels>(length(input.var)*nfilter.levels.density))
- {
- error.message<-
- paste0("FAILED: this variable has too many levels and may be disclosive. The number of factor levels must not exceed ", (nfilter.levels.density*100), "% of the length of the variable being converted to a factor. The max number of levels in this study is therefore ",max.levels.by.density," but this variable has ",num.levels," factor levels")
+ if(num.levels > (length(input.var)*nfilter.levels.density)){
+ error.message <- paste0("FAILED: this variable has too many levels and may be disclosive.
+ The number of factor levels must not exceed ", (nfilter.levels.density*100),
+ "% of the length of the variable being converted to a factor. The max number
+ of levels in this study is therefore ",max.levels.by.density," but this
+ variable has ", num.levels, " factor levels")
stop(error.message, call. = FALSE)
}
return(factor.levels.present.in.source)
}
-#AGGREGATE FUNCTION
+# AGGREGATE FUNCTION
# asFactorDS1
-
diff --git a/R/asFactorDS2.R b/R/asFactorDS2.R
index 74af1d67..6fecabfe 100644
--- a/R/asFactorDS2.R
+++ b/R/asFactorDS2.R
@@ -4,7 +4,7 @@
#' a factor type that presented as a vector or as a matrix with dummy variables.
#' @details The functions converts the input variable into a factor which is presented as a vector
#' if the \code{fixed.dummy.vars} is set to FALSE or as a matrix with dummy variables if the
-#' \code{fixed.dummy.vars} is set to TRUE (see the help file of ds.asFactor.b for more details).
+#' \code{fixed.dummy.vars} is set to TRUE (see the help file of ds.asFactor for more details).
#' @param input.var.name the name of the variable that is to be converted to a factor.
#' @param all.unique.levels.transmit the levels that the variable will be transmitted to.
#' @param fixed.dummy.vars a boolean that determines whether the new object will be represented as
diff --git a/R/asListDS.R b/R/asListDS.R
index 31da5f0b..0029526b 100644
--- a/R/asListDS.R
+++ b/R/asListDS.R
@@ -1,3 +1,4 @@
+#'
#' @title asListDS a serverside aggregate function called by ds.asList
#' @description Coerces an R object into a list
#' @details Unlike most other class coercing functions this is
@@ -21,25 +22,26 @@
#' the class of the output object should usually be 'list'
#' @author Amadou Gaye, Paul Burton for DataSHIELD Development Team
#' @export
+#'
asListDS <- function (x.name, newobj){
newobj.class <- NULL
if(is.character(x.name)){
- active.text<-paste0(newobj,"<-as.list(",x.name,")")
+ active.text <- paste0(newobj,"<-as.list(",x.name,")")
eval(parse(text=active.text), envir = parent.frame())
- active.text2<-paste0("class(",newobj,")")
+ active.text2 <- paste0("class(",newobj,")")
assign("newobj.class", eval(parse(text=active.text2), envir = parent.frame()))
}else{
- studysideMessage<-"ERROR: x.name must be specified as a character string"
+ studysideMessage <- "ERROR: x.name must be specified as a character string"
stop(studysideMessage, call. = FALSE)
}
- return.message<-paste0("New object <",newobj,"> created")
- object.class.text<-paste0("Class of <",newobj,"> is '",newobj.class,"'")
+ return.message <- paste0("New object <", newobj, "> created")
+ object.class.text <- paste0("Class of <", newobj, "> is '", newobj.class, "'")
- return(list(return.message=return.message,class.of.newobj=object.class.text))
+ return(list(return.message=return.message, class.of.newobj=object.class.text))
}
# AGGEGATE FUNCTION
# asListDS
diff --git a/R/asLogicalDS.R b/R/asLogicalDS.R
index 4a1725f5..26199153 100644
--- a/R/asLogicalDS.R
+++ b/R/asLogicalDS.R
@@ -1,3 +1,4 @@
+#'
#' @title Coerces an R object into class numeric
#' @description this function is based on the native R function \code{as.numeric}
#' @details See help for function \code{as.logical} in native R
@@ -10,24 +11,25 @@
#' details see help on the clientside function \code{ds.asLogical}
#' @author Amadou Gaye, Paul Burton for DataSHIELD Development Team
#' @export
+#'
asLogicalDS <- function (x.name){
-if(is.character(x.name)){
- x<-eval(parse(text=x.name), envir = parent.frame())
+ if(is.character(x.name)){
+ x <- eval(parse(text=x.name), envir = parent.frame())
}else{
- studysideMessage<-"ERROR: x.name must be specified as a character string"
+ studysideMessage <- "ERROR: x.name must be specified as a character string"
stop(studysideMessage, call. = FALSE)
- }
+ }
- if(!is.numeric(x)&&!is.integer(x)&&!is.character(x)&&!is.matrix(x)){
- studysideMessage<-"ERROR: for ds.asLogical function, x.name must specify an input object of class numeric, integer, character or matrix"
- stop(studysideMessage, call. = FALSE)
+ if(!is.numeric(x) && !is.integer(x) && !is.character(x) && !is.matrix(x)){
+ studysideMessage <- "ERROR: for ds.asLogical function, x.name must specify an input object of class numeric, integer, character or matrix"
+ stop(studysideMessage, call. = FALSE)
}
output <- as.logical(x)
return(output)
}
-#ASSIGN FUNCTION
+# ASSIGN FUNCTION
# asLogicalDS
diff --git a/R/aucDS.R b/R/aucDS.R
index 6afda830..02d7f638 100644
--- a/R/aucDS.R
+++ b/R/aucDS.R
@@ -31,6 +31,6 @@ aucDS <- function(pred=pred, y=y){
q2 <- 2*AUC^2/(1+AUC)-AUC^2
se <- sqrt((q0+(n0-1)*q1+(n1-1)*q2)/(n0*n1))
- return(list(AUC=AUC,se=se))
+ return(list(AUC=AUC, se=se))
}
\ No newline at end of file
diff --git a/R/dateDS.R b/R/dateDS.R
new file mode 100644
index 00000000..f6b442cd
--- /dev/null
+++ b/R/dateDS.R
@@ -0,0 +1,186 @@
+#'
+#' @title dateDS
+#' @description Takes an object that is either a data-frame column or a vector, and can do extraction of
+#' components of full date (\code{extractdate}), can combine date components to a full date (\code{makedate}),
+#' or can calculate the time between two dates (\code{timebetween}).
+#'
+#' @details
+#' If the input is a data-frame column, it must be provided in the \code{x} argument as data-frame$column.
+#' Inputs for \code{extractdate} and \code{timebetween} must be date objects.
+#' For \code{makedate}, three numeric vectors (year, month, day) must be provided in the correct order.
+#' The \code{add.column} argument determines whether the result is added as a new column in the existing
+#' data-frame (\code{TRUE}), or created as a new server-side object (\code{FALSE}).
+#' Note: \code{add.column = TRUE} is only valid for data-frame inputs.
+#'
+#'
+#' @param x Character vector specifying the server-side object(s). For data-frame columns, use the format \code{df$column}.
+#' @param type Character string specifying the operation: \code{"extractdate"}, \code{"makedate"}, or \code{"timebetween"}.
+#' @param newobj Character string for the name of the object that will be created on the server. Default is \code{"date.result"}.
+#' @param unit Character string specifying the unit for \code{extractdate} or \code{timebetween}: \code{"days"}, \code{"months"}, or \code{"years"}.
+#' @param add.column Logical. If \code{FALSE}, the result is created as a new server-side object;
+#' if \code{TRUE}, the result is added as a new column in the existing data-frame. Default is \code{FALSE}.
+#'
+#'
+#' @author Zulal Bekerecioglu
+#' @return the created numeric vector or date object, or the updated dataframe with the added column
+#' @export
+#'
+#'
+dateDS <- function(x=NULL, type=NULL,
+ newobj=NULL, unit=NULL, add.column=NULL) {
+
+ add.column <- as.logical(add.column)
+
+ # If argument not in c("extractdate", "makedate", "timebetween") throw an error
+ if (!(type %in% c("extractdate", "makedate", "timebetween"))) {
+ stop("Invalid argument. Must be one of: ", paste(c("extractdate", "makedate", "timebetween"), collapse=", "))
+ }
+
+ # Check if input(s) are valid.
+ error_message <- "Input object couldn't be found. Please provide the correct format.
+ For vectors, supply an existing object name; for columns, use the format dataframe$column,
+ where 'dataframe' is the name of the data frame and 'column' is the column name,
+ and ensure that both exist."
+
+
+ # If x is NULL, throw and error.
+ if (is.null(x)) {
+ stop(error_message, call. = FALSE)
+ }
+
+ # Each argument takes a unique number of elements, check if they match.
+ if (length(x) != 1 && type=="extractdate") {
+ stop(paste0("Invalid input length for argument '", type, "'. Expected ", 1,
+ " elements, but received ", length(x), ". Please provide exactly ", 1,
+ " object name(s) or column(s)."), call. = FALSE)
+ }
+
+ if (length(x) != 3 && type=="makedate") {
+ stop(paste0("Invalid input length for argument '", type, "'. Expected ", 3,
+ " elements, but received ", length(x), ". Please provide exactly ", 3,
+ " object name(s) or column(s)."), call. = FALSE)
+ }
+
+ if (length(x) != 2 && type=="timebetween") {
+ stop(paste0("Invalid input length for argument '", type, "'. Expected ", 2,
+ " elements, but received ", length(x), ". Please provide exactly ", 2,
+ " object name(s) or column(s)."), call. = FALSE)
+ }
+
+ inputs <- vector("list", length(x))
+
+ # When add.column = TRUE, the client function ensures that at least one input is a column,
+ # and if multiple columns are provided, they all come from the same data-frame.
+ # Therefore, it is safe to use any of these data-frames as common_df on the server.
+ common_df <- NULL
+
+ for (i in seq_along(x)) {
+ element <- x[i]
+
+ # Each element must be a single string
+ if (!is.character(element) || length(element) != 1) stop(error_message, call. = FALSE)
+
+ # Check if it is a df$col reference
+ if (grepl("\\$", element, perl = TRUE)) {
+ parts <- strsplit(element, "\\$", perl = TRUE)[[1]]
+
+ # Validate both parts
+ if (length(parts) != 2 || !nzchar(parts[1]) || !nzchar(parts[2])) stop(error_message, call. = FALSE)
+
+ # Try to get the dataframe
+ df <- tryCatch(get(parts[1]), error = function(e) NULL)
+ if (is.null(df)) stop(error_message, call. = FALSE)
+
+ # Check that column exists
+ if (!(parts[2] %in% names(df))) stop(error_message, call. = FALSE)
+
+ # Save the column values in the list
+ inputs[[i]] <- df[[parts[2]]]
+ common_df <- parts[1]
+
+ } else {
+ # It's a plain object, just get it
+ obj <- tryCatch(get(element), error = function(e) NULL)
+ if (is.null(obj)) stop(error_message, call. = FALSE)
+
+ inputs[[i]] <- obj
+ }
+ }
+
+
+ # extractdate
+ # x should be a column name or object with a date format, type should be provided,
+ # a new column or object with outputcolname will be created
+ if (type == "extractdate") {
+
+ # Only one input is expected
+ date_input <- inputs[[1]]
+
+ # Extract the requested component
+ result <- switch(unit,
+ days = lubridate::day(date_input),
+ months = lubridate::month(date_input),
+ years = lubridate::year(date_input),
+ stop("Invalid unit. Must be one of: days, months, years"))
+ }
+
+
+ # makedate
+ # inputcolname should be list of 3 strings: year-month-day
+ if (type == "makedate") {
+
+ # inputs[[1]] = year, inputs[[2]] = month, inputs[[3]] = day
+ year_vec <- as.numeric(inputs[[1]])
+ month_vec <- as.numeric(inputs[[2]])
+ day_vec <- as.numeric(inputs[[3]])
+
+ # Basic plausibility checks
+ if (length(unique(sapply(list(year_vec, month_vec, day_vec), length))) != 1) {
+ stop("Inputs for 'makedate' must be of equal length.", call. = FALSE)
+ }
+
+ if (any(year_vec < 1000 | year_vec > 3000, na.rm = TRUE)) {
+ stop("The 'year' input in 'makedate' must contain plausible 4-digit years (1000-3000).
+ Check that year, month, and day are given in the correct order (year, month, day).", call. = FALSE)
+ }
+ if (any(month_vec < 1 | month_vec > 12, na.rm = TRUE)) {
+ stop("The 'month' input in 'makedate' must contain values between 1 and 12.
+ Check that year, month, and day are given in the correct order (year, month, day).", call. = FALSE)
+ }
+ if (any(day_vec < 1 | day_vec > 31, na.rm = TRUE)) {
+ stop("The 'day' input in 'makedate' must contain values between 1 and 31.
+ Check that year, month, and day are given in the correct order (year, month, day).", call. = FALSE)
+ }
+
+
+ result <- lubridate::make_date(year = year_vec,
+ month = month_vec,
+ day = day_vec)
+ }
+
+
+ # timebetween
+ # inputcolname should be a list of 2 strings: start and end date
+ if (type == "timebetween") {
+
+ # inputs[[1]] = start date, inputs[[2]] = end date
+ units <- list(
+ years = lubridate::period(years = 1),
+ months = lubridate::period(months = 1),
+ days = lubridate::period(days = 1)
+ )
+
+ result <- lubridate::interval(inputs[[1]], inputs[[2]]) %/% units[[unit]]
+ }
+
+ # Save result based on add.column
+ if (!add.column) {
+ return(result)
+ } else {
+ # Assign to common_df as a new column
+ df <- get(common_df)
+ df[[newobj]] <- result
+ return(df)
+ }
+
+}
diff --git a/R/glmSLMADS.assign.R b/R/glmSLMADS.assign.R
index 275e1f47..f7b5fff4 100644
--- a/R/glmSLMADS.assign.R
+++ b/R/glmSLMADS.assign.R
@@ -18,40 +18,25 @@
#' @export
glmSLMADS.assign <- function(formula, family, offsetName, weightsName, dataName){
-#############################################################
-#MODULE 1: CAPTURE THE nfilter SETTINGS #
-thr <- dsBase::listDisclosureSettingsDS() #
-nfilter.tab <- as.numeric(thr$nfilter.tab) #
-nfilter.glm <- as.numeric(thr$nfilter.glm) #
-#nfilter.subset<-as.numeric(thr$nfilter.subset) #
-#nfilter.string<-as.numeric(thr$nfilter.string) #
-#############################################################
-
-########################################
-############
-#Convert transmitable text for special link variance combinations back to full representation
-if(family=="quasigamma.link_log")
-{family<-"quasi(link=log,variance=mu^2)"}
-
-if(family=="Gamma.link_log")
-{family<-"Gamma(link=log)"}
-#############
-
-#Activate family object (this may not be necessary as character string may already be OK
-#but just checking
-final.family.object<-eval(parse(text=family))
-
-
-#Correctly name offset, weights and data objects in function call
-#(to allow glmPredict to work correctly later)
-calltext<-paste0("mg<-glm(formula,family=",family,",offset=",
- offsetName,",weights=",weightsName,",data=", dataName,",x=TRUE)")
-
-eval(parse(text=calltext))
-
-return(mg)
+ # Convert transmitable text for special link variance combinations back to full representation
+ if(family=="quasigamma.link_log")
+ {family<-"quasi(link=log,variance=mu^2)"}
+
+ if(family=="Gamma.link_log")
+ {family<-"Gamma(link=log)"}
+
+ # Correctly name offset, weights and data objects in function call
+ # (to allow glmPredict to work correctly later)
+ calltext <- paste0("mg<-glm(formula,family=",family,",offset=",
+ offsetName,",weights=",weightsName,",data=", dataName,",x=TRUE)")
+
+ eval(parse(text=calltext))
+
+ # update the call object to include the actual formula
+ mg$call$formula <- formula
+
+ return(mg)
}
-
# ASSIGN FUNCTION
# glmSLMADS.assign
diff --git a/R/global.R b/R/global.R
index a3c9454e..f4402723 100644
--- a/R/global.R
+++ b/R/global.R
@@ -2,4 +2,5 @@
utils::globalVariables(c('offset.to.use', 'weights.to.use', 'out.table.real', 'out.table.dim', 'out.table.dimnames', 'list.obj', 'mg',
'blackbox.output.df', 'blackbox.ranks.df', 'global.bounds.df', 'global.ranks.quantiles.df', 'sR4.df',
- 'min.max.df','sR5.df','input.mean.sd.df','input.ranks.sd.df','RS','CG','mixed','x','y','.old_seed','.Random.seed'))
+ 'min.max.df','sR5.df','input.mean.sd.df','input.ranks.sd.df','RS','CG','mixed','x','y','.old_seed','.Random.seed',
+ 'subsetByClassHelper1', 'subsetByClassHelper2', 'subsetByClassHelper3'))
diff --git a/R/minMaxRandDS.R b/R/minMaxRandDS.R
index 64caf7ae..8846147f 100644
--- a/R/minMaxRandDS.R
+++ b/R/minMaxRandDS.R
@@ -1,3 +1,4 @@
+#'
#' @title Secure ranking of "V2BR" (vector to be ranked) across all sources
#' @description Creates a minimum value that is more negative, and less positive
#' than any real value in V2BR and a maximum value that is more positive and
@@ -68,9 +69,5 @@ rand.min.max<-c(rand.min,rand.max)
return(rand.min.max)
}
-#AGGREGATE
+# AGGREGATE
# minMaxRandDS
-
-
-
-
diff --git a/R/predictDS.R b/R/predictDS.R
new file mode 100644
index 00000000..e3311f91
--- /dev/null
+++ b/R/predictDS.R
@@ -0,0 +1,137 @@
+#'
+#' @title predictDS
+#' @description Generates server-side predictions using the client-side output from \code{ds.glm}.
+#'
+#' @details
+#' This function uses the components supplied by the client-side function (coefficients, family, formula,
+#' and any categorical variables) to generate predictions on the server. To use the base R \code{predict()} function,
+#' a "dummy" glm object is created using the same model formula, family, and link function as the original model.
+#' The dummy model's coefficients are then replaced with the client-side coefficient estimates.
+#'
+#' To avoid mismatches between the factors used in the original glm and those in the dummy glm, the categorical
+#' variables saved by the client-side function are applied to the newdata.
+#'
+#' For intercept-only models, the function simply returns a vector of predicted values equal to the model intercept, with the appropriate length
+#' based on the row length of \code{newdataname}.
+#'
+#'
+#' @param newdataname A character string specifying the name of the new dataset to be used for predictions.
+#' @param traindataname A character string specifying the name of the dataset used for model training.
+#' @param type A character string specifying the type of prediction. Options are \code{"response"} or \code{"link"}.
+#' @param na.action A character string to specify the action to take if missing values are present. Default is \code{"na.pass"}.
+#'
+#'
+#' @author Zulal Bekerecioglu
+#' @return a numeric vector containing the predicted values
+#' @export
+#'
+#'
+predictDS <- function(newdataname, traindataname, type = c("response", "link"),
+ na.action = "na.pass") {
+
+ # Get the objects saved by the client function ds.predict
+ coefficients<-get("predictDS_coefficients")
+ model_formula <-get("predictDS_formula")
+ family <- get("predictDS_family")
+ categorical_variables <- get("predictDS_categorical_variables")
+
+
+
+ if(!is.null(traindataname))
+ {
+ traindata<-get(traindataname)
+
+ }else{
+ stop("'traindataname' couldn't be found, please provide a valid object name.", call.=FALSE)
+ }
+
+
+ if(!is.null(newdataname))
+ {
+ newdf<-get(newdataname)
+
+ }else{
+ stop("'newdataname' couldn't be found, please provide a valid object name.", call.=FALSE)
+ }
+
+ if (!is.character(type) || !(type %in% c("link","response"))) {
+ stop("Invalid argument. Must be one of: 'link','response'.", call. = FALSE)
+ }
+
+ # Convert the family object to it's corresponding function, i.e. poisson.link.log -> poisson(link= "log")
+ family_dist <- strsplit(family, "\\.")[[1]]
+
+ family_name <- family_dist[1] # "binomial"
+ link_name <- family_dist[3] # "logit"
+
+ family_func <- get(family_name) # gets the function binomial()
+ family_obj <- family_func(link = link_name)
+
+
+
+ # SPECIAL CASE HANDLING: y ~ 1, intercept only ######################
+ # A numeric vector will be created wit the mean, with the same length as the row number in newdataname
+ special_case <- length(attr(stats::terms(stats::formula(model_formula)), "term.labels")) == 0
+
+ if(special_case){
+ intercept <- coefficients
+
+ # If the input is just a numeric vector, get the length
+ if(all(c("numeric") %in% class(newdf))){
+ predictions.f <- rep(intercept, length(newdf))
+ } else if(all(c("data.frame") %in% class(newdf))){
+ predictions.f <- rep(intercept, nrow(newdf)) # Otherwise use the number of rows
+ } else {
+ stop("Invalid input: The object called 'newdataname' must be either a numeric vector or a data.frame.",, call. = FALSE)
+ }
+
+ if (type == "link") {
+ predictions.f <- predictions.f
+ } else if (family_name == "gaussian") { # if type is 'response'
+ # identity
+ predictions.f <- predictions.f
+ } else if (family_name == "poisson") {
+ # log
+ predictions.f <- exp(predictions.f)
+ } else if (family_name == "binomial") {
+ # logit
+ predictions.f <- stats::plogis(predictions.f)
+ } else {
+ stop("Unsupported family for intercept-only prediction: Family must be either Gaussian, Poisson, or Binomial.", call. = FALSE)
+ }
+
+ return(predictions.f)
+ }
+ # END OF SPECIAL CASE ######################
+
+
+
+ # Fix factor levels if any exists
+ for (var in categorical_variables) {
+
+ # First get all the factor levels from the train data
+ traindata[[var]] <- factor(traindata[[var]])
+
+ # Ensure new data variable is a factor with the SAME levels (this is needed if the newdf is missing some categories)
+ newdf[[var]] <- factor(newdf[[var]],
+ levels = levels(traindata[[var]]))
+ }
+
+ # Get the na.action argument
+ na.action.fun <- match.fun(na.action)
+
+ # Use a dummy glm object with the correct formula and family
+ dummy_fit <- stats::glm(model_formula,
+ data = traindata,
+ family = family_obj,
+ control = stats::glm.control(maxit = 1))
+
+ # Change its coefficients with the correct ones
+ names(coefficients) <- names(dummy_fit$coefficients)
+ dummy_fit$coefficients <- coefficients
+
+ # New predictions
+ prediction <- stats::predict(dummy_fit, newdata = newdf, type = type, na.action = na.action.fun)
+
+ return(prediction)
+}
diff --git a/R/roundDS.R b/R/roundDS.R
new file mode 100644
index 00000000..6e0d0c10
--- /dev/null
+++ b/R/roundDS.R
@@ -0,0 +1,77 @@
+#'
+#' @title roundDS
+#' @description Generates objects from a server-side object, which can be either a vector or
+#' a data-frame column. Supports five operations:
+#' 1. (\code{round})
+#' 2. (\code{ceiling})
+#' 3. (\code{floor})
+#' 4. (\code{trunc})
+#' 5. (\code{signif})
+#' where each function in baseR is applied on the server side to the specified object.
+#'
+#' @details
+#' Note: \code{add.column = TRUE} is only valid for data-frame inputs.
+#'
+#'
+#' @param x Character vector specifying the server-side object(s). For data-frame columns, use the format \code{df$column}.
+#' @param type Character string specifying the operation: \code{"round"}, \code{"ceiling"}, \code{"floor"},
+#' \code{trunc}, or \code{"signif"}.
+#' @param digits Number of digits to be used in arguments \code{"round"} and \code{"signif"}.
+#' @param add.column Logical. If \code{FALSE}, the result is created as a new server-side object;
+#' if \code{TRUE}, the result is added as a new column in the existing data-frame. Default is \code{FALSE}.
+#' @param newobj Character string for the name of the object that will be created on the server. Default is \code{"rounding.result"}.
+#'
+#'
+#' @author Zulal Bekerecioglu
+#' @return the created numeric vector or the updated dataframe with the added column
+#' @export
+#'
+#'
+roundDS <- function(x, type, digits, add.column, newobj) {
+
+ # If x is NULL, throw and error.
+ if (is.null(x)) {
+ stop("Input object couldn't be found, please provide an object for rounding in the correct format.
+ For vectors, supply an existing object name; for columns, use the format dataframe$column,
+ where 'dataframe' is the name of the data frame and 'column' is the column name,
+ and ensure that both exist.", call. = FALSE)
+ }
+
+ add.column <- as.logical(add.column)
+
+ # Check if object is a column (contains $), if it is then save the dataframe name for later use if necessary
+ if(grepl("\\$", x)) {
+ is_column <- TRUE
+ dataframe_name <- strsplit(x, "\\$")[[1]][1]
+ column_name <- strsplit(x, "\\$")[[1]][2]
+ } else {
+ is_column <- FALSE
+ }
+
+
+ if(!is.null(x)&&!(is_column))
+ {
+ object <- get(x)
+
+ } else if(!is.null(x)&&is_column){
+ df <- get(dataframe_name)
+ object <- df[[column_name]]
+ }
+
+ result <- switch(type,
+ round = round(object, digits = digits),
+ ceiling = ceiling(object),
+ floor = floor(object),
+ trunc = trunc(object),
+ signif = signif(object, digits = digits))
+
+ if(!(is_column)){ # if the object was a numerical vector, save the result in a new object
+ return(result)
+ } else if((is_column)&&!add.column){ # if the object was a column and add.column is FALSE, save the result in a new object
+ return(result)
+ } else if((is_column)&&add.column){ # if the object was a column and add.column is TRUE, save the result as a column
+ df[[newobj]] <- result
+ return(df)
+ }
+
+}
diff --git a/R/scaleDS.R b/R/scaleDS.R
new file mode 100644
index 00000000..96cbd103
--- /dev/null
+++ b/R/scaleDS.R
@@ -0,0 +1,61 @@
+#'
+#' @title scaleDS
+#' @description Generates scaled objects using a server-side object, which can be either a vector or
+#' a data-frame column.
+#'
+#' @details
+#' Note: \code{add.column = TRUE} is only valid for data-frame inputs.
+#'
+#'
+#' @param x Character string specifying the server-side vector For data-frame columns, use the format \code{df$column}.
+#' @param newobj Character string for the name of the object that will be created on the server. Default is \code{"scaled.data"}.
+#' @param add.column Logical. If \code{FALSE}, the result is created as a new server-side object;
+#' if \code{TRUE}, the result is added as a new column in the existing data-frame. Default is \code{FALSE}.
+#'
+#' @author Zulal Bekerecioglu
+#' @return the created numeric vector or the updated dataframe with the added column
+#' @export
+#'
+#'
+scaleDS <- function(x=NULL, newobj=NULL, add.column=NULL) {
+
+ add.column <- as.logical(add.column)
+
+ error_message <- "Input object couldn't found, please provide the correct format. For vectors, supply an existing object name;
+ for columns, use df$colname and ensure the dataframe and column exist."
+
+ # If x is NULL, throw and error.
+ if (is.null(x)) {
+ stop(error_message, call. = FALSE)
+ } else {
+ is_dataframe <- grepl("\\$", x)
+
+ if(is_dataframe) {
+ # Extract dataframe name
+ dataframe_name <- strsplit(x, "\\$")[[1]][1]
+ column_name <- strsplit(x, "\\$")[[1]][2]
+
+ df <-get(dataframe_name)
+ } else {
+
+ object <-get(x)
+ }
+ }
+
+
+ if(is_dataframe) {
+ result <- as.numeric(scale(df[[column_name]])) # scale the column
+ } else {
+ result <- as.numeric(scale(object)) # scale the vector
+ }
+
+ # Return the dataframe with the added column, or the new object.
+ if(is_dataframe&&add.column) {
+ df[[newobj]] <- result
+ return(df)
+ } else {
+ return(result)
+ }
+
+
+}
diff --git a/R/standardiseDfDS.R b/R/standardiseDfDS.R
new file mode 100644
index 00000000..35e15e6e
--- /dev/null
+++ b/R/standardiseDfDS.R
@@ -0,0 +1,88 @@
+#' Get the Class of All Columns in a Data Frame
+#' @param df.name A string representing the name of the data frame.
+#' @return A tibble with the class of each column in the data frame.
+#' @importFrom dplyr %>%
+#' @importFrom tibble as_tibble
+#' @importFrom purrr map
+#' @export
+getClassAllColsDS <- function(df.name){
+ df.name <- eval(parse(text = df.name), envir = parent.frame())
+ all_classes <- map(df.name, class) %>% as_tibble()
+ return(all_classes)
+}
+
+#' Change Class of Target Variables in a Data Frame
+#' @param df.name A string representing the name of the data frame.
+#' @param target_vars A character vector specifying the columns to be modified.
+#' @param target_class A character vector specifying the new classes for each column (1 = factor,
+#' 2 = integer, 3 = numeric, 4 = character, 5 = logical).
+#' @return A modified data frame with the specified columns converted to the target classes.
+#' @importFrom dplyr mutate across
+#' @importFrom tidyselect all_of
+#' @export
+fixClassDS <- function(df.name, target_vars, target_class) {
+ df <- eval(parse(text = df.name), envir = parent.frame())
+ df_transformed <- df %>%
+ mutate(
+ across(all_of(target_vars),
+ ~ .convertClass(.x, target_class[which(target_vars == cur_column())])))
+ return(df_transformed)
+}
+
+#' Convert a Vector to a Specified Class
+#' @param x The vector to be converted.
+#' @param class_name A string indicating the target class (1 = factor, 2 = integer, 3 = numeric,
+#' 4 = character, 5 = logical).
+#' @return The converted vector.
+#' @noRd
+.convertClass <- function(target_var, target_class_code) {
+ switch(target_class_code,
+ "1" = as.factor(target_var),
+ "2" = as.integer(target_var),
+ "3" = as.numeric(target_var),
+ "4" = as.character(target_var),
+ "5" = as.logical(target_var)
+ )
+}
+
+#' Add Missing Columns with NA Values
+#' @param .data A string representing the name of the data frame.
+#' @param cols A character vector specifying the columns to be added if missing.
+#' @return A modified data frame with missing columns added and filled with NA.
+#' @importFrom dplyr mutate select
+#' @importFrom tidyselect peek_vars
+#' @importFrom purrr set_names
+#' @export
+fixColsDS <- function(.data, cols) {
+ .data <- eval(parse(text = .data), envir = parent.frame())
+ missing <- setdiff(cols, colnames(.data))
+ out <- .data %>%
+ mutate(!!!set_names(rep(list(NA), length(missing)), missing)) %>%
+ select(sort(peek_vars()))
+ return(out)
+}
+
+#' Retrieve Factor Levels for Specific Columns
+#' @param df.name A string representing the name of the data frame.
+#' @param factor_vars A character vector specifying the factor columns.
+#' @return A list of factor levels for the specified columns.
+#' @importFrom tidyselect all_of
+#' @importFrom purrr map
+#' @export
+getAllLevelsDS <- function(df.name, factor_vars) {
+ df <- eval(parse(text = df.name), envir = parent.frame())
+ return(df %>% dplyr::select(all_of(factor_vars)) %>% map(levels))
+}
+
+
+#' Set Factor Levels for Specific Columns in a Data Frame
+#' @param df.name A string representing the name of the data frame to modify.
+#' @param vars A character vector specifying the columns to be modified.
+#' @param levels A named list where each element contains the levels for the corresponding factor variable.
+#' @return A modified data frame with the specified columns converted to factors with the provided levels.
+#' @export
+fixLevelsDS <- function(df.name, vars, levels) {
+ df.name <- eval(parse(text = df.name), envir = parent.frame())
+ out <- df.name %>%
+ mutate(across(all_of(vars), ~factor(., levels = levels[[dplyr::cur_column()]])))
+}
diff --git a/R/subsetByClassHelper1.R b/R/subsetByClassHelper1.R
deleted file mode 100644
index 7c288da3..00000000
--- a/R/subsetByClassHelper1.R
+++ /dev/null
@@ -1,41 +0,0 @@
-#'
-#' @title generates subsets vectors from a factor vector
-#' @description This is an internal function called by the function 'subsetByClassDS'.
-#' @details The function generates subsets if the input of 'subsetByClassDS' is a factor vector.
-#' @param xvect a vector of type factor.
-#' @param xname the name of the vector.
-#' @param filter the minimum number observation (i.e. rows) that are allowed.
-#' @return a list which contains the subsets.
-#' @keywords internal
-#' @noRd
-#' @author Gaye, A.
-#'
-subsetByClassHelper1 <- function(xvect=NULL, xname=NULL, filter=NULL){
- vectname <- xname
- subsets <- list()
- names.of.subsets <- c()
- categories <- levels(xvect)
- for(i in 1:length(categories)){
- indices <- which(xvect == as.numeric(categories[i]))
- if(!(length(indices) < filter)){
- subsets[[i]] <- xvect[indices]
- name.of.subD <- paste(vectname,".level_", categories[i], sep="")
- names.of.subsets <- append(names.of.subsets, name.of.subD)
- }else{
- # if any one category has between 0 and 'filter' observation turn subset content into missing values
- if(length(indices) == 0){
- subsets[[i]] <- xvect[-c(1:length(xvect))]
- name.of.subD <- paste(vectname,".level_", categories[i], "_EMPTY", sep="")
- }else{
- temp1 <- xvect[indices]
- temp1[1:length(temp1)] <- NA
- subsets[[i]] <- temp1
- name.of.subD <- paste(vectname,".level_", categories[i], "_INVALID", sep="")
- }
- names.of.subsets <- append(names.of.subsets, name.of.subD)
- }
- names(subsets) <- names.of.subsets
- output <- subsets
- }
- return(output)
-}
diff --git a/R/subsetByClassHelper2.R b/R/subsetByClassHelper2.R
deleted file mode 100644
index f59cc907..00000000
--- a/R/subsetByClassHelper2.R
+++ /dev/null
@@ -1,64 +0,0 @@
-#'
-#' @title generates subset tables from a data frame
-#' @description This is an internal function called by the function 'subsetByClassDS'
-#' @details The function generates subsets if the input of 'subsetByClassDS' is a data frame
-#' and if the number variables(columns) to subset by are greater than 1; i.e. this
-#' function is called if the user specified more than one variable or no variable to subset by
-#' (if no variables are specified the function 'subsetByClassDS' produces a subset for each category
-#' in each variable).
-#' @param df a data frame.
-#' @param iter the indices of columns to loop trough.
-#' @param filter the minimum number of observations (i.e. rows) that are allowed.
-#' @return a list which contains the subsets, their names and an integer that indicates how many columns were
-#' not factors.
-#' @keywords internal
-#' @noRd
-#' @author Gaye, A.
-#'
-subsetByClassHelper2 <- function(df=NULL, iter=NULL, filter=NULL){
- # various counters and temporary variables to hold info
- subsets <- list()
- names.of.subsets <- c()
- count <- 0
- nonfactorvars <- 0
- ncols <- length(colnames(df))
- for(i in iter){
- var <- df[,i]
- varname <- colnames(df)[i]
- if(is.factor(var)){
- # get the levels
- categories <- levels(var)
- # loop through the levels and generate a dataset for each level
- # if the number of observations for that level > 0 and < 'filter'
- for(j in 1:length(categories)){
- indices <- which(var == as.numeric(categories[j]))
- if(!(length(indices) < filter)){
- count <- count+1
- subD <- df[indices,]
- subsets[[count]] <- subD
- name.of.subD <- paste(varname,".level_", categories[j], sep="")
- names.of.subsets <- append(names.of.subsets, name.of.subD)
- }else{
- # if any one category has between 1 and 'filter' number of observation turn subset content into missing values
- count <- count+1
- if(length(indices) == 0){
- subsets[[count]] <- df[-c(1:dim(df)[1]),]
- name.of.subD <- paste(varname,".level_", categories[j], "_EMPTY",sep="")
- }else{
- subD <- df[indices,]
- subD[] <- NA
- subsets[[count]] <- subD
- name.of.subD <- paste(varname,".level_", categories[j], "_INVALID",sep="")
- }
- colnames(subsets[[count]]) <- colnames(df)
- names.of.subsets <- append(names.of.subsets, name.of.subD)
- }
- }
- names(subsets) <- names.of.subsets
- }else{
- # if a variable is not a factor increment the below counter
- nonfactorvars <- nonfactorvars + 1
- }
- }
- return(list(subsets, nonfactorvars))
-}
\ No newline at end of file
diff --git a/R/subsetByClassHelper3.R b/R/subsetByClassHelper3.R
deleted file mode 100644
index de401f62..00000000
--- a/R/subsetByClassHelper3.R
+++ /dev/null
@@ -1,59 +0,0 @@
-#'
-#' @title generates subset tables from a data frame
-#' @description This is an internal function called by the function 'subsetByClassDS'
-#' @details The function generates subsets if the input of 'subsetByClassDS' is a data frame
-#' and if the number of variables (columns) to subset by is 1; i.e. this
-#' function is called if the user specified one variable to subset by.
-#' @param df a data frame.
-#' @param indx1 the column index of the variable specified by the user.
-#' @param filter the minimum number of observations (i.e. rows) that are allowed.
-#' @return a list which contains the subsets, their names and an integer that indicates if
-#' the variable specified by user is a factor.
-#' @keywords internal
-#' @noRd
-#' @author Gaye, A.
-#'
-subsetByClassHelper3 <- function(df=NULL, indx1=NULL, filter=NULL){
- # various counters and temporary variables to hold info
- subsets <- list()
- names.of.subsets <- c()
- count <- 0
- nonfactorvars <- 0
- ncols <- length(colnames(df))
- var <- df[,indx1]
- varname <- colnames(df)[indx1]
- if(is.factor(var)){
- # get the levels
- categories <- levels(var)
- # loop through the levels and generate a dataset for each level
- # if the number of observations for that level > 0 and < 'filter'
- for(j in 1:length(categories)){
- indices <- which(var == as.numeric(categories[j]))
- if(!(length(indices) < filter)){
- count <- count+1
- subD <- df[indices,]
- subsets[[count]] <- subD
- name.of.subD <- paste(varname,".level_", categories[j], sep="")
- names.of.subsets <- append(names.of.subsets, name.of.subD)
- }else{
- # if any one category has between 1 and 'filter' number of observations turn subset content into missing values
- count <- count+1
- if(length(indices) == 0){
- subsets[[count]] <- df[-c(1:dim(df)[1]),]
- name.of.subD <- paste(varname,".level_", categories[j], "_EMPTY",sep="")
- }else{
- subD <- df[indices,]
- subD[] <- NA
- subsets[[count]] <- subD
- name.of.subD <- paste(varname,".level_", categories[j], "_INVALID",sep="")
- }
- colnames(subsets[[count]]) <- colnames(df)
- names.of.subsets <- append(names.of.subsets, name.of.subD)
- }
- }
- names(subsets) <- names.of.subsets
- }else{
- nonfactorvars <- 1
- }
- return(list(subsets, nonfactorvars))
-}
\ No newline at end of file
diff --git a/R/tableDS.R b/R/tableDS.R
index c190ffc3..cf636c02 100644
--- a/R/tableDS.R
+++ b/R/tableDS.R
@@ -1,3 +1,4 @@
+#'
#' @title tableDS is the first of two serverside aggregate functions
#' called by ds.table
#' @description creates 1-dimensional, 2-dimensional and 3-dimensional
@@ -34,153 +35,118 @@
#' @return For information see help for \code{ds.table}
#' @author Paul Burton for DataSHIELD Development Team, 13/11/2019
#' @export
-tableDS<-function(rvar.transmit, cvar.transmit, stvar.transmit, rvar.all.unique.levels.transmit, cvar.all.unique.levels.transmit,
- stvar.all.unique.levels.transmit, exclude.transmit, useNA.transmit, force.nfilter.transmit){
-
-
-#########################################################################
-# DataSHIELD MODULE: CAPTURE THE nfilter SETTINGS #
-thr<-dsBase::listDisclosureSettingsDS() #
-nfilter.tab<-as.numeric(thr$nfilter.tab) #
-#nfilter.glm<-as.numeric(thr$nfilter.glm) #
-#nfilter.subset<-as.numeric(thr$nfilter.subset) #
-#nfilter.string<-as.numeric(thr$nfilter.string) #
-#nfilter.stringShort<-as.numeric(thr$nfilter.stringShort) #
-#nfilter.kNN<-as.numeric(thr$nfilter.kNN) #
-#datashield.privacyLevel<-as.numeric(thr$datashield.privacyLevel) #
-#########################################################################
-
-#Force higher value of nfilter
-
-
-if(!is.null(force.nfilter.transmit))
-{
-force.nfilter.active<-eval(parse(text=force.nfilter.transmit), envir = parent.frame())
-
- if(force.nfilter.active= to nfilter.tab i.e.",nfilter.tab)
- stop(return.message, call. = FALSE)
- }
-}
-else
-{
-force.nfilter.active<-NULL
-}
-
-if(!is.null(force.nfilter.active)&&!is.na(force.nfilter.active)&&force.nfilter.active>nfilter.tab)
-{
-nfilter.tab<-force.nfilter.active
-}
-
+#'
+tableDS <- function(rvar.transmit, cvar.transmit, stvar.transmit, rvar.all.unique.levels.transmit,
+ cvar.all.unique.levels.transmit, stvar.all.unique.levels.transmit, exclude.transmit,
+ useNA.transmit, force.nfilter.transmit){
+
+ # DataSHIELD MODULE: CAPTURE THE nfilter SETTINGS
+ thr <- dsBase::listDisclosureSettingsDS()
+ nfilter.tab <- as.numeric(thr$nfilter.tab)
+
+ # Force higher value of nfilter
+ if(!is.null(force.nfilter.transmit)){
+ force.nfilter.active <- eval(parse(text=force.nfilter.transmit), envir = parent.frame())
+ if(force.nfilter.active < nfilter.tab){
+ return.message <- paste0("Failed: if force.nfilter is non-null it must be >= to nfilter.tab i.e.",
+ nfilter.tab)
+ stop(return.message, call. = FALSE)
+ }
+ }else{
+ force.nfilter.active <- NULL
+ }
+ if(!is.null(force.nfilter.active) && !is.na(force.nfilter.active) && force.nfilter.active>nfilter.tab){
+ nfilter.tab <- force.nfilter.active
+ }
-#Activate via eval when needed
-#rvar
- rvar<-eval(parse(text=rvar.transmit), envir = parent.frame())
- if(!is.factor(rvar))
- {
- rvar.all.unique.levels <- unlist(strsplit(rvar.all.unique.levels.transmit,split=","))
- rvar<-factor(as.factor(rvar), levels=rvar.all.unique.levels)
+ # Activate via eval when needed
+
+ # rvar
+ rvar <- eval(parse(text=rvar.transmit), envir = parent.frame())
+ if(!is.factor(rvar)){
+ rvar.all.unique.levels <- unlist(strsplit(rvar.all.unique.levels.transmit, split=","))
+ rvar <- factor(as.factor(rvar), levels=rvar.all.unique.levels)
}else{
- rvar.all.unique.levels <- unlist(strsplit(rvar.all.unique.levels.transmit,split=","))
- rvar<-factor(rvar, levels=rvar.all.unique.levels)
+ rvar.all.unique.levels <- unlist(strsplit(rvar.all.unique.levels.transmit, split=","))
+ rvar <- factor(rvar, levels=rvar.all.unique.levels)
}
-#cvar
- if(!is.null(cvar.transmit))
-{
- cvar<-eval(parse(text=cvar.transmit), envir = parent.frame())
- if(!is.factor(cvar))
- {
+
+ # cvar
+ if(!is.null(cvar.transmit)){
+ cvar <- eval(parse(text=cvar.transmit), envir = parent.frame())
+ if(!is.factor(cvar)){
cvar.all.unique.levels <- unlist(strsplit(cvar.all.unique.levels.transmit,split=","))
- cvar<-factor(as.factor(cvar), levels=cvar.all.unique.levels)
+ cvar <- factor(as.factor(cvar), levels=cvar.all.unique.levels)
}else{
cvar.all.unique.levels <- unlist(strsplit(cvar.all.unique.levels.transmit,split=","))
- cvar<-factor(cvar, levels=cvar.all.unique.levels)
+ cvar <- factor(cvar, levels=cvar.all.unique.levels)
}
-
-}
-else
-{
-cvar<-NULL
-}
-#stvar
-if(!is.null(stvar.transmit))
-{
- stvar<-eval(parse(text=stvar.transmit), envir = parent.frame())
- if(!is.factor(stvar))
- {
- stvar.all.unique.levels<- unlist(strsplit(stvar.all.unique.levels.transmit,split=","))
- stvar<-factor(as.factor(stvar), levels=stvar.all.unique.levels)
}else{
- stvar.all.unique.levels<- unlist(strsplit(stvar.all.unique.levels.transmit,split=","))
- stvar<-factor(stvar, levels=stvar.all.unique.levels)
+ cvar <- NULL
+ }
+
+ # stvar
+ if(!is.null(stvar.transmit)){
+ stvar <- eval(parse(text=stvar.transmit), envir = parent.frame())
+ if(!is.factor(stvar)){
+ stvar.all.unique.levels <- unlist(strsplit(stvar.all.unique.levels.transmit,split=","))
+ stvar <- factor(as.factor(stvar), levels=stvar.all.unique.levels)
+ }else{
+ stvar.all.unique.levels <- unlist(strsplit(stvar.all.unique.levels.transmit,split=","))
+ stvar <- factor(stvar, levels=stvar.all.unique.levels)
+ }
+ }else{
+ stvar <- NULL
}
-}
-else
-{
-stvar<-NULL
-}
-
-#exclude
-if(!is.null(exclude.transmit))
-{
-exclude.text<-strsplit(exclude.transmit, split=",")
-exclude<-eval(parse(text=exclude.text), envir = parent.frame())
-}
-else
-{
-exclude<-NULL
-}
-
-if(!is.null(rvar)&&!is.null(cvar)&&!is.null(stvar))
-{
-#Check cell counts valid without NAs or NaNs
-counts.valid<-TRUE
-test.outobj<-table(rvar,cvar,stvar,exclude="NaN",useNA="no")
-
-numcells<-length(test.outobj)
- for (cell in 1:numcells)
- {
- if(test.outobj[cell]>0&&test.outobj[cell]0 && test.outobj[cell]0&&test.outobj[cell]0 && test.outobj[cell]dsBase
- 6.3.5.9000
+ 6.4.0.9000
diff --git a/docs/LICENSE.html b/docs/LICENSE.html
index 5faa2d0e..f1cadf0d 100644
--- a/docs/LICENSE.html
+++ b/docs/LICENSE.html
@@ -17,7 +17,7 @@
dsBase
- 6.3.5.9000
+ 6.4.0.9000
diff --git a/docs/authors.html b/docs/authors.html
index a1aec549..3526a873 100644
--- a/docs/authors.html
+++ b/docs/authors.html
@@ -17,7 +17,7 @@
dsBase
- 6.3.5.9000
+ 6.4.0.9000
@@ -102,12 +102,12 @@
Citation
Burton P, Wilson R, Butters O, Ryser-Welch P, Westerberg A, Abarrategui L, Villegas-Diaz R, Avraam D, Marcon Y, Bishop T, Gaye A, Escribà-Montagut X, Wheater S (????).
dsBase: 'DataSHIELD' Server Side Base Functions.
-R package version 6.3.5.9000.
+R package version 6.4.0.9000.
@Manual{,
title = {dsBase: 'DataSHIELD' Server Side Base Functions},
author = {Paul Burton and Rebecca Wilson and Olly Butters and Patricia Ryser-Welch and Alex Westerberg and Leire Abarrategui and Roberto Villegas-Diaz and Demetris Avraam and Yannick Marcon and Tom Bishop and Amadou Gaye and Xavier Escribà-Montagut and Stuart Wheater},
- note = {R package version 6.3.5.9000},
+ note = {R package version 6.4.0.9000},
}
Gaye A, Marcon Y, Isaeva J, LaFlamme P, Turner A, Jones E, Minion J, Boyd A, Newby C, Nuotio M, Wilson R, Butters O, Murtagh B, Demir I, Doiron D, Giepmans L, Wallace S, Budin-Ljøsne I, Schmidt C, Boffetta P, Boniol M, Bota M, Carter K, deKlerk N, Dibben C, Francis R, Hiekkalinna T, Hveem K, Kvaløy K, Millar S, Perry I, Peters A, Phillips C, Popham F, Raab G, Reischl E, Sheehan N, Waldenberger M, Perola M, van den Heuvel E, Macleod J, Knoppers B, Stolk R, Fortier I, Harris J, Woffenbuttel B, Murtagh M, Ferretti V, Burton P (2014).
“DataSHIELD: taking the analysis to the data, not the data to the analysis.”
diff --git a/docs/index.html b/docs/index.html
index 1f70741d..34fba0b5 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -33,7 +33,7 @@
dsBase
- 6.3.5.9000
+ 6.4.0.9000
@@ -70,10 +70,10 @@
install.packages("remotes")
-remotes::install_github("datashield/dsBase", "<BRANCH>")
+remotes::install_github("datashield/dsBase", "<BRANCH>")# Install v6.3.4 with the following
-remotes::install_github("datashield/dsBase", "6.3.4")
asDataFrameDS a serverside assign function called by ds.asDataFrame
+
+
asDataFrameDS.Rd
+
+
+
+
Coerces an R object into a matrix maintaining original
+class for all columns in data.frames.
+
+
+
+
asDataFrameDS(x.name)
+
+
+
+
Arguments
+
+
+
x.name
+
the name of the input object to be coerced to class
+data.frame. Must be specified in inverted commas. But this argument is
+usually specified directly by <x.name> argument of the clientside function
+ds.asDataFrame
+
+
+
+
Value
+
the object specified by the <newobj> argument (or its default name
+"asdataframe.newobj") which is written to the serverside. For further
+details see help on the clientside function ds.asDataMatrix
+
+
+
Details
+
This assign function is based on the native R function data.frame
Takes an object that is either a data-frame column or a vector, and can do extraction of
+components of full date (extractdate), can combine date components to a full date (makedate),
+or can calculate the time between two dates (timebetween).
+
+
+
+
dateDS(x =NULL, type =NULL, newobj =NULL, unit =NULL, add.column =NULL)
+
+
+
+
Arguments
+
+
+
x
+
Character vector specifying the server-side object(s). For data-frame columns, use the format df$column.
+
+
+
type
+
Character string specifying the operation: "extractdate", "makedate", or "timebetween".
+
+
+
newobj
+
Character string for the name of the object that will be created on the server. Default is "date.result".
+
+
+
unit
+
Character string specifying the unit for extractdate or timebetween: "days", "months", or "years".
+
+
+
add.column
+
Logical. If FALSE, the result is created as a new server-side object;
+if TRUE, the result is added as a new column in the existing data-frame. Default is FALSE.
+
+
+
+
Value
+
the created numeric vector or date object, or the updated dataframe with the added column
+
+
+
Details
+
If the input is a data-frame column, it must be provided in the x argument as data-frame$column.
+Inputs for extractdate and timebetween must be date objects.
+For makedate, three numeric vectors (year, month, day) must be provided in the correct order.
+The add.column argument determines whether the result is added as a new column in the existing
+data-frame (TRUE), or created as a new server-side object (FALSE).
+Note: add.column = TRUE is only valid for data-frame inputs.
the column index of the variable specified by the user.
+
target_vars
+
A character vector specifying the columns to be modified.
-
filter
-
the minimum number of observations (i.e. rows) that are allowed.
+
target_class
+
A character vector specifying the new classes for each column (1 = factor,
+2 = integer, 3 = numeric, 4 = character, 5 = logical).
Value
-
a list which contains the subsets, their names and an integer that indicates if
-the variable specified by user is a factor.
-
-
-
Details
-
The function generates subsets if the input of 'subsetByClassDS' is a data frame
-and if the number of variables (columns) to subset by is 1; i.e. this
-function is called if the user specified one variable to subset by.
-
-
-
Author
-
Gaye, A.
+
A modified data frame with the specified columns converted to the target classes.
@@ -87,11 +77,11 @@
Author
diff --git a/docs/reference/fixColsDS.html b/docs/reference/fixColsDS.html
new file mode 100644
index 00000000..7a294694
--- /dev/null
+++ b/docs/reference/fixColsDS.html
@@ -0,0 +1,90 @@
+
+Add Missing Columns with NA Values — fixColsDS • dsBase
+
+
+
A list of factor levels for the specified columns.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/docs/reference/getClassAllColsDS.html b/docs/reference/getClassAllColsDS.html
new file mode 100644
index 00000000..0f95926f
--- /dev/null
+++ b/docs/reference/getClassAllColsDS.html
@@ -0,0 +1,86 @@
+
+Get the Class of All Columns in a Data Frame — getClassAllColsDS • dsBase
+
+
+
A character string specifying the name of the new dataset to be used for predictions.
+
+
+
traindataname
+
A character string specifying the name of the dataset used for model training.
+
+
+
type
+
A character string specifying the type of prediction. Options are "response" or "link".
+
+
+
na.action
+
A character string to specify the action to take if missing values are present. Default is "na.pass".
+
+
+
+
Value
+
a numeric vector containing the predicted values
+
+
+
Details
+
This function uses the components supplied by the client-side function (coefficients, family, formula,
+and any categorical variables) to generate predictions on the server. To use the base R predict() function,
+a "dummy" glm object is created using the same model formula, family, and link function as the original model.
+The dummy model's coefficients are then replaced with the client-side coefficient estimates.
+
To avoid mismatches between the factors used in the original glm and those in the dummy glm, the categorical
+variables saved by the client-side function are applied to the newdata.
+
For intercept-only models, the function simply returns a vector of predicted values equal to the model intercept, with the appropriate length
+based on the row length of newdataname.
Generates objects from a server-side object, which can be either a vector or
+a data-frame column. Supports five operations:
+1. (round)
+2. (ceiling)
+3. (floor)
+4. (trunc)
+5. (signif)
+where each function in baseR is applied on the server side to the specified object.
+
+
+
+
roundDS(x, type, digits, add.column, newobj)
+
+
+
+
Arguments
+
+
+
x
+
Character vector specifying the server-side object(s). For data-frame columns, use the format df$column.
+
+
+
type
+
Character string specifying the operation: "round", "ceiling", "floor",
+trunc, or "signif".
+
+
+
digits
+
Number of digits to be used in arguments "round" and "signif".
+
+
+
add.column
+
Logical. If FALSE, the result is created as a new server-side object;
+if TRUE, the result is added as a new column in the existing data-frame. Default is FALSE.
+
+
+
newobj
+
Character string for the name of the object that will be created on the server. Default is "rounding.result".
+
+
+
+
Value
+
the created numeric vector or the updated dataframe with the added column
+
+
+
Details
+
Note: add.column = TRUE is only valid for data-frame inputs.
Generates scaled objects using a server-side object, which can be either a vector or
+a data-frame column.
+
+
+
+
scaleDS(x =NULL, newobj =NULL, add.column =NULL)
+
+
+
+
Arguments
+
+
+
x
+
Character string specifying the server-side vector For data-frame columns, use the format df$column.
+
+
+
newobj
+
Character string for the name of the object that will be created on the server. Default is "scaled.data".
+
+
+
add.column
+
Logical. If FALSE, the result is created as a new server-side object;
+if TRUE, the result is added as a new column in the existing data-frame. Default is FALSE.
+
+
+
+
Value
+
the created numeric vector or the updated dataframe with the added column
+
+
+
Details
+
Note: add.column = TRUE is only valid for data-frame inputs.
+
+
+
Author
+
Zulal Bekerecioglu
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/docs/reference/scatterPlotDS.html b/docs/reference/scatterPlotDS.html
index e7391a39..858452d4 100644
--- a/docs/reference/scatterPlotDS.html
+++ b/docs/reference/scatterPlotDS.html
@@ -18,7 +18,7 @@
dsBase
- 6.3.5.9000
+ 6.4.0.9000
diff --git a/docs/reference/seqDS.html b/docs/reference/seqDS.html
index 5436c17f..ca737326 100644
--- a/docs/reference/seqDS.html
+++ b/docs/reference/seqDS.html
@@ -17,7 +17,7 @@
dsBase
- 6.3.5.9000
+ 6.4.0.9000
diff --git a/docs/reference/setFilterDS.html b/docs/reference/setFilterDS.html
index dc702f6d..252a4d6f 100644
--- a/docs/reference/setFilterDS.html
+++ b/docs/reference/setFilterDS.html
@@ -19,7 +19,11 @@
dsBase
+<<<<<<< HEAD
+ 6.4.0.9000
+=======
6.3.3
+>>>>>>> origin/v6.3.3-dev
diff --git a/docs/reference/setSeedDS.html b/docs/reference/setSeedDS.html
index 3f93d5d5..f7fbf96f 100644
--- a/docs/reference/setSeedDS.html
+++ b/docs/reference/setSeedDS.html
@@ -18,7 +18,7 @@
dsBase
- 6.3.5.9000
+ 6.4.0.9000
diff --git a/docs/reference/skewnessDS1.html b/docs/reference/skewnessDS1.html
index 7fcff17c..d75060b2 100644
--- a/docs/reference/skewnessDS1.html
+++ b/docs/reference/skewnessDS1.html
@@ -17,7 +17,7 @@
dsBase
- 6.3.5.9000
+ 6.4.0.9000
diff --git a/docs/reference/skewnessDS2.html b/docs/reference/skewnessDS2.html
index 96ca37f2..3a1f7b67 100644
--- a/docs/reference/skewnessDS2.html
+++ b/docs/reference/skewnessDS2.html
@@ -18,7 +18,7 @@
dsBase
- 6.3.5.9000
+ 6.4.0.9000
diff --git a/docs/reference/sqrtDS.html b/docs/reference/sqrtDS.html
index 7b232fd0..575ce4f7 100644
--- a/docs/reference/sqrtDS.html
+++ b/docs/reference/sqrtDS.html
@@ -17,7 +17,7 @@
dsBase
- 6.3.5.9000
+ 6.4.0.9000
diff --git a/docs/reference/subsetByClassDS.html b/docs/reference/subsetByClassDS.html
index fc546a46..c9f7a973 100644
--- a/docs/reference/subsetByClassDS.html
+++ b/docs/reference/subsetByClassDS.html
@@ -19,7 +19,7 @@
dsBase
- 6.3.5.9000
+ 6.4.0.9000
diff --git a/docs/reference/subsetDS.html b/docs/reference/subsetDS.html
index 82f92ba4..6ebe3cc2 100644
--- a/docs/reference/subsetDS.html
+++ b/docs/reference/subsetDS.html
@@ -19,7 +19,7 @@
dsBase
- 6.3.5.9000
+ 6.4.0.9000
diff --git a/docs/reference/table1DDS.html b/docs/reference/table1DDS.html
index 574f90e4..ff21fd5d 100644
--- a/docs/reference/table1DDS.html
+++ b/docs/reference/table1DDS.html
@@ -18,7 +18,7 @@
dsBase
- 6.3.5.9000
+ 6.4.0.9000
diff --git a/docs/reference/table2DDS.html b/docs/reference/table2DDS.html
index b120d9ab..b3009107 100644
--- a/docs/reference/table2DDS.html
+++ b/docs/reference/table2DDS.html
@@ -18,7 +18,7 @@
dsBase
- 6.3.5.9000
+ 6.4.0.9000
diff --git a/docs/reference/tableDS.assign.html b/docs/reference/tableDS.assign.html
index 5c5ea7d7..7df8b1d3 100644
--- a/docs/reference/tableDS.assign.html
+++ b/docs/reference/tableDS.assign.html
@@ -18,7 +18,7 @@
dsBase
- 6.3.5.9000
+ 6.4.0.9000
diff --git a/docs/reference/tableDS.html b/docs/reference/tableDS.html
index 96b9b031..d0fb2550 100644
--- a/docs/reference/tableDS.html
+++ b/docs/reference/tableDS.html
@@ -18,7 +18,7 @@
dsBase
- 6.3.5.9000
+ 6.4.0.9000
diff --git a/docs/reference/tableDS2.html b/docs/reference/tableDS2.html
index 360bd692..713d4f3a 100644
--- a/docs/reference/tableDS2.html
+++ b/docs/reference/tableDS2.html
@@ -18,7 +18,7 @@
dsBase
- 6.3.5.9000
+ 6.4.0.9000
diff --git a/docs/reference/tapplyDS.assign.html b/docs/reference/tapplyDS.assign.html
index 325a9d7c..cdb99f48 100644
--- a/docs/reference/tapplyDS.assign.html
+++ b/docs/reference/tapplyDS.assign.html
@@ -19,7 +19,7 @@
dsBase
- 6.3.5.9000
+ 6.4.0.9000
diff --git a/docs/reference/tapplyDS.html b/docs/reference/tapplyDS.html
index ba1dec6d..c9a9b945 100644
--- a/docs/reference/tapplyDS.html
+++ b/docs/reference/tapplyDS.html
@@ -19,7 +19,7 @@
dsBase
- 6.3.5.9000
+ 6.4.0.9000
diff --git a/docs/reference/testObjExistsDS.html b/docs/reference/testObjExistsDS.html
index d8baf8bd..7bed49f9 100644
--- a/docs/reference/testObjExistsDS.html
+++ b/docs/reference/testObjExistsDS.html
@@ -1,5 +1,9 @@
+<<<<<<< HEAD
+testObjExistsDS — testObjExistsDS • dsBasetestObjExistsDS — testObjExistsDS • dsBase
@@ -17,7 +21,15 @@
dsBase
+<<<<<<< HEAD
+<<<<<<< HEAD
+ 6.4.0-9000
+=======
+ 6.3.4
+>>>>>>> origin/v6.3.5-dev
+=======
6.3.5.9000
+>>>>>>> origin/v6.3.5-dev
diff --git a/docs/reference/unListDS.html b/docs/reference/unListDS.html
index c59d955e..f613a8d0 100644
--- a/docs/reference/unListDS.html
+++ b/docs/reference/unListDS.html
@@ -19,7 +19,7 @@
dsBase
- 6.3.5.9000
+ 6.4.0.9000
diff --git a/docs/reference/uniqueDS.html b/docs/reference/uniqueDS.html
index c65fdb87..65899058 100644
--- a/docs/reference/uniqueDS.html
+++ b/docs/reference/uniqueDS.html
@@ -17,7 +17,7 @@
dsBase
- 6.3.5.9000
+ 6.4.0.9000
diff --git a/docs/reference/varDS.html b/docs/reference/varDS.html
index f4a05a39..86e0bac6 100644
--- a/docs/reference/varDS.html
+++ b/docs/reference/varDS.html
@@ -17,7 +17,7 @@
dsBase
- 6.3.5.9000
+ 6.4.0.9000
diff --git a/docs/reference/vectorDS.html b/docs/reference/vectorDS.html
index 36faaa54..6fbba822 100644
--- a/docs/reference/vectorDS.html
+++ b/docs/reference/vectorDS.html
@@ -17,7 +17,7 @@
dsBase
- 6.3.5.9000
+ 6.4.0.9000
diff --git a/docs/sitemap.xml b/docs/sitemap.xml
index 0d5e1008..8f4e88ad 100644
--- a/docs/sitemap.xml
+++ b/docs/sitemap.xml
@@ -6,6 +6,7 @@
/reference/BooleDS.html/reference/absDS.html/reference/asCharacterDS.html
+/reference/asDataFrameDS.html/reference/asDataMatrixDS.html/reference/asFactorDS1.html/reference/asFactorDS2.html
@@ -38,6 +39,7 @@
/reference/dataFrameSortDS.html/reference/dataFrameSubsetDS1.html/reference/dataFrameSubsetDS2.html
+/reference/dateDS.html/reference/densityGridDS.html/reference/dimDS.html/reference/dmtC2SDS.html
@@ -46,7 +48,12 @@
/reference/extract.html/reference/extractQuantilesDS1.html/reference/extractQuantilesDS2.html
+/reference/fixClassDS.html
+/reference/fixColsDS.html
+/reference/fixLevelsDS.html/reference/gamlssDS.html
+/reference/getAllLevelsDS.html
+/reference/getClassAllColsDS.html/reference/getWGSRDS.html/reference/glmDS1.html/reference/glmDS2.html
@@ -100,6 +107,7 @@
/reference/namesDS.html/reference/nsDS.html/reference/numNaDS.html
+/reference/predictDS.html/reference/qlsplineDS.html/reference/quantileMeanDS.html/reference/rBinomDS.html
@@ -119,8 +127,10 @@
/reference/repDS.html/reference/replaceNaDS.html/reference/rmDS.html
+/reference/roundDS.html/reference/rowColCalcDS.html/reference/sampleDS.html
+/reference/scaleDS.html/reference/scatterPlotDS.html/reference/seqDS.html/reference/setFilterDS.html
@@ -131,7 +141,6 @@
/reference/subsetByClassDS.html/reference/subsetByClassHelper1.html/reference/subsetByClassHelper2.html
-/reference/subsetByClassHelper3.html/reference/subsetDS.html/reference/table1DDS.html/reference/table2DDS.html
diff --git a/man/asDataFrameDS.Rd b/man/asDataFrameDS.Rd
new file mode 100644
index 00000000..8e1b1029
--- /dev/null
+++ b/man/asDataFrameDS.Rd
@@ -0,0 +1,29 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/asDataFrameDS.R
+\name{asDataFrameDS}
+\alias{asDataFrameDS}
+\title{asDataFrameDS a serverside assign function called by ds.asDataFrame}
+\usage{
+asDataFrameDS(x.name)
+}
+\arguments{
+\item{x.name}{the name of the input object to be coerced to class
+data.frame. Must be specified in inverted commas. But this argument is
+usually specified directly by argument of the clientside function
+\code{ds.asDataFrame}}
+}
+\value{
+the object specified by the argument (or its default name
+"asdataframe.newobj") which is written to the serverside. For further
+details see help on the clientside function \code{ds.asDataMatrix}
+}
+\description{
+Coerces an R object into a matrix maintaining original
+class for all columns in data.frames.
+}
+\details{
+This assign function is based on the native R function \code{data.frame}
+}
+\author{
+Tim Cadman
+}
diff --git a/man/asDataMatrixDS.Rd b/man/asDataMatrixDS.Rd
index 3170e9af..82bebec3 100644
--- a/man/asDataMatrixDS.Rd
+++ b/man/asDataMatrixDS.Rd
@@ -2,19 +2,19 @@
% Please edit documentation in R/asDataMatrixDS.R
\name{asDataMatrixDS}
\alias{asDataMatrixDS}
-\title{asDataMatrixDS a serverside assign function called by ds.asDataMatrix}
+\title{asDataFrameDS a serverside assign function called by ds.asDataFrame}
\usage{
asDataMatrixDS(x.name)
}
\arguments{
\item{x.name}{the name of the input object to be coerced to class
-data.matrix. Must be specified in inverted commas. But this argument is
+data.frame. Must be specified in inverted commas. But this argument is
usually specified directly by argument of the clientside function
-\code{ds.asDataMatrix}}
+\code{ds.asDataFrame}}
}
\value{
the object specified by the argument (or its default name
-"asdatamatrix.newobj") which is written to the serverside. For further
+"asdataframe.newobj") which is written to the serverside. For further
details see help on the clientside function \code{ds.asDataMatrix}
}
\description{
@@ -30,5 +30,5 @@ the data.frame to a matrix but maintains all data columns in their
original class
}
\author{
-Paul Burton for DataSHIELD Development Team
+Tim Cadman
}
diff --git a/man/asFactorDS2.Rd b/man/asFactorDS2.Rd
index 16b4a50f..fff7b73b 100644
--- a/man/asFactorDS2.Rd
+++ b/man/asFactorDS2.Rd
@@ -35,5 +35,5 @@ a factor type that presented as a vector or as a matrix with dummy variables.
\details{
The functions converts the input variable into a factor which is presented as a vector
if the \code{fixed.dummy.vars} is set to FALSE or as a matrix with dummy variables if the
-\code{fixed.dummy.vars} is set to TRUE (see the help file of ds.asFactor.b for more details).
+\code{fixed.dummy.vars} is set to TRUE (see the help file of ds.asFactor for more details).
}
diff --git a/man/dateDS.Rd b/man/dateDS.Rd
new file mode 100644
index 00000000..91546ab7
--- /dev/null
+++ b/man/dateDS.Rd
@@ -0,0 +1,39 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/dateDS.R
+\name{dateDS}
+\alias{dateDS}
+\title{dateDS}
+\usage{
+dateDS(x = NULL, type = NULL, newobj = NULL, unit = NULL, add.column = NULL)
+}
+\arguments{
+\item{x}{Character vector specifying the server-side object(s). For data-frame columns, use the format \code{df$column}.}
+
+\item{type}{Character string specifying the operation: \code{"extractdate"}, \code{"makedate"}, or \code{"timebetween"}.}
+
+\item{newobj}{Character string for the name of the object that will be created on the server. Default is \code{"date.result"}.}
+
+\item{unit}{Character string specifying the unit for \code{extractdate} or \code{timebetween}: \code{"days"}, \code{"months"}, or \code{"years"}.}
+
+\item{add.column}{Logical. If \code{FALSE}, the result is created as a new server-side object;
+if \code{TRUE}, the result is added as a new column in the existing data-frame. Default is \code{FALSE}.}
+}
+\value{
+the created numeric vector or date object, or the updated dataframe with the added column
+}
+\description{
+Takes an object that is either a data-frame column or a vector, and can do extraction of
+components of full date (\code{extractdate}), can combine date components to a full date (\code{makedate}),
+or can calculate the time between two dates (\code{timebetween}).
+}
+\details{
+If the input is a data-frame column, it must be provided in the \code{x} argument as data-frame$column.
+Inputs for \code{extractdate} and \code{timebetween} must be date objects.
+For \code{makedate}, three numeric vectors (year, month, day) must be provided in the correct order.
+The \code{add.column} argument determines whether the result is added as a new column in the existing
+data-frame (\code{TRUE}), or created as a new server-side object (\code{FALSE}).
+Note: \code{add.column = TRUE} is only valid for data-frame inputs.
+}
+\author{
+Zulal Bekerecioglu
+}
diff --git a/man/dsBase-package.Rd b/man/dsBase-package.Rd
index 725c4131..be7f5744 100644
--- a/man/dsBase-package.Rd
+++ b/man/dsBase-package.Rd
@@ -13,6 +13,7 @@ Base 'DataSHIELD' functions for the server side. 'DataSHIELD' is a software pack
Authors:
\itemize{
+ \item Stuart Wheater \email{stuart.wheater@arjuna.com} (\href{https://orcid.org/0009-0003-2419-1964}{ORCID})
\item Paul Burton (\href{https://orcid.org/0000-0001-5799-9634}{ORCID})
\item Rebecca Wilson (\href{https://orcid.org/0000-0003-2294-593X}{ORCID})
\item Olly Butters (\href{https://orcid.org/0000-0003-0354-8461}{ORCID})
diff --git a/man/fixClassDS.Rd b/man/fixClassDS.Rd
new file mode 100644
index 00000000..d7b6bf17
--- /dev/null
+++ b/man/fixClassDS.Rd
@@ -0,0 +1,22 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/standardiseDfDS.R
+\name{fixClassDS}
+\alias{fixClassDS}
+\title{Change Class of Target Variables in a Data Frame}
+\usage{
+fixClassDS(df.name, target_vars, target_class)
+}
+\arguments{
+\item{df.name}{A string representing the name of the data frame.}
+
+\item{target_vars}{A character vector specifying the columns to be modified.}
+
+\item{target_class}{A character vector specifying the new classes for each column (1 = factor,
+2 = integer, 3 = numeric, 4 = character, 5 = logical).}
+}
+\value{
+A modified data frame with the specified columns converted to the target classes.
+}
+\description{
+Change Class of Target Variables in a Data Frame
+}
diff --git a/man/fixColsDS.Rd b/man/fixColsDS.Rd
new file mode 100644
index 00000000..709d9472
--- /dev/null
+++ b/man/fixColsDS.Rd
@@ -0,0 +1,19 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/standardiseDfDS.R
+\name{fixColsDS}
+\alias{fixColsDS}
+\title{Add Missing Columns with NA Values}
+\usage{
+fixColsDS(.data, cols)
+}
+\arguments{
+\item{.data}{A string representing the name of the data frame.}
+
+\item{cols}{A character vector specifying the columns to be added if missing.}
+}
+\value{
+A modified data frame with missing columns added and filled with NA.
+}
+\description{
+Add Missing Columns with NA Values
+}
diff --git a/man/fixLevelsDS.Rd b/man/fixLevelsDS.Rd
new file mode 100644
index 00000000..096757a9
--- /dev/null
+++ b/man/fixLevelsDS.Rd
@@ -0,0 +1,21 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/standardiseDfDS.R
+\name{fixLevelsDS}
+\alias{fixLevelsDS}
+\title{Set Factor Levels for Specific Columns in a Data Frame}
+\usage{
+fixLevelsDS(df.name, vars, levels)
+}
+\arguments{
+\item{df.name}{A string representing the name of the data frame to modify.}
+
+\item{vars}{A character vector specifying the columns to be modified.}
+
+\item{levels}{A named list where each element contains the levels for the corresponding factor variable.}
+}
+\value{
+A modified data frame with the specified columns converted to factors with the provided levels.
+}
+\description{
+Set Factor Levels for Specific Columns in a Data Frame
+}
diff --git a/man/getAllLevelsDS.Rd b/man/getAllLevelsDS.Rd
new file mode 100644
index 00000000..e5030725
--- /dev/null
+++ b/man/getAllLevelsDS.Rd
@@ -0,0 +1,19 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/standardiseDfDS.R
+\name{getAllLevelsDS}
+\alias{getAllLevelsDS}
+\title{Retrieve Factor Levels for Specific Columns}
+\usage{
+getAllLevelsDS(df.name, factor_vars)
+}
+\arguments{
+\item{df.name}{A string representing the name of the data frame.}
+
+\item{factor_vars}{A character vector specifying the factor columns.}
+}
+\value{
+A list of factor levels for the specified columns.
+}
+\description{
+Retrieve Factor Levels for Specific Columns
+}
diff --git a/man/getClassAllColsDS.Rd b/man/getClassAllColsDS.Rd
new file mode 100644
index 00000000..cb2de0e7
--- /dev/null
+++ b/man/getClassAllColsDS.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/standardiseDfDS.R
+\name{getClassAllColsDS}
+\alias{getClassAllColsDS}
+\title{Get the Class of All Columns in a Data Frame}
+\usage{
+getClassAllColsDS(df.name)
+}
+\arguments{
+\item{df.name}{A string representing the name of the data frame.}
+}
+\value{
+A tibble with the class of each column in the data frame.
+}
+\description{
+Get the Class of All Columns in a Data Frame
+}
diff --git a/man/predictDS.Rd b/man/predictDS.Rd
new file mode 100644
index 00000000..d795b422
--- /dev/null
+++ b/man/predictDS.Rd
@@ -0,0 +1,43 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/predictDS.R
+\name{predictDS}
+\alias{predictDS}
+\title{predictDS}
+\usage{
+predictDS(
+ newdataname,
+ traindataname,
+ type = c("response", "link"),
+ na.action = "na.pass"
+)
+}
+\arguments{
+\item{newdataname}{A character string specifying the name of the new dataset to be used for predictions.}
+
+\item{traindataname}{A character string specifying the name of the dataset used for model training.}
+
+\item{type}{A character string specifying the type of prediction. Options are \code{"response"} or \code{"link"}.}
+
+\item{na.action}{A character string to specify the action to take if missing values are present. Default is \code{"na.pass"}.}
+}
+\value{
+a numeric vector containing the predicted values
+}
+\description{
+Generates server-side predictions using the client-side output from \code{ds.glm}.
+}
+\details{
+This function uses the components supplied by the client-side function (coefficients, family, formula,
+and any categorical variables) to generate predictions on the server. To use the base R \code{predict()} function,
+a "dummy" glm object is created using the same model formula, family, and link function as the original model.
+The dummy model's coefficients are then replaced with the client-side coefficient estimates.
+
+To avoid mismatches between the factors used in the original glm and those in the dummy glm, the categorical
+variables saved by the client-side function are applied to the newdata.
+
+For intercept-only models, the function simply returns a vector of predicted values equal to the model intercept, with the appropriate length
+based on the row length of \code{newdataname}.
+}
+\author{
+Zulal Bekerecioglu
+}
diff --git a/man/roundDS.Rd b/man/roundDS.Rd
new file mode 100644
index 00000000..00d0e106
--- /dev/null
+++ b/man/roundDS.Rd
@@ -0,0 +1,40 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/roundDS.R
+\name{roundDS}
+\alias{roundDS}
+\title{roundDS}
+\usage{
+roundDS(x, type, digits, add.column, newobj)
+}
+\arguments{
+\item{x}{Character vector specifying the server-side object(s). For data-frame columns, use the format \code{df$column}.}
+
+\item{type}{Character string specifying the operation: \code{"round"}, \code{"ceiling"}, \code{"floor"},
+\code{trunc}, or \code{"signif"}.}
+
+\item{digits}{Number of digits to be used in arguments \code{"round"} and \code{"signif"}.}
+
+\item{add.column}{Logical. If \code{FALSE}, the result is created as a new server-side object;
+if \code{TRUE}, the result is added as a new column in the existing data-frame. Default is \code{FALSE}.}
+
+\item{newobj}{Character string for the name of the object that will be created on the server. Default is \code{"rounding.result"}.}
+}
+\value{
+the created numeric vector or the updated dataframe with the added column
+}
+\description{
+Generates objects from a server-side object, which can be either a vector or
+a data-frame column. Supports five operations:
+1. (\code{round})
+2. (\code{ceiling})
+3. (\code{floor})
+4. (\code{trunc})
+5. (\code{signif})
+where each function in baseR is applied on the server side to the specified object.
+}
+\details{
+Note: \code{add.column = TRUE} is only valid for data-frame inputs.
+}
+\author{
+Zulal Bekerecioglu
+}
diff --git a/man/scaleDS.Rd b/man/scaleDS.Rd
new file mode 100644
index 00000000..a7ab4e25
--- /dev/null
+++ b/man/scaleDS.Rd
@@ -0,0 +1,29 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/scaleDS.R
+\name{scaleDS}
+\alias{scaleDS}
+\title{scaleDS}
+\usage{
+scaleDS(x = NULL, newobj = NULL, add.column = NULL)
+}
+\arguments{
+\item{x}{Character string specifying the server-side vector For data-frame columns, use the format \code{df$column}.}
+
+\item{newobj}{Character string for the name of the object that will be created on the server. Default is \code{"scaled.data"}.}
+
+\item{add.column}{Logical. If \code{FALSE}, the result is created as a new server-side object;
+if \code{TRUE}, the result is added as a new column in the existing data-frame. Default is \code{FALSE}.}
+}
+\value{
+the created numeric vector or the updated dataframe with the added column
+}
+\description{
+Generates scaled objects using a server-side object, which can be either a vector or
+a data-frame column.
+}
+\details{
+Note: \code{add.column = TRUE} is only valid for data-frame inputs.
+}
+\author{
+Zulal Bekerecioglu
+}
diff --git a/tests/testthat/disclosure/set_disclosure_settings.R b/tests/testthat/disclosure/set_disclosure_settings.R
index 4642cc53..4ee70213 100644
--- a/tests/testthat/disclosure/set_disclosure_settings.R
+++ b/tests/testthat/disclosure/set_disclosure_settings.R
@@ -24,3 +24,27 @@ set.standard.disclosure.settings <- function() {
options(default.nfilter.levels.density = "0.33")
options(default.nfilter.levels.max = "40")
}
+
+set.specific.disclosure.settings <- function(datashield.privacyControlLevel='permissive',
+ nfilter.tab='3',
+ nfilter.subset='3',
+ nfilter.glm='0.33',
+ nfilter.string='80',
+ nfilter.stringShort='20',
+ nfilter.kNN='3',
+ nfilter.levels.density='0.33',
+ nfilter.levels.max='40',
+ nfilter.noise='0.25',
+ nfilter.privacy.old='5') {
+ options(datashield.privacyLevel = nfilter.privacy.old)
+ options(default.datashield.privacyControlLevel = datashield.privacyControlLevel)
+ options(default.nfilter.glm = nfilter.glm)
+ options(default.nfilter.kNN = nfilter.kNN)
+ options(default.nfilter.string = nfilter.string)
+ options(default.nfilter.subset = nfilter.subset)
+ options(default.nfilter.stringShort = nfilter.stringShort)
+ options(default.nfilter.tab = nfilter.tab)
+ options(default.nfilter.noise = nfilter.noise)
+ options(default.nfilter.levels.density = nfilter.levels.density)
+ options(default.nfilter.levels.max = nfilter.levels.max)
+}
diff --git a/tests/testthat/helper.R b/tests/testthat/helper.R
new file mode 100644
index 00000000..403fdfb9
--- /dev/null
+++ b/tests/testthat/helper.R
@@ -0,0 +1,162 @@
+#' Create a mixed dataframe with factor and other types of columns
+#'
+#' This function generates a dataframe with a specified number of rows,
+#' factor columns, and other columns (integer, numeric, and string).
+#'
+#' @param n_rows Number of rows in the dataframe. Default is 10,000.
+#' @param n_factor_cols Number of factor columns in the dataframe. Default is 15.
+#' @param n_other_cols Number of other columns (integer, numeric, and string) in the dataframe. Default is 15.
+#'
+#' @return A dataframe with the specified number of rows and columns, containing mixed data types.
+#' @importFrom dplyr bind_cols
+#' @importFrom purrr map_dfc
+#' @examples
+#' df <- create_mixed_dataframe(n_rows = 100, n_factor_cols = 10, n_other_cols = 5)
+create_mixed_dataframe <- function(n_rows = 10000, n_factor_cols = 15, n_other_cols = 15) {
+
+ create_factor_column <- function(levels, n = n_rows) {
+ set.seed(123) # Set seed before sample
+ factor(sample(levels, n, replace = TRUE))
+ }
+
+ factor_levels <- list(
+ c("Low", "Medium", "High"),
+ c("Red", "Green", "Blue"),
+ c("Yes", "No"),
+ c("A", "B", "C"),
+ c("One", "Two", "Three"),
+ c("Cat", "Dog", "Bird"),
+ c("Small", "Medium", "Large"),
+ c("Alpha", "Beta", "Gamma"),
+ c("True", "False"),
+ c("Left", "Right"),
+ c("North", "South", "East", "West"),
+ c("Day", "Night"),
+ c("Up", "Down"),
+ c("Male", "Female"),
+ c("Summer", "Winter", "Spring", "Fall")
+ )
+
+ factor_columns <- purrr::map_dfc(factor_levels[1:n_factor_cols], create_factor_column)
+ colnames(factor_columns) <- paste0("fac_col", 1:n_factor_cols)
+
+ create_other_column <- function(type, n = n_rows) {
+ set.seed(123) # Set seed before sample
+ switch(type,
+ "int" = sample(1:100, n, replace = TRUE),
+ "num" = runif(n, 0, 100),
+ "str" = sample(letters, n, replace = TRUE)
+ )
+ }
+
+ column_types <- c(
+ "int", "int", "num", "num", "str",
+ "str", "int", "num", "str", "int",
+ "num", "str", "int", "num", "str"
+ )
+
+ other_columns <- purrr::map_dfc(column_types[1:n_other_cols], create_other_column)
+ colnames(other_columns) <- paste0("col", (n_factor_cols + 1):(n_factor_cols + n_other_cols))
+ df <- bind_cols(factor_columns, other_columns)
+
+ return(df)
+}
+
+
+#' Modify factor levels for partial overlap
+#'
+#' This function takes two sets of factor levels, computes the common and unique levels,
+#' and returns a new set of levels with partial overlap.
+#'
+#' @param levels1 First set of factor levels.
+#' @param levels2 Second set of factor levels.
+#'
+#' @return A character vector of new factor levels with partial overlap.
+#' @examples
+#' new_levels <- partial_overlap_levels(c("A", "B", "C"), c("B", "C", "D"))
+partial_overlap_levels <- function(levels1, levels2) {
+ common <- intersect(levels1, levels2)
+ unique1 <- setdiff(levels1, common)
+ unique2 <- setdiff(levels2, common)
+
+ # Set seed before each sample call
+ set.seed(123)
+ sampled_unique1 <- sample(unique1, length(unique1) * 0.5)
+
+ set.seed(123)
+ sampled_unique2 <- sample(unique2, length(unique2) * 0.5)
+
+ new_levels <- c(common, sampled_unique1, sampled_unique2)
+ return(new_levels)
+}
+
+
+#' Create additional dataframes with specific conditions
+#'
+#' This function generates additional dataframes based on an input dataframe, modifying column classes and levels,
+#' and adding new columns with unique names. Different seeds are used for each iteration of the loop,
+#' ensuring reproducibility of the generated dataframes.
+#'
+#' @param base_df The base dataframe used to create the additional dataframes.
+#' @param n_rows Number of rows in the additional dataframes. Default is 10,000.
+#' @param df_names Names of the additional dataframes to be created. Default is c("df1", "df2", "df3").
+#'
+#' @return A list of dataframes with the specified modifications.
+#' @importFrom dplyr bind_cols
+#' @examples
+#' base_df <- create_mixed_dataframe(n_rows = 100, n_factor_cols = 10, n_other_cols = 5)
+#' additional_dfs <- create_additional_dataframes(base_df, n_rows = 1000, df_names = c("df1", "df2"))
+create_additional_dataframes <- function(base_df, n_rows = 10000, df_names = c("df1", "df2", "df3")) {
+
+ # Define a fixed sequence of seeds, one for each dataframe to be created
+ seeds <- c(123, 456, 789, 101112)
+
+ df_list <- list()
+
+ for (i in seq_along(df_names)) {
+ # Set the seed for this iteration based on the pre-defined seeds
+ set.seed(seeds[i])
+
+ # Proceed with the dataframe generation process
+ overlap_cols <- sample(colnames(base_df), size = round(0.8 * ncol(base_df)))
+ df <- base_df
+ cols_to_modify_class <- sample(overlap_cols, size = round(0.2 * length(overlap_cols)))
+
+ # Modify columns to have different data types
+ for (col in cols_to_modify_class) {
+ current_class <- class(df[[col]])
+ new_class <- switch(current_class,
+ "factor" = as.character(df[[col]]),
+ "character" = as.factor(df[[col]]),
+ "numeric" = as.integer(df[[col]]),
+ "integer" = as.numeric(df[[col]]),
+ df[[col]])
+ df[[col]] <- new_class
+ }
+
+ # Modify factor levels for partial overlap
+ factor_cols <- colnames(base_df)[sapply(base_df, is.factor)]
+ overlap_factor_cols <- intersect(overlap_cols, factor_cols)
+ cols_to_modify_levels <- sample(overlap_factor_cols, size = round(0.5 * length(overlap_factor_cols)))
+
+ for (col in cols_to_modify_levels) {
+ original_levels <- levels(base_df[[col]])
+ new_levels <- partial_overlap_levels(original_levels, original_levels)
+ df[[col]] <- factor(df[[col]], levels = new_levels)
+ }
+
+ # Create new random columns for each dataframe (these will vary by seed)
+ set.seed(seeds[i]) # Set the seed again for generating new columns
+ n_new_cols <- round(0.2 * ncol(base_df))
+ new_col_names <- paste0(df_names[i], "_new_col_", 1:n_new_cols)
+ new_cols <- data.frame(matrix(runif(n_rows * n_new_cols), ncol = n_new_cols))
+ colnames(new_cols) <- new_col_names
+
+ # Bind new columns to the dataframe
+ df <- bind_cols(df, new_cols)
+ df_list[[df_names[i]]] <- df
+ }
+
+ return(df_list)
+}
+
diff --git a/tests/testthat/test-smk-BooleDS.R b/tests/testthat/test-smk-BooleDS.R
index dd10c782..c7ee5af0 100644
--- a/tests/testthat/test-smk-BooleDS.R
+++ b/tests/testthat/test-smk-BooleDS.R
@@ -18,6 +18,30 @@ context("BooleDS::smk::setup")
# Tests
#
+context("BooleDS::smk::make errors")
+test_that("make errors, character V1", {
+ input <- data.frame(v1 = c('0.0', '1.0', '2.0', '3.0', '4.0'), v2 = c(4.0, 3.0, 2.0, 1.0, 0.0))
+ expect_error(BooleDS("input$v1", "input$v2", 1, "NA", TRUE), "FAILED: V1 is character, please convert to numeric, factor or logical before running Boole", fixed=TRUE)
+})
+
+test_that("make errors, character V2", {
+ input <- data.frame(v1 = c(0.0, 1.0, 2.0, 3.0, 4.0), v2 = c('4.0', '3.0', '2.0', '1.0', '0.0'))
+ expect_error(BooleDS("input$v1", "input$v2", 1, "NA", TRUE), "FAILED: V2 is character, please convert to numeric, factor or logical before running Boole", fixed=TRUE)
+})
+
+test_that("make errors, V1 and V2 are vectors of different length", {
+ v1 = c(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0)
+ v2 = c(4.0, 3.0, 2.0, 1.0, 0.0)
+ expect_error(BooleDS("v1", "v2", 1, "NA", TRUE), "FAILED: V2 must either be of length one or of length equal to V1", fixed=TRUE)
+})
+
+test_that("make errors, incorrect Boolean operator", {
+ v1 = c(0.0, 1.0, 2.0, 3.0, 4.0)
+ v2 = c(4.0, 3.0, 2.0, 1.0, 0.0)
+ boolean.op = 10
+ expect_error(BooleDS("v1", "v2", boolean.op, "NA", TRUE), "FAILED: Boolean.operator specified incorrectly. Must be: '==', '!=', '<', '<=', '>' or '>='", fixed=TRUE)
+})
+
context("BooleDS::smk::simple equal")
test_that("simple BooleDS, equal numeric", {
input <- data.frame(v1 = c(0.0, 1.0, 2.0, 3.0, 4.0), v2 = c(4.0, 3.0, 2.0, 1.0, 0.0))
diff --git a/tests/testthat/test-smk-asDataFrameDS.R b/tests/testthat/test-smk-asDataFrameDS.R
new file mode 100644
index 00000000..2d967f38
--- /dev/null
+++ b/tests/testthat/test-smk-asDataFrameDS.R
@@ -0,0 +1,47 @@
+#-------------------------------------------------------------------------------
+# Copyright (c) 2019-2022 University of Newcastle upon Tyne. All rights reserved.
+#
+# This program and the accompanying materials
+# are made available under the terms of the GNU Public License v3.0.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+#-------------------------------------------------------------------------------
+
+#
+# Set up
+#
+
+context("asDataFrameDS::smk::setup")
+
+#
+# Tests
+#
+
+context("asDataFrameDS::smk::simple")
+test_that("simple asDataFrameDS", {
+ input <- tibble(v1 = c(0.0, 1.0, 2.0, 3.0, 4.0), v2 = c(4.0, 3.0, 2.0, 1.0, 0.0))
+
+ res <- asDataFrameDS("input")
+
+ res.class <- class(res)
+ expect_length(res.class, 1)
+ expect_true("data.frame" %in% res.class)
+ expect_equal(
+ dim(res), c(5, 2)
+ )
+
+ expect_equal(res$v1, 0:4)
+ expect_equal(res$v2, 4:0)
+ res.colnames <- colnames(res)
+ expect_length(res.colnames, 2)
+ expect_equal(res.colnames[1], "v1")
+ expect_equal(res.colnames[2], "v2")
+})
+
+#
+# Done
+#
+
+context("asDataMatrixDS::smk::shutdown")
+context("asDataMatrixDS::smk::done")
diff --git a/tests/testthat/test-smk-asDataMatrixDS.R b/tests/testthat/test-smk-asDataMatrixDS.R
index 6c4f567c..cfcd5f2f 100644
--- a/tests/testthat/test-smk-asDataMatrixDS.R
+++ b/tests/testthat/test-smk-asDataMatrixDS.R
@@ -1,5 +1,6 @@
#-------------------------------------------------------------------------------
# Copyright (c) 2019-2022 University of Newcastle upon Tyne. All rights reserved.
+# Copyright (c) 2026 Arjuna Technologies, Newcastle upon Tyne. All rights reserved.
#
# This program and the accompanying materials
# are made available under the terms of the GNU Public License v3.0.
@@ -29,25 +30,53 @@ test_that("simple asDataMatrixDS", {
{
expect_length(res.class, 1)
expect_true("matrix" %in% res.class)
+
+ expect_length(res, 10)
+ expect_equal(res[1], 0)
+ expect_equal(res[2], 1)
+ expect_equal(res[3], 2)
+ expect_equal(res[4], 3)
+ expect_equal(res[5], 4)
+ expect_equal(res[6], 4)
+ expect_equal(res[7], 3)
+ expect_equal(res[8], 2)
+ expect_equal(res[9], 1)
+ expect_equal(res[10], 0)
}
- else
+ else if (base::getRversion() < '4.0.1')
{
expect_length(res.class, 2)
- expect_true("matrix" %in% res.class)
- expect_true("array" %in% res.class)
+ expect_true(all(c("matrix", "array") %in% res.class))
+
+ expect_length(res, 10)
+ expect_equal(res[1], 0)
+ expect_equal(res[2], 1)
+ expect_equal(res[3], 2)
+ expect_equal(res[4], 3)
+ expect_equal(res[5], 4)
+ expect_equal(res[6], 4)
+ expect_equal(res[7], 3)
+ expect_equal(res[8], 2)
+ expect_equal(res[9], 1)
+ expect_equal(res[10], 0)
}
+ else
+ {
+ expect_length(res.class, 1)
+ expect_true(all("data.frame" %in% res.class))
- expect_length(res, 10)
- expect_equal(res[1], 0)
- expect_equal(res[2], 1)
- expect_equal(res[3], 2)
- expect_equal(res[4], 3)
- expect_equal(res[5], 4)
- expect_equal(res[6], 4)
- expect_equal(res[7], 3)
- expect_equal(res[8], 2)
- expect_equal(res[9], 1)
- expect_equal(res[10], 0)
+ expect_length(res, 2)
+ expect_equal(res[1, 1], 0)
+ expect_equal(res[2, 1], 1)
+ expect_equal(res[3, 1], 2)
+ expect_equal(res[4, 1], 3)
+ expect_equal(res[5, 1], 4)
+ expect_equal(res[1, 2], 4)
+ expect_equal(res[2, 2], 3)
+ expect_equal(res[3, 2], 2)
+ expect_equal(res[4, 2], 1)
+ expect_equal(res[5, 2], 0)
+ }
res.colnames <- colnames(res)
expect_length(res.colnames, 2)
diff --git a/tests/testthat/test-smk-asFactorDS1.R b/tests/testthat/test-smk-asFactorDS1.R
index fd59cefc..78d44bf7 100644
--- a/tests/testthat/test-smk-asFactorDS1.R
+++ b/tests/testthat/test-smk-asFactorDS1.R
@@ -33,6 +33,49 @@ test_that("simple asFactorDS1", {
expect_equal(res[3], "3")
})
+context("asFactorDS1::smk::make errors")
+test_that("make errors, vector with more unique values than nfilter.levels.max", {
+ input <- c(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0)
+
+ factor.levels.present.in.source <- levels(factor(input))
+ num.levels <- length(factor.levels.present.in.source)
+
+ thr <- dsBase::listDisclosureSettingsDS()
+ nfilter.levels.density <- as.numeric(thr$nfilter.levels.density)
+ nfilter.levels.max <- as.numeric(thr$nfilter.levels.max)
+ max.levels.by.density <- nfilter.levels.density*length(input)
+
+ error.message <- paste0("FAILED: this variable has too many levels and may be disclosive.
+ The number of factor levels must not exceed ", (nfilter.levels.density*100),
+ "% of the length of the variable being converted to a factor. The max number
+ of levels in this study is therefore ",max.levels.by.density," but this
+ variable has ", num.levels, " factor levels")
+
+ expect_error(asFactorDS1("input"), error.message, fixed=TRUE)
+})
+
+test_that("make errors, vector with more levels than nfilter.levels.max", {
+ input <- c(2.0, 1.0, 3.0, 3.0, 3.0, 1.0, 2.0, 2.0, 1.0, 2.0)
+
+ factor.levels.present.in.source <- levels(factor(input))
+ num.levels <- length(factor.levels.present.in.source)
+
+ set.specific.disclosure.settings(nfilter.levels.max='2')
+
+ thr <- dsBase::listDisclosureSettingsDS()
+ nfilter.levels.density <- as.numeric(thr$nfilter.levels.density)
+ nfilter.levels.max <- as.numeric(thr$nfilter.levels.max)
+ max.levels.by.density <- nfilter.levels.density*length(input)
+
+ error.message <- paste0("FAILED: this variable has too many levels and may be disclosive.
+ It exceeds the max number of levels allowed by nfilter.levels.max:
+ that is ", nfilter.levels.max, ". In this study this variable has ",
+ num.levels," factor levels")
+
+ expect_error(asFactorDS1("input"), error.message, fixed=TRUE)
+})
+
+
#
# Done
#
diff --git a/tests/testthat/test-smk-standardiseDfDS.R b/tests/testthat/test-smk-standardiseDfDS.R
new file mode 100644
index 00000000..1bfd6720
--- /dev/null
+++ b/tests/testthat/test-smk-standardiseDfDS.R
@@ -0,0 +1,163 @@
+#-------------------------------------------------------------------------------
+# Copyright (c) 2025 University Medical Center Groningen (UCMG), Netherlands. All rights reserved.
+#
+# This program and the accompanying materials
+# are made available under the terms of the GNU Public License v3.0.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+#-------------------------------------------------------------------------------
+
+#
+# Set up
+#
+
+context("standardiseDfDS::smk::setup")
+
+df <- create_mixed_dataframe()
+df_list <- create_additional_dataframes(df)
+
+df_1 <- df
+df_2 <- df_list[[1]]
+df_3 <- df_list[[2]]
+df_4 <- df_list[[3]]
+
+context("standardiseDfDS::smk")
+test_that("getClassAllColsDS returns correct classes", {
+ expect_equal(
+ getClassAllColsDS("df_1"),
+ tibble(
+ fac_col1 = "factor", fac_col2 = "factor", fac_col3 = "factor", fac_col4 = "factor", fac_col5 = "factor",
+ fac_col6 = "factor", fac_col7 = "factor", fac_col8 = "factor", fac_col9 = "factor", fac_col10 = "factor",
+ fac_col11 = "factor", fac_col12 = "factor", fac_col13 = "factor", fac_col14 = "factor", fac_col15 = "factor",
+ col16 = "integer", col17 = "integer", col18 = "numeric", col19 = "numeric", col20 = "character",
+ col21 = "character", col22 = "integer", col23 = "numeric", col24 = "character", col25 = "integer",
+ col26 = "numeric", col27 = "character", col28 = "integer", col29 = "numeric", col30 = "character"
+ )
+ )
+})
+
+test_that("fixClassDS sets classes correctly", {
+
+ cols_to_set <- c("fac_col13", "fac_col5", "col22", "col19", "col25", "col20", "col28",
+ "fac_col14", "fac_col3", "fac_col8")
+
+ classes_to_set <- c("4", "1", "3", "5", "3", "2", "5", "5", "3", "2")
+
+ expect_warning(
+ classes_changed_df <- fixClassDS("df_1", cols_to_set, classes_to_set)
+ )
+
+ expect_equal(
+ classes_changed_df %>% tidytable::map_chr(class) %>% unname(),
+ c("factor", "factor", "numeric", "factor", "factor", "factor", "factor", "integer", "factor",
+ "factor", "factor", "factor", "character", "logical", "factor", "integer", "integer",
+ "numeric", "logical", "integer", "character", "numeric", "numeric", "character", "numeric",
+ "numeric", "character", "logical", "numeric", "character")
+ )
+})
+
+test_that("convert_class calls the correct function", {
+
+ result <- .convertClass(c(1, 2, 3), "1")
+ expect_true(is.factor(result))
+
+ result <- .convertClass(c(1.5, 2.5, 3.7), "2")
+ expect_true(is.integer(result))
+
+ result <- .convertClass(c("1", "2", "3"), "3")
+ expect_true(is.numeric(result))
+
+ result <- .convertClass(c(1, 2, 3), "4")
+ expect_true(is.character(result))
+
+ result <- .convertClass(c(0, 1, 0), "5")
+ expect_true(is.logical(result))
+
+})
+
+test_that("fixColsDS correctly adds missing columns", {
+
+ all_cols <- unique(c(colnames(df_1), colnames(df_2), colnames(df_3), colnames(df_4)))
+ out <- fixColsDS("df_3", all_cols)
+
+ expect_equal(
+ colnames(out),
+ sort(all_cols))
+
+})
+
+test_that("getAllLevelsDS correctly retrieves the levels of specified factor columns", {
+
+ factor_vars <- c("fac_col1", "fac_col2", "fac_col3", "fac_col4", "fac_col5", "fac_col6", "fac_col7",
+ "fac_col8", "fac_col9", "fac_col10", "fac_col11", "fac_col12", "fac_col14",
+ "fac_col15", "col27")
+
+ observed <- getAllLevelsDS("df_3", factor_vars)
+
+ expected <- list(
+ fac_col1 = c("High", "Low", "Medium"),
+ fac_col2 = c("Blue", "Green", "Red"),
+ fac_col3 = c("No", "Yes"),
+ fac_col4 = c("A", "B", "C"),
+ fac_col5 = c("One", "Three", "Two"),
+ fac_col6 = c("Bird", "Cat", "Dog"),
+ fac_col7 = c("Large", "Medium", "Small"),
+ fac_col8 = c("Alpha", "Beta", "Gamma"),
+ fac_col9 = c("False", "True"),
+ fac_col10 = c("Left", "Right"),
+ fac_col11 = c("East", "North", "South", "West"),
+ fac_col12 = c("Day", "Night"),
+ fac_col14 = c("Female", "Male"),
+ fac_col15 = c("Fall", "Spring", "Summer", "Winter"),
+ col27 = letters
+ )
+
+ expect_equal(expected, observed)
+
+})
+
+example_df <- data.frame(
+ col1 = c("A", "B", "A", "C"),
+ col2 = c("X", "Y", "X", "Z"),
+ col3 = c("Yes", "No", "Yes", "No"),
+ stringsAsFactors = FALSE
+)
+
+test_that("fixLevelsDS sets factor levels correctly", {
+
+ levels <- list(
+ col1 = c("A", "B", "C"),
+ col2 = c("X", "Y", "Z"),
+ col3 = c("Yes", "No")
+ )
+
+ modified_df <- fixLevelsDS("example_df", c("col1", "col2", "col3"), levels)
+
+ expect_s3_class(modified_df$col1, "factor")
+ expect_s3_class(modified_df$col2, "factor")
+ expect_s3_class(modified_df$col3, "factor")
+
+ expect_equal(levels(modified_df$col1), levels$col1)
+ expect_equal(levels(modified_df$col2), levels$col2)
+ expect_equal(levels(modified_df$col3), levels$col3)
+
+})
+
+test_that("fixLevelsDS throws an error for invalid input", {
+
+ levels <- list(
+ col1 = c("A", "B", "C"),
+ col2 = c("X", "Y", "Z")
+ )
+
+ expect_error(fixLevelsDS("example_df", c("col1", "non_existent_col"), levels))
+})
+
+#
+# Done
+#
+
+context("standardiseDfDS::smk::shutdown")
+
+context("standardiseDfDS::smk::done")
diff --git a/tests/testthat/test-smk-subsetByClassDS.R b/tests/testthat/test-smk-subsetByClassDS.R
deleted file mode 100644
index 8de5114b..00000000
--- a/tests/testthat/test-smk-subsetByClassDS.R
+++ /dev/null
@@ -1,87 +0,0 @@
-#-------------------------------------------------------------------------------
-# Copyright (c) 2019-2022 University of Newcastle upon Tyne. All rights reserved.
-#
-# This program and the accompanying materials
-# are made available under the terms of the GNU Public License v3.0.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see .
-#-------------------------------------------------------------------------------
-
-#
-# Set up
-#
-
-context("subsetByClassDS::smk::setup")
-
-set.standard.disclosure.settings()
-
-#
-# Tests
-#
-
-context("subsetByClassDS::smk")
-test_that("simple subsetByClassDS, data.frame, unspecified variables", {
- data <- data.frame(v1 = factor(c(0, 0, 0, 1, 1, 1, 2, 1, 2, 2)), v2 = c(4.0, 0.0, 3.0, 1.0, 2.0, 2.0, 1.0, 3.0, 0.0, 4.0), v3 = c(1:10), v4 = c(1:10))
- variables <- NULL
-
- res <- subsetByClassDS("data", variables)
-
- expect_equal(class(res), "list")
- expect_length(res, 3)
-
- expect_equal(class(res$v1.level_0), "data.frame")
- expect_length(res$v1.level_0, 4)
-
- expect_equal(class(res$v1.level_1), "data.frame")
- expect_length(res$v1.level_1, 4)
-
- expect_equal(class(res$v1.level_2), "data.frame")
- expect_length(res$v1.level_2, 4)
-})
-
-test_that("simple subsetByClassDS, data.frame, specified variables", {
- data <- data.frame(v1 = factor(c(0, 0, 0, 1, 1, 1, 2, 1, 2, 2)), v2 = c(4.0, 0.0, 3.0, 1.0, 2.0, 2.0, 1.0, 3.0, 0.0, 4.0), v3 = c(1:10), v4 = c(1:10))
- variables <- c("v1")
-
- res <- subsetByClassDS("data", variables)
-
- expect_equal(class(res), "list")
- expect_length(res, 3)
-
- expect_equal(class(res$v1.level_0), "data.frame")
- expect_length(res$v1.level_0, 4)
-
- expect_equal(class(res$v1.level_1), "data.frame")
- expect_length(res$v1.level_1, 4)
-
- expect_equal(class(res$v1.level_2), "data.frame")
- expect_length(res$v1.level_2, 4)
-})
-
-test_that("simple subsetByClassDS, factor vector, specified variables", {
- data <- factor(c(0, 0, 0, 1, 1, 1, 2, 1, 2, 2))
- variables <- NULL
-
- res <- subsetByClassDS("data", variables)
-
- expect_equal(class(res), "list")
- expect_length(res, 3)
-
- expect_equal(class(res$data.level_0), "factor")
- expect_length(res$data.level_0, 3)
-
- expect_equal(class(res$data.level_1), "factor")
- expect_length(res$data.level_1, 4)
-
- expect_equal(class(res$data.level_2), "factor")
- expect_length(res$data.level_2, 3)
-})
-
-#
-# Done
-#
-
-context("subsetByClassDS::smk::shutdown")
-
-context("subsetByClassDS::smk::done")
diff --git a/tests/testthat/test-smk-subsetDS.R b/tests/testthat/test-smk-subsetDS.R
deleted file mode 100644
index f34119c4..00000000
--- a/tests/testthat/test-smk-subsetDS.R
+++ /dev/null
@@ -1,125 +0,0 @@
-#-------------------------------------------------------------------------------
-# Copyright (c) 2019-2022 University of Newcastle upon Tyne. All rights reserved.
-#
-# This program and the accompanying materials
-# are made available under the terms of the GNU Public License v3.0.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see .
-#-------------------------------------------------------------------------------
-
-#
-# Set up
-#
-
-context("subsetDS::smk::setup")
-
-set.standard.disclosure.settings()
-
-#
-# Tests
-#
-
-context("subsetDS::smk")
-test_that("simple subsetDS, no NAs", {
- data <- data.frame(v1 = c(0, 0, 1, 1, 2, 2, 3, 3, 4, 4), v2 = c(4.0, 0.0, 3.0, 1.0, 2.0, 2.0, 1.0, 3.0, 0.0, 4.0))
- complt <- FALSE
- rs <- NULL
- cs <- NULL
- lg <- 2
- th <- 2
- varname <- "v1"
-
- res <- subsetDS("data", complt, rs, cs, lg, th, varname)
-
- expect_equal(class(res), "data.frame")
- expect_length(res, 2)
-
- expect_equal(class(res$v1), "numeric")
- expect_length(res$v1, 6)
- expect_equal(res$v1[1], 2)
- expect_equal(res$v1[2], 2)
- expect_equal(res$v1[3], 3)
- expect_equal(res$v1[4], 3)
- expect_equal(res$v1[5], 4)
- expect_equal(res$v1[6], 4)
-
- expect_equal(class(res$v2), "numeric")
- expect_length(res$v2, 6)
- expect_equal(res$v2[1], 2.0)
- expect_equal(res$v2[2], 2.0)
- expect_equal(res$v2[3], 1.0)
- expect_equal(res$v2[4], 3.0)
- expect_equal(res$v2[5], 0.0)
- expect_equal(res$v2[6], 4.0)
-})
-
-test_that("simple subsetDS, NAs, complete.case FALSE", {
- data <- data.frame(v1 = c(0, 0, 1, 1, 2, 2, 3, 3, 4, 4), v2 = c(4.0, 0.0, 3.0, 1.0, 2.0, 2.0, 1.0, NA, NA, 4.0))
- complt <- FALSE
- rs <- NULL
- cs <- NULL
- lg <- 2
- th <- 2
- varname <- "v1"
-
- res <- subsetDS("data", complt, rs, cs, lg, th, varname)
-
- expect_equal(class(res), "data.frame")
- expect_length(res, 2)
-
- expect_equal(class(res$v1), "numeric")
- expect_length(res$v1, 6)
- expect_equal(res$v1[1], 2)
- expect_equal(res$v1[2], 2)
- expect_equal(res$v1[3], 3)
- expect_equal(res$v1[4], 3)
- expect_equal(res$v1[5], 4)
- expect_equal(res$v1[6], 4)
-
- expect_equal(class(res$v2), "numeric")
- expect_length(res$v2, 6)
- expect_equal(res$v2[1], 2.0)
- expect_equal(res$v2[2], 2.0)
- expect_equal(res$v2[3], 1.0)
- expect_true(is.na(res$v2[4]))
- expect_true(is.na(res$v2[5]))
- expect_equal(res$v2[6], 4.0)
-})
-
-test_that("simple subsetDS, NAs, complete.case TRUE", {
- data <- data.frame(v1 = c(0, 0, 1, 1, 2, 2, 3, 3, 4, 4), v2 = c(4.0, 0.0, 3.0, 1.0, 2.0, 2.0, 1.0, NA, NA, 4.0))
- complt <- TRUE
- rs <- NULL
- cs <- NULL
- lg <- 2
- th <- 2
- varname <- "v1"
-
- res <- subsetDS("data", complt, rs, cs, lg, th, varname)
-
- expect_equal(class(res), "data.frame")
- expect_length(res, 2)
-
- expect_equal(class(res$v1), "numeric")
- expect_length(res$v1, 4)
- expect_equal(res$v1[1], 2)
- expect_equal(res$v1[2], 2)
- expect_equal(res$v1[3], 3)
- expect_equal(res$v1[4], 4)
-
- expect_equal(class(res$v2), "numeric")
- expect_length(res$v2, 4)
- expect_equal(res$v2[1], 2.0)
- expect_equal(res$v2[2], 2.0)
- expect_equal(res$v2[3], 1.0)
- expect_equal(res$v2[4], 4.0)
-})
-
-#
-# Done
-#
-
-context("subsetDS::smk::shutdown")
-
-context("subsetDS::smk::done")