diff --git a/R/associators.R b/R/associators.R index 8f67eef..76a86cd 100644 --- a/R/associators.R +++ b/R/associators.R @@ -19,9 +19,9 @@ function(name, class = NULL) } RWeka_build_associator <- -function(x, control, name) +function(x, control, name, weights = NULL) { - instances <- read_data_into_Weka(x) + instances <- read_data_into_Weka(x, weights) ## Build the associator. associator <- .jnew(name) diff --git a/R/classifiers.R b/R/classifiers.R index b689825..7b9a317 100644 --- a/R/classifiers.R +++ b/R/classifiers.R @@ -34,7 +34,7 @@ function(name, class = NULL, handlers = list()) if(is.na(match("data", names(handlers)))) handlers$data <- .default_data_handler_for_classifiers - out <- function(formula, data, subset, na.action, + out <- function(formula, data, subset, na.action, weights = NULL, control = Weka_control(), options = NULL) { ## The "usual" way of creating a model frame from the call. @@ -45,7 +45,7 @@ function(name, class = NULL, handlers = list()) mf <- eval(mf, parent.frame()) .structure(c(RWeka_build_classifier(mf, control, name, handlers, - options), + options, weights), list(call = mc, handlers = handlers, levels = levels(mf[[1L]]), terms = attr(mf, "terms"))), @@ -55,7 +55,7 @@ function(name, class = NULL, handlers = list()) } RWeka_build_classifier <- -function(mf, control, name, handlers, options) +function(mf, control, name, handlers, options, weights = NULL) { out <- list() @@ -67,7 +67,7 @@ function(mf, control, name, handlers, options) out$model <- mf mf <- .compose_and_funcall(handlers$data, mf) - instances <- read_model_frame_into_Weka(mf) + instances <- read_model_frame_into_Weka(mf, weights) ## Build the classifier. classifier <- .jnew(name) @@ -239,10 +239,10 @@ function(formula, ...) { if(!is.null(mf <- formula$model)) return(mf) dots <- list(...) - nargs <- dots[match(c("data", "na.action", "subset"), + nargs <- dots[match(c("data", "na.action", "subset", "weights"), names(dots), 0L)] mf <- formula$call - mf <- mf[c(1L, match(c("formula", "data", "subset", "na.action"), + mf <- mf[c(1L, match(c("formula", "data", "subset", "na.action", "weights"), names(mf), 0L))] mf$drop.unused.levels <- TRUE mf[[1L]] <- as.name("model.frame") diff --git a/R/clusterers.R b/R/clusterers.R index b021979..63c1326 100644 --- a/R/clusterers.R +++ b/R/clusterers.R @@ -19,9 +19,9 @@ function(name, class = NULL) } RWeka_build_clusterer <- -function(x, control, name) +function(x, control, name, weights = NULL) { - instances <- read_data_into_Weka(x) + instances <- read_data_into_Weka(x, weights) ## Build the clusterer. clusterer <- .jnew(name) diff --git a/R/readers.R b/R/readers.R index 45b8754..b06bae6 100644 --- a/R/readers.R +++ b/R/readers.R @@ -27,10 +27,10 @@ function(file) read_model_frame_into_Weka <- -function(mf) +function(mf, weights = NULL) { ## Model frame has the class variable in first position. - read_data_into_Weka(mf, 1L) + read_data_into_Weka(mf, weights, 1L) } write.arff <- @@ -147,7 +147,7 @@ function(x) } read_data_into_Weka <- -function(x, classIndex = 0L) +function(x, weights = NULL, classIndex = 0L) { ## FastVector was deprecated in Weka >= 3-7-1. Now we have to use ## the List interface (see the cast of ArrayList in the Attribute @@ -228,8 +228,12 @@ function(x, classIndex = 0L) ## Populate. x <- unlist(x, use.names = FALSE) x[is.na(x)] <- NaN # Weka missing value. + if(is.null(weights)) { + weights <- rep(1.0, n) + } + weights[is.na(weights)] <- 1.0 .jcall("RWekaInterfaces", "V", "addInstances", - instances, .jarray(x), as.integer(n)) + instances, .jarray(round(x, digits=6)), .jarray(weights), as.integer(n)) ## Note that using dim and dimnames attributes would result in a ## matrix, which seems a bad idea. diff --git a/java/RWekaInterfaces.java b/java/RWekaInterfaces.java index 7d9530b..82db092 100644 --- a/java/RWekaInterfaces.java +++ b/java/RWekaInterfaces.java @@ -102,19 +102,19 @@ public abstract class RWekaInterfaces { // Instance is now an Interface, Weka >= 3-7-1. The equivalent // class seems to be DenseInstance. public static void addInstances(Instances instances, double[] data, - int nrow) + double[] weights, int nrow) throws Exception { int i, j, ncol = instances.numAttributes(); - if (data.length / ncol != nrow) { + if ((double) data.length / (double) ncol != nrow) { throw new Exception("invalid number of rows 'nrow'"); } for (i = 0; i < nrow; i++) { Instance instance = new DenseInstance(ncol); - for (j = 0; j < ncol; j++) - instance.setValue(j, data[i+j*nrow]); + for (j = 0; j < ncol; j++) { instance.setValue(j, data[i+j*nrow]); } + instance.setWeight(weights[i]); instances.add(instance); } } diff --git a/man/Weka_associators.Rd b/man/Weka_associators.Rd index 549c6a3..39a9780 100644 --- a/man/Weka_associators.Rd +++ b/man/Weka_associators.Rd @@ -6,8 +6,8 @@ R interfaces to Weka association rule learning algorithms. } \usage{ -Apriori(x, control = NULL) -Tertius(x, control = NULL) +Apriori(x, control = NULL, weights = NULL) +Tertius(x, control = NULL, weights = NULL) } \arguments{ \item{x}{an R object with the data to be associated.} @@ -15,6 +15,8 @@ Tertius(x, control = NULL) character vector of control options, or \code{NULL} (default). Available options can be obtained on-line using the Weka Option Wizard \code{\link{WOW}}, or the Weka documentation.} + \item{weights}{an optional list of weights, one for each row in data. If + \code{NULL}, each row is assigned the weight 1.} } \value{ A list inheriting from class \code{Weka_associators} with components diff --git a/man/Weka_classifier_functions.Rd b/man/Weka_classifier_functions.Rd index e7f0fed..2e6f43e 100644 --- a/man/Weka_classifier_functions.Rd +++ b/man/Weka_classifier_functions.Rd @@ -9,11 +9,11 @@ } \usage{ LinearRegression(formula, data, subset, na.action, - control = Weka_control(), options = NULL) + control = Weka_control(), options = NULL, weights = NULL) Logistic(formula, data, subset, na.action, - control = Weka_control(), options = NULL) + control = Weka_control(), options = NULL, weights = NULL) SMO(formula, data, subset, na.action, - control = Weka_control(), options = NULL) + control = Weka_control(), options = NULL, weights = NULL) } \arguments{ \item{formula}{a symbolic description of the model to be fit.} @@ -29,6 +29,8 @@ SMO(formula, data, subset, na.action, the Weka documentation.} \item{options}{a named list of further options, or \code{NULL} (default). See \bold{Details}.} + \item{weights}{an optional list of weights, one for each row in data. If + \code{NULL}, each row is assigned the weight 1.} } \value{ A list inheriting from classes \code{Weka_functions} and diff --git a/man/Weka_classifier_lazy.Rd b/man/Weka_classifier_lazy.Rd index fbd022c..be3ab01 100644 --- a/man/Weka_classifier_lazy.Rd +++ b/man/Weka_classifier_lazy.Rd @@ -8,9 +8,9 @@ } \usage{ IBk(formula, data, subset, na.action, - control = Weka_control(), options = NULL) + control = Weka_control(), options = NULL, weights = NULL) LBR(formula, data, subset, na.action, - control = Weka_control(), options = NULL) + control = Weka_control(), options = NULL, weights = NULL) } \arguments{ \item{formula}{a symbolic description of the model to be fit.} @@ -26,6 +26,8 @@ LBR(formula, data, subset, na.action, the Weka documentation.} \item{options}{a named list of further options, or \code{NULL} (default). See \bold{Details}.} + \item{weights}{an optional list of weights, one for each row in data. If + \code{NULL}, each row is assigned the weight 1.} } \value{ A list inheriting from classes \code{Weka_lazy} and diff --git a/man/Weka_classifier_meta.Rd b/man/Weka_classifier_meta.Rd index 4f8f847..9c9dd85 100644 --- a/man/Weka_classifier_meta.Rd +++ b/man/Weka_classifier_meta.Rd @@ -12,17 +12,18 @@ } \usage{ AdaBoostM1(formula, data, subset, na.action, - control = Weka_control(), options = NULL) + control = Weka_control(), options = NULL, weights = NULL) Bagging(formula, data, subset, na.action, - control = Weka_control(), options = NULL) + control = Weka_control(), options = NULL, weights = NULL) LogitBoost(formula, data, subset, na.action, - control = Weka_control(), options = NULL) + control = Weka_control(), options = NULL, weights = NULL) MultiBoostAB(formula, data, subset, na.action, - control = Weka_control(), options = NULL) + control = Weka_control(), options = NULL, weights = NULL) Stacking(formula, data, subset, na.action, - control = Weka_control(), options = NULL) + control = Weka_control(), options = NULL, weights = NULL) CostSensitiveClassifier(formula, data, subset, na.action, - control = Weka_control(), options = NULL) + control = Weka_control(), options = NULL, + weights = NULL) } \arguments{ \item{formula}{a symbolic description of the model to be fit.} @@ -42,6 +43,8 @@ CostSensitiveClassifier(formula, data, subset, na.action, function.} \item{options}{a named list of further options, or \code{NULL} (default). See \bold{Details}.} + \item{weights}{an optional list of weights, one for each row in data. If + \code{NULL}, each row is assigned the weight 1.} } \value{ A list inheriting from classes \code{Weka_meta} and diff --git a/man/Weka_classifier_rules.Rd b/man/Weka_classifier_rules.Rd index 4c0d1d6..6326f00 100644 --- a/man/Weka_classifier_rules.Rd +++ b/man/Weka_classifier_rules.Rd @@ -10,13 +10,13 @@ } \usage{ JRip(formula, data, subset, na.action, - control = Weka_control(), options = NULL) + control = Weka_control(), options = NULL, weights = NULL) M5Rules(formula, data, subset, na.action, - control = Weka_control(), options = NULL) + control = Weka_control(), options = NULL, weights = NULL) OneR(formula, data, subset, na.action, - control = Weka_control(), options = NULL) + control = Weka_control(), options = NULL, weights = NULL) PART(formula, data, subset, na.action, - control = Weka_control(), options = NULL) + control = Weka_control(), options = NULL, weights = NULL) } \arguments{ \item{formula}{a symbolic description of the model to be fit.} @@ -32,6 +32,8 @@ PART(formula, data, subset, na.action, the Weka documentation.} \item{options}{a named list of further options, or \code{NULL} (default). See \bold{Details}.} + \item{weights}{an optional list of weights, one for each row in data. If + \code{NULL}, each row is assigned the weight 1.} } \value{ A list inheriting from classes \code{Weka_rules} and diff --git a/man/Weka_classifier_trees.Rd b/man/Weka_classifier_trees.Rd index 57ffbb9..35df486 100644 --- a/man/Weka_classifier_trees.Rd +++ b/man/Weka_classifier_trees.Rd @@ -11,13 +11,13 @@ } \usage{ J48(formula, data, subset, na.action, - control = Weka_control(), options = NULL) + control = Weka_control(), options = NULL, weights = NULL) LMT(formula, data, subset, na.action, - control = Weka_control(), options = NULL) + control = Weka_control(), options = NULL, weights = NULL) M5P(formula, data, subset, na.action, - control = Weka_control(), options = NULL) + control = Weka_control(), options = NULL, weights = NULL) DecisionStump(formula, data, subset, na.action, - control = Weka_control(), options = NULL) + control = Weka_control(), options = NULL, weights = NULL) } \arguments{ \item{formula}{a symbolic description of the model to be fit.} @@ -33,6 +33,8 @@ DecisionStump(formula, data, subset, na.action, the Weka documentation.} \item{options}{a named list of further options, or \code{NULL} (default). See \bold{Details}.} + \item{weights}{an optional list of weights, one for each row in data. If + \code{NULL}, each row is assigned the weight 1.} } \value{ A list inheriting from classes \code{Weka_tree} and diff --git a/man/Weka_classifiers.Rd b/man/Weka_classifiers.Rd index a45faad..5855c8d 100644 --- a/man/Weka_classifiers.Rd +++ b/man/Weka_classifiers.Rd @@ -12,12 +12,13 @@ R interface functions to Weka classifiers are created by \code{\link{make_Weka_classifier}}, and have formals \code{formula}, - \code{data}, \code{subset}, \code{na.action}, and \code{control} - (default: none), where the first four have the \dQuote{usual} meanings - for statistical modeling functions in R, and the last again specifies - the control options to be employed by the Weka learner. By default, - the model formulae should only use the \samp{+} and \samp{-} operators - to indicate the variables to be included or not used, respectively. + \code{data}, \code{subset}, \code{na.action}, \code{control} + (default: none), and \code{weights} (default: \code{NULL}) where the first + four have the \dQuote{usual} meanings for statistical modeling functions in R, + the fifth again specifies the control options to be employed by the Weka + learner, and the last gives the weight attached to each row in data. By + default, the model formulae should only use the \samp{+} and \samp{-} + operators to indicate the variables to be included or not used, respectively. Objects created by these interfaces always inherit from class \code{Weka_classifier}, and have at least suitable \code{print}, diff --git a/man/Weka_clusterers.Rd b/man/Weka_clusterers.Rd index e0341d6..9ec56e0 100644 --- a/man/Weka_clusterers.Rd +++ b/man/Weka_clusterers.Rd @@ -9,11 +9,11 @@ R interfaces to Weka clustering algorithms. } \usage{ -Cobweb(x, control = NULL) -FarthestFirst(x, control = NULL) -SimpleKMeans(x, control = NULL) -XMeans(x, control = NULL) -DBScan(x, control = NULL) +Cobweb(x, control = NULL, weights = NULL) +FarthestFirst(x, control = NULL, weights = NULL) +SimpleKMeans(x, control = NULL, weights = NULL) +XMeans(x, control = NULL, weights = NULL) +DBScan(x, control = NULL, weights = NULL) } \arguments{ \item{x}{an R object with the data to be clustered.} @@ -21,6 +21,8 @@ DBScan(x, control = NULL) character vector of control options, or \code{NULL} (default). Available options can be obtained on-line using the Weka Option Wizard \code{\link{WOW}}, or the Weka documentation.} + \item{weights}{an optional list of weights, one for each row in data. If + \code{NULL}, each row is assigned the weight 1.} } \value{ A list inheriting from class \code{Weka_clusterers} with components diff --git a/man/Weka_interfaces.Rd b/man/Weka_interfaces.Rd index c468ed3..b7a3c05 100644 --- a/man/Weka_interfaces.Rd +++ b/man/Weka_interfaces.Rd @@ -10,9 +10,9 @@ available interfaces. } \usage{ -make_Weka_associator(name, class = NULL) -make_Weka_classifier(name, class = NULL, handlers = list()) -make_Weka_clusterer(name, class = NULL) +make_Weka_associator(name, class = NULL, weights = NULL) +make_Weka_classifier(name, class = NULL, handlers = list(), weights = NULL) +make_Weka_clusterer(name, class = NULL, weights = NULL) make_Weka_filter(name, class = NULL) list_Weka_interfaces() } @@ -25,6 +25,8 @@ list_Weka_interfaces() representing associators, classifiers, and clusterers).} \item{handlers}{a named list of special handler functions, see \bold{Details}.} + \item{weights}{an optional list of weights, one for each row in data. If + \code{NULL}, each row is assigned the weight 1.} } \details{ \code{make_Weka_associator} and \code{make_Weka_clusterer} create an R