added tests for calculations

zachary-foster · zachary-foster · commit e24ca2b15c3f · 2017-12-19T12:58:27.000-08:00
diff --git a/R/option_parsers.R b/R/option_parsers.R
@@ -82,7 +82,8 @@ get_taxmap_data <- function(obj, dataset) {
   # Check that dataset exists
   error_msg <- paste0('The dataset "', dataset,
                       '" is not in the object supplied. Datasets found include:\n  ',
-                      limited_print(paste0("[", seq_along(obj$data), "] ", names(obj$data)), type = "silent"))
+                      limited_print(paste0("[", seq_along(obj$data), "] ", names(obj$data)),
+                                    type = "silent"))
   if (is.character(dataset)) {
     if (! dataset %in% names(obj$data)) {
       stop(error_msg, call. = FALSE)
@@ -281,7 +282,8 @@ get_taxmap_other_cols <- function(obj, dataset, cols, other_cols = NULL) {
   in_both <- other_cols %in% cols
   if (sum(in_both) > 0) {
     warning(paste0("The following columns will be replaced in the output:\n  ",
-                   limited_print(other_cols[in_both])))
+                   limited_print(other_cols[in_both], type = "silent")),
+            call. = FALSE)
   }
   result <- other_cols[! in_both]
   
diff --git a/tests/testthat/test--calculations.R b/tests/testthat/test--calculations.R
@@ -35,55 +35,67 @@ test_that("Counting the number of samples with reads", {
 
 test_that("Observation proportions", {
   # Calculate proportions for all numeric columns
-  calc_obs_props(x, "tax_data")
+  result <- calc_obs_props(x, "tax_data")
+  expect_equal(colnames(x$data$tax_data)[-(1:3)], colnames(result)[-1])
+  expect_true(all(result$`700016050` == x$data$tax_data$`700016050` / sum(x$data$tax_data$`700016050`)))
   
   # Calculate proportions for a subset of columns
-  calc_obs_props(x, "tax_data", cols = c("700035949", "700097855", "700100489"))
-  calc_obs_props(x, "tax_data", cols = 4:6)
-  calc_obs_props(x, "tax_data", cols = startsWith(colnames(x$data$tax_data), "70001"))
+  col_subset <- c("700035949", "700097855", "700100489")
+  result <- calc_obs_props(x, "tax_data", cols = col_subset)
+  expect_equal(col_subset, colnames(result)[-1])
+  
+  result <- calc_obs_props(x, "tax_data", cols = 4:6)
+  expect_equal(col_subset, colnames(result)[-1])
+
+  result <- calc_obs_props(x, "tax_data",
+                           cols = startsWith(colnames(x$data$tax_data), "70001"))
+  expect_equal(colnames(x$data$tax_data)[startsWith(colnames(x$data$tax_data), "70001")],
+               colnames(result)[-1])
   
   # Including all other columns in ouput
-  calc_obs_props(x, "tax_data", other_cols = TRUE)
+  expect_warning(result <- calc_obs_props(x, "tax_data", other_cols = TRUE))
+  expect_true(all(c("otu_id", "lineage") %in% colnames(result)))
   
   # Inlcuding specific columns in output
-  calc_obs_props(x, "tax_data", cols = c("700035949", "700097855", "700100489"),
+  result <- calc_obs_props(x, "tax_data", cols = col_subset,
                  other_cols = 2:3)
+  expect_true(all(c("otu_id", "lineage") %in% colnames(result)))
   
   # Rename output columns
-  calc_obs_props(x, "tax_data", cols = c("700035949", "700097855", "700100489"),
+  result <- calc_obs_props(x, "tax_data", cols = col_subset,
                  out_names = c("a", "b", "c"))
+  expect_equal(colnames(result), c("taxon_id", "a", "b", "c"))
+  
 })
 
 
 test_that("Summing counts per taxon", {
   # Calculate the taxon abundance for each numeric column (i.e. sample)
-  calc_taxon_abund(x, "tax_data")
+  result <- calc_taxon_abund(x, "tax_data")
+  expect_equal(sum(x$data$tax_data$`700035949`), result$`700035949`[1])
   
   # Calculate the taxon abundance for a subset of columns
-  calc_taxon_abund(x, "tax_data", cols = 4:5)
-  calc_taxon_abund(x, "tax_data", cols = c("700035949", "700097855"))
-  calc_taxon_abund(x, "tax_data", cols = startsWith(colnames(x$data$tax_data), "70001"))
-  
+  expect_equal(calc_taxon_abund(x, "tax_data", cols = 4:5), 
+               calc_taxon_abund(x, "tax_data", cols = c("700035949", "700097855")))
+
   # Calculate the taxon abundance for groups of columns (e.g. treatments)
   #  Note that we do not need to use the "cols" option for this since all
   #  numeric columns are samples in this dataset. If there were numeric columns
   #  that were not samples present in hmp_samples, the "cols" would be needed.
-  calc_taxon_abund(x, "tax_data", groups = hmp_samples$sex)
-  calc_taxon_abund(x, "tax_data", groups = hmp_samples$body_site)
-  
-  # The above example using the "cols" option, even though not needed in this case
-  calc_taxon_abund(x, "tax_data", cols = hmp_samples$sample_id,
-                   groups = hmp_samples$sex)
+  result <- calc_taxon_abund(x, "tax_data", groups = hmp_samples$sex)
+  expect_equal(colnames(result), c("taxon_id", "female", "male"))
   
   # Rename the output columns
-  calc_taxon_abund(x, "tax_data", cols = hmp_samples$sample_id[1:10],
-                   out_names = letters[1:10])
-  calc_taxon_abund(x, "tax_data", groups = hmp_samples$sex,
-                   out_names = c("Women", "Men"))
+  total_counts <- sum(x$data$tax_data[, hmp_samples$sample_id])
+  result <- calc_taxon_abund(x, "tax_data", groups = hmp_samples$sex,
+                             out_names = c("Women", "Men"))
+  expect_equal(colnames(result), c("taxon_id", "Women", "Men"))
+  expect_equal(total_counts, sum(result[1, c("Women", "Men")]))
   
   # Geting a total for all columns 
-  calc_taxon_abund(x, "tax_data", cols = hmp_samples$sample_id,
-                   groups = rep("total", nrow(hmp_samples)))
+  result <- calc_taxon_abund(x, "tax_data", cols = hmp_samples$sample_id,
+                             groups = rep("total", nrow(hmp_samples)))
+  expect_equal(total_counts, result$total[1])
 })
 
 
@@ -95,52 +107,24 @@ test_that("Comparing groups of samples", {
   x$data$tax_table <- calc_taxon_abund(x, dataset = "otu_table", cols = hmp_samples$sample_id)
   
   # Calculate difference between groups
-  x$data$diff_table <- compare_treatments(x, dataset = "tax_table",
+  expect_warning(x$data$diff_table <- compare_treatments(x, dataset = "tax_table",
                                           cols = hmp_samples$sample_id,
-                                          groups = hmp_samples$body_site)
+                                          groups = hmp_samples$body_site))
+  expect_equal(nrow(x$data$diff_table),
+               ncol(combn(length(unique(hmp_samples$body_site)), 2)) * nrow(x$data$tax_table))
   
 })
 
 
 test_that("Rarefying observation counts", {
   # Rarefy all numeric columns
-  rarefy_obs(x, "tax_data")
-  
-  # Rarefy a subset of columns
-  rarefy_obs(x, "tax_data", cols = c("700035949", "700097855", "700100489"))
-  rarefy_obs(x, "tax_data", cols = 4:6)
-  rarefy_obs(x, "tax_data", cols = startsWith(colnames(x$data$tax_data), "70001"))
-  
-  # Including all other columns in ouput
-  rarefy_obs(x, "tax_data", other_cols = TRUE)
-  
-  # Inlcuding specific columns in output
-  rarefy_obs(x, "tax_data", cols = c("700035949", "700097855", "700100489"),
-             other_cols = 2:3)
-  
-  # Rename output columns
-  rarefy_obs(x, "tax_data", cols = c("700035949", "700097855", "700100489"),
-             out_names = c("a", "b", "c"))
+  result <- rarefy_obs(x, "tax_data")
+  expect_equal(length(unique(colSums(result[, hmp_samples$sample_id]))), 1)
 })
 
 
 test_that("Converting low counts to zero", {
   # Calculate proportions for all numeric columns
-  calc_obs_props(x, "tax_data")
-  
-  # Calculate proportions for a subset of columns
-  calc_obs_props(x, "tax_data", cols = c("700035949", "700097855", "700100489"))
-  calc_obs_props(x, "tax_data", cols = 4:6)
-  calc_obs_props(x, "tax_data", cols = startsWith(colnames(x$data$tax_data), "70001"))
-  
-  # Including all other columns in ouput
-  calc_obs_props(x, "tax_data", other_cols = TRUE)
-  
-  # Inlcuding specific columns in output
-  calc_obs_props(x, "tax_data", cols = c("700035949", "700097855", "700100489"),
-                 other_cols = 2:3)
-  
-  # Rename output columns
-  calc_obs_props(x, "tax_data", cols = c("700035949", "700097855", "700100489"),
-                 out_names = c("a", "b", "c"))
+  result <- zero_low_counts(x, "tax_data")
+  expect_equal(sum(result[, hmp_samples$sample_id] == 1), 0)
 })