Knowledge base

Run R code in parallel

Introduction

To run code in parallel by "foreach" and "doParallel" package.

Code exmaple

library(foreach)
library(doParallel)
cl <- makeCluster(2) # Using 2 cluster of cpu
registerDoParallel(cl)

# Split data(dt_test here) into list 
dt_test_lst <- list()
for(i in unique(dt_test$Cluster)){
  dt_test_lst[[i]] <- dt_test[Cluster ==i,]
}

# Run test in parallel
pvalue_dt <- foreach(i =  names(dt_test_lst), .combine = rbind, .packages = "data.table") %dopar% {
           tmp_test =  dt_test_lst[[i]]
           tmp_test[,.(Change = 2^(mean(expr[umap_dt$Cluster == Cluster,Var])-mean(expr[umap_dt$Cluster != Cluster,Var])),
                      Pvalue = wilcox.test(expr[umap_dt$Cluster == Cluster,Var], expr[umap_dt$Cluster != Cluster,Var])$p.value), by = .(Var,Cluster)]
         }
stopImplicitCluster()
stopCluster(cl)

# Save data
saveRDS(pvalue_dt, "factor_pvalue.rds")

Leave a Reply

Your email address will not be published. Required fields are marked *