# #!/usr/bin/Rscript # # args <- commandArgs(TRUE) #https://cran.r-project.org/web/packages/stringr/vignettes/regular-expressions.html library(digest) #!/usr/bin/env Rscript args = commandArgs(trailingOnly=TRUE) #check if file argument was provided if (length(args) < 1) stop("Usage: Rscript cagi-validate.R ") infile <- args[[1]] #check if file exists if (!file.exists(infile)) stop("File ",infile," does not exist!") # and if file can be read if (file.access(infile,mode=4) < 0) stop("No permission to read file ",infile,"!") #read the lines in the file lines <- scan(infile,what=character(),sep="\n") if (length(lines) != 39) stop("File does not have the required 39 lines") #break lines into data fields fields <- strsplit(lines,"\t") #check if each line has six fields if (any(sapply(fields,length) != 9)) stop("Must have 9 tab-delimited columns including Nucleotide position and Variant!") #Comments are optional #check if the header is valid header <- fields[[1]] valid.header <- c("Nucleotide_position", "Variant", "P-value_MO", "Standard_deviation", "P-value_MO+WT", "Standard_deviation", "Functional_effect", "Confidence:", "Comments") if (any(header != valid.header)) stop("Invalid table headers!") #extract data columns for the remaining lines body <- fields[-1] body.cols <- lapply(1:9,function(i) sapply(body,`[[`,i)) #check if the first column contains valid allele descriptors, where ^ marks start and $ marks end if (any(regexpr("^\\w\\d+\\w(,\\w\\d+\\w)*$",body.cols[[1]]) != 1)) { stop("First column must contain valid alleles!") } #check if the second (variant) column contains valid allele descriptors if (any(regexpr("^\\w\\d+\\w(,\\w\\d+\\w)*$",body.cols[[2]]) != 1)) { stop("First column must contain valid alleles!") } #check if the third column "P-value_MO" is numerical values or stars if (!all(sapply(body.cols[[3]],function(x) x == "*" || !is.na(as.numeric(x))))) { stop("Second column must be numeric or \"*\"!") } #check if the fourth column "Standard_deviation" is numerical values or stars if (!all(sapply(body.cols[[4]],function(x) x == "*" || (!is.na(as.numeric(x))) ))) { stop("Third column must be numeric or \"*\"!") } #check if the fifth column "P-value_MO+WT" is numerical values or stars if (!all(sapply(body.cols[[5]],function(x) x == "*" || (!is.na(as.numeric(x))) ))) { stop("Third column must be numeric or \"*\"!") } #check if the sixth column "Standard_deviation" is numerical values or stars if (!all(sapply(body.cols[[6]],function(x) x == "*" || (!is.na(as.numeric(x))) ))) { stop("Third column must be numeric or \"*\"!") } #check if the sixth column "Functional_effect" is numerical values or stars if (!all(sapply(body.cols[[7]],function(x) x == "*" || (!is.na(as.numeric(x))) ))) { stop("Third column must be numeric or \"*\"!") } #check if the sixth column "Confidence:" is numerical values or stars if (!all(sapply(body.cols[[8]],function(x) x == "*" || (!is.na(as.numeric(x))) ))) { stop("Third column must be numeric or \"*\"!") } cat("\nCongratulations!\nYour submitted file has a validated format.\n") #MD5 Hash Using Package digest, optional if (file_test("-x", "/usr/bin/md5sum") && file_test("-x", "/usr/bin/cut")) { # cat("MD5 hash for your file is:", digest(body, algo="md5", serialize=TRUE, file=FALSE, # length=Inf, skip="auto", ascii=FALSE, raw=FALSE, seed=0, # errormode="warn"), "\n") cat("MD5 hash for your file is:") Sys.unsetenv("PATH") system("/usr/bin/md5sum $infile | /usr/bin/cut -c 1-32") cat("\n") if (file_test("-r", "cagi.txt") && file_test("-x", "/bin/cat")) { cat("Receipt code for your file is: ") system("/bin/cat cagi.txt $infile | /usr/bin/md5sum | /usr/bin/cut -c 1-32") cat("\n") } quit() }