Mercurial > repos > davidvanzessen > shm_csr
changeset 18:949a30f04d9b draft
Uploaded
author | davidvanzessen |
---|---|
date | Mon, 28 Nov 2016 04:41:22 -0500 |
parents | b95fa7e426c3 |
children | fff3c83ec9b8 |
files | merge_and_filter.r shm_csr.xml wrapper.sh |
diffstat | 3 files changed, 26 insertions(+), 28 deletions(-) [+] |
line wrap: on
line diff
--- a/merge_and_filter.r Thu Nov 24 10:24:19 2016 -0500 +++ b/merge_and_filter.r Mon Nov 28 04:41:22 2016 -0500 @@ -96,8 +96,6 @@ result$JGene = gsub("^Homsap ", "", result$J.GENE.and.allele) result$JGene = gsub("[*].*", "", result$JGene) -print(result[result$Sequence.ID == "JY8QFUQ01C310D",c("Sequence.ID","best_match")]) - splt = strsplit(class.filter, "_")[[1]] chunk_hit_threshold = as.numeric(splt[1]) nt_hit_threshold = as.numeric(splt[2]) @@ -112,8 +110,6 @@ result$best_match = "all" } -print(result[result$Sequence.ID == "JY8QFUQ01C310D",c("Sequence.ID","best_match")]) - write.table(x=result, file=gsub("merged.txt$", "before_filters.txt", output), sep="\t",quote=F,row.names=F,col.names=T) print(paste("Number of empty CDR1 sequences:", sum(result$CDR1.IMGT.seq == ""))) @@ -139,8 +135,6 @@ filtering.steps = rbind(filtering.steps, c("After empty CDR2, FR3 filter", nrow(result))) } -print(result[result$Sequence.ID == "JY8QFUQ01C310D",c("Sequence.ID","best_match")]) - if(empty.region.filter == "leader"){ result = result[!(grepl("n|N", result$FR1.IMGT.seq) | grepl("n|N", result$FR2.IMGT.seq) | grepl("n|N", result$FR3.IMGT.seq) | grepl("n|N", result$CDR1.IMGT.seq) | grepl("n|N", result$CDR2.IMGT.seq) | grepl("n|N", result$CDR3.IMGT.seq)),] } else if(empty.region.filter == "FR1"){ @@ -151,9 +145,6 @@ result = result[!(grepl("n|N", result$FR3.IMGT.seq) | grepl("n|N", result$CDR2.IMGT.seq) | grepl("n|N", result$CDR3.IMGT.seq)),] } -print(result[result$Sequence.ID == "JY8QFUQ01C310D",c("Sequence.ID","best_match")]) -print(result[result$Sequence.ID == "JY8QFUQ01BNS72",c("Sequence.ID","best_match")]) - print(paste("Number of sequences in result after n filtering:", nrow(result))) filtering.steps = rbind(filtering.steps, c("After N filter", nrow(result))) @@ -192,9 +183,6 @@ result = result[!duplicated(result$unique.def),] } -print(result[result$Sequence.ID == "JY8QFUQ01C310D",c("Sequence.ID","best_match")]) -print(result[result$Sequence.ID == "JY8QFUQ01BNS72",c("Sequence.ID","best_match")]) - write.table(result, gsub("before_unique_filter.txt", "after_unique_filter.txt", before.unique.file), sep="\t", quote=F,row.names=F,col.names=T) filtering.steps = rbind(filtering.steps, c("After filter unique sequences", nrow(result))) @@ -217,11 +205,6 @@ result = result[!(duplicated(result$past)), ] - - -print(result[result$Sequence.ID == "JY8QFUQ01C310D",c("Sequence.ID","best_match")]) - - result = result[,!(names(result) %in% c("past", "best_match_class"))] print(paste("Number of sequences in result after", unique.type, "filtering:", nrow(result)))
--- a/shm_csr.xml Thu Nov 24 10:24:19 2016 -0500 +++ b/shm_csr.xml Mon Nov 28 04:41:22 2016 -0500 @@ -33,13 +33,15 @@ <option value="CDR3.IMGT.seq">CDR3 (nt)</option> <option value="Sequence.ID" selected="true">Don't remove duplicates</option> </param> - <param name="class_filter" type="select" label="Human Class/Subclass filter" help="" > - <option value="70_70" selected="true">>70% class and >70% subclass</option> - <option value="60_55">>60% class and >55% subclass</option> - <option value="70_0">>70% class</option> - <option value="60_0">>60% class</option> - <option value="101_101">Do not assign (sub)class</option> - </param> + <conditional name="class_filter_cond"> + <param name="class_filter" type="select" label="Human Class/Subclass filter" help="" > + <option value="70_70" selected="true">>70% class and >70% subclass</option> + <option value="60_55">>60% class and >55% subclass</option> + <option value="70_0">>70% class</option> + <option value="60_0">>60% class</option> + <option value="101_101">Do not assign (sub)class</option> + </param> + </conditional> <conditional name="naive_output_cond"> <param name="naive_output" type="select" label="Output new IMGT archives per class into your history?"> <option value="yes">Yes</option> @@ -59,15 +61,23 @@ <data format="html" name="out_file" label = "SHM & CSR on ${in_file.name}"/> <data format="imgt_archive" name="naive_output_ca" label = "Naive CA input data from ${in_file.name}" > <filter>naive_output_cond['naive_output'] == "yes"</filter> + <filter>class_filter_cond['class_filter'] != "101_101"</filter> </data> <data format="imgt_archive" name="naive_output_cg" label = "Naive CG input data from ${in_file.name}" > <filter>naive_output_cond['naive_output'] == "yes"</filter> + <filter>class_filter_cond['class_filter'] != "101_101"</filter> </data> <data format="imgt_archive" name="naive_output_cm" label = "Naive CM input data from ${in_file.name}" > <filter>naive_output_cond['naive_output'] == "yes"</filter> + <filter>class_filter_cond['class_filter'] != "101_101"</filter> </data> <data format="imgt_archive" name="naive_output_ce" label = "Naive CE input data from ${in_file.name}" > <filter>naive_output_cond['naive_output'] == "yes"</filter> + <filter>class_filter_cond['class_filter'] != "101_101"</filter> + </data> + <data format="imgt_archive" name="naive_output_ca" label = "Naive input data from ${in_file.name}" > + <filter>naive_output_cond['naive_output'] == "yes"</filter> + <filter>class_filter_cond['class_filter'] == "101_101"</filter> </data> </outputs> <citations>
--- a/wrapper.sh Thu Nov 24 10:24:19 2016 -0500 +++ b/wrapper.sh Mon Nov 28 04:41:22 2016 -0500 @@ -651,10 +651,15 @@ if [[ "$naive_output" == "yes" ]] then - cp $outdir/new_IMGT_IGA.txz ${naive_output_ca} - cp $outdir/new_IMGT_IGG.txz ${naive_output_cg} - cp $outdir/new_IMGT_IGM.txz ${naive_output_cm} - cp $outdir/new_IMGT_IGE.txz ${naive_output_ce} + if [[ "${class_filter}" == "101_101" ]] + then + cp $outdir/new_IMGT.txz ${naive_output_ca} + else + cp $outdir/new_IMGT_IGA.txz ${naive_output_ca} + cp $outdir/new_IMGT_IGG.txz ${naive_output_cg} + cp $outdir/new_IMGT_IGM.txz ${naive_output_cm} + cp $outdir/new_IMGT_IGE.txz ${naive_output_ce} + fi fi echo "</table>" >> $outdir/base_overview.html