changeset 14:59765d2c8890 draft

Uploaded
author davidvanzessen
date Fri, 11 Nov 2016 07:31:48 -0500 (2016-11-11)
parents 933fb21568ce
children 61d0a6318711
files merge_and_filter.r shm_csr.xml wrapper.sh
diffstat 3 files changed, 21 insertions(+), 15 deletions(-) [+]
line wrap: on
line diff
--- a/merge_and_filter.r	Fri Nov 11 03:49:30 2016 -0500
+++ b/merge_and_filter.r	Fri Nov 11 07:31:48 2016 -0500
@@ -6,22 +6,24 @@
 mutationanalysisfile = args[3]
 mutationstatsfile = args[4]
 hotspotsfile = args[5]
-gene_identification_file= args[6]
-output = args[7]
-before.unique.file = args[8]
-unmatchedfile = args[9]
-method=args[10]
-functionality=args[11]
-unique.type=args[12]
-filter.unique=args[13]
-class.filter=args[14]
-empty.region.filter=args[15]
+aafile = args[6]
+gene_identification_file= args[7]
+output = args[8]
+before.unique.file = args[9]
+unmatchedfile = args[10]
+method=args[11]
+functionality=args[12]
+unique.type=args[13]
+filter.unique=args[14]
+class.filter=args[15]
+empty.region.filter=args[16]
 
 summ = read.table(summaryfile, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="")
 sequences = read.table(sequencesfile, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="")
 mutationanalysis = read.table(mutationanalysisfile, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="")
 mutationstats = read.table(mutationstatsfile, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="")
 hotspots = read.table(hotspotsfile, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="")
+AAs = read.table(aafile, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="")
 gene_identification = read.table(gene_identification_file, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="")
 
 if(method == "blastn"){
@@ -81,6 +83,10 @@
 names(sequences) = c("Sequence.ID", "FR1.IMGT.seq", "CDR1.IMGT.seq", "FR2.IMGT.seq", "CDR2.IMGT.seq", "FR3.IMGT.seq", "CDR3.IMGT.seq")
 result = merge(result, sequences, by="Sequence.ID", all.x=T)
 
+AAs = AAs[,c("Sequence.ID", "CDR3.IMGT")]
+names(AAs) = c("Sequence.ID", "CDR3.IMGT.AA")
+result = merge(result, AAs, by="Sequence.ID", all.x=T)
+
 print(paste("Number of sequences in result after merging with sequences:", nrow(result)))
 
 result$VGene = gsub("^Homsap ", "", result$V.GENE.and.allele)
--- a/shm_csr.xml	Fri Nov 11 03:49:30 2016 -0500
+++ b/shm_csr.xml	Fri Nov 11 07:31:48 2016 -0500
@@ -22,10 +22,10 @@
 			<option value="no">No</option>
 		</param>
 		<param name="unique" type="select" label="Remove duplicates based on" help="" >
-			<option value="VGene,AA.JUNCTION,best_match_class">Top.V.Gene, CDR3 (AA), C region</option>
-			<option value="VGene,AA.JUNCTION">Top.V.Gene, CDR3 (AA)</option>
-			<option value="AA.JUNCTION,best_match_class">CDR3 (AA), C region</option>
-			<option value="AA.JUNCTION">CDR3 (AA)</option>
+			<option value="VGene,CDR3.IMGT.AA,best_match_class">Top.V.Gene, CDR3 (AA), C region</option>
+			<option value="VGene,CDR3.IMGT.AA">Top.V.Gene, CDR3 (AA)</option>
+			<option value="CDR3.IMGT.AA,best_match_class">CDR3 (AA), C region</option>
+			<option value="CDR3.IMGT.AA">CDR3 (AA)</option>
 			
 			<option value="VGene,CDR3.IMGT.seq,best_match_class">Top.V.Gene, CDR3.nt.Seq, C region</option>
 			<option value="VGene,CDR3.IMGT.seq">Top.V.Gene, CDR3 (nt)</option>
--- a/wrapper.sh	Fri Nov 11 03:49:30 2016 -0500
+++ b/wrapper.sh	Fri Nov 11 07:31:48 2016 -0500
@@ -62,7 +62,7 @@
 echo "---------------- merge_and_filter.r ----------------"
 echo "---------------- merge_and_filter.r ----------------<br />" >> $log
 
-Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/sequences.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/before_unique_filter.txt $outdir/unmatched.txt $method $functionality $unique ${filter_unique} ${class_filter} ${empty_region_filter} 2>&1
+Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/sequences.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $PWD/aa.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/before_unique_filter.txt $outdir/unmatched.txt $method $functionality $unique ${filter_unique} ${class_filter} ${empty_region_filter} 2>&1
 
 if [[ "$fast" == "no" ]] ; then