# HG changeset patch
# User davidvanzessen
# Date 1481891432 18000
# Node ID d001d0c05dbe8d918de49e3435d82aa3f50e3329
# Parent  bcec7bb4e08974217f69e4cde821f037e9ed6b1b
Uploaded
diff -r bcec7bb4e089 -r d001d0c05dbe experimental_design.xml
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/experimental_design.xml	Fri Dec 16 07:30:32 2016 -0500
@@ -0,0 +1,57 @@
+
+	 
+	
+		experimental_design/experimental_design.sh 
+		#for $i, $f in enumerate($patients)
+            "$f.id"
+            #for $j, $g in enumerate($f.samples)
+            	${g.sample}
+            #end for
+		#end for
+		$out_file
+	
+	
+		
+            
+                
+            
+			
+		
+	
+	
+		
+	
+	
+Takes the ARGalaxy proprietary format and merges several samples and/or patients together.
+	
+ 
+    
+    10.1093/bioinformatics/btq281
+
+    
+    @ARTICLE{Kim07aninterior-point,
+    author = {Seung-jean Kim and Kwangmoo Koh and Michael Lustig and Stephen Boyd and Dimitry Gorinevsky},
+    title = {An interior-point method for large-scale l1-regularized logistic regression},
+    journal = {Journal of Machine Learning Research},
+    year = {2007},
+    volume = {8},
+    pages = {1519-1555}
+    }
+  
+  
+    
+      
+      
+      
+      
+      
+    
+    
+      
+      
+      
+      
+      
+    
+  
+
diff -r bcec7bb4e089 -r d001d0c05dbe igblast/igblast.sh
--- a/igblast/igblast.sh	Mon Dec 12 05:22:57 2016 -0500
+++ b/igblast/igblast.sh	Fri Dec 16 07:30:32 2016 -0500
@@ -7,6 +7,9 @@
 locus=$3
 output=$4
 
+declare -A speciesdict
+
+speciesdict=(["Bos taurus functional"]="BosTaurus" ["Bos taurus non-functional"]="BosTaurus" ["Camelus dromedarius functional"]="CamelusDromedarius" ["Camelus dromedarius non-functional"]="CamelusDromedarius" ["Canis lupus familiaris functional"]="CanisLupusFamiliaris" ["Canis lupus familiaris non-functional"]="CanisLupusFamiliaris" ["Danio rerio non-functional"]="DanioRerio" ["Danio rerio functional"]="DanioRerio" ["Homo sapiens functional"]="HomoSapiens" ["Homo sapiens non-functional"]="HomoSapiens" ["Macaca mulatta non-functional"]="MacacaMulatta" ["Macaca mulatta functional"]="MacacaMulatta" ["Mus musculus functional"]="MusMusculus" ["Mus musculus non-functional"]="MusMusculus" ["Mus spretus functional"]="MusSpretus" ["Mus spretus non-functional"]="MusSpretus" ["Oncorhynchus mykiss functional"]="OncorhynchusMykiss" ["Oncorhynchus mykiss non-functional"]="OncorhynchusMykiss" ["Oryctolagus cuniculus functional"]="OryctolagusCuniculus" ["Oryctolagus cuniculus non-functional"]="OryctolagusCuniculus" ["Rattus norvegicus functional"]="RattusNorvegicus" ["Rattus norvegicus non-functional"]="RattusNorvegicus" ["Sus scrofa functional"]="SusScrofa" ["Sus scrofa non-functional"]="SusScrofa" ["BosTaurus"]="BosTaurus" ["CamelusDromedarius"]="CamelusDromedarius" ["CanisLupusFamiliaris"]="CanisLupusFamiliaris" ["DanioRerio"]="DanioRerio" ["HomoSapiens"]="HomoSapiens" ["MacacaMulatta"]="MacacaMulatta" ["MusMusculus"]="MusMusculus" ["MusSpretus"]="MusSpretus" ["OncorhynchusMykiss"]="OncorhynchusMykiss" ["OryctolagusCuniculus"]="OryctolagusCuniculus" ["RattusNorvegicus"]="RattusNorvegicus" ["SusScrofa"]="SusScrofa")
 
 echo "$input $species $locus $output"
 
diff -r bcec7bb4e089 -r d001d0c05dbe igblastn.xml
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/igblastn.xml	Fri Dec 16 07:30:32 2016 -0500
@@ -0,0 +1,107 @@
+
+     
+    
+		igblast/igblast.sh $input $species $locus $output
+	
+	
+		
+		
+			
+			
+			
+			
+			
+			
+			
+			
+			
+			
+			
+			
+			
+			
+				
+		
+			
+			
+			
+			
+			
+			
+			
+		
+	
+	
+		
+	
+	
+	
+		igblastwrp
+	
+	
+============
+iReport
+============
+
+This tool uses the online igBLAST website hosted by NCBI to blast a FASTA file, it retrieves the result and generates a convenient tabular format for further processing.
+
+**NOTE**
+
+.. class:: warningmark
+
+- Everything goes through the servers of NCBI, so if you have sensitive data that that isn't allowed to leave your local network, this isn't the tool the use.
+
+**USAGE**
+
+.. class:: infomark
+
+- This tool uses a free service provided by NCBI, and although there doesn't seem to be any restrictions on usage, avoid unnecessary usage to lighten the load on NCBI's servers.
+
+
+**INPUT**
+
+This tool accepts FASTA files as input:
+
+::
+
+		>lcl|FLN1FA002RWEZA.1| 
+		ggctggagtgggtttcatacattagtagtaatagtggtgccatatactacgcagactctgtgaagggccgattcaccatc
+		tccagaaacaatgccaaggactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgc
+		gagagcgatcccccggtattactatgatactagtggcccaaacgactactggggccagggaaccctggtcaccgtctcct
+		cag
+		>lcl|FLN1FA001BLION.1| 
+		aggcttgagtggatgggatggatcaacgctggcaatggtaacacaaaatattcacagaagttccagggcagagtcaccat
+		taccagggacacatccgcgagcacagcctacatggagctgagcagcctgagatctgaagacacggctgtgtattactgtg
+		cgagagtgggcagcagctggtctgatgcttttgattatctggggccaagggacaatggtcaccgtctcctcag
+
+**OUTPUT**
+
+The following data is used for ARGalaxy
+
++-----------------+----------------------------------------------+
+| Column name     | Column contents                              |
++-----------------+----------------------------------------------+
+| ID              | The Sequence ID provided by the sequencer.   |
++-----------------+----------------------------------------------+
+| VDJ Frame       | In-frame/Out-frame                           |
++-----------------+----------------------------------------------+
+| Top V Gene      | The best matching V gene found.              |
++-----------------+----------------------------------------------+
+| Top D Gene      | The best matching D gene found.              |
++-----------------+----------------------------------------------+
+| Top J Gene      | The best matching J gene found.              |
++-----------------+----------------------------------------------+
+| CDR3 Seq        | The CDR3 region.                             |
++-----------------+----------------------------------------------+
+| CDR3 Length     | The length of the CDR3 region.               |
++-----------------+----------------------------------------------+
+| CDR3 Seq DNA    | The CDR3 sequence region.                    |
++-----------------+----------------------------------------------+
+| CDR3 Length DNA | The length of the CDR3 sequence region.      |
++-----------------+----------------------------------------------+
+| Functionality   | If sequence is productive/unproductive       |
++-----------------+----------------------------------------------+
+
+
+    
+
diff -r bcec7bb4e089 -r d001d0c05dbe igparse.xml
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/igparse.xml	Fri Dec 16 07:30:32 2016 -0500
@@ -0,0 +1,15 @@
+
+	 
+	
+		igblastparser/igparse.pl $input 0 2>/dev/null | grep -v "D:" | cut -f2- > $output
+	
+	
+		
+	
+	
+		
+	
+	
+		Step 2 of the Immune Repertoire tools, extracts the relevant information needed from the reports generated by igblast (Step 1)
+	
+
diff -r bcec7bb4e089 -r d001d0c05dbe imgt_loader.xml
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/imgt_loader.xml	Fri Dec 16 07:30:32 2016 -0500
@@ -0,0 +1,48 @@
+
+	 
+	
+		imgt_loader/imgt_loader.sh $in_file $out_file "tmp"
+	
+	
+        
+	
+	
+		
+	
+	
+**INPUT**
+
+This tool accepts an IMGT/HIGHV-QUEST ZIP file
+
+**OUTPUT**
+
+The following data is used for ARGalaxy
+
++-----------------+----------------------------------------------+
+| Column name     | Column contents                              |
++-----------------+----------------------------------------------+
+| ID              | The Sequence ID provided by the sequencer.   |
++-----------------+----------------------------------------------+
+| VDJ Frame       | In-frame/Out-frame                           |
++-----------------+----------------------------------------------+
+| Top V Gene      | The best matching V gene found.              |
++-----------------+----------------------------------------------+
+| Top D Gene      | The best matching D gene found.              |
++-----------------+----------------------------------------------+
+| Top J Gene      | The best matching J gene found.              |
++-----------------+----------------------------------------------+
+| CDR3 Seq        | The CDR3 region.                             |
++-----------------+----------------------------------------------+
+| CDR3 Length     | The length of the CDR3 region.               |
++-----------------+----------------------------------------------+
+| CDR3 Seq DNA    | The CDR3 sequence region.                    |
++-----------------+----------------------------------------------+
+| CDR3 Length DNA | The length of the CDR3 sequence region.      |
++-----------------+----------------------------------------------+
+| Functionality   | If sequence is productive/unproductive       |
++-----------------+----------------------------------------------+
+
+
+	
+
+
diff -r bcec7bb4e089 -r d001d0c05dbe report_clonality/RScript.r
--- a/report_clonality/RScript.r	Mon Dec 12 05:22:57 2016 -0500
+++ b/report_clonality/RScript.r	Fri Dec 16 07:30:32 2016 -0500
@@ -400,6 +400,10 @@
   maxVD = data.frame(data.table(VandDCount)[, list(max=max(l)), by=c("Sample")])
   VandDCount = merge(VandDCount, maxVD, by.x="Sample", by.y="Sample", all.x=T)
   VandDCount$relLength = VandDCount$l / VandDCount$max
+  check = is.nan(VandDCount$relLength)
+  if(any(check)){
+	VandDCount[check,"relLength"] = 0
+  }
   
   cartegianProductVD = expand.grid(Top.V.Gene = Vchain$v.name, Top.D.Gene = Dchain$v.name)
   
@@ -446,6 +450,11 @@
 VandJCount = merge(VandJCount, maxVJ, by.x="Sample", by.y="Sample", all.x=T)
 VandJCount$relLength = VandJCount$l / VandJCount$max
 
+check = is.nan(VandJCount$relLength)
+if(any(check)){
+	VandJCount[check,"relLength"] = 0
+}
+
 cartegianProductVJ = expand.grid(Top.V.Gene = Vchain$v.name, Top.J.Gene = Jchain$v.name)
 
 completeVJ = merge(VandJCount, cartegianProductVJ, all.y=TRUE)
@@ -490,6 +499,11 @@
   DandJCount = merge(DandJCount, maxDJ, by.x="Sample", by.y="Sample", all.x=T)
   DandJCount$relLength = DandJCount$l / DandJCount$max
   
+  check = is.nan(DandJCount$relLength)
+  if(any(check)){
+    DandJCount[check,"relLength"] = 0
+  }
+  
   cartegianProductDJ = expand.grid(Top.D.Gene = Dchain$v.name, Top.J.Gene = Jchain$v.name)
   
   completeDJ = merge(DandJCount, cartegianProductDJ, all.y=TRUE)
diff -r bcec7bb4e089 -r d001d0c05dbe report_clonality_igg.xml
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/report_clonality_igg.xml	Fri Dec 16 07:30:32 2016 -0500
@@ -0,0 +1,197 @@
+
+	 
+	
+#if $gene_selection.source == "imgtdb"		
+	report_clonality/r_wrapper.sh $in_file $out_file $out_file.files_path "$clonaltype" "${gene_selection.species}" "${gene_selection.locus}" $filterproductive $clonality_method
+#else
+	report_clonality/r_wrapper.sh $in_file $out_file $out_file.files_path "$clonaltype" "custom" "${gene_selection.vgenes};${gene_selection.dgenes};${gene_selection.jgenes}" $filterproductive $clonality_method
+#end if
+	
+	
+		
+		
+			
+			
+			
+			
+			
+			
+			
+		
+		
+		
+			
+					
+					
+			
+			
+				
+					
+					
+					
+					
+					
+					
+					
+					
+					
+					
+					
+					
+					
+					
+					
+					
+					
+					
+					
+					
+					
+					
+					
+					
+					
+					
+					
+					
+					
+					
+					
+					
+					
+					
+					
+					
+					
+					
+					
+				
+			
+				
+					
+					
+					
+					
+					
+					
+					
+					
+				
+			
+			
+				
+				
+				
+				
+			
+		
+		
+		
+			
+			
+		
+		
+		
+			
+			
+		
+		
+	
+	
+		
+	
+	
+		weblogo
+		
+	
+			
+**INPUT**
+
+One or more ARGalaxy proprietary format files combined with the ARGalaxy Experimental Design tool
+
+
+.. class:: warningmark
+
+Custom gene ordering based on position on genome: 
+
+**Human**
+
+IGH::
+
+    V:
+    IGHV7-81,IGHV3-74,IGHV3-73,IGHV3-72,IGHV3-71,IGHV2-70,IGHV1-69,IGHV3-66,IGHV3-64,IGHV4-61,IGHV4-59,IGHV1-58,IGHV3-53,IGHV3-52,IGHV5-a,IGHV5-51,IGHV3-49,IGHV3-48,IGHV3-47,IGHV1-46,IGHV1-45,IGHV3-43,IGHV4-39,IGHV3-35,IGHV4-34,IGHV3-33,IGHV4-31,IGHV4-30-4,IGHV4-30-2,IGHV3-30-3,IGHV3-30,IGHV4-28,IGHV2-26,IGHV1-24,IGHV3-23,IGHV3-22,IGHV3-21,IGHV3-20,IGHV3-19,IGHV1-18,IGHV3-15,IGHV3-13,IGHV3-11,IGHV3-9,IGHV1-8,IGHV3-7,IGHV2-5,IGHV7-4-1,IGHV4-4,IGHV4-b,IGHV1-3,IGHV1-2,IGHV6-1
+    D:
+    IGHD1-1,IGHD2-2,IGHD3-3,IGHD6-6,IGHD1-7,IGHD2-8,IGHD3-9,IGHD3-10,IGHD4-11,IGHD5-12,IGHD6-13,IGHD1-14,IGHD2-15,IGHD3-16,IGHD4-17,IGHD5-18,IGHD6-19,IGHD1-20,IGHD2-21,IGHD3-22,IGHD4-23,IGHD5-24,IGHD6-25,IGHD1-26,IGHD7-27
+    J:
+    IGHJ1,IGHJ2,IGHJ3,IGHJ4,IGHJ5,IGHJ6
+
+
+IGK::
+
+    V:
+    IGKV3D-7,IGKV1D-8,IGKV1D-43,IGKV3D-11,IGKV1D-12,IGKV1D-13,IGKV3D-15,IGKV1D-16,IGKV1D-17,IGKV3D-20,IGKV2D-26,IGKV2D-28,IGKV2D-29,IGKV2D-30,IGKV1D-33,IGKV1D-39,IGKV2D-40,IGKV2-40,IGKV1-39,IGKV1-33,IGKV2-30,IGKV2-29,IGKV2-28,IGKV1-27,IGKV2-24,IGKV3-20,IGKV1-17,IGKV1-16,IGKV3-15,IGKV1-13,IGKV1-12,IGKV3-11,IGKV1-9,IGKV1-8,IGKV1-6,IGKV1-5,IGKV5-2,IGKV4-1
+    J:
+    IGKJ1,IGKJ2,IGKJ3,IGKJ4,IGKJ5
+
+
+IGL::
+
+    V:
+    IGLV4-69,IGLV8-61,IGLV4-60,IGLV6-57,IGLV5-52,IGLV1-51,IGLV9-49,IGLV1-47,IGLV7-46,IGLV5-45,IGLV1-44,IGLV7-43,IGLV1-41,IGLV1-40,IGLV5-39,IGLV5-37,IGLV1-36,IGLV3-27,IGLV3-25,IGLV2-23,IGLV3-22,IGLV3-21,IGLV3-19,IGLV2-18,IGLV3-16,IGLV2-14,IGLV3-12,IGLV2-11,IGLV3-10,IGLV3-9,IGLV2-8,IGLV4-3,IGLV3-1
+    J:
+    IGLJ1,IGLJ2,IGLJ3,IGLJ6,IGLJ7
+
+
+TRB::
+
+    V:
+    TRBV2,TRBV3-1,TRBV4-1,TRBV5-1,TRBV6-1,TRBV4-2,TRBV6-2,TRBV4-3,TRBV6-3,TRBV7-2,TRBV6-4,TRBV7-3,TRBV9,TRBV10-1,TRBV11-1,TRBV10-2,TRBV11-2,TRBV6-5,TRBV7-4,TRBV5-4,TRBV6-6,TRBV5-5,TRBV7-6,TRBV5-6,TRBV6-8,TRBV7-7,TRBV6-9,TRBV7-8,TRBV5-8,TRBV7-9,TRBV13,TRBV10-3,TRBV11-3,TRBV12-3,TRBV12-4,TRBV12-5,TRBV14,TRBV15,TRBV16,TRBV18,TRBV19,TRBV20-1,TRBV24-1,TRBV25-1,TRBV27,TRBV28,TRBV29-1,TRBV30
+    D:
+    TRBD1,TRBD2
+    J:
+    TRBJ1-1,TRBJ1-2,TRBJ1-3,TRBJ1-4,TRBJ1-5,TRBJ1-6,TRBJ2-1,TRBJ2-2,TRBJ2-3,TRBJ2-4,TRBJ2-5,TRBJ2-6,TRBJ2-7
+
+
+TRA::
+
+    V:
+    TRAV1-1,TRAV1-2,TRAV2,TRAV3,TRAV4,TRAV5,TRAV6,TRAV7,TRAV8-1,TRAV9-1,TRAV10,TRAV12-1,TRAV8-2,TRAV8-3,TRAV13-1,TRAV12-2,TRAV8-4,TRAV13-2,TRAV14/DV4,TRAV9-2,TRAV12-3,TRAV8-6,TRAV16,TRAV17,TRAV18,TRAV19,TRAV20,TRAV21,TRAV22,TRAV23/DV6,TRAV24,TRAV25,TRAV26-1,TRAV27,TRAV29/DV5,TRAV30,TRAV26-2,TRAV34,TRAV35,TRAV36/DV7,TRAV38-1,TRAV38-2/DV8,TRAV39,TRAV40,TRAV41
+    J:
+    TRAJ57,TRAJ56,TRAJ54,TRAJ53,TRAJ52,TRAJ50,TRAJ49,TRAJ48,TRAJ47,TRAJ46,TRAJ45,TRAJ44,TRAJ43,TRAJ42,TRAJ41,TRAJ40,TRAJ39,TRAJ38,TRAJ37,TRAJ36,TRAJ34,TRAJ33,TRAJ32,TRAJ31,TRAJ30,TRAJ29,TRAJ28,TRAJ27,TRAJ26,TRAJ24,TRAJ23,TRAJ22,TRAJ21,TRAJ20,TRAJ18,TRAJ17,TRAJ16,TRAJ15,TRAJ14,TRAJ13,TRAJ12,TRAJ11,TRAJ10,TRAJ9,TRAJ8,TRAJ7,TRAJ6,TRAJ5,TRAJ4,TRAJ3
+
+
+TRG::
+
+    V:
+    TRGV9,TRGV8,TRGV5,TRGV4,TRGV3,TRGV2
+    J:
+    TRGJ2,TRGJP2,TRGJ1,TRGJP1
+
+
+TRD::
+
+    V:
+    TRDV1,TRDV2,TRDV3
+    D:
+    TRDD1,TRDD2,TRDD3
+    J:
+    TRDJ1,TRDJ4,TRDJ2,TRDJ3
+
+
+**Mouse**
+
+TRB::
+
+    V:
+    TRBV1,TRBV2,TRBV3,TRBV4,TRBV5,TRBV12-1,TRBV13-1,TRBV12-2,TRBV13-2,TRBV13-3,TRBV14,TRBV15,TRBV16,TRBV17,TRBV19,TRBV20,TRBV23,TRBV24,TRBV26,TRBV29,TRBV30,TRBV31
+    D:
+    TRBD1,TRBD2
+    J:
+    TRBJ1-1,TRBJ1-2,TRBJ1-3,TRBJ1-4,TRBJ1-5,TRBJ2-1,TRBJ2-2,TRBJ2-3,TRBJ2-4,TRBJ2-5,TRBJ2-6,TRBJ2-7
+    
+
+**OUTPUT**
+
+It generates the following result:
+	
+