Repository 'rgrnastar'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/rgrnastar

Changeset 3:318b2a9d54dd (2017-04-21)
Previous changeset 2:ace9f5a2b40f (2016-02-05) Next changeset 4:d681e989ac95 (2017-06-09)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/rgrnastar commit 0d434bca5083e908114d93e11094e48f49b98ed1
modified:
README.rst
rg_rnaStar.xml
test-data/rnastar_test.log
test-data/rnastar_test2.log
test-data/rnastar_test2_mapped_reads.bam
test-data/rnastar_test_mapped_reads.bam
tool_data_table_conf.xml.sample
added:
macros.xml
test-data/test1.gtf
test-data/test3.fastqsanger.gz
test-data/tophat_test_reads_per_gene.txt
tool-data/rnastar_index2.loc.sample
removed:
tool-data/rnastar_index.loc.sample
tool_dependencies.xml
b
diff -r ace9f5a2b40f -r 318b2a9d54dd README.rst
--- a/README.rst Fri Feb 05 11:56:20 2016 -0500
+++ b/README.rst Fri Apr 21 07:58:59 2017 -0400
b
@@ -3,5 +3,7 @@
 ===================
 
 -  **Memory**: To run efficiently, RNA-STAR requires enough free memory to
-   hold the SA-indexed reference genome in RAM. For Human Genome hg19 this is
-   index about 27GB and running RNA-STAR requires approximately ~30GB of RAM.
+   hold the SA-indexed reference genome in RAM. For Human Genome hg19 this
+   index is about 27GB and running RNA-STAR requires approximately ~30GB of RAM.
+   For custom genomes, the rule of thub is to multiply the size of the 
+   reference FASTA file by 9 to estimated required amount of RAM.
b
diff -r ace9f5a2b40f -r 318b2a9d54dd macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Fri Apr 21 07:58:59 2017 -0400
b
@@ -0,0 +1,20 @@
+<macros>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="2.5.2b">star</requirement>
+            <requirement type="package" version="0.1.19">samtools</requirement>
+        </requirements>
+    </xml>
+    <token name="@FASTQ_GZ_OPTION@">
+        --readFilesCommand zcat
+    </token>
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1093/bioinformatics/bts635</citation>
+        </citations>
+    </xml>
+    <xml name="@SJDBOPTIONS@">
+         <param argument="--sjdbGTFfile" type="data" format="gff3,gtf" label="Gene model (gff3,gtf) file for splice junctions" optional="true" help="Exon junction information for mapping splices"/>
+         <param argument="--sjdbOverhang" type="integer" min="1" value="100" label="Length of the genomic sequence around annotated junctions" help="Used in constructing the splice junctions database. Ideal value is ReadLength-1"/>
+    </xml>
+</macros>
b
diff -r ace9f5a2b40f -r 318b2a9d54dd rg_rnaStar.xml
--- a/rg_rnaStar.xml Fri Feb 05 11:56:20 2016 -0500
+++ b/rg_rnaStar.xml Fri Apr 21 07:58:59 2017 -0400
[
b'@@ -1,143 +1,175 @@\n-<tool id="rna_star" name="RNA STAR" version="2.4.0d-2">\n+<tool id="rna_star" name="RNA STAR" version="2.5.2b-0" profile="17.01">\n     <description>Gapped-read mapper for RNA-seq data</description>\n-    <requirements>\n-        <requirement type="package" version="2.4.0d">rnastar</requirement>\n-        <requirement type="package" version="0.1.19">samtools</requirement>\n-    </requirements>\n+    <macros>\n+        <import>macros.xml</import>\n+    </macros>\n+    <expand macro="requirements"/>\n+    \n     <stdio>\n+        <regex match="FATAL error" source="both" level="fatal"/>\n         <regex match="EXITING: FATAL INPUT ERROR:" source="both" level="fatal"/>\n         <regex match="EXITING: fatal error trying to allocate genome arrays, exception thrown: std::bad_alloc" source="both" level="fatal"/>\n         <regex match="\\[sam_read1\\] missing header\\? Abort!" source="both" level="fatal"/>\n-        <regex match=".*" source="both" level="warning" description="Some stderr/stdout text"/>\n     </stdio>\n \n+<!--\n+    important quote (https://groups.google.com/forum/#!topic/rna-star/q4zGzlPgwXY):\n+    Hi Gary,\n+\n+    if you generate the genome with GTF file, and do not specify the value for  - -sjdbOverhang, it will be set to the default 100.\n+    If you want to be able to set arbitrary value of  - -sjdbOverhang on the fly, you have to generate the genome without annotations (GTF) - then you supply both the  - -sjdbOverhang and GTF file at the mapping step.\n+\n+    Cheers\n+    Alex\n+-->\n     <command><![CDATA[\n     ## Create temporary index for custom reference\n-    #if str($refGenomeSource.genomeSource) == \'history\':\n+    #if str($refGenomeSource.geneSource) == \'history\':\n         mkdir -p tempstargenomedir &&\n         STAR\n             --runMode genomeGenerate\n-            --genomeDir "tempstargenomedir"\n-            --genomeFastaFiles "$refGenomeSource.ownFile"\n-            --runThreadN \\${GALAXY_SLOTS:-4}\n-        \n-        #if str($refGenomeSource.geneModel) != \'None\':\n-            --sjdbOverhang "$refGenomeSource.overhang"\n-            --sjdbGTFfile "$refGenomeSource.geneModel"\n-            \n-            #if str($refGenomeSource.geneModel.ext) == \'gff3\':\n-                --sjdbGTFtagExonParentTranscript Parent\n+            --genomeDir \'tempstargenomedir\'\n+            --genomeFastaFiles \'$refGenomeSource.genomeFastaFiles\'\n+            #if $refGenomeSource.sjdbGTFfile:\n+                --sjdbGTFfile \'$refGenomeSource.sjdbGTFfile\'\n+                --sjdbOverhang \'$refGenomeSource.sjdbOverhang\'\n             #end if\n-        #end if\n-        ;\n-    #end if\n-    \n-    \n-    ## Actual alignment\n-    STAR\n-    --runThreadN \\${GALAXY_SLOTS:-4}\n-    --genomeLoad NoSharedMemory    \n-    #if str($refGenomeSource.genomeSource) == \'history\':\n-        --genomeDir "tempstargenomedir"\n-    #else\n-        --genomeDir "$refGenomeSource.index.fields.path"\n-    #end if\n-    \n-    --readFilesIn\n-    #if str($singlePaired.sPaired) == "paired_collection"\n-        "$singlePaired.input.forward" "$singlePaired.input.reverse"\n-    #else\n-        "$singlePaired.input1"\n-        #if str($singlePaired.sPaired) == "paired"\n-            "$singlePaired.input2"\n-        #end if\n-    #end if\n-\n-    ## Output parameters\n-    #if str( $output_params.output_select ) == "yes":\n-        --outSAMattributes $output_params.outSAMattributes\n-        --outSAMstrandField $output_params.outSAMstrandField\n-        --outFilterIntronMotifs $output_params.outFilterIntronMotifs\n-        #if str( $output_params.output_params2.output_select2 ) == "yes":\n-            --outSAMunmapped $output_params.output_params2.unmapped_opt\n-            --outSAMprimaryFlag $output_params.output_params2.primary_opt\n-            --outSAMmapqUnique "$output_params.output_params2.unique"\n-            --outFilterType $output_params.output_params2.sjfilter_opt\n-            --outFilterMultimapScoreRange "$output_params.output_params2.multiScoreRange"\n-            --outFilterMultimapNmax "$out'..b'e="mapped_reads" file="rnastar_test2_mapped_reads.bam" compare="sim_size" delta="200" />\n         </test>\n-\n         <test>\n             <param name="input1" value="test3.fastqsanger" ftype="fastqsanger" />\n-            <param name="genomeSource" value="history" />\n-            <param name="ownFile" value="test3.ref.fa" />\n+            <param name="geneSource" value="history" />\n+            <param name="genomeFastaFiles" value="test3.ref.fa" />\n             <param name="sPaired" value="single" />\n \n             <param name="output_select" value="yes" />\n             <param name="outSAMattributes" value="All" />\n             <param name="outSAMstrandField" value="intronMotif" />\n             <param name="settingsType" value="star_fusion" />\n-            \n+\n             <output name="chimeric_junctions" file="test3.chimjunc.tabular"/>\n         </test>\n-        \n+        <test><!-- tests fastqsanger.gz -->\n+            <param name="input1" value="test3.fastqsanger.gz" ftype="fastqsanger.gz" />\n+            <param name="geneSource" value="history" />\n+            <param name="genomeFastaFiles" value="test3.ref.fa" />\n+            <param name="sPaired" value="single" />\n+\n+            <param name="output_select" value="yes" />\n+            <param name="outSAMattributes" value="All" />\n+            <param name="outSAMstrandField" value="intronMotif" />\n+            <param name="settingsType" value="star_fusion" />\n+\n+            <output name="chimeric_junctions" file="test3.chimjunc.tabular"/>\n+        </test>\n         <test>\n             <param name="input1" value="tophat_in2.fastqsanger" ftype="fastqsanger" />\n-            <param name="genomeSource" value="history" />\n-            <param name="ownFile" value="tophat_test.fa" />\n+            <param name="geneSource" value="history" />\n+            <param name="genomeFastaFiles" value="tophat_test.fa" />\n             <param name="sPaired" value="single" />\n-            \n+\n             <param name="output_select" value="yes" />\n             <param name="outSAMattributes" value="All" />\n             <param name="outSAMstrandField" value="intronMotif" />\n             <param name="outFilterIntronMotifs" value="RemoveNoncanonical" />\n-            \n+\n             <param name="output_select2" value="yes" />\n             <param name="settingsType" value="full" />\n-            <param name="seed_select" value="yes" />\n-            <param name="align_select" value="yes" />\n-            <param name="chim_select" value="yes" />\n+            <param name="chim_select" value="false" />\n             \n-            <!-- Uses default settings, should be similar to test1, but tests the parameters -->\n-            <output name="output_log" file="rnastar_test.log" compare="diff" lines_diff="10"/>\n+            <output name="output_log" file="rnastar_test.log" compare="diff" lines_diff="12"/>\n             <output name="splice_junctions" file="rnastar_test_splicejunctions.bed"/>\n-            <output name="mapped_reads" file="rnastar_test_mapped_reads.bam" compare="sim_size" delta="634" /><!-- header is 434 bytes larger  -->\n+            <output name="mapped_reads" file="rnastar_test_mapped_reads.bam" compare="sim_size" delta="634" />\n         </test>\n-\n     </tests>\n     <help>\n **What it does**\n \n-This tool runs STAR, an ultrafast universal RNA-seq aligner.\n+STAR is an ultrafast universal RNA-seq aligner.\n \n **Extra SAM attributes**\n \n@@ -500,10 +563,6 @@\n \n **Attributions**\n \n-Note that each component has its own license:\n- - RNA STAR: GPLv3\n- - samtools: MIT/Expat License\n-\n rna_star - see the web site at rna_star_\n \n For details, please see the rna_starMS_\n@@ -525,8 +584,6 @@\n .. _rna_star: https://github.com/alexdobin/STAR\n .. _rna_starMS: http://bioinformatics.oxfordjournals.org/content/29/1/15.full\n .. _Galaxy: http://getgalaxy.org\n-</help>\n-    <citations>\n-        <citation type="doi">10.1093/bioinformatics/bts635</citation>\n-    </citations>\n+    </help>\n+    <expand macro="citations"/>\n </tool>\n'
b
diff -r ace9f5a2b40f -r 318b2a9d54dd test-data/rnastar_test.log
--- a/test-data/rnastar_test.log Fri Feb 05 11:56:20 2016 -0500
+++ b/test-data/rnastar_test.log Fri Apr 21 07:58:59 2017 -0400
b
@@ -1,7 +1,7 @@
-                                 Started job on | Feb 18 14:00:29
-                             Started mapping on | Feb 18 14:00:32
-                                    Finished on | Feb 18 14:00:33
-       Mapping speed, Million of reads per hour | 0.36
+                                 Started job on | Mar 01 15:54:22
+                             Started mapping on | Mar 01 15:54:25
+                                    Finished on | Mar 01 15:54:25
+       Mapping speed, Million of reads per hour | inf
 
                           Number of input reads | 100
                       Average input read length | 75
@@ -29,3 +29,6 @@
        % of reads unmapped: too many mismatches | 0.00%
                  % of reads unmapped: too short | 0.00%
                      % of reads unmapped: other | 0.00%
+                                  CHIMERIC READS:
+                       Number of chimeric reads | 0
+                            % of chimeric reads | 0.00%
b
diff -r ace9f5a2b40f -r 318b2a9d54dd test-data/rnastar_test2.log
--- a/test-data/rnastar_test2.log Fri Feb 05 11:56:20 2016 -0500
+++ b/test-data/rnastar_test2.log Fri Apr 21 07:58:59 2017 -0400
b
@@ -1,7 +1,7 @@
-                                 Started job on | Jul 23 12:00:42
-                             Started mapping on | Jul 23 12:00:42
-                                    Finished on | Jul 23 12:00:43
-       Mapping speed, Million of reads per hour | 0.36
+                                 Started job on | Mar 01 15:53:05
+                             Started mapping on | Mar 01 15:53:08
+                                    Finished on | Mar 01 15:53:08
+       Mapping speed, Million of reads per hour | inf
 
                           Number of input reads | 100
                       Average input read length | 75
@@ -29,3 +29,6 @@
        % of reads unmapped: too many mismatches | 0.00%
                  % of reads unmapped: too short | 10.00%
                      % of reads unmapped: other | 0.00%
+                                  CHIMERIC READS:
+                       Number of chimeric reads | 0
+                            % of chimeric reads | 0.00%
b
diff -r ace9f5a2b40f -r 318b2a9d54dd test-data/rnastar_test2_mapped_reads.bam
b
Binary file test-data/rnastar_test2_mapped_reads.bam has changed
b
diff -r ace9f5a2b40f -r 318b2a9d54dd test-data/rnastar_test_mapped_reads.bam
b
Binary file test-data/rnastar_test_mapped_reads.bam has changed
b
diff -r ace9f5a2b40f -r 318b2a9d54dd test-data/test1.gtf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test1.gtf Fri Apr 21 07:58:59 2017 -0400
b
@@ -0,0 +1,4 @@
+test_chromosome test gene 1 650 . + . gene_id "GENE1"; gene_name "GENE1"; transcript_id "GENE1_t1";
+test_chromosome test transcript 1 650 . + . gene_id "GENE1"; gene_name "GENE1"; transcript_id "GENE1_t1";
+test_chromosome test exon 1 650 . + . gene_id "GENE1"; transcript_id "GENE1_t1"; exon_number "1"; gene_name "GENE1";
+test_chromosome test CDS 100 550 . + . gene_id "GENE1"; transcript_id "GENE1_t1"; exon_number "1"; gene_name "GENE1";
b
diff -r ace9f5a2b40f -r 318b2a9d54dd test-data/test3.fastqsanger.gz
b
Binary file test-data/test3.fastqsanger.gz has changed
b
diff -r ace9f5a2b40f -r 318b2a9d54dd test-data/tophat_test_reads_per_gene.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tophat_test_reads_per_gene.txt Fri Apr 21 07:58:59 2017 -0400
b
@@ -0,0 +1,5 @@
+N_unmapped 0 0 0
+N_multimapping 1 1 1
+N_noFeature 0 51 48
+N_ambiguous 0 0 0
+GENE1 99 48 51
b
diff -r ace9f5a2b40f -r 318b2a9d54dd tool-data/rnastar_index.loc.sample
--- a/tool-data/rnastar_index.loc.sample Fri Feb 05 11:56:20 2016 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,11 +0,0 @@
-#This is a sample file distributed with Galaxy that enables tools
-#to use a directory of rna-star indexed sequences data files. You will
-#need to create these data files and then create a bowtie_indices.loc
-#file similar to this one (store it in this directory) that points to
-#the directories in which those files are stored. The bowtie2_indices.loc
-#file has this format (longer white space characters are TAB characters):
-#
-#<unique_build_id>   <dbkey>   <display_name>   <file_base_path>
-#
-#hg19   hg19    hg19 full   /mnt/galaxyIndices/genomes/hg19/rnastar
-
b
diff -r ace9f5a2b40f -r 318b2a9d54dd tool-data/rnastar_index2.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/rnastar_index2.loc.sample Fri Apr 21 07:58:59 2017 -0400
b
@@ -0,0 +1,23 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory of rna-star indexed sequences data files. You will
+#need to create these data files and then create a rnastar_index2.loc
+#file similar to this one (store it in this directory) that points to
+#the directories in which those files are stored. The rnastar_index2.loc
+#file has this format (longer white space characters are TAB characters):
+#
+#<unique_build_id>   <dbkey>   <display_name>   <file_base_path> <withGTF>
+#
+#The <with_gtf> column should be 1 or 0, indicating whether the index was made
+#with an annotation (i.e., --sjdbGTFfile and --sjdbOverhang were used) or not,
+#respecively.
+#
+#Note that STAR indices can become quite large. Consequently, it is only
+#advisable to create indices with annotations if it's known ahead of time that
+#(A) the annotations won't be frequently updated and (B) the read lengths used
+#will also rarely vary. If either of these is not the case, it's advisable to
+#create indices without annotations and then specify an annotation file and
+#maximum read length (minus 1) when running STAR.
+#
+#hg19   hg19    hg19 full   /mnt/galaxyIndices/genomes/hg19/rnastar 0
+#hg19Ensembl   hg19Ensembl    hg19 full with Ensembl annotation   /mnt/galaxyIndices/genomes/hg19Ensembl/rnastar 1
+
b
diff -r ace9f5a2b40f -r 318b2a9d54dd tool_data_table_conf.xml.sample
--- a/tool_data_table_conf.xml.sample Fri Feb 05 11:56:20 2016 -0500
+++ b/tool_data_table_conf.xml.sample Fri Apr 21 07:58:59 2017 -0400
b
@@ -1,7 +1,7 @@
 <!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changed in revision 4550:535d276c92bc-->
 <tables>
-    <table name="rnastar_index" comment_char="#">
-        <columns>value, dbkey, name, path</columns>
-        <file path="tool-data/rnastar_index.loc" />
+    <table name="rnastar_index2" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, dbkey, name, path, with-gtf</columns>
+        <file path="tool-data/rnastar_index2.loc" />
     </table>
 </tables>
b
diff -r ace9f5a2b40f -r 318b2a9d54dd tool_dependencies.xml
--- a/tool_dependencies.xml Fri Feb 05 11:56:20 2016 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,13 +0,0 @@
-<?xml version="1.0"?>
-<tool_dependency>
-    <package name="rnastar" version="2.4.0d">
-        <repository changeset_revision="54c96a529c59" name="package_rnastar_2_4_0d" owner="iuc" prior_installation_required="True" toolshed="https://toolshed.g2.bx.psu.edu" />
-        <readme>
-            Installs the STAR wrapper and dependency packages samtools and star - see https://code.google.com/p/rna-star/
-            STAR is a very fast mapper for rna-seq giving junctions if the indexes are constructed with a junction library
-        </readme>
-    </package>
-    <package name="samtools" version="0.1.19">
-        <repository changeset_revision="96aab723499f" name="package_samtools_0_1_19" owner="iuc" prior_installation_required="True" toolshed="https://toolshed.g2.bx.psu.edu" />
-    </package>
-</tool_dependency>