Repository 'rgrnastar'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/rgrnastar

Changeset 10:d82339e37e8e (2019-08-15)
Previous changeset 9:0a563fd2f22e (2019-03-14) Next changeset 11:41929aa1e7f3 (2019-08-17)
Commit message:
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/rgrnastar commit 686574b0392e554b75035a9b79bc919dfda9ab97"
modified:
macros.xml
rg_rnaStar.xml
test-data/rnastar_test.log
test-data/rnastar_test2.log
test-data/rnastar_test2_mapped_reads.bam
test-data/rnastar_test_mapped_reads.bam
test-data/rnastar_test_twopass.log
test-data/test3.chimjunc.tabular
tool_data_table_conf.xml.sample
added:
test-data/41737_R1.fastq.sub240k.gz
test-data/41737_R2.fastq.sub240k.gz
test-data/737K-august-2016.small.txt.gz
test-data/SNORD83B.22.fa
test-data/SNORD83B.22.gtf
test-data/rnastar_index2_versioned.loc
test-data/rnastar_test_mapped_reads_twopass.bam
test-data/rnastar_test_transcriptome_mapped_reads.bam
test-data/test-cache/tophat-test/Genome
test-data/test-cache/tophat-test/SA
test-data/test-cache/tophat-test/SAindex
test-data/test-cache/tophat-test/chrLength.txt
test-data/test-cache/tophat-test/chrName.txt
test-data/test-cache/tophat-test/chrNameLength.txt
test-data/test-cache/tophat-test/chrStart.txt
test-data/test-cache/tophat-test/genomeParameters.txt
tool-data/rnastar_index2_versioned.loc.sample
tool_data_table_conf.xml.test
removed:
README.rst
tool-data/rnastar_index2.loc.sample
b
diff -r 0a563fd2f22e -r d82339e37e8e README.rst
--- a/README.rst Thu Mar 14 16:12:38 2019 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,9 +0,0 @@
-
-System Requirements
-===================
-
--  **Memory**: To run efficiently, RNA-STAR requires enough free memory to
-   hold the SA-indexed reference genome in RAM. For Human Genome hg19 this
-   index is about 27GB and running RNA-STAR requires approximately ~30GB of RAM.
-   For custom genomes, the rule of thumb is to multiply the size of the 
-   reference FASTA file by 9 to estimate the required amount of RAM.
b
diff -r 0a563fd2f22e -r d82339e37e8e macros.xml
--- a/macros.xml Thu Mar 14 16:12:38 2019 -0400
+++ b/macros.xml Thu Aug 15 01:54:13 2019 -0400
[
b'@@ -1,10 +1,39 @@\n <macros>\n+    <!-- REMEMBER to bump the version of rna_star_index_builder_data_manager\n+    whenever you make changes to the following two version tokens!\n+    The data manager uses a symlink to this macro file to keep the versions in\n+    sync. -->\n+    <!-- STAR version to be used -->\n+    <token name="@VERSION@">2.7.2a</token>\n+    <!-- STAR index version compatible with this version of STAR\n+    This is the STAR version that introduced the index structure expected\n+    by the current version.\n+    It can be found for any specific version of STAR with:\n+    STAR -h | grep versionGenome\n+    or by looking for the versionGenome parameter in source/parametersDefault\n+    of STAR\'s source code -->\n+    <token name="@IDX_VERSION@">2.7.1a</token>\n+\n     <xml name="requirements">\n         <requirements>\n-            <requirement type="package" version="2.6.0b">star</requirement>\n-            <requirement type="package" version="1.8">samtools</requirement>\n+            <requirement type="package" version="@VERSION@">star</requirement>\n+            <requirement type="package" version="1.9">samtools</requirement>\n         </requirements>\n     </xml>\n+\n+    <xml name="index_selection" token_with_gene_model="1">\n+        <param argument="--genomeDir" name="genomeDir" type="select"\n+        label="Select reference genome"\n+        help="If your genome of interest is not listed, contact the Galaxy team">\n+            <options from_data_table="rnastar_index2_versioned">\n+                <filter type="static_value" column="4" value="@WITH_GENE_MODEL@" />\n+                <filter type="static_value" column="5" value="@IDX_VERSION@" />\n+                <filter type="sort_by" column="2" />\n+                <validator type="no_options" message="No indexes are available for the selected input dataset" />\n+            </options>\n+        </param>\n+    </xml>\n+\n     <token name="@FASTQ_GZ_OPTION@">\n         --readFilesCommand zcat\n     </token>\n@@ -13,8 +42,8 @@\n             <citation type="doi">10.1093/bioinformatics/bts635</citation>\n         </citations>\n     </xml>\n-    <xml name="@SJDBOPTIONS@">\n-         <param argument="--sjdbGTFfile" type="data" format="gff3,gtf" label="Gene model (gff3,gtf) file for splice junctions" optional="true" help="Exon junction information for mapping splices"/>\n+    <xml name="@SJDBOPTIONS@" token_optional="true">\n+         <param argument="--sjdbGTFfile" type="data" format="gff3,gtf" label="Gene model (gff3,gtf) file for splice junctions" optional="@OPTIONAL@" help="Exon junction information for mapping splices"/>\n          <param argument="--sjdbOverhang" type="integer" min="1" value="100" label="Length of the genomic sequence around annotated junctions" help="Used in constructing the splice junctions database. Ideal value is ReadLength-1"/>\n     </xml>\n     <xml name="dbKeyActions">\n@@ -22,7 +51,7 @@\n             <conditional name="refGenomeSource.geneSource">\n                 <when value="indexed">\n                     <action type="metadata" name="dbkey">\n-                        <option type="from_data_table" name="rnastar_index2" column="1" offset="0">\n+                        <option type="from_data_table" name="rnastar_index2_versioned" column="1" offset="0">\n                             <filter type="param_value" column="0" value="#" compare="startswith" keep="False"/>\n                             <filter type="param_value" ref="refGenomeSource.GTFconditional.genomeDir" column="0"/>\n                         </option>\n@@ -36,4 +65,99 @@\n             </conditional>\n         </actions>\n     </xml>\n+    <token name="@TEMPINDEX@"><![CDATA[\n+    ## Create temporary index for custom reference\n+    #if str($refGenomeSource.geneSource) == \'history\':\n+        mkdir -p tempstargenomedir &&\n+        STAR\n+            --runMode genomeGenerate\n+            --genomeDir \'tempstargenomedir\'\n+            --genomeFastaFiles \'${refGenomeSource.genomeFastaFiles}\'\n+            ## Handle difference betwee'..b'     #if $refGenomeSource.GTFconditional.sjdbGTFfile:\n+                --sjdbOverhang $refGenomeSource.GTFconditional.sjdbOverhang\n+                --sjdbGTFfile \'${refGenomeSource.GTFconditional.sjdbGTFfile}\'\n+                #if str($refGenomeSource.GTFconditional.sjdbGTFfile.ext) == \'gff3\':\n+                    --sjdbGTFtagExonParentTranscript Parent\n+                #end if\n+            #end if\n+        #end if\n+        #end if\n+        ]]></token>\n+    <xml name="stdio" >\n+        <stdio>\n+            <regex match="FATAL error" source="both" level="fatal"/>\n+            <regex match="EXITING: FATAL INPUT ERROR:" source="both" level="fatal"/>\n+            <regex match="EXITING: fatal error trying to allocate genome arrays, exception thrown: std::bad_alloc" source="both" level="fatal"/>\n+            <regex match="\\[sam_read1\\] missing header\\? Abort!" source="both" level="fatal"/>\n+        </stdio>\n+    </xml>\n+    <xml name="refgenomehandling" >\n+        <conditional name="refGenomeSource">\n+            <param name="geneSource" type="select" label="Custom or built-in reference genome" help="Built-ins were indexed using default options">\n+                <option value="indexed" selected="true">Use a built-in index</option>\n+                <option value="history">Use reference genome from history and create temporary index</option>\n+            </param>\n+            <when value="indexed">\n+                <conditional name="GTFconditional">\n+                    <param name="GTFselect" type="select"\n+                           label="Reference genome with or without an annotation"\n+                           help="Select the \'... with builtin gene-model\' option to select from the list of available indexes that were built with splice junction information. Select the \'... without builtin gene-model\' option to select from the list of available indexes without annotated splice junctions.">\n+                        <option value="without-gtf">use genome reference with builtin gene-model</option>\n+                        <option value="with-gtf">use genome reference without builtin gene-model</option>\n+                    </param>\n+                    <when value="with-gtf">\n+                        <expand macro="index_selection" with_gene_model="0" />\n+                        <expand macro="@SJDBOPTIONS@" />\n+                    </when>\n+                    <when value="without-gtf">\n+                        <expand macro="index_selection" with_gene_model="1" />\n+                    </when>\n+                </conditional>\n+            </when>\n+            <when value="history">\n+                <param argument="--genomeFastaFiles" type="data" format="fasta" label="Select a reference genome" />\n+                <!-- Currently, this parameter is not exposed in the wrapper,\n+                     but used only in the tests to avoid excessive index sizes for\n+                     the tiny test genomes. -->\n+                <param name="genomeSAindexNbases" type="hidden" value="" />\n+                <conditional name="GTFconditional">\n+                    <param name="GTFselect" type="select"\n+                           label="Build index with our without known splice junctions annotation"\n+                           help="To build an index with known splice junctions annotated, you will have to provide a GTF or GFF3 dataset that describes the gene models (the location of genes, transcripts and exons) known for the reference genome.">\n+                        <option value="without-gtf">build index without gene-model</option>\n+                        <option value="with-gtf">build index with gene-model</option>\n+                    </param>\n+                    <when value="with-gtf">\n+                        <expand macro="@SJDBOPTIONS@" optional="false"/>\n+                    </when>\n+                    <when value="without-gtf" />\n+                </conditional>\n+            </when>\n+        </conditional>\n+    </xml>\n </macros>\n'
b
diff -r 0a563fd2f22e -r d82339e37e8e rg_rnaStar.xml
--- a/rg_rnaStar.xml Thu Mar 14 16:12:38 2019 -0400
+++ b/rg_rnaStar.xml Thu Aug 15 01:54:13 2019 -0400
[
b'@@ -1,16 +1,10 @@\n-<tool id="rna_star" name="RNA STAR" version="2.6.0b-2" profile="17.01">\n+<tool id="rna_star" name="RNA STAR" version="@VERSION@" profile="17.01">\n     <description>Gapped-read mapper for RNA-seq data</description>\n     <macros>\n         <import>macros.xml</import>\n     </macros>\n     <expand macro="requirements"/>\n-\n-    <stdio>\n-        <regex match="FATAL error" source="both" level="fatal"/>\n-        <regex match="EXITING: FATAL INPUT ERROR:" source="both" level="fatal"/>\n-        <regex match="EXITING: fatal error trying to allocate genome arrays, exception thrown: std::bad_alloc" source="both" level="fatal"/>\n-        <regex match="\\[sam_read1\\] missing header\\? Abort!" source="both" level="fatal"/>\n-    </stdio>\n+    <expand macro="stdio" />\n \n <!--\n     important quote (https://groups.google.com/forum/#!topic/rna-star/q4zGzlPgwXY):\n@@ -23,176 +17,212 @@\n     Alex\n -->\n     <command><![CDATA[\n-    ## Create temporary index for custom reference\n-    #if str($refGenomeSource.geneSource) == \'history\':\n-        mkdir -p tempstargenomedir &&\n-        STAR\n-            --runMode genomeGenerate\n-            --genomeDir \'tempstargenomedir\'\n-            --genomeFastaFiles \'$refGenomeSource.genomeFastaFiles\'\n-            #if $refGenomeSource.sjdbGTFfile:\n-                --sjdbGTFfile \'$refGenomeSource.sjdbGTFfile\'\n-                --sjdbOverhang \'$refGenomeSource.sjdbOverhang\'\n-            #end if\n-            --runThreadN \\${GALAXY_SLOTS:-4}\n-        &&\n-    #end if\n-\n+    @TEMPINDEX@\n     STAR\n-        --runThreadN \\${GALAXY_SLOTS:-4}\n-        --genomeLoad NoSharedMemory\n-        --genomeDir\n-        #if str($refGenomeSource.geneSource) == \'history\':\n-            \'tempstargenomedir\'\n-            #if $refGenomeSource.sjdbGTFfile:\n-                --sjdbGTFfile \'$refGenomeSource.sjdbGTFfile\'\n-                --sjdbOverhang \'$refGenomeSource.sjdbOverhang\'\n-            #end if\n-        #else\n-            \'${refGenomeSource.GTFconditional.genomeDir.fields.path}\'\n-        #end if\n+    @REFGENOMEHANDLING@\n \n         --readFilesIn\n-        #if str($singlePaired.sPaired) == "paired_collection"\n+        #if str($singlePaired.sPaired) == \'paired_collection\':\n             \'$singlePaired.input.forward\' \'$singlePaired.input.reverse\'\n \n-            #if $singlePaired.input.forward.is_of_type("fastq.gz", "fastqsanger.gz"):\n+            #if $singlePaired.input.forward.is_of_type(\'fastq.gz\', \'fastqsanger.gz\'):\n                 @FASTQ_GZ_OPTION@\n             #end if\n         #else\n             \'$singlePaired.input1\'\n-            #if str($singlePaired.sPaired) == "paired"\n+            #if str($singlePaired.sPaired) == \'paired\':\n                 \'$singlePaired.input2\'\n             #end if\n \n-            #if $singlePaired.input1.is_of_type("fastq.gz", "fastqsanger.gz"):\n+            #if $singlePaired.input1.is_of_type(\'fastq.gz\', \'fastqsanger.gz\'):\n                 @FASTQ_GZ_OPTION@\n             #end if\n         #end if\n \n         --outSAMtype BAM SortedByCoordinate\n \n-        ## Handle difference between indices with/without annotations\n-        #if str($refGenomeSource.geneSource) == \'indexed\':\n-            #if str($refGenomeSource.GTFconditional.GTFselect) == \'without-gtf\':\n-                #if $refGenomeSource.GTFconditional.sjdbGTFfile:\n-                    --sjdbOverhang \'$refGenomeSource.GTFconditional.sjdbOverhang\'\n-                    --sjdbGTFfile \'${refGenomeSource.GTFconditional.sjdbGTFfile}\'\n-                    #if str($refGenomeSource.GTFconditional.sjdbGTFfile.ext) == \'gff3\':\n-                        --sjdbGTFtagExonParentTranscript Parent\n-                    #end if\n+        ## Two pass mode\n+        --twopassMode ${twopass.twopassMode} ${twopass.twopass_read_subset}\n+        #for $sj_input in $twopass.sj_precalculated:\n+            \'$sj_input\'\n+        #end for\n+        #if str($twopass.twopassMode) != \'None\':\n+            #if str($refGenomeSource.GTFconditional.GTFselect) == \'with-gtf\':\n+                #if not '..b': Number of reported alignments that contain the query in the current record.\n-  HI: Query hit index, indicating the alignment record is the i-th one stored in SAM\n-  AS: Local alignment score (paired for paired-end reads)\n-  nM: Number of mismatches per (paired) alignment\n+STAR-Fusion_ can use the chimeric junctions output of STAR as input, but you\n+need to enable **chimeric alignment detection** by STAR for that dataset to be\n+generated. Hence, be sure to select:\n \n-The All option includes the Standard attributes, plus the following four::\n+**Report chimeric alignments?**: `As separate tabular "Junctions" output (Junctions)`.\n+\n+In addition, for best results it is recommended_ that you\n+\n+- use **2-pass mapping** for more sensitive novel splice junction discovery\n \n-  NM: Edit distance to the reference, including ambiguous bases but excluding clipping\n-  MD: String for mismatching positions\n-  jM: Intron motifs for all junctions\n-  jI: Start and end of introns for all junctions\n+- under *BAM output format specification*,\n+  **Read alignment tags to include in the BAM output**: select `XS` as an\n+  additional tag to generate (this is the equivalent of using\n+  `--outSAMstrandField intronMotif` on the command line)\n \n-**STAR-Fusion**\n+- under *Algorithmic settings*, **Configure seed, alignment and limits options**:\n+  `use parameters suggested for STAR-Fusion`.\n \n-STAR-Fusion_ is used to identify candidate fusion transcripts. The recommended_ parameters for running\n-STAR prior to STAR-Fusion can be pre-selected.\n+*Cufflinks*\n \n-**Attributions**\n-\n-rna_star - see the web site at rna_star_\n+.. class:: infomark\n \n-For details, please see the rna_starMS_\n-"STAR: ultrafast universal RNA-seq aligner"\n-A. Dobin et al, Bioinformatics 2012; doi: 10.1093/bioinformatics/bts635\n+   Cufflinks is not considered to be the best tool for use downstream of STAR\n+   anymore. Consider using *Stringtie* instead, which also should pose no\n+   compatibility issues.\n \n-Galaxy_ (that\'s what you are using right now!) for gluing everything together\n-\n-Most of the work for this wrapper XML is Jeremy Goecks\' original STAR wrapper\n+To avoid compatibility issues with Cufflinks you should:\n \n-Minor tweaks to output names to suit our downstream purposes, toolshed automated dependencies\n-and odds and ends of other code and documentation comprising this tool was\n-written by Ross Lazarus and that part is licensed_ the same way as other rgenetics artefacts\n+- select **XS** as a *Read alignment tag to include in the BAM output* if (and\n+  only if) your sequenced reads come from an unstranded library prep\n+- *not* select the *jM* and *jI* tags for inclusion\n+- keep the **HI** tag selected and\n+- select *HI tag values should be* **zero-based**\n+- exclude **All alignments across non-canonical junctions** under *Output\n+  filter criteria -> Exclude the following records from the BAM output*\n+\n+-----\n \n-.. _licensed: http://creativecommons.org/licenses/by-nc-nd/3.0/\n+Attribution\n+\n+Minor tweaks to output names to suit downstream purposes, toolshed automated\n+dependencies and odds and ends of other code and documentation comprising\n+this tool were originally written by Ross Lazarus and have been licensed under\n+the creative commons\n+`BY-NC_ND 3.0 license <http://creativecommons.org/licenses/by-nc-nd/3.0/>`__.\n+\n+.. _STAR: https://github.com/alexdobin/STAR\n .. _STAR-Fusion: https://github.com/STAR-Fusion/STAR-Fusion\n-.. _recommended: https://github.com/STAR-Fusion/STAR-Fusion/wiki#alternatively-running-star-yourself-and-then-running-star-fusion-using-the-existing-outputs\n-.. _rna_star: https://github.com/alexdobin/STAR\n-.. _rna_starMS: http://bioinformatics.oxfordjournals.org/content/29/1/15.full\n-.. _Galaxy: http://getgalaxy.org\n-    </help>\n+.. _recommended: https://github.com/STAR-Fusion/STAR-Fusion/wiki#alternatively-kickstart-mode-running-star-yourself-and-then-running-star-fusion-using-the-existing-outputs\n+    ]]></help>\n     <expand macro="citations"/>\n </tool>\n'
b
diff -r 0a563fd2f22e -r d82339e37e8e test-data/41737_R1.fastq.sub240k.gz
b
Binary file test-data/41737_R1.fastq.sub240k.gz has changed
b
diff -r 0a563fd2f22e -r d82339e37e8e test-data/41737_R2.fastq.sub240k.gz
b
Binary file test-data/41737_R2.fastq.sub240k.gz has changed
b
diff -r 0a563fd2f22e -r d82339e37e8e test-data/737K-august-2016.small.txt.gz
b
Binary file test-data/737K-august-2016.small.txt.gz has changed
b
diff -r 0a563fd2f22e -r d82339e37e8e test-data/SNORD83B.22.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/SNORD83B.22.fa Thu Aug 15 01:54:13 2019 -0400
b
@@ -0,0 +1,17 @@
+>22 dna:chromosome chromosome:GRCh38:22:39313740:50818468:1 REF
+CACACTCAAAGCAGCAAACAGCCCAGCAAGGCCAGACTGGGAATTTCCTCATCTCAGGAC
+TTCAAAGCCAGTGTGAAAGGACTGCCAACACCCTCTCCTTCCTTTCCTCTCCCACCACAG
+GGCCACCAGCGTCTGTGGCCTTGGATCCTCCCTCTACAAGAGCCCCCCCATGACAAGTCA
+GGACCTGCCTCACCTTGCGGAGGGTGAGCACCCGCTTCTTGGTTCCCACCACACAGCCTT
+TCAGCATGACAAAGTCATTGGTCACTTCACCATAGTGGACAAAGCCACCCTGGAAAACGA
+GCATCGGATCAGCACAGGCCCAGGAGGGGATTGTCGTGCAGATGACCCCTCCAGGTTCAG
+GCCCTCCCTGACCACAGGGCTGTTCTCAGAAGGAAGGCAACAAGGAACGGTTCCGCAGTC
+TGTCTCGGGCGCTGTGCCCAGCGCACATTCCAGGCCTCATCACTGAACAGCTGAGCCTGA
+GACCCCACTTCTCACCAGCCAACCCCGACGAGTGGACTCAGATGACAACATGCCACTTAC
+AAGGGACACAGCTAGGTGTTGTGTTGGCTTCAGTTAACGATCCTGCTAGCAGCCCCTAGG
+AAGCAGCCTATCCCCAAAAGCACGAGGCCTGGGATGGCCTCACAGAGCAGAACACCCATT
+ACTTACCAGAGGGTTGATGCTCTTGTCAGATAGGTCATAGTCAGTGGAGGCATTGTTCTT
+GATCAGCTTGCCGTCCTTGATAAGGTAGCCCTGGCCAATCTTATAAATCTGAATGAACAA
+GAAGGGTGTAAGGCTGGGGCATTAGGGACAAATAACCCAGACATGCCAGTGTGCTGACCT
+GCAAAGCACGCTAGAAGGCAGCTGAGGCCTCAGTCCCAGTCACAGCGTATCCCAAGGTCA
+GAGCAAAAAGCTGGCTGGCCCTCCAGGTTCCTTTCTGTAAGGCGGCTGGGCTAAAACTAA
\ No newline at end of file
b
diff -r 0a563fd2f22e -r d82339e37e8e test-data/SNORD83B.22.gtf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/SNORD83B.22.gtf Thu Aug 15 01:54:13 2019 -0400
b
@@ -0,0 +1,8 @@
+#!genome-build GRCh38.p12
+#!genome-version GRCh38
+#!genome-date 2013-12
+#!genome-build-accession NCBI:GCA_000001405.27
+#!genebuild-last-updated 2018-11
+22 ensembl gene 39313819 39313911 . - . gene_id "ENSG00000209480"; gene_version "1"; gene_name "SNORD83B"; gene_source "ensembl"; gene_biotype "snoRNA";
+22 ensembl transcript 39313819 39313911 . - . gene_id "ENSG00000209480"; gene_version "1"; transcript_id "ENST00000386745"; transcript_version "1"; gene_name "SNORD83B"; gene_source "ensembl"; gene_biotype "snoRNA"; transcript_name "SNORD83B-201"; transcript_source "ensembl"; transcript_biotype "snoRNA"; tag "basic"; transcript_support_level "NA";
+22 ensembl exon 39313819 39313911 . - . gene_id "ENSG00000209480"; gene_version "1"; transcript_id "ENST00000386745"; transcript_version "1"; exon_number "1"; gene_name "SNORD83B"; gene_source "ensembl"; gene_biotype "snoRNA"; transcript_name "SNORD83B-201"; transcript_source "ensembl"; transcript_biotype "snoRNA"; exon_id "ENSE00001501751"; exon_version "1"; tag "basic"; transcript_support_level "NA";
b
diff -r 0a563fd2f22e -r d82339e37e8e test-data/rnastar_index2_versioned.loc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/rnastar_index2_versioned.loc Thu Aug 15 01:54:13 2019 -0400
b
@@ -0,0 +1,4 @@
+##STAR versioned indexes
+#build_id dbkey display_name path with_gene_model version
+000 ? test_index ${__HERE__}/test-cache/tophat-test 0 2.7.1a
+001 ? test_index2 ${__HERE__}/test-cache/tophat-test 1 2.7.1a
b
diff -r 0a563fd2f22e -r d82339e37e8e test-data/rnastar_test.log
--- a/test-data/rnastar_test.log Thu Mar 14 16:12:38 2019 -0400
+++ b/test-data/rnastar_test.log Thu Aug 15 01:54:13 2019 -0400
b
@@ -1,7 +1,7 @@
-                                 Started job on | Mar 01 15:54:22
-                             Started mapping on | Mar 01 15:54:25
-                                    Finished on | Mar 01 15:54:25
-       Mapping speed, Million of reads per hour | inf
+                                 Started job on | .*
+                             Started mapping on | .*
+                                    Finished on | .*
+       Mapping speed, Million of reads per hour | .*
 
                           Number of input reads | 100
                       Average input read length | 75
b
diff -r 0a563fd2f22e -r d82339e37e8e test-data/rnastar_test2.log
--- a/test-data/rnastar_test2.log Thu Mar 14 16:12:38 2019 -0400
+++ b/test-data/rnastar_test2.log Thu Aug 15 01:54:13 2019 -0400
b
@@ -1,7 +1,7 @@
-                                 Started job on | Mar 01 15:53:05
-                             Started mapping on | Mar 01 15:53:08
-                                    Finished on | Mar 01 15:53:08
-       Mapping speed, Million of reads per hour | inf
+                                 Started job on | .*
+                             Started mapping on | .*
+                                    Finished on | .*
+       Mapping speed, Million of reads per hour | .*
 
                           Number of input reads | 100
                       Average input read length | 75
b
diff -r 0a563fd2f22e -r d82339e37e8e test-data/rnastar_test2_mapped_reads.bam
b
Binary file test-data/rnastar_test2_mapped_reads.bam has changed
b
diff -r 0a563fd2f22e -r d82339e37e8e test-data/rnastar_test_mapped_reads.bam
b
Binary file test-data/rnastar_test_mapped_reads.bam has changed
b
diff -r 0a563fd2f22e -r d82339e37e8e test-data/rnastar_test_mapped_reads_twopass.bam
b
Binary file test-data/rnastar_test_mapped_reads_twopass.bam has changed
b
diff -r 0a563fd2f22e -r d82339e37e8e test-data/rnastar_test_transcriptome_mapped_reads.bam
b
Binary file test-data/rnastar_test_transcriptome_mapped_reads.bam has changed
b
diff -r 0a563fd2f22e -r d82339e37e8e test-data/rnastar_test_twopass.log
--- a/test-data/rnastar_test_twopass.log Thu Mar 14 16:12:38 2019 -0400
+++ b/test-data/rnastar_test_twopass.log Thu Aug 15 01:54:13 2019 -0400
b
@@ -1,7 +1,7 @@
-                                 Started job on | Feb 12 11:19:46
-                             Started mapping on | Feb 12 11:20:03
-                                    Finished on | Feb 12 11:20:03
-       Mapping speed, Million of reads per hour | inf
+                                 Started job on | .*
+                             Started mapping on | .*
+                                    Finished on | .*
+       Mapping speed, Million of reads per hour | .*
 
                           Number of input reads | 100
                       Average input read length | 75
b
diff -r 0a563fd2f22e -r d82339e37e8e test-data/test-cache/tophat-test/Genome
b
Binary file test-data/test-cache/tophat-test/Genome has changed
b
diff -r 0a563fd2f22e -r d82339e37e8e test-data/test-cache/tophat-test/SA
b
Binary file test-data/test-cache/tophat-test/SA has changed
b
diff -r 0a563fd2f22e -r d82339e37e8e test-data/test-cache/tophat-test/SAindex
b
Binary file test-data/test-cache/tophat-test/SAindex has changed
b
diff -r 0a563fd2f22e -r d82339e37e8e test-data/test-cache/tophat-test/chrLength.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-cache/tophat-test/chrLength.txt Thu Aug 15 01:54:13 2019 -0400
b
@@ -0,0 +1,1 @@
+650
b
diff -r 0a563fd2f22e -r d82339e37e8e test-data/test-cache/tophat-test/chrName.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-cache/tophat-test/chrName.txt Thu Aug 15 01:54:13 2019 -0400
b
@@ -0,0 +1,1 @@
+test_chromosome
b
diff -r 0a563fd2f22e -r d82339e37e8e test-data/test-cache/tophat-test/chrNameLength.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-cache/tophat-test/chrNameLength.txt Thu Aug 15 01:54:13 2019 -0400
b
@@ -0,0 +1,1 @@
+test_chromosome 650
b
diff -r 0a563fd2f22e -r d82339e37e8e test-data/test-cache/tophat-test/chrStart.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-cache/tophat-test/chrStart.txt Thu Aug 15 01:54:13 2019 -0400
b
@@ -0,0 +1,2 @@
+0
+262144
b
diff -r 0a563fd2f22e -r d82339e37e8e test-data/test-cache/tophat-test/genomeParameters.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-cache/tophat-test/genomeParameters.txt Thu Aug 15 01:54:13 2019 -0400
b
@@ -0,0 +1,16 @@
+### STAR   --runMode genomeGenerate   --genomeDir test-cache/tophat_test/   --genomeFastaFiles tophat_test.fa      --genomeSAindexNbases 5
+### GstrandBit 32
+versionGenome 2.7.1a
+genomeFastaFiles tophat_test.fa 
+genomeSAindexNbases 5
+genomeChrBinNbits 18
+genomeSAsparseD 1
+sjdbOverhang 0
+sjdbFileChrStartEnd - 
+sjdbGTFfile -
+sjdbGTFchrPrefix -
+sjdbGTFfeatureExon exon
+sjdbGTFtagExonParentTranscript transcript_id
+sjdbGTFtagExonParentGene gene_id
+sjdbInsertSave Basic
+genomeFileSizes 262144 5366
b
diff -r 0a563fd2f22e -r d82339e37e8e test-data/test3.chimjunc.tabular
--- a/test-data/test3.chimjunc.tabular Thu Mar 14 16:12:38 2019 -0400
+++ b/test-data/test3.chimjunc.tabular Thu Aug 15 01:54:13 2019 -0400
b
@@ -1,24 +1,27 @@
-chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_60 181 60M15S 241 60S15M
-chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_62 183 58M17S 241 58S17M
-chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_64 185 56M19S 241 56S19M
-chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_66 187 54M21S 241 54S21M
-chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_68 189 52M23S 241 52S23M
-chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_70 191 50M25S 241 50S25M
-chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_72 193 48M27S 241 48S27M
-chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_74 195 46M29S 241 46S29M
-chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_76 197 44M31S 241 44S31M
-chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_78 199 42M33S 241 42S33M
-chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_80 201 40M35S 241 40S35M
-chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_82 203 38M37S 241 38S37M
-chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_84 205 36M39S 241 36S39M
-chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_86 207 34M41S 241 34S41M
-chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_88 209 32M43S 241 32S43M
-chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_90 211 30M45S 241 30S45M
-chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_92 213 28M47S 241 28S47M
-chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_94 215 26M49S 241 26S49M
-chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_96 217 24M51S 241 24S51M
-chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_98 219 22M53S 241 22S53M
-chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_100 221 20M55S 241 20S55M
-chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_102 223 18M57S 241 18S57M
-chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_104 225 16M59S 241 16S59M
-chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_106 227 14M61S 241 14S61M
+chr_donorA brkpt_donorA strand_donorA chr_acceptorB brkpt_acceptorB strand_acceptorB junction_type repeat_left_lenA repeat_right_lenB read_name start_alnA cigar_alnA start_alnB cigar_alnB num_chim_aln max_poss_aln_score non_chim_aln_score this_chim_aln_score bestall_chim_aln_score PEmerged_bool readgrp
+chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_60 181 60M15S 241 60S15M 1 75 59 72 72 0
+chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_62 183 58M17S 241 58S17M 1 75 57 72 72 0
+chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_64 185 56M19S 241 56S19M 1 75 55 72 72 0
+chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_66 187 54M21S 241 54S21M 1 75 53 72 72 0
+chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_68 189 52M23S 241 52S23M 1 75 51 72 72 0
+chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_70 191 50M25S 241 50S25M 1 75 49 72 72 0
+chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_72 193 48M27S 241 48S27M 1 75 47 72 72 0
+chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_74 195 46M29S 241 46S29M 1 75 45 72 72 0
+chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_76 197 44M31S 241 44S31M 1 75 43 72 72 0
+chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_78 199 42M33S 241 42S33M 1 75 41 72 72 0
+chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_80 201 40M35S 241 40S35M 1 75 39 72 72 0
+chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_82 203 38M37S 241 38S37M 1 75 37 72 72 0
+chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_84 205 36M39S 241 36S39M 1 75 38 72 72 0
+chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_86 207 34M41S 241 34S41M 1 75 40 72 72 0
+chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_88 209 32M43S 241 32S43M 1 75 42 72 72 0
+chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_90 211 30M45S 241 30S45M 1 75 44 72 72 0
+chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_92 213 28M47S 241 28S47M 1 75 46 72 72 0
+chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_94 215 26M49S 241 26S49M 1 75 48 72 72 0
+chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_96 217 24M51S 241 24S51M 1 75 50 72 72 0
+chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_98 219 22M53S 241 22S53M 1 75 52 72 72 0
+chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_100 221 20M55S 241 20S55M 1 75 54 72 72 0
+chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_102 223 18M57S 241 18S57M 1 75 56 72 72 0
+chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_104 225 16M59S 241 16S59M 1 75 58 72 72 0
+chr1 241 + chr2 240 + 0 0 0 test_chimeric_mRNA_106 227 14M61S 241 14S61M 1 75 60 72 72 0
+# 2.7.2a   STAR --runThreadN 1 --genomeLoad NoSharedMemory --genomeDir tempstargenomedir @ --readFilesIn /tmp/tmpTNNe38/files/2/7/0/dataset_2700161e-e03d-4a50-9bc8-1508ff059881.dat --readFilesCommand zcat --outSAMtype BAM SortedByCoordinate --twopassMode None  --quantMode - --outSAMstrandField intronMotif --outSAMattrIHstart 1 --outSAMattributes NH HI AS nM NM MD jM jI MC ch --outSAMprimaryFlag OneBestScore --outSAMmapqUnique 255 --outSAMunmapped Within --chimSegmentMin 12 --chimJunctionOverhangMin 12 --alignSJDBoverhangMin 10 --alignMatesGapMax 100000 --alignIntronMax 100000 --chimSegmentReadGapMax 3 --alignSJstitchMismatchNmax 5 -1 5 5 --peOverlapNbasesMin 12 --peOverlapMMp 0.1 --chimMultimapScoreRange 10 --chimMultimapNmax 10 --chimNonchimScoreDropMin 10 --outBAMsortingThreadN 1 --outBAMsortingBinsN 50 --limitBAMsortRAM 0 --chimOutType Junctions --chimOutJunctionFormat 1
+# Nreads 83 NreadsUnique 72 NreadsMulti 0
b
diff -r 0a563fd2f22e -r d82339e37e8e tool-data/rnastar_index2.loc.sample
--- a/tool-data/rnastar_index2.loc.sample Thu Mar 14 16:12:38 2019 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,23 +0,0 @@
-#This is a sample file distributed with Galaxy that enables tools
-#to use a directory of rna-star indexed sequences data files. You will
-#need to create these data files and then create a rnastar_index2.loc
-#file similar to this one (store it in this directory) that points to
-#the directories in which those files are stored. The rnastar_index2.loc
-#file has this format (longer white space characters are TAB characters):
-#
-#<unique_build_id>   <dbkey>   <display_name>   <file_base_path> <withGTF>
-#
-#The <with_gtf> column should be 1 or 0, indicating whether the index was made
-#with an annotation (i.e., --sjdbGTFfile and --sjdbOverhang were used) or not,
-#respecively.
-#
-#Note that STAR indices can become quite large. Consequently, it is only
-#advisable to create indices with annotations if it's known ahead of time that
-#(A) the annotations won't be frequently updated and (B) the read lengths used
-#will also rarely vary. If either of these is not the case, it's advisable to
-#create indices without annotations and then specify an annotation file and
-#maximum read length (minus 1) when running STAR.
-#
-#hg19   hg19    hg19 full   /mnt/galaxyIndices/genomes/hg19/rnastar 0
-#hg19Ensembl   hg19Ensembl    hg19 full with Ensembl annotation   /mnt/galaxyIndices/genomes/hg19Ensembl/rnastar 1
-
b
diff -r 0a563fd2f22e -r d82339e37e8e tool-data/rnastar_index2_versioned.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/rnastar_index2_versioned.loc.sample Thu Aug 15 01:54:13 2019 -0400
b
@@ -0,0 +1,26 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory of rna-star indexed sequences data files. You will
+#need to create these data files and then create a rnastar_index2.loc
+#file similar to this one (store it in this directory) that points to
+#the directories in which those files are stored. The rnastar_index2.loc
+#file has this format (longer white space characters are TAB characters):
+#
+#<unique_build_id>   <dbkey>   <display_name>   <file_base_path> <with_gene_model> <version>
+#
+#The <with_gene_model> column should be 1 or 0, indicating whether the index
+#was built with annotations (i.e., --sjdbGTFfile and --sjdbOverhang were used)
+#or not.
+#
+#The <version> column indicates the STAR version that introduced the format of
+#the index, i.e., the oldest STAR version that could make use of the index.
+#
+#Note that STAR indices can become quite large. Consequently, it is only
+#advisable to create indices with annotations if it's known ahead of time that
+#(A) the annotations won't be frequently updated and (B) the read lengths used
+#will also rarely vary. If either of these is not the case, it's advisable to
+#create indices without annotations and then specify an annotation file and
+#maximum read length (minus 1) when running STAR.
+#
+#hg19   hg19    hg19 full   /mnt/galaxyIndices/genomes/hg19/rnastar 0 2.7.1a
+#hg19Ensembl   hg19Ensembl    hg19 full with Ensembl annotation   /mnt/galaxyIndices/genomes/hg19Ensembl/rnastar 1 2.7.1a
+
b
diff -r 0a563fd2f22e -r d82339e37e8e tool_data_table_conf.xml.sample
--- a/tool_data_table_conf.xml.sample Thu Mar 14 16:12:38 2019 -0400
+++ b/tool_data_table_conf.xml.sample Thu Aug 15 01:54:13 2019 -0400
b
@@ -1,7 +1,6 @@
-<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changed in revision 4550:535d276c92bc-->
 <tables>
-    <table name="rnastar_index2" comment_char="#" allow_duplicate_entries="False">
-        <columns>value, dbkey, name, path, with-gtf</columns>
-        <file path="tool-data/rnastar_index2.loc" />
+    <table name="rnastar_index2_versioned" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, dbkey, name, path, with-gtf, version</columns>
+        <file path="tool-data/rnastar_index2_versioned.loc" />
     </table>
 </tables>
b
diff -r 0a563fd2f22e -r d82339e37e8e tool_data_table_conf.xml.test
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test Thu Aug 15 01:54:13 2019 -0400
b
@@ -0,0 +1,7 @@
+<tables>
+    <!-- Location of rnastar indexes for testing -->
+    <table name="rnastar_index2_versioned" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, dbkey, name, path, with-gtf, version</columns>
+        <file path="${__HERE__}/test-data/rnastar_index2_versioned.loc" />
+    </table>
+</tables>