Repository 'rna_starsolo'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/rna_starsolo

Changeset 7:e403d27e8f24 (2020-08-05)
Previous changeset 6:178bdbdb6d24 (2019-11-28) Next changeset 8:00fbfac99d39 (2020-12-04)
Commit message:
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/rgrnastar commit a8e319862d723654c372a6d71e5de76e052586a9"
modified:
macros.xml
rg_rnaStarSolo.xml
test-data/test-cache/tophat-test/genomeParameters.txt
tool_data_table_conf.xml.sample
tool_data_table_conf.xml.test
added:
test-data/filtered.barcodes.txt
test-data/filtered3.Homo_sapiens.GRCh38.100.chr21.gtf
test-data/filtered3.Homo_sapiens.GRCh38.dna.chromosome.21.fa.gz
test-data/filtered3.bam
test-data/pbmc_1k_v2_L001.R1.10k.fastq.gz
test-data/pbmc_1k_v2_L001.R2.10k.fastq.gz
test-data/rnastar_index2x_versioned.loc
test-data/test-cache/tophat-test/Log.out
tool-data/rnastar_index2x_versioned.loc.sample
removed:
test-data/41737_R1_sub240k.fastq.gz
test-data/41737_R2_sub240k.fastq.gz
test-data/737K-august-2016.small.txt.gz
test-data/SNORD83B.22.fa
test-data/SNORD83B.22.gtf
test-data/rnastar_index2_versioned.loc
tool-data/rnastar_index2_versioned.loc.sample
b
diff -r 178bdbdb6d24 -r e403d27e8f24 macros.xml
--- a/macros.xml Thu Nov 28 15:55:47 2019 -0500
+++ b/macros.xml Wed Aug 05 09:41:22 2020 -0400
b
@@ -5,7 +5,7 @@
     the index versions in sync, but you should manually adjust the +galaxy
     version number. -->
     <!-- STAR version to be used -->
-    <token name="@VERSION@">2.7.2b</token>
+    <token name="@VERSION@">2.7.5b</token>
     <!-- STAR index version compatible with this version of STAR
     This is the STAR version that introduced the index structure expected
     by the current version.
@@ -13,7 +13,8 @@
     STAR -h | grep versionGenome
     or by looking for the versionGenome parameter in source/parametersDefault
     of STAR's source code -->
-    <token name="@IDX_VERSION@">2.7.1a</token>
+    <token name="@IDX_VERSION@">2.7.4a</token>
+    <token name="@IDX_DATA_TABLE@">rnastar_index2x_versioned</token>
 
     <xml name="requirements">
         <requirements>
@@ -27,7 +28,7 @@
         <param argument="--genomeDir" name="genomeDir" type="select"
         label="Select reference genome"
         help="If your genome of interest is not listed, contact the Galaxy team">
-            <options from_data_table="rnastar_index2_versioned">
+            <options from_data_table="@IDX_DATA_TABLE@">
                 <filter type="static_value" column="4" value="@WITH_GENE_MODEL@" />
                 <filter type="static_value" column="5" value="@IDX_VERSION@" />
                 <filter type="sort_by" column="2" />
@@ -53,7 +54,7 @@
             <conditional name="refGenomeSource.geneSource">
                 <when value="indexed">
                     <action type="metadata" name="dbkey">
-                        <option type="from_data_table" name="rnastar_index2_versioned" column="1" offset="0">
+                        <option type="from_data_table" name="@IDX_DATA_TABLE@" column="1" offset="0">
                             <filter type="param_value" column="0" value="#" compare="startswith" keep="False"/>
                             <filter type="param_value" ref="refGenomeSource.GTFconditional.genomeDir" column="0"/>
                         </option>
@@ -76,10 +77,21 @@
             --genomeDir 'tempstargenomedir'
             --genomeFastaFiles '${refGenomeSource.genomeFastaFiles}'
             ## Handle difference between indices with/without annotations
-            #if str($refGenomeSource.GTFconditional.GTFselect) == 'with-gtf':
-                --sjdbOverhang '${refGenomeSource.GTFconditional.sjdbOverhang}'
-                --sjdbGTFfile '${refGenomeSource.GTFconditional.sjdbGTFfile}'
-                #if str($refGenomeSource.GTFconditional.sjdbGTFfile.ext) == 'gff3':
+            #if 'GTFconditional' in $refGenomeSource:
+                ## GTFconditional exists only in STAR, but not STARsolo
+                #if str($refGenomeSource.GTFconditional.GTFselect) == 'with-gtf':
+                    --sjdbOverhang '${refGenomeSource.GTFconditional.sjdbOverhang}'
+                    --sjdbGTFfile '${refGenomeSource.GTFconditional.sjdbGTFfile}'
+                    #if str($refGenomeSource.GTFconditional.sjdbGTFfile.ext) == 'gff3':
+                        --sjdbGTFtagExonParentTranscript Parent
+                    #end if
+                #end if
+            #else:
+                ## ref genome selection is less complex for STARsolo cause
+                ## with-gtf is mandatory there
+                --sjdbOverhang '${refGenomeSource.sjdbOverhang}'
+                --sjdbGTFfile '${refGenomeSource.sjdbGTFfile}'
+                #if str($refGenomeSource.sjdbGTFfile.ext) == 'gff3':
                     --sjdbGTFtagExonParentTranscript Parent
                 #end if
             #end if
@@ -110,6 +122,13 @@
         #end if
         #end if
         ]]></token>
+    <xml name="ref_selection">
+        <param argument="--genomeFastaFiles" type="data" format="fasta" label="Select a reference genome" />
+        <!-- Currently, this parameter is not exposed in the wrapper,
+             but used only in the tests to avoid excessive index sizes for
+             the tiny test genomes. -->
+        <param name="genomeSAindexNbases" type="hidden" value="" />
+    </xml>
     <xml name="stdio" >
         <stdio>
             <regex match="FATAL error" source="both" level="fatal"/>
@@ -119,48 +138,4 @@
             <yield />
         </stdio>
     </xml>
-    <xml name="refgenomehandling" >
-        <conditional name="refGenomeSource">
-            <param name="geneSource" type="select" label="Custom or built-in reference genome" help="Built-ins were indexed using default options">
-                <option value="indexed" selected="true">Use a built-in index</option>
-                <option value="history">Use reference genome from history and create temporary index</option>
-            </param>
-            <when value="indexed">
-                <conditional name="GTFconditional">
-                    <param name="GTFselect" type="select"
-                           label="Reference genome with or without an annotation"
-                           help="Select the '... with builtin gene-model' option to select from the list of available indexes that were built with splice junction information. Select the '... without builtin gene-model' option to select from the list of available indexes without annotated splice junctions.">
-                        <option value="without-gtf" selected='true'>use genome reference without builtin gene-model</option>
-                        <option value="with-gtf">use genome reference with builtin gene-model</option>
-                    </param>
-                    <when value="with-gtf">
-                        <expand macro="index_selection" with_gene_model="1" />
-                    </when>
-                    <when value="without-gtf">
-                        <expand macro="index_selection" with_gene_model="0" />
-                        <expand macro="@SJDBOPTIONS@" />
-                    </when>
-                </conditional>
-            </when>
-            <when value="history">
-                <param argument="--genomeFastaFiles" type="data" format="fasta" label="Select a reference genome" />
-                <!-- Currently, this parameter is not exposed in the wrapper,
-                     but used only in the tests to avoid excessive index sizes for
-                     the tiny test genomes. -->
-                <param name="genomeSAindexNbases" type="hidden" value="" />
-                <conditional name="GTFconditional">
-                    <param name="GTFselect" type="select"
-                           label="Build index with our without known splice junctions annotation"
-                           help="To build an index with known splice junctions annotated, you will have to provide a GTF or GFF3 dataset that describes the gene models (the location of genes, transcripts and exons) known for the reference genome.">
-                        <option value="without-gtf">build index without gene-model</option>
-                        <option value="with-gtf">build index with gene-model</option>
-                    </param>
-                    <when value="with-gtf">
-                        <expand macro="@SJDBOPTIONS@" optional="false"/>
-                    </when>
-                    <when value="without-gtf" />
-                </conditional>
-            </when>
-        </conditional>
-    </xml>
 </macros>
b
diff -r 178bdbdb6d24 -r e403d27e8f24 rg_rnaStarSolo.xml
--- a/rg_rnaStarSolo.xml Thu Nov 28 15:55:47 2019 -0500
+++ b/rg_rnaStarSolo.xml Wed Aug 05 09:41:22 2020 -0400
[
b'@@ -1,8 +1,7 @@\n-<tool id="rna_starsolo" name="RNA STARSolo" version="@VERSION@@WRAPPER@" profile="17.01">\n+<tool id="rna_starsolo" name="RNA STARSolo" version="@VERSION@" profile="17.01">\n     <description>mapping, demultiplexing and gene quantification for single cell RNA-seq</description>\n     <macros>\n         <import>macros.xml</import>\n-        <token name="@WRAPPER@">1</token>\n     </macros>\n     <expand macro="requirements"/>\n     <expand macro="stdio" >\n@@ -14,52 +13,34 @@\n     STAR\n     @REFGENOMEHANDLING@\n \n+    --readFilesIn\n     ## Check that the input pairs are of the same type\n     ## otherwise STARsolo will run for a long time and then error out.\n     ## We consume either repeats of two inputs R1 + R2\n     ## or a collection of paired reads.\n \n-    #try\n-        #set $last = None\n-        #for $x in $input_types.input_repeats:\n-            #if str($input_types.use) == "repeat":\n-                #set $r1 = $x.input1\n-                #set $r2 = $x.input2\n-            #elif str($input_types.use) == "list_paired":\n-                #set $r1 = $x.forward\n-                #set $r2 = $x.reverse\n-            #else\n-                Wrong Type\n-                #stop\n-            #end if\n-\n+    #if str($input_types.use) == "repeat":\n+        #set $reads1 = []\n+        #set $reads2 = []\n+        #for $r1, $r2 in zip($input_types.input1, $input_types.input2):\n             #assert $r1.datatype == $r2.datatype\n-\n-            ## Test that all pairs are of the same type\n-            #if $last:\n-                #assert $last.datatype == $r1.datatype\n-            #end if\n-            #set $last = $r1\n+            #silent $reads1.append(str($r1))\n+            #silent $reads2.append(str($r2))\n         #end for\n-    #except AssertionError\n-        Input types are not the same!\n-        #stop\n-    #end try\n+        #set $reads1 = \',\'.join($reads1)\n+        #set $reads2 = \',\'.join($reads2)\n+    #elif str($input_types.use) == "list_paired":\n+        #set $r1 = $input_types.input_collection.forward\n+        #set $r2 = $input_types.input_collection.reverse\n+        #set $reads1 = $r1\n+        #set $reads2 = $r2\n+    #end if\n \n     ## cDNA sequence(s) [R2] always go first, then barcode(s) [R1]\n     ## see: Section 3.1 of STAR manual for multiple inputs, and Section 13 for STARsolo inputs\n-    #if str($input_types.use) == "repeat":\n-        #set $reads2 = \',\'.join([ \'%s\' % $x.input2 for $i,$x in enumerate($input_types.input_repeats)])\n-        #set $reads1 = \',\'.join([ \'%s\' % $x.input1 for $i,$x in enumerate($input_types.input_repeats)])\n-    #else if str($input_types.use) == "list_paired"\n-        #set $reads2 = \',\'.join([ \'%s\' % $x.reverse for $i,$x in enumerate($input_types.input_repeats)])\n-        #set $reads1 = \',\'.join([ \'%s\' % $x.forward for $i,$x in enumerate($input_types.input_repeats)])\n-    #end if\n-\n-    --readFilesIn\n     $reads2 $reads1\n \n-    #if $last.is_of_type(\'fastq.gz\', \'fastqsanger.gz\'):\n+    #if $r1.is_of_type(\'fastq.gz\', \'fastqsanger.gz\'):\n         @FASTQ_GZ_OPTION@\n     #end if\n \n@@ -69,7 +50,7 @@\n     ## 1 - check length of barcode, 0 - do not check\n     ## Good for checking custom chemistries\n     --soloCBwhitelist \'$soloCBwhitelist\'\n-    --soloBarcodeReadLength \'$solo.soloBarcodeReadLength\'\n+    --soloBarcodeReadLength $solo.soloBarcodeReadLength\n \n     #if str($solo.params.chemistry) == "CR2":\n     --soloCBstart 1\n@@ -82,34 +63,80 @@\n     --soloUMIstart 17\n     --soloUMIlen 12\n     #else if str($solo.params.chemistry) == "custom":\n-    --soloCBstart \'$solo.params.soloCBstart\'\n-    --soloCBlen \'$solo.params.soloCBlen\'\n-    --soloUMIstart \'$solo.params.soloUMIstart\'\n-    --soloUMIlen \'$solo.params.soloUMIlen\'\n+    --soloCBstart $solo.params.soloCBstart\n+    --soloCBlen $solo.params.soloCBlen\n+    --soloUMIstart $solo.params.soloUMIstart\n+    --soloUMIlen $solo.params.soloUMIlen\n     #end if\n \n-    --soloStrand \'$solo.soloStrand\'\n-    --soloFeatures \'$solo.soloFeatures\'\n-    --soloUMIdedup \'$solo.soloUMIdedup\'\n+  '..b'="41737_R2_sub240k.fastq.gz" ftype="fastqsanger.gz" /> -->\n-                        <!-- </element> -->\n+                <param name="input_collection" >\n+                    <collection type="paired">\n+                        <element name="forward" value="pbmc_1k_v2_L001.R1.10k.fastq.gz" ftype="fastqsanger.gz" />\n+                        <element name="reverse" value="pbmc_1k_v2_L001.R2.10k.fastq.gz" ftype="fastqsanger.gz" />\n                     </collection>\n                 </param>\n             </conditional>\n-            <param name="soloCBwhitelist" value="737K-august-2016.small.txt.gz" />\n+            <param name="soloCBwhitelist" value="filtered.barcodes.txt" />\n             <conditional name="refGenomeSource">\n                 <param name="geneSource" value="history" />\n-                <param name="genomeFastaFiles" value="SNORD83B.22.fa" />\n+                <param name="genomeFastaFiles" value="filtered3.Homo_sapiens.GRCh38.dna.chromosome.21.fa.gz" />\n                 <param name="genomeSAindexNbases" value="4" />\n-                <conditional name="GTFconditional">\n-                    <param name="GTFselect" value="with-gtf" />\n-                    <param name="sjdbOverhang" value="75" />\n-                    <param name="sjdbGTFfile" value="SNORD83B.22.gtf" ftype="gtf"/>\n-                </conditional>\n+                <param name="sjdbOverhang" value="100" />\n+                <param name="sjdbGTFfile" value="filtered3.Homo_sapiens.GRCh38.100.chr21.gtf" ftype="gtf"/>\n             </conditional>\n             <section name="solo" >\n                 <conditional name="params">\n-                    <param name="chemistry" value="custom" />\n-                    <param name="soloCBstart" value="1" />\n-                    <param name="soloCBlen" value="16" />\n-                    <param name="soloUMIstart" value="17" />\n-                    <param name="soloUMIlen" value="10" />\n+                    <param name="chemistry" value="CR3" />\n                 </conditional>\n                 <param name="soloStrand" value="Forward" />\n-                <param name="soloFeatures" value="GeneFull" />\n-                <param name="soloUMIdedup" value="1MM_Directional" />\n+                <param name="soloFeatures" value="Gene" />\n+                <param name="soloUMIdedup" value="1MM_All" />\n             </section>\n             <output name="output_barcodes" >\n                 <assert_contents>\n-                    <has_line line="TTTGTCATCTTAGAGC" />\n-                    <has_line line="TTTGTCATCTTTCCTC" />\n+                    <has_line line="ACACCGGTCTAACGGT" />\n+                    <has_line line="TTCTCAATCCACGTTC" />\n                 </assert_contents>\n             </output>\n+            <output name="output_BAM" value="filtered3.bam" compare="sim_size" delta="600" />\n         </test>\n     </tests>\n     <help><![CDATA[\n **What it does**\n \n-**STARSolo** is a turnkey solution for analyzing droplet single cell RNA sequencing data (e.g. 10X Genomics Chromium System) built directly into STAR code. STARsolo inputs the raw FASTQ reads files, and performs the following operations:\n+**STARSolo** is a turnkey solution for analyzing droplet single cell RNA sequencing data (e.g. 10X Genomics Chromium System) built directly into STAR_ code. STARsolo takes raw FASTQ reads files as input, and performs the following operations:\n \n  * Error correction and demultiplexing of cell barcodes using user-input whitelist\n  * Mapping the reads to the reference genome using the standard STAR spliced read alignment algorithm\n@@ -360,6 +388,7 @@\n \n STARsolo output is designed to be a drop-in replacement for 10X CellRanger gene quantification output. It follows CellRanger logic for cell barcode whitelisting and UMI deduplication, and produces nearly identical gene counts in the same format. At the same time STARsolo is 10 times faster than CellRanger.\n \n+.. _STAR: https://github.com/alexdobin/STAR\n ]]></help>\n     <expand macro="citations"/>\n </tool>\n'
b
diff -r 178bdbdb6d24 -r e403d27e8f24 test-data/41737_R1_sub240k.fastq.gz
b
Binary file test-data/41737_R1_sub240k.fastq.gz has changed
b
diff -r 178bdbdb6d24 -r e403d27e8f24 test-data/41737_R2_sub240k.fastq.gz
b
Binary file test-data/41737_R2_sub240k.fastq.gz has changed
b
diff -r 178bdbdb6d24 -r e403d27e8f24 test-data/737K-august-2016.small.txt.gz
b
Binary file test-data/737K-august-2016.small.txt.gz has changed
b
diff -r 178bdbdb6d24 -r e403d27e8f24 test-data/SNORD83B.22.fa
--- a/test-data/SNORD83B.22.fa Thu Nov 28 15:55:47 2019 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,17 +0,0 @@
->22 dna:chromosome chromosome:GRCh38:22:39313740:50818468:1 REF
-CACACTCAAAGCAGCAAACAGCCCAGCAAGGCCAGACTGGGAATTTCCTCATCTCAGGAC
-TTCAAAGCCAGTGTGAAAGGACTGCCAACACCCTCTCCTTCCTTTCCTCTCCCACCACAG
-GGCCACCAGCGTCTGTGGCCTTGGATCCTCCCTCTACAAGAGCCCCCCCATGACAAGTCA
-GGACCTGCCTCACCTTGCGGAGGGTGAGCACCCGCTTCTTGGTTCCCACCACACAGCCTT
-TCAGCATGACAAAGTCATTGGTCACTTCACCATAGTGGACAAAGCCACCCTGGAAAACGA
-GCATCGGATCAGCACAGGCCCAGGAGGGGATTGTCGTGCAGATGACCCCTCCAGGTTCAG
-GCCCTCCCTGACCACAGGGCTGTTCTCAGAAGGAAGGCAACAAGGAACGGTTCCGCAGTC
-TGTCTCGGGCGCTGTGCCCAGCGCACATTCCAGGCCTCATCACTGAACAGCTGAGCCTGA
-GACCCCACTTCTCACCAGCCAACCCCGACGAGTGGACTCAGATGACAACATGCCACTTAC
-AAGGGACACAGCTAGGTGTTGTGTTGGCTTCAGTTAACGATCCTGCTAGCAGCCCCTAGG
-AAGCAGCCTATCCCCAAAAGCACGAGGCCTGGGATGGCCTCACAGAGCAGAACACCCATT
-ACTTACCAGAGGGTTGATGCTCTTGTCAGATAGGTCATAGTCAGTGGAGGCATTGTTCTT
-GATCAGCTTGCCGTCCTTGATAAGGTAGCCCTGGCCAATCTTATAAATCTGAATGAACAA
-GAAGGGTGTAAGGCTGGGGCATTAGGGACAAATAACCCAGACATGCCAGTGTGCTGACCT
-GCAAAGCACGCTAGAAGGCAGCTGAGGCCTCAGTCCCAGTCACAGCGTATCCCAAGGTCA
-GAGCAAAAAGCTGGCTGGCCCTCCAGGTTCCTTTCTGTAAGGCGGCTGGGCTAAAACTAA
\ No newline at end of file
b
diff -r 178bdbdb6d24 -r e403d27e8f24 test-data/SNORD83B.22.gtf
--- a/test-data/SNORD83B.22.gtf Thu Nov 28 15:55:47 2019 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,8 +0,0 @@
-#!genome-build GRCh38.p12
-#!genome-version GRCh38
-#!genome-date 2013-12
-#!genome-build-accession NCBI:GCA_000001405.27
-#!genebuild-last-updated 2018-11
-22 ensembl gene 39313819 39313911 . - . gene_id "ENSG00000209480"; gene_version "1"; gene_name "SNORD83B"; gene_source "ensembl"; gene_biotype "snoRNA";
-22 ensembl transcript 39313819 39313911 . - . gene_id "ENSG00000209480"; gene_version "1"; transcript_id "ENST00000386745"; transcript_version "1"; gene_name "SNORD83B"; gene_source "ensembl"; gene_biotype "snoRNA"; transcript_name "SNORD83B-201"; transcript_source "ensembl"; transcript_biotype "snoRNA"; tag "basic"; transcript_support_level "NA";
-22 ensembl exon 39313819 39313911 . - . gene_id "ENSG00000209480"; gene_version "1"; transcript_id "ENST00000386745"; transcript_version "1"; exon_number "1"; gene_name "SNORD83B"; gene_source "ensembl"; gene_biotype "snoRNA"; transcript_name "SNORD83B-201"; transcript_source "ensembl"; transcript_biotype "snoRNA"; exon_id "ENSE00001501751"; exon_version "1"; tag "basic"; transcript_support_level "NA";
b
diff -r 178bdbdb6d24 -r e403d27e8f24 test-data/filtered.barcodes.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/filtered.barcodes.txt Wed Aug 05 09:41:22 2020 -0400
b
@@ -0,0 +1,394 @@
+AAACCTGAGCGCTCCA
+AAACGGGGTTTGTGTG
+AAAGATGAGTACTTGC
+AAAGCAATCCACGAAT
+AAAGTAGGTAGCAAAT
+AACCATGGTAGTACCT
+AACCGCGCACATCTTT
+AACCGCGCATGCCTAA
+AACGTTGTCACAGTAC
+AACTTTCTCTGTTTGT
+AAGACCTTCAAACGGG
+AAGGAGCAGTATCGAA
+AAGGAGCAGTGGTAGC
+AAGGAGCCAGCTTAAC
+AAGGAGCTCTAAGCCA
+ACACCAACAGCTGTAT
+ACACCGGTCTAACGGT
+ACATACGCACACCGCA
+ACATACGTCTGCCCTA
+ACATACGTCTGGCGAC
+ACATCAGCAAACGCGA
+ACATGGTCAGACTCGC
+ACCAGTAAGCCTCGTG
+ACCGTAACACAACGCC
+ACCGTAAGTTCAGTAC
+ACCGTAATCTTGGGTA
+ACGAGCCCAAGAGTCG
+ACGAGCCGTGCTGTAT
+ACGAGGATCCTATGTT
+ACGAGGATCTTTAGTC
+ACGATACCAAAGTCAA
+ACGCAGCTCACCGTAA
+ACGCCGAAGCGCTTAT
+ACGGAGACATCTATGG
+ACGGCCACACCGAAAG
+ACGGGCTGTCGGCACT
+ACGGGTCCACACCGCA
+ACTGAGTAGACGACGT
+ACTGAGTTCGGAGCAA
+ACTGAGTTCGTCTGAA
+ACTGATGCAAGTTCTG
+ACTGCTCGTGCAGGTA
+ACTGTCCAGAGCTGCA
+ACTGTCCTCCCACTTG
+ACTTACTAGTTAGCGG
+ACTTACTGTCCCTTGT
+ACTTACTGTTACCAGT
+ACTTTCAGTCTCCACT
+ACTTTCAGTTAAAGTG
+AGAATAGCAGCTGTAT
+AGACGTTCAAGGCTCC
+AGACGTTCAGACTCGC
+AGACGTTGTCAAAGCG
+AGACGTTTCAGTTTGG
+AGAGCTTGTGACGCCT
+AGAGTGGCAGATGAGC
+AGAGTGGTCTTTAGGG
+AGCATACCAGGGTATG
+AGCCTAAGTTTAGGAA
+AGCCTAATCCCGACTT
+AGCGGTCAGTACATGA
+AGCGGTCGTTCCACGG
+AGCTCCTGTGTTTGGT
+AGCTCTCAGCAGCCTC
+AGCTCTCAGTGTTAGA
+AGGCCACGTCAGCTAT
+AGGCCACGTGCAGGTA
+AGGCCACTCGTGGTCG
+AGGGAGTAGGGTCTCC
+AGGGAGTTCGATGAGG
+AGGGATGGTTCAGGCC
+AGGTCATAGCTCCTCT
+AGGTCATCATCCGTGG
+AGTAGTCAGGATGTAT
+AGTAGTCCAATGGATA
+AGTCTTTAGCAGGCTA
+AGTGAGGCAATCGGTT
+AGTGAGGTCGAACGGA
+AGTGTCAAGGCGATAC
+AGTGTCAGTTCCATGA
+ATAACGCGTCGGCACT
+ATAAGAGCACCAGGTC
+ATAAGAGCACTTCTGC
+ATAAGAGTCGGGAGTA
+ATAGACCGTCCTAGCG
+ATCACGACATATGCTG
+ATCACGAGTCGACTGC
+ATCATGGTCCGTACAA
+ATCCACCCATTACGAC
+ATCGAGTCAGGTCTCG
+ATCGAGTGTTCCGTCT
+ATCTGCCCACCAGGCT
+ATCTGCCTCGCCATAA
+ATGAGGGGTCTTCAAG
+ATGTGTGAGGCTCAGA
+ATTACTCAGACCCACC
+ATTCTACAGTTTCCTT
+ATTCTACTCATTGCGA
+ATTTCTGTCTAACTCT
+CAACCAAGTCCGTCAG
+CAACTAGTCATGGTCA
+CAAGAAAGTTCCGTCT
+CAAGATCTCGTAGGTT
+CAAGGCCCAATCTACG
+CAAGGCCGTTATTCTC
+CAAGTTGTCGTACGGC
+CACAAACAGCCCAATT
+CACACCTTCCCAACGG
+CACACTCCATATGAGA
+CACAGGCCAGTCCTTC
+CACAGGCGTCATGCCG
+CACAGTAAGCTGTCTA
+CACAGTACAGTCGTGC
+CACATAGTCGTACCGG
+CACCACTAGTGTACCT
+CACCACTCATACGCCG
+CACCTTGAGTGTACGG
+CAGAATCGTGAAGGCT
+CAGAATCGTTACGACT
+CAGAGAGTCTTGTACT
+CAGCAGCGTGCAACGA
+CAGCATAAGTGGCACA
+CAGCCGAGTTCTGAAC
+CAGCTAACACCTCGTT
+CAGCTAAGTCAGTGGA
+CATATTCCACCTCGTT
+CATCAAGAGACAGAGA
+CATCCACAGGCGACAT
+CATCCACTCTGCCAGG
+CATGCCTGTTCTCATT
+CCAATCCAGACCTAGG
+CCACCTACAGAAGCAC
+CCACCTACAGCCTGTG
+CCCAATCTCCTAAGTG
+CCCAGTTAGTCCAGGA
+CCCAGTTCAAGGTTCT
+CCCATACAGCACCGCT
+CCCATACGTACCGCTG
+CCCATACGTGTTTGTG
+CCCTCCTGTGTGCCTG
+CCCTCCTTCGGTCCGA
+CCGGTAGGTGCAGTAG
+CCGGTAGTCTTCCTTC
+CCGTACTCATGGTCTA
+CCGTACTTCACTTCAT
+CCTAAAGGTGAGGGAG
+CCTACCAAGTGGGATC
+CCTACCAGTAAGTGGC
+CCTAGCTAGTCAAGCG
+CCTAGCTGTTTGCATG
+CGACCTTAGCCGTCGT
+CGACCTTAGCGATCCC
+CGACCTTTCTGTCAAG
+CGACTTCGTTCGAATC
+CGACTTCGTTGTGGCC
+CGACTTCTCATATCGG
+CGAGAAGTCTTGCATT
+CGAGCACAGAAGGTTT
+CGAGCACCAACGATGG
+CGAGCACTCAGAAATG
+CGAGCCACACCAGCAC
+CGATCGGCATAACCTG
+CGATGGCGTGCACGAA
+CGATGGCGTGGTTTCA
+CGCGGTACAGTATGCT
+CGCGGTACATGGATGG
+CGCTATCTCATCTGTT
+CGCTGGAAGGAGCGTT
+CGCTTCAAGTACGCGA
+CGCTTCAAGTTCGCGC
+CGCTTCACAATCAGAA
+CGGACACTCTATCCCG
+CGGACGTTCAGATAAG
+CGGACGTTCTCATTCA
+CGGACTGTCTAACTCT
+CGGAGCTCACGAAGCA
+CGGAGTCTCAACACCA
+CGGGTCAGTCGACTAT
+CGGGTCATCATCGCTC
+CGGTTAAGTTCAGTAC
+CGGTTAAGTTGTGGCC
+CGTCACTTCGAATGGG
+CGTGTCTAGACTGTAA
+CGTGTCTAGATAGCAT
+CGTTAGACAAGGGTCA
+CGTTCTGGTACATGTC
+CTAACTTGTATGCTTG
+CTAACTTTCTAACCGA
+CTAAGACGTTTGTGTG
+CTACATTAGAAACGCC
+CTACATTAGGTGACCA
+CTACATTTCTCGAGTA
+CTAGAGTAGACACGAC
+CTAGAGTGTCGCGAAA
+CTCACACCATTAACCG
+CTCACACGTGCTTCTC
+CTCAGAATCGGATGGA
+CTCATTATCTAGAGTC
+CTCCTAGTCACGCATA
+CTCGGAGAGCTGAAAT
+CTCGGAGTCTTAGAGC
+CTCGGGACACACGCTG
+CTCGGGATCGTAGGAG
+CTCGTACAGGTAAACT
+CTCTAATGTCCAGTTA
+CTCTGGTAGAGTGACC
+CTGAAGTCAATTGCTG
+CTGAAGTGTATAATGG
+CTGAAGTGTGTCGCTG
+CTGATCCAGTAGCGGT
+CTGATCCTCGTCACGG
+CTGCCTAGTTCGTGAT
+CTGCGGAAGACTAGAT
+CTGCTGTCAAGGTTCT
+CTGCTGTCATTATCTC
+CTGCTGTTCAACTCTT
+CTGGTCTTCATAAAGG
+CTGTGCTAGCAGGTCA
+CTGTGCTCATATACGC
+CTGTTTAAGGGATGGG
+CTTACCGGTGGACGAT
+CTTAGGACAAACCTAC
+CTTCTCTAGATGGCGT
+CTTGGCTAGTCAAGGC
+CTTTGCGAGATCCTGT
+CTTTGCGGTTTAAGCC
+CTTTGCGGTTTGACAC
+GAAATGATCTCCTATA
+GAATAAGGTCAAAGAT
+GAATGAATCATCACCC
+GACCAATCAGCTGGCT
+GACCAATGTCATGCAT
+GACGCGTTCCGATATG
+GACGGCTCATGATCCA
+GACGTGCCATACGCTA
+GACTGCGCAATGGACG
+GACTGCGCATCTACGA
+GACTGCGGTCGGCACT
+GATCGCGTCGTAGGTT
+GATCTAGGTCTAGCGC
+GATGAAATCACAACGT
+GATGCTAGTATATGGA
+GATTCAGCATGCGCAC
+GCAAACTAGAGGGCTT
+GCAAACTGTGTCAATC
+GCAAACTTCTGCGTAA
+GCACTCTAGCACAGGT
+GCAGCCACAGGACCCT
+GCAGCCATCCTCAATT
+GCATGATGTCAACATC
+GCATGTACACAGCGTC
+GCATGTAGTTCAACCA
+GCATGTAGTTGCCTCT
+GCGACCACAGACAAAT
+GCGAGAACAGATGGGT
+GCGCAACCACCTGGTG
+GCGCAACTCGTCACGG
+GCGCAGTAGGAGTACC
+GCGCAGTGTTCGCTAA
+GCGCGATAGGATGGAA
+GCGCGATGTAGCTTGT
+GCGGGTTCATCCGTGG
+GCGGGTTTCGTTTATC
+GCTCTGTGTGCCTGGT
+GCTGCAGTCTGAGTGT
+GCTTCCAAGCAGGCTA
+GCTTCCAGTGCAGTAG
+GCTTGAAAGAAGGACA
+GGAATAACACCAGGTC
+GGACAAGAGTACGCCC
+GGACATTCACTGCCAG
+GGACATTTCACAGTAC
+GGACGTCAGTGGGATC
+GGATGTTTCCACTGGG
+GGCAATTCACCATCCT
+GGCGACTTCAACACCA
+GGCTGGTTCTCGTTTA
+GGGAATGAGGGTTTCT
+GGGACCTCAGTCGATT
+GGGACCTTCGGAGGTA
+GGGATGAGTGCAGACA
+GGTGCGTTCAGCTCGG
+GGTGCGTTCGCTAGCG
+GTACGTAGTATCAGTC
+GTACTCCCACAGACAG
+GTAGGCCCACTGCCAG
+GTATCTTAGACGCTTT
+GTATCTTTCCTCAACC
+GTATTCTAGGTTCCTA
+GTCACAATCTCAACTT
+GTCACGGAGCACACAG
+GTCATTTGTGCTTCTC
+GTGCAGCTCTTCATGT
+GTGCATAAGGAGCGTT
+GTGCGGTAGCTACCGC
+GTGCGGTAGGCCGAAT
+GTGCGGTCATGCCACG
+GTGCTTCAGATGAGAG
+GTGGGTCCATCGGGTC
+GTGGGTCGTTCGAATC
+GTTAAGCAGCCACCTG
+GTTAAGCAGTACATGA
+GTTAAGCCAATTGCTG
+GTTACAGGTCAGGACA
+GTTACAGTCCAGTAGT
+GTTCTCGTCTAACTGG
+TAAACCGCAAGGACAC
+TAAACCGCATACGCCG
+TAAACCGCATGGTAGG
+TAAGCGTAGCTCCTCT
+TACACGATCGTGGGAA
+TACAGTGAGTGTCCCG
+TACAGTGCAGCTCGCA
+TACGGATCAGCTATTG
+TACGGGCCACAACGTT
+TACGGTAAGGATGCGT
+TACTTACAGGCTATCT
+TACTTGTAGACTTTCG
+TACTTGTCACATAACC
+TAGCCGGCAGGGTTAG
+TATGCCCCAATGTAAG
+TATGCCCGTACTTAGC
+TCAACGAAGCTAGTGG
+TCAATCTAGGATGCGT
+TCACAAGCATCCTTGC
+TCACGAATCACTCCTG
+TCAGATGTCGTTGCCT
+TCAGCAATCAGCACAT
+TCAGGATGTATAGGGC
+TCGCGAGGTTCCAACA
+TCGCGAGGTTGTCTTT
+TCGTACCAGGCTCAGA
+TCGTAGAGTCTTGTCC
+TCTATTGCATGCCTAA
+TCTCTAACAGATCCAT
+TCTTTCCTCCACTCCA
+TGACAACGTTGTGGCC
+TGACTAGGTTCGGGCT
+TGACTAGTCACATACG
+TGACTTTCATTGGCGC
+TGAGAGGCATGTCGAT
+TGAGCCGAGCGAAGGG
+TGAGCCGCAGTCACTA
+TGAGGGAAGAGACTTA
+TGAGGGACATCCAACA
+TGCACCTCATGCAATC
+TGCCAAACAAACCTAC
+TGCCAAATCGAATCCA
+TGCCCTAAGTGTACTC
+TGCGCAGAGGAGTTTA
+TGCGCAGGTGGGTATG
+TGCGTGGGTCTCCACT
+TGCTACCCATGACGGA
+TGCTACCTCGGCTACG
+TGCTGCTGTTATTCTC
+TGCTGCTGTTCGGCAC
+TGCTGCTTCAAAGTAG
+TGGACGCAGACCACGA
+TGGACGCCAGAGTGTG
+TGGCGCACAGATGGGT
+TGGCTGGCACCCATTC
+TGGGAAGCATCCCATC
+TGGTTAGTCAACACAC
+TGGTTAGTCACAATGC
+TGGTTCCAGTATTGGA
+TGGTTCCCAAGCCGCT
+TGGTTCCCATCGTCGG
+TGTATTCGTAGCGTGA
+TGTCCCATCAACACTG
+TGTGGTAAGGGCATGT
+TTAACTCCAACACCTA
+TTAGGACCAGCCTTTC
+TTAGGACGTCTTTCAT
+TTAGTTCCAAGTAGTA
+TTATGCTCACCAACCG
+TTATGCTTCCACGTTC
+TTATGCTTCCCTTGTG
+TTCCCAGTCTTGCATT
+TTCGGTCAGCTAAACA
+TTCTCAAAGCTACCTA
+TTCTCAATCCACGTTC
+TTCTCAATCGCCTGAG
+TTCTCCTTCGCCAAAT
+TTCTTAGTCAGCTCGG
+TTGAACGAGCTTCGCG
+TTGAACGCACGAAAGC
+TTGCGTCGTTATTCTC
+TTGTAGGTCGGATGTT
+TTTACTGAGGCTAGGT
+TTTACTGCAGTGGAGT
+TTTCCTCCACTTGGAT
+TTTCCTCCATCTCGCT
+TTTGGTTAGCACCGTC
+TTTGGTTAGTGGGCTA
b
diff -r 178bdbdb6d24 -r e403d27e8f24 test-data/filtered3.Homo_sapiens.GRCh38.100.chr21.gtf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/filtered3.Homo_sapiens.GRCh38.100.chr21.gtf Wed Aug 05 09:41:22 2020 -0400
b
b'@@ -0,0 +1,461 @@\n+#!genome-build GRCh38.p13\n+#!genome-version GRCh38\n+#!genome-date 2013-12\n+#!genome-build-accession NCBI:GCA_000001405.28\n+#!genebuild-last-updated 2019-06\n+21\thavana\tgene\t1000\t6346\t.\t+\t.\tgene_id "ENSG00000279493"; gene_version "1"; gene_name "FP565260.4"; gene_source "havana"; gene_biotype "protein_coding";\n+21\thavana\ttranscript\t1000\t6346\t.\t+\t.\tgene_id "ENSG00000279493"; gene_version "1"; transcript_id "ENST00000624081"; transcript_version "1"; gene_name "FP565260.4"; gene_source "havana"; gene_biotype "protein_coding"; transcript_name "FP565260.4-201"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "cds_start_NF"; tag "mRNA_start_NF"; tag "basic"; transcript_support_level "5";\n+21\thavana\texon\t1000\t1075\t.\t+\t.\tgene_id "ENSG00000279493"; gene_version "1"; transcript_id "ENST00000624081"; transcript_version "1"; exon_number "1"; gene_name "FP565260.4"; gene_source "havana"; gene_biotype "protein_coding"; transcript_name "FP565260.4-201"; transcript_source "havana"; transcript_biotype "protein_coding"; exon_id "ENSE00003760288"; exon_version "1"; tag "cds_start_NF"; tag "mRNA_start_NF"; tag "basic"; transcript_support_level "5";\n+21\thavana\tCDS\t1000\t1075\t.\t+\t0\tgene_id "ENSG00000279493"; gene_version "1"; transcript_id "ENST00000624081"; transcript_version "1"; exon_number "1"; gene_name "FP565260.4"; gene_source "havana"; gene_biotype "protein_coding"; transcript_name "FP565260.4-201"; transcript_source "havana"; transcript_biotype "protein_coding"; protein_id "ENSP00000485664"; protein_version "1"; tag "cds_start_NF"; tag "mRNA_start_NF"; tag "basic"; transcript_support_level "5";\n+21\thavana\texon\t1749\t1888\t.\t+\t.\tgene_id "ENSG00000279493"; gene_version "1"; transcript_id "ENST00000624081"; transcript_version "1"; exon_number "2"; gene_name "FP565260.4"; gene_source "havana"; gene_biotype "protein_coding"; transcript_name "FP565260.4-201"; transcript_source "havana"; transcript_biotype "protein_coding"; exon_id "ENSE00003758404"; exon_version "1"; tag "cds_start_NF"; tag "mRNA_start_NF"; tag "basic"; transcript_support_level "5";\n+21\thavana\tCDS\t1749\t1888\t.\t+\t2\tgene_id "ENSG00000279493"; gene_version "1"; transcript_id "ENST00000624081"; transcript_version "1"; exon_number "2"; gene_name "FP565260.4"; gene_source "havana"; gene_biotype "protein_coding"; transcript_name "FP565260.4-201"; transcript_source "havana"; transcript_biotype "protein_coding"; protein_id "ENSP00000485664"; protein_version "1"; tag "cds_start_NF"; tag "mRNA_start_NF"; tag "basic"; transcript_support_level "5";\n+21\thavana\texon\t3587\t3672\t.\t+\t.\tgene_id "ENSG00000279493"; gene_version "1"; transcript_id "ENST00000624081"; transcript_version "1"; exon_number "3"; gene_name "FP565260.4"; gene_source "havana"; gene_biotype "protein_coding"; transcript_name "FP565260.4-201"; transcript_source "havana"; transcript_biotype "protein_coding"; exon_id "ENSE00003755466"; exon_version "1"; tag "cds_start_NF"; tag "mRNA_start_NF"; tag "basic"; transcript_support_level "5";\n+21\thavana\tCDS\t3587\t3672\t.\t+\t0\tgene_id "ENSG00000279493"; gene_version "1"; transcript_id "ENST00000624081"; transcript_version "1"; exon_number "3"; gene_name "FP565260.4"; gene_source "havana"; gene_biotype "protein_coding"; transcript_name "FP565260.4-201"; transcript_source "havana"; transcript_biotype "protein_coding"; protein_id "ENSP00000485664"; protein_version "1"; tag "cds_start_NF"; tag "mRNA_start_NF"; tag "basic"; transcript_support_level "5";\n+21\thavana\texon\t6136\t6346\t.\t+\t.\tgene_id "ENSG00000279493"; gene_version "1"; transcript_id "ENST00000624081"; transcript_version "1"; exon_number "4"; gene_name "FP565260.4"; gene_source "havana"; gene_biotype "protein_coding"; transcript_name "FP565260.4-201"; transcript_source "havana"; transcript_biotype "protein_coding"; exon_id "ENSE00003755385"; exon_version "1"; tag "cds_start_NF"; tag "mRNA_start_NF"; tag "basic"; transcript_support_level "5";\n+21\thavana\tCDS\t6136\t6298\t.\t+\t1\tgene_id "ENSG00000279493"; gene_'..b'\tgene\t586591\t586691\t.\t+\t.\tgene_id "ENSG00000277777"; gene_version "1"; gene_name "CU633967.2"; gene_source "ensembl"; gene_biotype "misc_RNA";\n+21\tensembl\ttranscript\t586591\t586691\t.\t+\t.\tgene_id "ENSG00000277777"; gene_version "1"; transcript_id "ENST00000610788"; transcript_version "1"; gene_name "CU633967.2"; gene_source "ensembl"; gene_biotype "misc_RNA"; transcript_name "CU633967.2-201"; transcript_source "ensembl"; transcript_biotype "misc_RNA"; tag "basic"; transcript_support_level "NA";\n+21\tensembl\texon\t586591\t586691\t.\t+\t.\tgene_id "ENSG00000277777"; gene_version "1"; transcript_id "ENST00000610788"; transcript_version "1"; exon_number "1"; gene_name "CU633967.2"; gene_source "ensembl"; gene_biotype "misc_RNA"; transcript_name "CU633967.2-201"; transcript_source "ensembl"; transcript_biotype "misc_RNA"; exon_id "ENSE00003748388"; exon_version "1"; tag "basic"; transcript_support_level "NA";\n+21\thavana\tgene\t692383\t694838\t.\t-\t.\tgene_id "ENSG00000279186"; gene_version "1"; gene_name "FP236315.2"; gene_source "havana"; gene_biotype "TEC";\n+21\thavana\ttranscript\t692383\t694838\t.\t-\t.\tgene_id "ENSG00000279186"; gene_version "1"; transcript_id "ENST00000624506"; transcript_version "1"; gene_name "FP236315.2"; gene_source "havana"; gene_biotype "TEC"; transcript_name "FP236315.2-201"; transcript_source "havana"; transcript_biotype "TEC"; tag "basic"; transcript_support_level "NA";\n+21\thavana\texon\t692383\t694838\t.\t-\t.\tgene_id "ENSG00000279186"; gene_version "1"; transcript_id "ENST00000624506"; transcript_version "1"; exon_number "1"; gene_name "FP236315.2"; gene_source "havana"; gene_biotype "TEC"; transcript_name "FP236315.2-201"; transcript_source "havana"; transcript_biotype "TEC"; exon_id "ENSE00003756739"; exon_version "1"; tag "basic"; transcript_support_level "NA";\n+21\thavana\tgene\t694546\t696361\t.\t+\t.\tgene_id "ENSG00000279784"; gene_version "1"; gene_name "FP236315.3"; gene_source "havana"; gene_biotype "lncRNA";\n+21\thavana\ttranscript\t694546\t696361\t.\t+\t.\tgene_id "ENSG00000279784"; gene_version "1"; transcript_id "ENST00000623587"; transcript_version "1"; gene_name "FP236315.3"; gene_source "havana"; gene_biotype "lncRNA"; transcript_name "FP236315.3-201"; transcript_source "havana"; transcript_biotype "lncRNA"; tag "basic"; transcript_support_level "2";\n+21\thavana\texon\t694546\t694654\t.\t+\t.\tgene_id "ENSG00000279784"; gene_version "1"; transcript_id "ENST00000623587"; transcript_version "1"; exon_number "1"; gene_name "FP236315.3"; gene_source "havana"; gene_biotype "lncRNA"; transcript_name "FP236315.3-201"; transcript_source "havana"; transcript_biotype "lncRNA"; exon_id "ENSE00003758868"; exon_version "1"; tag "basic"; transcript_support_level "2";\n+21\thavana\texon\t696164\t696361\t.\t+\t.\tgene_id "ENSG00000279784"; gene_version "1"; transcript_id "ENST00000623587"; transcript_version "1"; exon_number "2"; gene_name "FP236315.3"; gene_source "havana"; gene_biotype "lncRNA"; transcript_name "FP236315.3-201"; transcript_source "havana"; transcript_biotype "lncRNA"; exon_id "ENSE00003757033"; exon_version "1"; tag "basic"; transcript_support_level "2";\n+21\thavana\tgene\t696205\t698657\t.\t-\t.\tgene_id "ENSG00000279064"; gene_version "1"; gene_name "FP236315.1"; gene_source "havana"; gene_biotype "lncRNA";\n+21\thavana\ttranscript\t696205\t698657\t.\t-\t.\tgene_id "ENSG00000279064"; gene_version "1"; transcript_id "ENST00000623723"; transcript_version "1"; gene_name "FP236315.1"; gene_source "havana"; gene_biotype "lncRNA"; transcript_name "FP236315.1-201"; transcript_source "havana"; transcript_biotype "lncRNA"; tag "basic"; transcript_support_level "2";\n+21\thavana\texon\t696205\t696358\t.\t-\t.\tgene_id "ENSG00000279064"; gene_version "1"; transcript_id "ENST00000623723"; transcript_version "1"; exon_number "2"; gene_name "FP236315.1"; gene_source "havana"; gene_biotype "lncRNA"; transcript_name "FP236315.1-201"; transcript_source "havana"; transcript_biotype "lncRNA"; exon_id "ENSE00003756092"; exon_version "1"; tag "basic"; transcript_support_level "2";\n'
b
diff -r 178bdbdb6d24 -r e403d27e8f24 test-data/filtered3.Homo_sapiens.GRCh38.dna.chromosome.21.fa.gz
b
Binary file test-data/filtered3.Homo_sapiens.GRCh38.dna.chromosome.21.fa.gz has changed
b
diff -r 178bdbdb6d24 -r e403d27e8f24 test-data/filtered3.bam
b
Binary file test-data/filtered3.bam has changed
b
diff -r 178bdbdb6d24 -r e403d27e8f24 test-data/pbmc_1k_v2_L001.R1.10k.fastq.gz
b
Binary file test-data/pbmc_1k_v2_L001.R1.10k.fastq.gz has changed
b
diff -r 178bdbdb6d24 -r e403d27e8f24 test-data/pbmc_1k_v2_L001.R2.10k.fastq.gz
b
Binary file test-data/pbmc_1k_v2_L001.R2.10k.fastq.gz has changed
b
diff -r 178bdbdb6d24 -r e403d27e8f24 test-data/rnastar_index2_versioned.loc
--- a/test-data/rnastar_index2_versioned.loc Thu Nov 28 15:55:47 2019 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,4 +0,0 @@
-##STAR versioned indexes
-#build_id dbkey display_name path with_gene_model version
-000 ? test_index ${__HERE__}/test-cache/tophat-test 0 2.7.1a
-001 ? test_index_with_gene_model ${__HERE__}/test-cache/tophat-test 1 2.7.1a
b
diff -r 178bdbdb6d24 -r e403d27e8f24 test-data/rnastar_index2x_versioned.loc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/rnastar_index2x_versioned.loc Wed Aug 05 09:41:22 2020 -0400
b
@@ -0,0 +1,4 @@
+##STAR versioned indexes
+#build_id dbkey display_name path with_gene_model version
+000 ? test_index ${__HERE__}/test-cache/tophat-test 0 2.7.4a
+001 ? test_index_with_gene_model ${__HERE__}/test-cache/tophat-test 1 2.7.4a
b
diff -r 178bdbdb6d24 -r e403d27e8f24 test-data/test-cache/tophat-test/Log.out
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-cache/tophat-test/Log.out Wed Aug 05 09:41:22 2020 -0400
b
@@ -0,0 +1,50 @@
+STAR version=2.7.5a
+STAR compilation time,server,dir=Tue Jun 16 12:17:16 EDT 2020 vega:/home/dobin/data/STAR/STARcode/STAR.master/source
+##### Command Line:
+STAR --runMode genomeGenerate --genomeDir tempstargenomedir --genomeFastaFiles tools/rgrnastar/test-data/tophat_test.fa --genomeSAindexNbases 5
+##### Initial USER parameters from Command Line:
+###### All USER parameters from Command Line:
+runMode                       genomeGenerate     ~RE-DEFINED
+genomeDir                     tempstargenomedir     ~RE-DEFINED
+genomeFastaFiles              tools/rgrnastar/test-data/tophat_test.fa        ~RE-DEFINED
+genomeSAindexNbases           5     ~RE-DEFINED
+##### Finished reading parameters from all sources
+
+##### Final user re-defined parameters-----------------:
+runMode                           genomeGenerate
+genomeDir                         tempstargenomedir
+genomeFastaFiles                  tools/rgrnastar/test-data/tophat_test.fa   
+genomeSAindexNbases               5
+
+-------------------------------
+##### Final effective command line:
+STAR   --runMode genomeGenerate   --genomeDir tempstargenomedir   --genomeFastaFiles tools/rgrnastar/test-data/tophat_test.fa      --genomeSAindexNbases 5
+----------------------------------------
+
+Number of fastq files for each mate = 1
+Finished loading and checking parameters
+--genomeDir directory created: tempstargenomedir/
+Jul 29 17:04:16 ... starting to generate Genome files
+tools/rgrnastar/test-data/tophat_test.fa : chr # 0  "test_chromosome" chrStart: 0
+Estimated genome size=201262144 = 262144 + 201000000
+GstrandBit=32
+Number of SA indices: 1300
+Jul 29 17:04:16 ... starting to sort Suffix Array. This may take a long time...
+Number of chunks: 1;   chunks size limit: 18599685304 bytes
+Jul 29 17:04:16 ... sorting Suffix Array chunks and saving them to disk...
+Writing 10400 bytes into tempstargenomedir//SA_0 ; empty space on disk = 66320359424 bytes ... done
+Jul 29 17:04:16 ... loading chunks from disk, packing SA...
+Jul 29 17:04:16 ... finished generating suffix array
+Jul 29 17:04:16 ... generating Suffix Array index
+Jul 29 17:04:16 ... completed Suffix Array index
+Jul 29 17:04:16 ... writing Genome to disk ...
+Writing 262144 bytes into tempstargenomedir//Genome ; empty space on disk = 66320355328 bytes ... done
+SA size in bytes: 5366
+Jul 29 17:04:16 ... writing Suffix Array to disk ...
+Writing 5366 bytes into tempstargenomedir//SA ; empty space on disk = 66320093184 bytes ... done
+Jul 29 17:04:16 ... writing SAindex to disk
+Writing 8 bytes into tempstargenomedir//SAindex ; empty space on disk = 66320084992 bytes ... done
+Writing 48 bytes into tempstargenomedir//SAindex ; empty space on disk = 66320084992 bytes ... done
+Writing 5971 bytes into tempstargenomedir//SAindex ; empty space on disk = 66320084992 bytes ... done
+Jul 29 17:04:16 ..... finished successfully
+DONE: Genome generation, EXITING
b
diff -r 178bdbdb6d24 -r e403d27e8f24 test-data/test-cache/tophat-test/genomeParameters.txt
--- a/test-data/test-cache/tophat-test/genomeParameters.txt Thu Nov 28 15:55:47 2019 -0500
+++ b/test-data/test-cache/tophat-test/genomeParameters.txt Wed Aug 05 09:41:22 2020 -0400
b
@@ -1,10 +1,13 @@
-### STAR   --runMode genomeGenerate   --genomeDir test-cache/tophat_test/   --genomeFastaFiles tophat_test.fa      --genomeSAindexNbases 5
+### STAR   --runMode genomeGenerate   --genomeDir tempstargenomedir   --genomeFastaFiles tools/rgrnastar/test-data/tophat_test.fa      --genomeSAindexNbases 5
 ### GstrandBit 32
-versionGenome 2.7.1a
-genomeFastaFiles tophat_test.fa 
+versionGenome 2.7.4a
+genomeType Full
+genomeFastaFiles tools/rgrnastar/test-data/tophat_test.fa 
 genomeSAindexNbases 5
 genomeChrBinNbits 18
 genomeSAsparseD 1
+genomeTransformType None
+genomeTransformVCF -
 sjdbOverhang 0
 sjdbFileChrStartEnd - 
 sjdbGTFfile -
b
diff -r 178bdbdb6d24 -r e403d27e8f24 tool-data/rnastar_index2_versioned.loc.sample
--- a/tool-data/rnastar_index2_versioned.loc.sample Thu Nov 28 15:55:47 2019 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,26 +0,0 @@
-#This is a sample file distributed with Galaxy that enables tools
-#to use a directory of rna-star indexed sequences data files. You will
-#need to create these data files and then create a rnastar_index2.loc
-#file similar to this one (store it in this directory) that points to
-#the directories in which those files are stored. The rnastar_index2.loc
-#file has this format (longer white space characters are TAB characters):
-#
-#<unique_build_id>   <dbkey>   <display_name>   <file_base_path> <with_gene_model> <version>
-#
-#The <with_gene_model> column should be 1 or 0, indicating whether the index
-#was built with annotations (i.e., --sjdbGTFfile and --sjdbOverhang were used)
-#or not.
-#
-#The <version> column indicates the STAR version that introduced the format of
-#the index, i.e., the oldest STAR version that could make use of the index.
-#
-#Note that STAR indices can become quite large. Consequently, it is only
-#advisable to create indices with annotations if it's known ahead of time that
-#(A) the annotations won't be frequently updated and (B) the read lengths used
-#will also rarely vary. If either of these is not the case, it's advisable to
-#create indices without annotations and then specify an annotation file and
-#maximum read length (minus 1) when running STAR.
-#
-#hg19   hg19    hg19 full   /mnt/galaxyIndices/genomes/hg19/rnastar 0 2.7.1a
-#hg19Ensembl   hg19Ensembl    hg19 full with Ensembl annotation   /mnt/galaxyIndices/genomes/hg19Ensembl/rnastar 1 2.7.1a
-
b
diff -r 178bdbdb6d24 -r e403d27e8f24 tool-data/rnastar_index2x_versioned.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/rnastar_index2x_versioned.loc.sample Wed Aug 05 09:41:22 2020 -0400
b
@@ -0,0 +1,27 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory of rna-star indexed sequences data files.
+#You will need to create these data files and then create a
+#rnastar_index2x_versioned.loc file similar to this one (store it in this
+directory) that points to the directories in which those files are stored.
+#The rnastar_index2x_versioned.loc file has this format (longer white space
+#characters are TAB characters):
+#
+#<unique_build_id>   <dbkey>   <display_name>   <file_base_path> <with_gene_model> <version>
+#
+#The <with_gene_model> column should be 1 or 0, indicating whether the index
+#was built with annotations (i.e., --sjdbGTFfile and --sjdbOverhang were used)
+#or not.
+#
+#The <version> column indicates the STAR version that introduced the format of
+#the index, i.e., the oldest STAR version that could make use of the index.
+#
+#Note that STAR indices can become quite large. Consequently, it is only
+#advisable to create indices with annotations if it's known ahead of time that
+#(A) the annotations won't be frequently updated and (B) the read lengths used
+#will also rarely vary. If either of these is not the case, it's advisable to
+#create indices without annotations and then specify an annotation file and
+#maximum read length (minus 1) when running STAR.
+#
+#hg19   hg19    hg19 full   /mnt/galaxyIndices/genomes/hg19/rnastar 0 2.7.1a
+#hg19Ensembl   hg19Ensembl    hg19 full with Ensembl annotation   /mnt/galaxyIndices/genomes/hg19Ensembl/rnastar 1 2.7.1a
+
b
diff -r 178bdbdb6d24 -r e403d27e8f24 tool_data_table_conf.xml.sample
--- a/tool_data_table_conf.xml.sample Thu Nov 28 15:55:47 2019 -0500
+++ b/tool_data_table_conf.xml.sample Wed Aug 05 09:41:22 2020 -0400
b
@@ -1,6 +1,6 @@
 <tables>
-    <table name="rnastar_index2_versioned" comment_char="#" allow_duplicate_entries="False">
+    <table name="rnastar_index2x_versioned" comment_char="#" allow_duplicate_entries="False">
         <columns>value, dbkey, name, path, with_gene_model, version</columns>
-        <file path="tool-data/rnastar_index2_versioned.loc" />
+        <file path="tool-data/rnastar_index2x_versioned.loc" />
     </table>
 </tables>
b
diff -r 178bdbdb6d24 -r e403d27e8f24 tool_data_table_conf.xml.test
--- a/tool_data_table_conf.xml.test Thu Nov 28 15:55:47 2019 -0500
+++ b/tool_data_table_conf.xml.test Wed Aug 05 09:41:22 2020 -0400
b
@@ -1,7 +1,7 @@
 <tables>
     <!-- Location of rnastar indexes for testing -->
-    <table name="rnastar_index2_versioned" comment_char="#" allow_duplicate_entries="False">
+    <table name="rnastar_index2x_versioned" comment_char="#" allow_duplicate_entries="False">
         <columns>value, dbkey, name, path, with_gene_model, version</columns>
-        <file path="${__HERE__}/test-data/rnastar_index2_versioned.loc" />
+        <file path="${__HERE__}/test-data/rnastar_index2x_versioned.loc" />
     </table>
 </tables>