Repository 'rna_starsolo'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/rna_starsolo

Changeset 17:5ef7ec16b04f (2023-09-01)
Previous changeset 16:13022c3d3076 (2023-03-27) Next changeset 18:8dc4a444ec04 (2024-02-11)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/rgrnastar commit 65fa34f035934b4e9c1fbeb5dffe00b12e6a0f32
modified:
macros.xml
rg_rnaStarSolo.xml
added:
test-data/fakexon.Homo_sapiens.GRCh38.100.chr21.gtf
test-data/no_exon.gtf
test-data/rnastar_test_mapped_reads_fakexon.bam
b
diff -r 13022c3d3076 -r 5ef7ec16b04f macros.xml
--- a/macros.xml Mon Mar 27 16:38:01 2023 +0000
+++ b/macros.xml Fri Sep 01 13:18:03 2023 +0000
b
@@ -5,7 +5,7 @@
     the index versions in sync, but you should manually update @IDX_VERSION_SUFFIX@ -->
     <!-- STAR version to be used -->
     <token name="@TOOL_VERSION@">2.7.10b</token>
-    <token name="@VERSION_SUFFIX@">3</token>
+    <token name="@VERSION_SUFFIX@">4</token>
     <token name="@PROFILE@">21.01</token>
     <!-- STAR index version compatible with this version of STAR
     This is the STAR version that introduced the index structure expected
@@ -60,6 +60,7 @@
     </xml>
     <xml name="SJDBOPTIONS">
          <param argument="--sjdbGTFfile" type="data" format="gff3,gtf" label="Gene model (gff3,gtf) file for splice junctions" optional="false" help="Exon junction information for mapping splices"/>
+         <param argument="--sjdbGTFfeatureExon" type="text" value="exon" label="Elements to use from the gene model to use for splice junctions" help="By default and for almost all cases: 'exon', referring to finding junctions at the RNA splice sites. This can optionally be changed to allow splicing at other levels, such as 'gene', 'transcript', 'CDS'."/>
          <param argument="--sjdbOverhang" type="integer" min="1" value="100" label="Length of the genomic sequence around annotated junctions" help="Used in constructing the splice junctions database. Ideal value is ReadLength-1"/>
     </xml>
     <xml name="dbKeyActions">
@@ -103,15 +104,17 @@
                 #if str($refGenomeSource.GTFconditional.GTFselect) == 'with-gtf':
                     --sjdbOverhang '${refGenomeSource.GTFconditional.sjdbOverhang}'
                     --sjdbGTFfile '${refGenomeSource.GTFconditional.sjdbGTFfile}'
+                    --sjdbGTFfeatureExon '${refGenomeSource.GTFconditional.sjdbGTFfeatureExon}'
                     #if str($refGenomeSource.GTFconditional.sjdbGTFfile.ext) == 'gff3':
                         --sjdbGTFtagExonParentTranscript Parent
                     #end if
                 #end if
             #else:
-                ## ref genome selection is less complex for STARsolo cause
+                ## ref genome selection is less complex for STARsolo because
                 ## with-gtf is mandatory there
                 --sjdbOverhang '${refGenomeSource.sjdbOverhang}'
                 --sjdbGTFfile '${refGenomeSource.sjdbGTFfile}'
+                --sjdbGTFfeatureExon '${refGenomeSource.sjdbGTFfeatureExon}'
                 #if str($refGenomeSource.sjdbGTFfile.ext) == 'gff3':
                     --sjdbGTFtagExonParentTranscript Parent
                 #end if
@@ -137,6 +140,7 @@
         #if str($refGenomeSource.GTFconditional.GTFselect) == 'without-gtf-with-gtf':
             --sjdbOverhang $refGenomeSource.GTFconditional.sjdbOverhang
             --sjdbGTFfile '${refGenomeSource.GTFconditional.sjdbGTFfile}'
+            --sjdbGTFfeatureExon '${refGenomeSource.GTFconditional.sjdbGTFfeatureExon}'
             #if str($refGenomeSource.GTFconditional.sjdbGTFfile.ext) == 'gff3':
                 --sjdbGTFtagExonParentTranscript Parent
             #end if
b
diff -r 13022c3d3076 -r 5ef7ec16b04f rg_rnaStarSolo.xml
--- a/rg_rnaStarSolo.xml Mon Mar 27 16:38:01 2023 +0000
+++ b/rg_rnaStarSolo.xml Fri Sep 01 13:18:03 2023 +0000
[
@@ -1306,6 +1306,71 @@
                 <metadata name="column_names" value="GeneID,Counts_unstrand,Counts_firstStrand,Counts_secondStrand" />
             </output>
         </test>
+        <test expect_num_outputs="6">
+            <!-- test 13
+            If the splice feature set is not set correctly,
+            "Fatal INPUT FILE error, no exon lines in the GTF file" is expected -->
+            <conditional name="refGenomeSource">
+                <param name="geneSource" value="history" />
+                <param name="genomeFastaFiles" value="filtered3.Homo_sapiens.GRCh38.dna.chromosome.21.fa.gz" />
+                <param name="genomeSAindexNbases" value="4" />
+                <param name="sjdbOverhang" value="100" />
+                <param name="sjdbGTFfile" value="fakexon.Homo_sapiens.GRCh38.100.chr21.gtf" ftype="gtf"/>
+                <param name="sjdbGTFfeatureExon" value="fakexon"/>
+            </conditional>
+            <conditional name="sc" >
+                <param name="solo_type" value="CB_UMI_Simple" />
+                <conditional name="input_types">
+                    <param name="use" value="repeat" />
+                    <param name="input1" value="pbmc_1k_v2_L001.R1.10k.fastq.gz" ftype="fastqsanger.gz" />
+                    <param name="input2" value="pbmc_1k_v2_L001.R2.10k.fastq.gz" ftype="fastqsanger.gz" />
+                </conditional>
+                <param name="soloCBwhitelist" value="filtered.barcodes.txt" />
+                <conditional name="params">
+                    <param name="chemistry" value="Cv3" />
+                </conditional>
+                <conditional name="umidedup">
+                    <param name="soloUMIdedup" value="1MM_All" />
+                </conditional>
+            </conditional>
+            <section name="solo" >
+                <conditional name="filter">
+                    <param name="filter_type" value="no_filter" />
+                </conditional>
+                <param name="soloStrand" value="Forward" />
+                <param name="soloFeatures" value="Gene Velocyto" />
+                <param name="quantModeGene" value="true" />
+            </section>
+            <output name="output_barcodes" >
+                <assert_contents>
+                    <!-- first and last line -->
+                    <has_line line="AAACCTGAGCGCTCCA" />
+                    <has_line line="TTTGGTTAGTGGGCTA" />
+                    <has_n_lines n="394" />
+                </assert_contents>
+            </output>
+            <output name="output_genes">
+                <assert_contents>
+                    <has_line_matching expression="ENSG00000279493\s+FP565260\.4\s+Gene\s+Expression" />
+                    <has_line_matching expression="ENSG00000279064\s+FP236315\.1\s+Gene\s+Expression" />
+                    <has_n_lines n="14" />
+                </assert_contents>
+            </output>
+            <output name="output_stats" >
+                <assert_contents>
+                    <has_line_matching expression="\s+noUnmapped\s+0" />
+                    <has_line_matching expression="\s+yesUMIs\s+36" />
+                </assert_contents>
+            </output>
+            <output name="output_BAM" value="filtered3.bam" compare="sim_size" delta="600" />
+            <output name="reads_per_gene" >
+                <assert_contents>
+                    <has_line_matching expression="ENSG00000279493\s+0\s+0\s+0" />
+                    <has_line_matching expression="ENSG00000275464\s+38\s+1\s+40" />
+                </assert_contents>
+                <metadata name="column_names" value="GeneID,Counts_unstrand,Counts_firstStrand,Counts_secondStrand" />
+            </output>
+        </test>
     </tests>
     <help><![CDATA[
 **What it does**
b
diff -r 13022c3d3076 -r 5ef7ec16b04f test-data/fakexon.Homo_sapiens.GRCh38.100.chr21.gtf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fakexon.Homo_sapiens.GRCh38.100.chr21.gtf Fri Sep 01 13:18:03 2023 +0000
b
b'@@ -0,0 +1,461 @@\n+#!genome-build GRCh38.p13\n+#!genome-version GRCh38\n+#!genome-date 2013-12\n+#!genome-build-accession NCBI:GCA_000001405.28\n+#!genebuild-last-updated 2019-06\n+21\thavana\tgene\t1000\t6346\t.\t+\t.\tgene_id "ENSG00000279493"; gene_version "1"; gene_name "FP565260.4"; gene_source "havana"; gene_biotype "protein_coding";\n+21\thavana\ttranscript\t1000\t6346\t.\t+\t.\tgene_id "ENSG00000279493"; gene_version "1"; transcript_id "ENST00000624081"; transcript_version "1"; gene_name "FP565260.4"; gene_source "havana"; gene_biotype "protein_coding"; transcript_name "FP565260.4-201"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "cds_start_NF"; tag "mRNA_start_NF"; tag "basic"; transcript_support_level "5";\n+21\thavana\tfakexon\t1000\t1075\t.\t+\t.\tgene_id "ENSG00000279493"; gene_version "1"; transcript_id "ENST00000624081"; transcript_version "1"; exon_number "1"; gene_name "FP565260.4"; gene_source "havana"; gene_biotype "protein_coding"; transcript_name "FP565260.4-201"; transcript_source "havana"; transcript_biotype "protein_coding"; exon_id "ENSE00003760288"; exon_version "1"; tag "cds_start_NF"; tag "mRNA_start_NF"; tag "basic"; transcript_support_level "5";\n+21\thavana\tCDS\t1000\t1075\t.\t+\t0\tgene_id "ENSG00000279493"; gene_version "1"; transcript_id "ENST00000624081"; transcript_version "1"; exon_number "1"; gene_name "FP565260.4"; gene_source "havana"; gene_biotype "protein_coding"; transcript_name "FP565260.4-201"; transcript_source "havana"; transcript_biotype "protein_coding"; protein_id "ENSP00000485664"; protein_version "1"; tag "cds_start_NF"; tag "mRNA_start_NF"; tag "basic"; transcript_support_level "5";\n+21\thavana\tfakexon\t1749\t1888\t.\t+\t.\tgene_id "ENSG00000279493"; gene_version "1"; transcript_id "ENST00000624081"; transcript_version "1"; exon_number "2"; gene_name "FP565260.4"; gene_source "havana"; gene_biotype "protein_coding"; transcript_name "FP565260.4-201"; transcript_source "havana"; transcript_biotype "protein_coding"; exon_id "ENSE00003758404"; exon_version "1"; tag "cds_start_NF"; tag "mRNA_start_NF"; tag "basic"; transcript_support_level "5";\n+21\thavana\tCDS\t1749\t1888\t.\t+\t2\tgene_id "ENSG00000279493"; gene_version "1"; transcript_id "ENST00000624081"; transcript_version "1"; exon_number "2"; gene_name "FP565260.4"; gene_source "havana"; gene_biotype "protein_coding"; transcript_name "FP565260.4-201"; transcript_source "havana"; transcript_biotype "protein_coding"; protein_id "ENSP00000485664"; protein_version "1"; tag "cds_start_NF"; tag "mRNA_start_NF"; tag "basic"; transcript_support_level "5";\n+21\thavana\tfakexon\t3587\t3672\t.\t+\t.\tgene_id "ENSG00000279493"; gene_version "1"; transcript_id "ENST00000624081"; transcript_version "1"; exon_number "3"; gene_name "FP565260.4"; gene_source "havana"; gene_biotype "protein_coding"; transcript_name "FP565260.4-201"; transcript_source "havana"; transcript_biotype "protein_coding"; exon_id "ENSE00003755466"; exon_version "1"; tag "cds_start_NF"; tag "mRNA_start_NF"; tag "basic"; transcript_support_level "5";\n+21\thavana\tCDS\t3587\t3672\t.\t+\t0\tgene_id "ENSG00000279493"; gene_version "1"; transcript_id "ENST00000624081"; transcript_version "1"; exon_number "3"; gene_name "FP565260.4"; gene_source "havana"; gene_biotype "protein_coding"; transcript_name "FP565260.4-201"; transcript_source "havana"; transcript_biotype "protein_coding"; protein_id "ENSP00000485664"; protein_version "1"; tag "cds_start_NF"; tag "mRNA_start_NF"; tag "basic"; transcript_support_level "5";\n+21\thavana\tfakexon\t6136\t6346\t.\t+\t.\tgene_id "ENSG00000279493"; gene_version "1"; transcript_id "ENST00000624081"; transcript_version "1"; exon_number "4"; gene_name "FP565260.4"; gene_source "havana"; gene_biotype "protein_coding"; transcript_name "FP565260.4-201"; transcript_source "havana"; transcript_biotype "protein_coding"; exon_id "ENSE00003755385"; exon_version "1"; tag "cds_start_NF"; tag "mRNA_start_NF"; tag "basic"; transcript_support_level "5";\n+21\thavana\tCDS\t6136\t6298\t.\t+\t1\tgene_id "ENSG0000027'..b'6691\t.\t+\t.\tgene_id "ENSG00000277777"; gene_version "1"; gene_name "CU633967.2"; gene_source "ensembl"; gene_biotype "misc_RNA";\n+21\tensembl\ttranscript\t586591\t586691\t.\t+\t.\tgene_id "ENSG00000277777"; gene_version "1"; transcript_id "ENST00000610788"; transcript_version "1"; gene_name "CU633967.2"; gene_source "ensembl"; gene_biotype "misc_RNA"; transcript_name "CU633967.2-201"; transcript_source "ensembl"; transcript_biotype "misc_RNA"; tag "basic"; transcript_support_level "NA";\n+21\tensembl\tfakexon\t586591\t586691\t.\t+\t.\tgene_id "ENSG00000277777"; gene_version "1"; transcript_id "ENST00000610788"; transcript_version "1"; exon_number "1"; gene_name "CU633967.2"; gene_source "ensembl"; gene_biotype "misc_RNA"; transcript_name "CU633967.2-201"; transcript_source "ensembl"; transcript_biotype "misc_RNA"; exon_id "ENSE00003748388"; exon_version "1"; tag "basic"; transcript_support_level "NA";\n+21\thavana\tgene\t692383\t694838\t.\t-\t.\tgene_id "ENSG00000279186"; gene_version "1"; gene_name "FP236315.2"; gene_source "havana"; gene_biotype "TEC";\n+21\thavana\ttranscript\t692383\t694838\t.\t-\t.\tgene_id "ENSG00000279186"; gene_version "1"; transcript_id "ENST00000624506"; transcript_version "1"; gene_name "FP236315.2"; gene_source "havana"; gene_biotype "TEC"; transcript_name "FP236315.2-201"; transcript_source "havana"; transcript_biotype "TEC"; tag "basic"; transcript_support_level "NA";\n+21\thavana\tfakexon\t692383\t694838\t.\t-\t.\tgene_id "ENSG00000279186"; gene_version "1"; transcript_id "ENST00000624506"; transcript_version "1"; exon_number "1"; gene_name "FP236315.2"; gene_source "havana"; gene_biotype "TEC"; transcript_name "FP236315.2-201"; transcript_source "havana"; transcript_biotype "TEC"; exon_id "ENSE00003756739"; exon_version "1"; tag "basic"; transcript_support_level "NA";\n+21\thavana\tgene\t694546\t696361\t.\t+\t.\tgene_id "ENSG00000279784"; gene_version "1"; gene_name "FP236315.3"; gene_source "havana"; gene_biotype "lncRNA";\n+21\thavana\ttranscript\t694546\t696361\t.\t+\t.\tgene_id "ENSG00000279784"; gene_version "1"; transcript_id "ENST00000623587"; transcript_version "1"; gene_name "FP236315.3"; gene_source "havana"; gene_biotype "lncRNA"; transcript_name "FP236315.3-201"; transcript_source "havana"; transcript_biotype "lncRNA"; tag "basic"; transcript_support_level "2";\n+21\thavana\tfakexon\t694546\t694654\t.\t+\t.\tgene_id "ENSG00000279784"; gene_version "1"; transcript_id "ENST00000623587"; transcript_version "1"; exon_number "1"; gene_name "FP236315.3"; gene_source "havana"; gene_biotype "lncRNA"; transcript_name "FP236315.3-201"; transcript_source "havana"; transcript_biotype "lncRNA"; exon_id "ENSE00003758868"; exon_version "1"; tag "basic"; transcript_support_level "2";\n+21\thavana\tfakexon\t696164\t696361\t.\t+\t.\tgene_id "ENSG00000279784"; gene_version "1"; transcript_id "ENST00000623587"; transcript_version "1"; exon_number "2"; gene_name "FP236315.3"; gene_source "havana"; gene_biotype "lncRNA"; transcript_name "FP236315.3-201"; transcript_source "havana"; transcript_biotype "lncRNA"; exon_id "ENSE00003757033"; exon_version "1"; tag "basic"; transcript_support_level "2";\n+21\thavana\tgene\t696205\t698657\t.\t-\t.\tgene_id "ENSG00000279064"; gene_version "1"; gene_name "FP236315.1"; gene_source "havana"; gene_biotype "lncRNA";\n+21\thavana\ttranscript\t696205\t698657\t.\t-\t.\tgene_id "ENSG00000279064"; gene_version "1"; transcript_id "ENST00000623723"; transcript_version "1"; gene_name "FP236315.1"; gene_source "havana"; gene_biotype "lncRNA"; transcript_name "FP236315.1-201"; transcript_source "havana"; transcript_biotype "lncRNA"; tag "basic"; transcript_support_level "2";\n+21\thavana\tfakexon\t696205\t696358\t.\t-\t.\tgene_id "ENSG00000279064"; gene_version "1"; transcript_id "ENST00000623723"; transcript_version "1"; exon_number "2"; gene_name "FP236315.1"; gene_source "havana"; gene_biotype "lncRNA"; transcript_name "FP236315.1-201"; transcript_source "havana"; transcript_biotype "lncRNA"; exon_id "ENSE00003756092"; exon_version "1"; tag "basic"; transcript_support_level "2";\n'
b
diff -r 13022c3d3076 -r 5ef7ec16b04f test-data/no_exon.gtf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/no_exon.gtf Fri Sep 01 13:18:03 2023 +0000
b
@@ -0,0 +1,4 @@
+test_chromosome test gene 1 650 . + . gene_id "GENE1"; gene_name "GENE1"; transcript_id "GENE1_t1";
+test_chromosome test transcript 1 650 . + . gene_id "GENE1"; gene_name "GENE1"; transcript_id "GENE1_t1";
+test_chromosome test fakexon 1 650 . + . gene_id "GENE1"; transcript_id "GENE1_t1"; exon_number "1"; gene_name "GENE1";
+test_chromosome test CDS 100 550 . + . gene_id "GENE1"; transcript_id "GENE1_t1"; exon_number "1"; gene_name "GENE1";
b
diff -r 13022c3d3076 -r 5ef7ec16b04f test-data/rnastar_test_mapped_reads_fakexon.bam
b
Binary file test-data/rnastar_test_mapped_reads_fakexon.bam has changed