Repository 'rgrnastar'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/rgrnastar

Changeset 27:79de45b5069b (2023-09-01)
Previous changeset 26:3ea5a2a63fa2 (2023-03-27) Next changeset 28:3e94726bfa9d (2024-02-11)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/rgrnastar commit 65fa34f035934b4e9c1fbeb5dffe00b12e6a0f32
modified:
macros.xml
rg_rnaStar.xml
added:
test-data/fakexon.Homo_sapiens.GRCh38.100.chr21.gtf
test-data/no_exon.gtf
test-data/rnastar_test_mapped_reads_fakexon.bam
b
diff -r 3ea5a2a63fa2 -r 79de45b5069b macros.xml
--- a/macros.xml Mon Mar 27 16:37:27 2023 +0000
+++ b/macros.xml Fri Sep 01 13:17:29 2023 +0000
b
@@ -5,7 +5,7 @@
     the index versions in sync, but you should manually update @IDX_VERSION_SUFFIX@ -->
     <!-- STAR version to be used -->
     <token name="@TOOL_VERSION@">2.7.10b</token>
-    <token name="@VERSION_SUFFIX@">3</token>
+    <token name="@VERSION_SUFFIX@">4</token>
     <token name="@PROFILE@">21.01</token>
     <!-- STAR index version compatible with this version of STAR
     This is the STAR version that introduced the index structure expected
@@ -60,6 +60,7 @@
     </xml>
     <xml name="SJDBOPTIONS">
          <param argument="--sjdbGTFfile" type="data" format="gff3,gtf" label="Gene model (gff3,gtf) file for splice junctions" optional="false" help="Exon junction information for mapping splices"/>
+         <param argument="--sjdbGTFfeatureExon" type="text" value="exon" label="Elements to use from the gene model to use for splice junctions" help="By default and for almost all cases: 'exon', referring to finding junctions at the RNA splice sites. This can optionally be changed to allow splicing at other levels, such as 'gene', 'transcript', 'CDS'."/>
          <param argument="--sjdbOverhang" type="integer" min="1" value="100" label="Length of the genomic sequence around annotated junctions" help="Used in constructing the splice junctions database. Ideal value is ReadLength-1"/>
     </xml>
     <xml name="dbKeyActions">
@@ -103,15 +104,17 @@
                 #if str($refGenomeSource.GTFconditional.GTFselect) == 'with-gtf':
                     --sjdbOverhang '${refGenomeSource.GTFconditional.sjdbOverhang}'
                     --sjdbGTFfile '${refGenomeSource.GTFconditional.sjdbGTFfile}'
+                    --sjdbGTFfeatureExon '${refGenomeSource.GTFconditional.sjdbGTFfeatureExon}'
                     #if str($refGenomeSource.GTFconditional.sjdbGTFfile.ext) == 'gff3':
                         --sjdbGTFtagExonParentTranscript Parent
                     #end if
                 #end if
             #else:
-                ## ref genome selection is less complex for STARsolo cause
+                ## ref genome selection is less complex for STARsolo because
                 ## with-gtf is mandatory there
                 --sjdbOverhang '${refGenomeSource.sjdbOverhang}'
                 --sjdbGTFfile '${refGenomeSource.sjdbGTFfile}'
+                --sjdbGTFfeatureExon '${refGenomeSource.sjdbGTFfeatureExon}'
                 #if str($refGenomeSource.sjdbGTFfile.ext) == 'gff3':
                     --sjdbGTFtagExonParentTranscript Parent
                 #end if
@@ -137,6 +140,7 @@
         #if str($refGenomeSource.GTFconditional.GTFselect) == 'without-gtf-with-gtf':
             --sjdbOverhang $refGenomeSource.GTFconditional.sjdbOverhang
             --sjdbGTFfile '${refGenomeSource.GTFconditional.sjdbGTFfile}'
+            --sjdbGTFfeatureExon '${refGenomeSource.GTFconditional.sjdbGTFfeatureExon}'
             #if str($refGenomeSource.GTFconditional.sjdbGTFfile.ext) == 'gff3':
                 --sjdbGTFtagExonParentTranscript Parent
             #end if
b
diff -r 3ea5a2a63fa2 -r 79de45b5069b rg_rnaStar.xml
--- a/rg_rnaStar.xml Mon Mar 27 16:37:27 2023 +0000
+++ b/rg_rnaStar.xml Fri Sep 01 13:17:29 2023 +0000
b
@@ -628,6 +628,44 @@
                 <metadata name="column_names" value="GeneID,Counts_unstrand,Counts_firstStrand,Counts_secondStrand" />
             </output>
         </test>
+        <!-- Test if an alternative feature can be provided as a splicing unit.
+         If the splice feature set is not set correctly,
+        "Fatal INPUT FILE error, no exon lines in the GTF file" is expected -->
+        <test expect_num_outputs="4">
+            <conditional name="singlePaired">
+                <param name="sPaired" value="single" />
+                <param name="input1" value="tophat_in2.fastqsanger" ftype="fastqsanger" />
+            </conditional>
+            <conditional name="refGenomeSource">
+                <param name="geneSource" value="history" />
+                <param name="genomeFastaFiles" value="tophat_test.fa" />
+                <param name="genomeSAindexNbases" value="5" />
+                <conditional name="GTFconditional">
+                    <param name="GTFselect" value="with-gtf" />
+                    <param name="sjdbOverhang" value="75"/>
+                    <param name="sjdbGTFfile" value="no_exon.gtf" ftype="gtf"/>
+                    <param name="sjdbGTFfeatureExon" value="fakexon"/>
+                    <conditional name="quantmode_output">
+                        <param name="quantMode" value="GeneCounts"/>
+                    </conditional>
+                </conditional>
+            </conditional>
+            <section name="oformat">
+                <param name="outSAMattributes" value="NH,HI,AS,nM,NM,MD,jM,jI,MC,ch" />
+            </section>
+            <section name="algo">
+                <conditional name="params">
+                    <param name="settingsType" value="default" />
+                </conditional>
+            </section>
+
+            <output name="output_log" file="rnastar_test.log" compare="re_match_multiline" />
+            <output name="splice_junctions" file="rnastar_test_splicejunctions.bed"/>
+            <output name="mapped_reads" file="rnastar_test_mapped_reads_fakexon.bam" compare="sim_size" delta="634" />
+            <output name="reads_per_gene" file="tophat_test_reads_per_gene.txt">
+                <metadata name="column_names" value="GeneID,Counts_unstrand,Counts_firstStrand,Counts_secondStrand" />
+            </output>
+        </test>
         <!-- test gtf file and TranscriptomeSAM mode -->
         <test expect_num_outputs="4">
             <conditional name="singlePaired">
b
diff -r 3ea5a2a63fa2 -r 79de45b5069b test-data/fakexon.Homo_sapiens.GRCh38.100.chr21.gtf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fakexon.Homo_sapiens.GRCh38.100.chr21.gtf Fri Sep 01 13:17:29 2023 +0000
b
b'@@ -0,0 +1,461 @@\n+#!genome-build GRCh38.p13\n+#!genome-version GRCh38\n+#!genome-date 2013-12\n+#!genome-build-accession NCBI:GCA_000001405.28\n+#!genebuild-last-updated 2019-06\n+21\thavana\tgene\t1000\t6346\t.\t+\t.\tgene_id "ENSG00000279493"; gene_version "1"; gene_name "FP565260.4"; gene_source "havana"; gene_biotype "protein_coding";\n+21\thavana\ttranscript\t1000\t6346\t.\t+\t.\tgene_id "ENSG00000279493"; gene_version "1"; transcript_id "ENST00000624081"; transcript_version "1"; gene_name "FP565260.4"; gene_source "havana"; gene_biotype "protein_coding"; transcript_name "FP565260.4-201"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "cds_start_NF"; tag "mRNA_start_NF"; tag "basic"; transcript_support_level "5";\n+21\thavana\tfakexon\t1000\t1075\t.\t+\t.\tgene_id "ENSG00000279493"; gene_version "1"; transcript_id "ENST00000624081"; transcript_version "1"; exon_number "1"; gene_name "FP565260.4"; gene_source "havana"; gene_biotype "protein_coding"; transcript_name "FP565260.4-201"; transcript_source "havana"; transcript_biotype "protein_coding"; exon_id "ENSE00003760288"; exon_version "1"; tag "cds_start_NF"; tag "mRNA_start_NF"; tag "basic"; transcript_support_level "5";\n+21\thavana\tCDS\t1000\t1075\t.\t+\t0\tgene_id "ENSG00000279493"; gene_version "1"; transcript_id "ENST00000624081"; transcript_version "1"; exon_number "1"; gene_name "FP565260.4"; gene_source "havana"; gene_biotype "protein_coding"; transcript_name "FP565260.4-201"; transcript_source "havana"; transcript_biotype "protein_coding"; protein_id "ENSP00000485664"; protein_version "1"; tag "cds_start_NF"; tag "mRNA_start_NF"; tag "basic"; transcript_support_level "5";\n+21\thavana\tfakexon\t1749\t1888\t.\t+\t.\tgene_id "ENSG00000279493"; gene_version "1"; transcript_id "ENST00000624081"; transcript_version "1"; exon_number "2"; gene_name "FP565260.4"; gene_source "havana"; gene_biotype "protein_coding"; transcript_name "FP565260.4-201"; transcript_source "havana"; transcript_biotype "protein_coding"; exon_id "ENSE00003758404"; exon_version "1"; tag "cds_start_NF"; tag "mRNA_start_NF"; tag "basic"; transcript_support_level "5";\n+21\thavana\tCDS\t1749\t1888\t.\t+\t2\tgene_id "ENSG00000279493"; gene_version "1"; transcript_id "ENST00000624081"; transcript_version "1"; exon_number "2"; gene_name "FP565260.4"; gene_source "havana"; gene_biotype "protein_coding"; transcript_name "FP565260.4-201"; transcript_source "havana"; transcript_biotype "protein_coding"; protein_id "ENSP00000485664"; protein_version "1"; tag "cds_start_NF"; tag "mRNA_start_NF"; tag "basic"; transcript_support_level "5";\n+21\thavana\tfakexon\t3587\t3672\t.\t+\t.\tgene_id "ENSG00000279493"; gene_version "1"; transcript_id "ENST00000624081"; transcript_version "1"; exon_number "3"; gene_name "FP565260.4"; gene_source "havana"; gene_biotype "protein_coding"; transcript_name "FP565260.4-201"; transcript_source "havana"; transcript_biotype "protein_coding"; exon_id "ENSE00003755466"; exon_version "1"; tag "cds_start_NF"; tag "mRNA_start_NF"; tag "basic"; transcript_support_level "5";\n+21\thavana\tCDS\t3587\t3672\t.\t+\t0\tgene_id "ENSG00000279493"; gene_version "1"; transcript_id "ENST00000624081"; transcript_version "1"; exon_number "3"; gene_name "FP565260.4"; gene_source "havana"; gene_biotype "protein_coding"; transcript_name "FP565260.4-201"; transcript_source "havana"; transcript_biotype "protein_coding"; protein_id "ENSP00000485664"; protein_version "1"; tag "cds_start_NF"; tag "mRNA_start_NF"; tag "basic"; transcript_support_level "5";\n+21\thavana\tfakexon\t6136\t6346\t.\t+\t.\tgene_id "ENSG00000279493"; gene_version "1"; transcript_id "ENST00000624081"; transcript_version "1"; exon_number "4"; gene_name "FP565260.4"; gene_source "havana"; gene_biotype "protein_coding"; transcript_name "FP565260.4-201"; transcript_source "havana"; transcript_biotype "protein_coding"; exon_id "ENSE00003755385"; exon_version "1"; tag "cds_start_NF"; tag "mRNA_start_NF"; tag "basic"; transcript_support_level "5";\n+21\thavana\tCDS\t6136\t6298\t.\t+\t1\tgene_id "ENSG0000027'..b'6691\t.\t+\t.\tgene_id "ENSG00000277777"; gene_version "1"; gene_name "CU633967.2"; gene_source "ensembl"; gene_biotype "misc_RNA";\n+21\tensembl\ttranscript\t586591\t586691\t.\t+\t.\tgene_id "ENSG00000277777"; gene_version "1"; transcript_id "ENST00000610788"; transcript_version "1"; gene_name "CU633967.2"; gene_source "ensembl"; gene_biotype "misc_RNA"; transcript_name "CU633967.2-201"; transcript_source "ensembl"; transcript_biotype "misc_RNA"; tag "basic"; transcript_support_level "NA";\n+21\tensembl\tfakexon\t586591\t586691\t.\t+\t.\tgene_id "ENSG00000277777"; gene_version "1"; transcript_id "ENST00000610788"; transcript_version "1"; exon_number "1"; gene_name "CU633967.2"; gene_source "ensembl"; gene_biotype "misc_RNA"; transcript_name "CU633967.2-201"; transcript_source "ensembl"; transcript_biotype "misc_RNA"; exon_id "ENSE00003748388"; exon_version "1"; tag "basic"; transcript_support_level "NA";\n+21\thavana\tgene\t692383\t694838\t.\t-\t.\tgene_id "ENSG00000279186"; gene_version "1"; gene_name "FP236315.2"; gene_source "havana"; gene_biotype "TEC";\n+21\thavana\ttranscript\t692383\t694838\t.\t-\t.\tgene_id "ENSG00000279186"; gene_version "1"; transcript_id "ENST00000624506"; transcript_version "1"; gene_name "FP236315.2"; gene_source "havana"; gene_biotype "TEC"; transcript_name "FP236315.2-201"; transcript_source "havana"; transcript_biotype "TEC"; tag "basic"; transcript_support_level "NA";\n+21\thavana\tfakexon\t692383\t694838\t.\t-\t.\tgene_id "ENSG00000279186"; gene_version "1"; transcript_id "ENST00000624506"; transcript_version "1"; exon_number "1"; gene_name "FP236315.2"; gene_source "havana"; gene_biotype "TEC"; transcript_name "FP236315.2-201"; transcript_source "havana"; transcript_biotype "TEC"; exon_id "ENSE00003756739"; exon_version "1"; tag "basic"; transcript_support_level "NA";\n+21\thavana\tgene\t694546\t696361\t.\t+\t.\tgene_id "ENSG00000279784"; gene_version "1"; gene_name "FP236315.3"; gene_source "havana"; gene_biotype "lncRNA";\n+21\thavana\ttranscript\t694546\t696361\t.\t+\t.\tgene_id "ENSG00000279784"; gene_version "1"; transcript_id "ENST00000623587"; transcript_version "1"; gene_name "FP236315.3"; gene_source "havana"; gene_biotype "lncRNA"; transcript_name "FP236315.3-201"; transcript_source "havana"; transcript_biotype "lncRNA"; tag "basic"; transcript_support_level "2";\n+21\thavana\tfakexon\t694546\t694654\t.\t+\t.\tgene_id "ENSG00000279784"; gene_version "1"; transcript_id "ENST00000623587"; transcript_version "1"; exon_number "1"; gene_name "FP236315.3"; gene_source "havana"; gene_biotype "lncRNA"; transcript_name "FP236315.3-201"; transcript_source "havana"; transcript_biotype "lncRNA"; exon_id "ENSE00003758868"; exon_version "1"; tag "basic"; transcript_support_level "2";\n+21\thavana\tfakexon\t696164\t696361\t.\t+\t.\tgene_id "ENSG00000279784"; gene_version "1"; transcript_id "ENST00000623587"; transcript_version "1"; exon_number "2"; gene_name "FP236315.3"; gene_source "havana"; gene_biotype "lncRNA"; transcript_name "FP236315.3-201"; transcript_source "havana"; transcript_biotype "lncRNA"; exon_id "ENSE00003757033"; exon_version "1"; tag "basic"; transcript_support_level "2";\n+21\thavana\tgene\t696205\t698657\t.\t-\t.\tgene_id "ENSG00000279064"; gene_version "1"; gene_name "FP236315.1"; gene_source "havana"; gene_biotype "lncRNA";\n+21\thavana\ttranscript\t696205\t698657\t.\t-\t.\tgene_id "ENSG00000279064"; gene_version "1"; transcript_id "ENST00000623723"; transcript_version "1"; gene_name "FP236315.1"; gene_source "havana"; gene_biotype "lncRNA"; transcript_name "FP236315.1-201"; transcript_source "havana"; transcript_biotype "lncRNA"; tag "basic"; transcript_support_level "2";\n+21\thavana\tfakexon\t696205\t696358\t.\t-\t.\tgene_id "ENSG00000279064"; gene_version "1"; transcript_id "ENST00000623723"; transcript_version "1"; exon_number "2"; gene_name "FP236315.1"; gene_source "havana"; gene_biotype "lncRNA"; transcript_name "FP236315.1-201"; transcript_source "havana"; transcript_biotype "lncRNA"; exon_id "ENSE00003756092"; exon_version "1"; tag "basic"; transcript_support_level "2";\n'
b
diff -r 3ea5a2a63fa2 -r 79de45b5069b test-data/no_exon.gtf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/no_exon.gtf Fri Sep 01 13:17:29 2023 +0000
b
@@ -0,0 +1,4 @@
+test_chromosome test gene 1 650 . + . gene_id "GENE1"; gene_name "GENE1"; transcript_id "GENE1_t1";
+test_chromosome test transcript 1 650 . + . gene_id "GENE1"; gene_name "GENE1"; transcript_id "GENE1_t1";
+test_chromosome test fakexon 1 650 . + . gene_id "GENE1"; transcript_id "GENE1_t1"; exon_number "1"; gene_name "GENE1";
+test_chromosome test CDS 100 550 . + . gene_id "GENE1"; transcript_id "GENE1_t1"; exon_number "1"; gene_name "GENE1";
b
diff -r 3ea5a2a63fa2 -r 79de45b5069b test-data/rnastar_test_mapped_reads_fakexon.bam
b
Binary file test-data/rnastar_test_mapped_reads_fakexon.bam has changed