Repository 'gtftobed12'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/gtftobed12

Changeset 0:75a14cc16d4d (2018-05-18)
Next changeset 1:57d45064f114 (2018-08-05)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gtfToBed12 commit adc4f5c431fca0bcdf93188b7065bdc4c9c424c6
added:
gtfToBed12.xml
test-data/gtf2bed_test.bed
test-data/gtf2bed_test.gtf
test-data/gtf2bed_test_havana.bed
test-data/gtf2bed_test_include_version.bed
test-data/gtf2bed_test_missing_exon.bed
test-data/gtf2bed_test_missing_exon.gtf
test-data/gtf2bed_test_transcript_info.txt
b
diff -r 000000000000 -r 75a14cc16d4d gtfToBed12.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gtfToBed12.xml Fri May 18 15:10:34 2018 -0400
[
@@ -0,0 +1,127 @@
+<tool id="gtftobed12" name="Convert GTF to BED12" version="357">
+    <requirements>
+        <requirement type="package" version="357">ucsc-gtftogenepred</requirement>
+        <requirement type="package" version="357">ucsc-genepredtobed</requirement>
+    </requirements>
+    <command detect_errors="aggressive"><![CDATA[
+        gtfToGenePred
+            #if $advanced_options.advanced_options_selector == "advanced":
+                $advanced_options.ignoreGroupsWithoutExons
+                $advanced_options.simple
+                $advanced_options.allErrors
+                $advanced_options.impliedStopAfterCds
+                $advanced_options.includeVersion
+                #if $advanced_options.infoOut:
+                    -infoOut='${transcript_info_file}'
+                #end if
+                #for $prefix in $advanced_options.sourcePrefixes
+                    -sourcePrefix='${prefix.sourcePrefix}'
+                #end for
+            #end if
+            '${gtf_file}'
+            'temp.genePred' &&
+        genePredToBed 'temp.genePred' '${bed_file}'
+        ]]>
+    </command>
+    <inputs>
+        <param name="gtf_file" type="data" format="gtf" label="GTF File to convert" />
+        <conditional name="advanced_options">
+            <param name="advanced_options_selector" type="select" label="Advanced options"
+                help="Advanced options for gtfToGenePred.">
+                <option value="default" selected="true">Use default options</option>
+                <option value="advanced">Set advanced options</option>
+            </param>
+            <when value="default" />
+            <when value="advanced">
+                <repeat name="sourcePrefixes" title="Source Prefixes"
+                    help="Only process entries where the source name has the specified prefixes">
+                    <param argument="-sourcePrefix" label="Source prefix"
+                        type="text" />
+                </repeat>
+                <param argument="-ignoreGroupsWithoutExons" label="Ignore groups without exons"
+                    help="Ignore groups that do not have exons, otherwise they will cause an error."
+                    type="boolean" truevalue="-ignoreGroupsWithoutExons" falsevalue="" checked="false" />
+                <param argument="-simple" label="Skip hierarchy check"
+                    help="Only check column validity, not heirarchy, may result in invalid output."
+                    type="boolean" truevalue="-simple" falsevalue="" checked="false" />
+                <param argument="-allErrors" label="Skip all errors"
+                    help="Skip groups with errors rather than aborting. Useful for getting information
+                    about as many errors as possible."
+                    type="boolean" truevalue="-allErrors" falsevalue="" checked="false" />
+                <param argument="-impliedStopAfterCds" label="Implied stop codon in after CDS"
+                    help="Assume there is an implied stop codon after CDS."
+                    type="boolean" truevalue="-impliedStopAfterCds" falsevalue="" checked="false" />
+                <param argument="-includeVersion" label="Include gene and transcript version"
+                    help="If gene_version and/or transcript_version attributes exist, include the version
+                    in the corresponding identifiers."
+                    type="boolean" truevalue="-includeVersion" falsevalue="" checked="false" />
+                <param argument="-infoOut" label="Output transcript information file"
+                    help="Outputs a file with information about each transcript."
+                    type="boolean" checked="false" />
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <data name="bed_file" format="bed12" metadata_source="gtf_file" />
+        <data name="transcript_info_file" format="tabular" metadata_source="gtf_file">
+            <filter>advanced_options['infoOut']</filter>
+        </data>
+    </outputs>
+    <tests>
+        <test>
+            <param name="gtf_file" value="gtf2bed_test.gtf"/>
+            <output name="bed_file" file="gtf2bed_test.bed" ftype="bed12"/>
+        </test>
+        <test>
+            <param name="gtf_file" value="gtf2bed_test_missing_exon.gtf"/>
+            <param name="advanced_options_selector" value="advanced" />
+            <param name="ignoreGroupsWithoutExons" value="true" />
+            <output name="bed_file" file="gtf2bed_test_missing_exon.bed" ftype="bed12"/>
+        </test>
+        <test>
+            <param name="gtf_file" value="gtf2bed_test.gtf"/>
+            <param name="advanced_options_selector" value="advanced" />
+            <param name="includeVersion" value="true" />
+            <output name="bed_file" file="gtf2bed_test_include_version.bed" ftype="bed12"/>
+        </test>
+        <test>
+            <param name="gtf_file" value="gtf2bed_test.gtf"/>
+            <param name="advanced_options_selector" value="advanced" />
+            <param name="infoOut" value="true" />
+            <output name="bed_file" file="gtf2bed_test.bed" ftype="bed12"/>
+            <output name="transcript_info_file" file="gtf2bed_test_transcript_info.txt" ftype="tabular"/>
+        </test>
+        <test>
+            <param name="gtf_file" value="gtf2bed_test.gtf"/>
+            <param name="advanced_options_selector" value="advanced" />
+            <repeat name="sourcePrefixes">
+                <param name="sourcePrefix" value="hav" />
+            </repeat>
+            <output name="bed_file" file="gtf2bed_test_havana.bed" ftype="bed12"/>
+        </test>
+        <test>
+            <param name="gtf_file" value="gtf2bed_test.gtf"/>
+            <param name="advanced_options_selector" value="advanced" />
+            <repeat name="sourcePrefixes">
+                <param name="sourcePrefix" value="hav" />
+            </repeat>
+            <repeat name="sourcePrefixes">
+                <param name="sourcePrefix" value="ens" />
+            </repeat>
+            <output name="bed_file" file="gtf2bed_test.bed" ftype="bed12"/>
+        </test>
+    </tests>
+    <help><![CDATA[
+Converts a GTF_ file to a BED12_ formatted file using UCSC tools from Jim Kent.
+
+``gtfToGenePred``, followed by ``genePredToBed``
+
+.. _GTF: https://genome.ucsc.edu/FAQ/FAQformat.html#format4
+.. _BED12: https://genome.ucsc.edu/FAQ/FAQformat.html#format1
+        ]]>
+    </help>
+
+    <citations>
+        <citation type="doi">10.1101/gr.229102</citation>
+    </citations>
+</tool>
b
diff -r 000000000000 -r 75a14cc16d4d test-data/gtf2bed_test.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gtf2bed_test.bed Fri May 18 15:10:34 2018 -0400
b
@@ -0,0 +1,21 @@
+1 11868 14409 ENST00000456328 0 + 14409 14409 0 3 359,109,1189, 0,744,1352,
+1 12009 13670 ENST00000450305 0 + 13670 13670 0 6 48,49,85,78,154,218, 0,169,603,965,1211,1443,
+1 14403 29570 ENST00000488147 0 - 29570 29570 0 11 98,34,152,159,198,136,137,147,99,154,37, 0,601,1392,2203,2454,2829,3202,3511,3864,10334,15130,
+1 17368 17436 ENST00000619216 0 - 17436 17436 0 1 68, 0,
+1 29553 31097 ENST00000473358 0 + 31097 31097 0 3 486,104,122, 0,1010,1422,
+1 30266 31109 ENST00000469289 0 + 31109 31109 0 2 401,134, 0,709,
+1 30365 30503 ENST00000607096 0 + 30503 30503 0 1 138, 0,
+1 34553 36081 ENST00000417324 0 - 36081 36081 0 3 621,205,361, 0,723,1167,
+1 35244 36073 ENST00000461467 0 - 36073 36073 0 2 237,353, 0,476,
+1 52472 53312 ENST00000606857 0 + 53312 53312 0 1 840, 0,
+1 62947 63887 ENST00000492842 0 + 63887 63887 0 1 940, 0,
+1 69090 70008 ENST00000335137 0 + 69090 70008 0 1 918, 0,
+1 89294 120932 ENST00000466430 0 - 120932 120932 0 4 2335,150,105,158, 0,2796,23405,31480,
+1 92229 129217 ENST00000477740 0 - 129217 129217 0 4 11,105,212,163, 0,20470,28491,36825,
+1 110952 129173 ENST00000471248 0 - 129173 129173 0 3 405,105,119, 0,1747,18102,
+1 120724 133723 ENST00000610542 0 - 133723 133723 0 4 145,59,169,350, 0,149,8330,12649,
+1 129080 133566 ENST00000453576 0 - 133566 133566 0 2 143,193, 0,4293,
+1 89550 91105 ENST00000495576 0 - 91105 91105 0 2 500,819, 0,736,
+1 131024 134836 ENST00000442987 0 + 134836 134836 0 1 3812, 0,
+1 135140 135895 ENST00000494149 0 - 135895 135895 0 1 755, 0,
+1 137681 137965 ENST00000595919 0 - 137965 137965 0 1 284, 0,
b
diff -r 000000000000 -r 75a14cc16d4d test-data/gtf2bed_test.gtf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gtf2bed_test.gtf Fri May 18 15:10:34 2018 -0400
b
b'@@ -0,0 +1,100 @@\n+#!genome-build GRCh38.p2\n+#!genome-version GRCh38\n+#!genome-date 2013-12\n+#!genome-build-accession NCBI:GCA_000001405.17\n+#!genebuild-last-updated 2015-01\n+1\thavana\tgene\t11869\t14409\t.\t+\t.\tgene_id "ENSG00000223972"; gene_version "5"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene";\n+1\thavana\ttranscript\t11869\t14409\t.\t+\t.\tgene_id "ENSG00000223972"; gene_version "5"; transcript_id "ENST00000456328"; transcript_version "2"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "DDX11L1-002"; transcript_source "havana"; transcript_biotype "processed_transcript"; tag "basic"; transcript_support_level "1";\n+1\thavana\texon\t11869\t12227\t.\t+\t.\tgene_id "ENSG00000223972"; gene_version "5"; transcript_id "ENST00000456328"; transcript_version "2"; exon_number "1"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "DDX11L1-002"; transcript_source "havana"; transcript_biotype "processed_transcript"; exon_id "ENSE00002234944"; exon_version "1"; tag "basic"; transcript_support_level "1";\n+1\thavana\texon\t12613\t12721\t.\t+\t.\tgene_id "ENSG00000223972"; gene_version "5"; transcript_id "ENST00000456328"; transcript_version "2"; exon_number "2"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "DDX11L1-002"; transcript_source "havana"; transcript_biotype "processed_transcript"; exon_id "ENSE00003582793"; exon_version "1"; tag "basic"; transcript_support_level "1";\n+1\thavana\texon\t13221\t14409\t.\t+\t.\tgene_id "ENSG00000223972"; gene_version "5"; transcript_id "ENST00000456328"; transcript_version "2"; exon_number "3"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "DDX11L1-002"; transcript_source "havana"; transcript_biotype "processed_transcript"; exon_id "ENSE00002312635"; exon_version "1"; tag "basic"; transcript_support_level "1";\n+1\thavana\ttranscript\t12010\t13670\t.\t+\t.\tgene_id "ENSG00000223972"; gene_version "5"; transcript_id "ENST00000450305"; transcript_version "2"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "DDX11L1-001"; transcript_source "havana"; transcript_biotype "transcribed_unprocessed_pseudogene"; tag "basic"; transcript_support_level "NA";\n+1\thavana\texon\t12010\t12057\t.\t+\t.\tgene_id "ENSG00000223972"; gene_version "5"; transcript_id "ENST00000450305"; transcript_version "2"; exon_number "1"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "DDX11L1-001"; transcript_source "havana"; transcript_biotype "transcribed_unprocessed_pseudogene"; exon_id "ENSE00001948541"; exon_version "1"; tag "basic"; transcript_support_level "NA";\n+1\thavana\texon\t12179\t12227\t.\t+\t.\tgene_id "ENSG00000223972"; gene_version "5"; transcript_id "ENST00000450305"; transcript_version "2"; exon_number "2"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "DDX11L1-001"; transcript_source "havana"; transcript_biotype "transcribed_unprocessed_pseudogene"; exon_id "ENSE00001671638"; exon_version "2"; tag "basic"; transcript_support_level "NA";\n+1\thavana\texon\t12613\t12697\t.\t+\t.\tgene_id "ENSG00000223972"; gene_version "5"; transcript_id "ENST00000450305"; transcript_version "2"; exon_number "3"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "DDX11L1-001"; transcript_source "havana"; transcript_biotype "transcribed_unprocessed_pseudogene"; exon_id "ENSE00001758273"; exon_version "2"; tag "basic"; transcript_support_level "NA";\n+1\thavana\texon\t12975\t13052\t.\t+\t.\tgene_id "ENSG00000223972"; gene_version "5"; transcript_id "ENST00000450305"; transcript_version "2"; exon_number "4"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcri'..b'1"; transcript_id "ENST00000495576"; transcript_version "1"; gene_name "RP11-34P13.8"; gene_source "havana"; gene_biotype "lincRNA"; transcript_name "RP11-34P13.8-001"; transcript_source "havana"; transcript_biotype "lincRNA"; tag "basic"; transcript_support_level "5";\n+1\thavana\texon\t90287\t91105\t.\t-\t.\tgene_id "ENSG00000239945"; gene_version "1"; transcript_id "ENST00000495576"; transcript_version "1"; exon_number "1"; gene_name "RP11-34P13.8"; gene_source "havana"; gene_biotype "lincRNA"; transcript_name "RP11-34P13.8-001"; transcript_source "havana"; transcript_biotype "lincRNA"; exon_id "ENSE00001907785"; exon_version "1"; tag "basic"; transcript_support_level "5";\n+1\thavana\texon\t89551\t90050\t.\t-\t.\tgene_id "ENSG00000239945"; gene_version "1"; transcript_id "ENST00000495576"; transcript_version "1"; exon_number "2"; gene_name "RP11-34P13.8"; gene_source "havana"; gene_biotype "lincRNA"; transcript_name "RP11-34P13.8-001"; transcript_source "havana"; transcript_biotype "lincRNA"; exon_id "ENSE00001927725"; exon_version "1"; tag "basic"; transcript_support_level "5";\n+1\thavana\tgene\t131025\t134836\t.\t+\t.\tgene_id "ENSG00000233750"; gene_version "3"; gene_name "CICP27"; gene_source "havana"; gene_biotype "processed_pseudogene";\n+1\thavana\ttranscript\t131025\t134836\t.\t+\t.\tgene_id "ENSG00000233750"; gene_version "3"; transcript_id "ENST00000442987"; transcript_version "3"; gene_name "CICP27"; gene_source "havana"; gene_biotype "processed_pseudogene"; transcript_name "CICP27-001"; transcript_source "havana"; transcript_biotype "processed_pseudogene"; tag "basic"; transcript_support_level "NA";\n+1\thavana\texon\t131025\t134836\t.\t+\t.\tgene_id "ENSG00000233750"; gene_version "3"; transcript_id "ENST00000442987"; transcript_version "3"; exon_number "1"; gene_name "CICP27"; gene_source "havana"; gene_biotype "processed_pseudogene"; transcript_name "CICP27-001"; transcript_source "havana"; transcript_biotype "processed_pseudogene"; exon_id "ENSE00001625118"; exon_version "3"; tag "basic"; transcript_support_level "NA";\n+1\thavana\tgene\t135141\t135895\t.\t-\t.\tgene_id "ENSG00000268903"; gene_version "1"; gene_name "RP11-34P13.15"; gene_source "havana"; gene_biotype "processed_pseudogene";\n+1\thavana\ttranscript\t135141\t135895\t.\t-\t.\tgene_id "ENSG00000268903"; gene_version "1"; transcript_id "ENST00000494149"; transcript_version "2"; gene_name "RP11-34P13.15"; gene_source "havana"; gene_biotype "processed_pseudogene"; transcript_name "RP11-34P13.15-001"; transcript_source "havana"; transcript_biotype "processed_pseudogene"; tag "basic"; transcript_support_level "NA";\n+1\thavana\texon\t135141\t135895\t.\t-\t.\tgene_id "ENSG00000268903"; gene_version "1"; transcript_id "ENST00000494149"; transcript_version "2"; exon_number "1"; gene_name "RP11-34P13.15"; gene_source "havana"; gene_biotype "processed_pseudogene"; transcript_name "RP11-34P13.15-001"; transcript_source "havana"; transcript_biotype "processed_pseudogene"; exon_id "ENSE00001879101"; exon_version "2"; tag "basic"; transcript_support_level "NA";\n+1\thavana\tgene\t137682\t137965\t.\t-\t.\tgene_id "ENSG00000269981"; gene_version "1"; gene_name "RP11-34P13.16"; gene_source "havana"; gene_biotype "processed_pseudogene";\n+1\thavana\ttranscript\t137682\t137965\t.\t-\t.\tgene_id "ENSG00000269981"; gene_version "1"; transcript_id "ENST00000595919"; transcript_version "1"; gene_name "RP11-34P13.16"; gene_source "havana"; gene_biotype "processed_pseudogene"; transcript_name "RP11-34P13.16-001"; transcript_source "havana"; transcript_biotype "processed_pseudogene"; tag "basic"; transcript_support_level "NA";\n+1\thavana\texon\t137682\t137965\t.\t-\t.\tgene_id "ENSG00000269981"; gene_version "1"; transcript_id "ENST00000595919"; transcript_version "1"; exon_number "1"; gene_name "RP11-34P13.16"; gene_source "havana"; gene_biotype "processed_pseudogene"; transcript_name "RP11-34P13.16-001"; transcript_source "havana"; transcript_biotype "processed_pseudogene"; exon_id "ENSE00001936432"; exon_version "2"; tag "basic"; transcript_support_level "NA";\n'
b
diff -r 000000000000 -r 75a14cc16d4d test-data/gtf2bed_test_havana.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gtf2bed_test_havana.bed Fri May 18 15:10:34 2018 -0400
b
@@ -0,0 +1,17 @@
+1 11868 14409 ENST00000456328 0 + 14409 14409 0 3 359,109,1189, 0,744,1352,
+1 12009 13670 ENST00000450305 0 + 13670 13670 0 6 48,49,85,78,154,218, 0,169,603,965,1211,1443,
+1 14403 29570 ENST00000488147 0 - 29570 29570 0 11 98,34,152,159,198,136,137,147,99,154,37, 0,601,1392,2203,2454,2829,3202,3511,3864,10334,15130,
+1 29553 31097 ENST00000473358 0 + 31097 31097 0 3 486,104,122, 0,1010,1422,
+1 30266 31109 ENST00000469289 0 + 31109 31109 0 2 401,134, 0,709,
+1 34553 36081 ENST00000417324 0 - 36081 36081 0 3 621,205,361, 0,723,1167,
+1 35244 36073 ENST00000461467 0 - 36073 36073 0 2 237,353, 0,476,
+1 52472 53312 ENST00000606857 0 + 53312 53312 0 1 840, 0,
+1 62947 63887 ENST00000492842 0 + 63887 63887 0 1 940, 0,
+1 89294 120932 ENST00000466430 0 - 120932 120932 0 4 2335,150,105,158, 0,2796,23405,31480,
+1 92229 129217 ENST00000477740 0 - 129217 129217 0 4 11,105,212,163, 0,20470,28491,36825,
+1 110952 129173 ENST00000471248 0 - 129173 129173 0 3 405,105,119, 0,1747,18102,
+1 129080 133566 ENST00000453576 0 - 133566 133566 0 2 143,193, 0,4293,
+1 89550 91105 ENST00000495576 0 - 91105 91105 0 2 500,819, 0,736,
+1 131024 134836 ENST00000442987 0 + 134836 134836 0 1 3812, 0,
+1 135140 135895 ENST00000494149 0 - 135895 135895 0 1 755, 0,
+1 137681 137965 ENST00000595919 0 - 137965 137965 0 1 284, 0,
b
diff -r 000000000000 -r 75a14cc16d4d test-data/gtf2bed_test_include_version.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gtf2bed_test_include_version.bed Fri May 18 15:10:34 2018 -0400
b
@@ -0,0 +1,21 @@
+1 11868 14409 ENST00000456328.2 0 + 14409 14409 0 3 359,109,1189, 0,744,1352,
+1 12009 13670 ENST00000450305.2 0 + 13670 13670 0 6 48,49,85,78,154,218, 0,169,603,965,1211,1443,
+1 14403 29570 ENST00000488147.1 0 - 29570 29570 0 11 98,34,152,159,198,136,137,147,99,154,37, 0,601,1392,2203,2454,2829,3202,3511,3864,10334,15130,
+1 17368 17436 ENST00000619216.1 0 - 17436 17436 0 1 68, 0,
+1 29553 31097 ENST00000473358.1 0 + 31097 31097 0 3 486,104,122, 0,1010,1422,
+1 30266 31109 ENST00000469289.1 0 + 31109 31109 0 2 401,134, 0,709,
+1 30365 30503 ENST00000607096.1 0 + 30503 30503 0 1 138, 0,
+1 34553 36081 ENST00000417324.1 0 - 36081 36081 0 3 621,205,361, 0,723,1167,
+1 35244 36073 ENST00000461467.1 0 - 36073 36073 0 2 237,353, 0,476,
+1 52472 53312 ENST00000606857.1 0 + 53312 53312 0 1 840, 0,
+1 62947 63887 ENST00000492842.1 0 + 63887 63887 0 1 940, 0,
+1 69090 70008 ENST00000335137.3 0 + 69090 70008 0 1 918, 0,
+1 89294 120932 ENST00000466430.4 0 - 120932 120932 0 4 2335,150,105,158, 0,2796,23405,31480,
+1 92229 129217 ENST00000477740.4 0 - 129217 129217 0 4 11,105,212,163, 0,20470,28491,36825,
+1 110952 129173 ENST00000471248.1 0 - 129173 129173 0 3 405,105,119, 0,1747,18102,
+1 120724 133723 ENST00000610542.1 0 - 133723 133723 0 4 145,59,169,350, 0,149,8330,12649,
+1 129080 133566 ENST00000453576.2 0 - 133566 133566 0 2 143,193, 0,4293,
+1 89550 91105 ENST00000495576.1 0 - 91105 91105 0 2 500,819, 0,736,
+1 131024 134836 ENST00000442987.3 0 + 134836 134836 0 1 3812, 0,
+1 135140 135895 ENST00000494149.2 0 - 135895 135895 0 1 755, 0,
+1 137681 137965 ENST00000595919.1 0 - 137965 137965 0 1 284, 0,
b
diff -r 000000000000 -r 75a14cc16d4d test-data/gtf2bed_test_missing_exon.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gtf2bed_test_missing_exon.bed Fri May 18 15:10:34 2018 -0400
b
@@ -0,0 +1,20 @@
+1 12009 13670 ENST00000450305 0 + 13670 13670 0 6 48,49,85,78,154,218, 0,169,603,965,1211,1443,
+1 14403 29570 ENST00000488147 0 - 29570 29570 0 11 98,34,152,159,198,136,137,147,99,154,37, 0,601,1392,2203,2454,2829,3202,3511,3864,10334,15130,
+1 17368 17436 ENST00000619216 0 - 17436 17436 0 1 68, 0,
+1 29553 31097 ENST00000473358 0 + 31097 31097 0 3 486,104,122, 0,1010,1422,
+1 30266 31109 ENST00000469289 0 + 31109 31109 0 2 401,134, 0,709,
+1 30365 30503 ENST00000607096 0 + 30503 30503 0 1 138, 0,
+1 34553 36081 ENST00000417324 0 - 36081 36081 0 3 621,205,361, 0,723,1167,
+1 35244 36073 ENST00000461467 0 - 36073 36073 0 2 237,353, 0,476,
+1 52472 53312 ENST00000606857 0 + 53312 53312 0 1 840, 0,
+1 62947 63887 ENST00000492842 0 + 63887 63887 0 1 940, 0,
+1 69090 70008 ENST00000335137 0 + 69090 70008 0 1 918, 0,
+1 89294 120932 ENST00000466430 0 - 120932 120932 0 4 2335,150,105,158, 0,2796,23405,31480,
+1 92229 129217 ENST00000477740 0 - 129217 129217 0 4 11,105,212,163, 0,20470,28491,36825,
+1 110952 129173 ENST00000471248 0 - 129173 129173 0 3 405,105,119, 0,1747,18102,
+1 120724 133723 ENST00000610542 0 - 133723 133723 0 4 145,59,169,350, 0,149,8330,12649,
+1 129080 133566 ENST00000453576 0 - 133566 133566 0 2 143,193, 0,4293,
+1 89550 91105 ENST00000495576 0 - 91105 91105 0 2 500,819, 0,736,
+1 131024 134836 ENST00000442987 0 + 134836 134836 0 1 3812, 0,
+1 135140 135895 ENST00000494149 0 - 135895 135895 0 1 755, 0,
+1 137681 137965 ENST00000595919 0 - 137965 137965 0 1 284, 0,
b
diff -r 000000000000 -r 75a14cc16d4d test-data/gtf2bed_test_missing_exon.gtf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gtf2bed_test_missing_exon.gtf Fri May 18 15:10:34 2018 -0400
b
b'@@ -0,0 +1,97 @@\n+#!genome-build GRCh38.p2\n+#!genome-version GRCh38\n+#!genome-date 2013-12\n+#!genome-build-accession NCBI:GCA_000001405.17\n+#!genebuild-last-updated 2015-01\n+1\thavana\tgene\t11869\t14409\t.\t+\t.\tgene_id "ENSG00000223972"; gene_version "5"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene";\n+1\thavana\ttranscript\t11869\t14409\t.\t+\t.\tgene_id "ENSG00000223972"; gene_version "5"; transcript_id "ENST00000456328"; transcript_version "2"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "DDX11L1-002"; transcript_source "havana"; transcript_biotype "processed_transcript"; tag "basic"; transcript_support_level "1";\n+1\thavana\ttranscript\t12010\t13670\t.\t+\t.\tgene_id "ENSG00000223972"; gene_version "5"; transcript_id "ENST00000450305"; transcript_version "2"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "DDX11L1-001"; transcript_source "havana"; transcript_biotype "transcribed_unprocessed_pseudogene"; tag "basic"; transcript_support_level "NA";\n+1\thavana\texon\t12010\t12057\t.\t+\t.\tgene_id "ENSG00000223972"; gene_version "5"; transcript_id "ENST00000450305"; transcript_version "2"; exon_number "1"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "DDX11L1-001"; transcript_source "havana"; transcript_biotype "transcribed_unprocessed_pseudogene"; exon_id "ENSE00001948541"; exon_version "1"; tag "basic"; transcript_support_level "NA";\n+1\thavana\texon\t12179\t12227\t.\t+\t.\tgene_id "ENSG00000223972"; gene_version "5"; transcript_id "ENST00000450305"; transcript_version "2"; exon_number "2"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "DDX11L1-001"; transcript_source "havana"; transcript_biotype "transcribed_unprocessed_pseudogene"; exon_id "ENSE00001671638"; exon_version "2"; tag "basic"; transcript_support_level "NA";\n+1\thavana\texon\t12613\t12697\t.\t+\t.\tgene_id "ENSG00000223972"; gene_version "5"; transcript_id "ENST00000450305"; transcript_version "2"; exon_number "3"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "DDX11L1-001"; transcript_source "havana"; transcript_biotype "transcribed_unprocessed_pseudogene"; exon_id "ENSE00001758273"; exon_version "2"; tag "basic"; transcript_support_level "NA";\n+1\thavana\texon\t12975\t13052\t.\t+\t.\tgene_id "ENSG00000223972"; gene_version "5"; transcript_id "ENST00000450305"; transcript_version "2"; exon_number "4"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "DDX11L1-001"; transcript_source "havana"; transcript_biotype "transcribed_unprocessed_pseudogene"; exon_id "ENSE00001799933"; exon_version "2"; tag "basic"; transcript_support_level "NA";\n+1\thavana\texon\t13221\t13374\t.\t+\t.\tgene_id "ENSG00000223972"; gene_version "5"; transcript_id "ENST00000450305"; transcript_version "2"; exon_number "5"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "DDX11L1-001"; transcript_source "havana"; transcript_biotype "transcribed_unprocessed_pseudogene"; exon_id "ENSE00001746346"; exon_version "2"; tag "basic"; transcript_support_level "NA";\n+1\thavana\texon\t13453\t13670\t.\t+\t.\tgene_id "ENSG00000223972"; gene_version "5"; transcript_id "ENST00000450305"; transcript_version "2"; exon_number "6"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "DDX11L1-001"; transcript_source "havana"; transcript_biotype "transcribed_unprocessed_pseudogene"; exon_id "ENSE00001863096"; exon_version "1"; tag "basic"; transcript_support_level "NA";\n+1\thavana\tgene\t14404\t29570\t.\t-\t.\tgene_id "ENSG00000227232"; gene_version "5"; gene_name "WASH7P"; gene_source "havana"; gene_biotype "unprocessed_pseudogene";\n+1\thavana\ttran'..b'1"; transcript_id "ENST00000495576"; transcript_version "1"; gene_name "RP11-34P13.8"; gene_source "havana"; gene_biotype "lincRNA"; transcript_name "RP11-34P13.8-001"; transcript_source "havana"; transcript_biotype "lincRNA"; tag "basic"; transcript_support_level "5";\n+1\thavana\texon\t90287\t91105\t.\t-\t.\tgene_id "ENSG00000239945"; gene_version "1"; transcript_id "ENST00000495576"; transcript_version "1"; exon_number "1"; gene_name "RP11-34P13.8"; gene_source "havana"; gene_biotype "lincRNA"; transcript_name "RP11-34P13.8-001"; transcript_source "havana"; transcript_biotype "lincRNA"; exon_id "ENSE00001907785"; exon_version "1"; tag "basic"; transcript_support_level "5";\n+1\thavana\texon\t89551\t90050\t.\t-\t.\tgene_id "ENSG00000239945"; gene_version "1"; transcript_id "ENST00000495576"; transcript_version "1"; exon_number "2"; gene_name "RP11-34P13.8"; gene_source "havana"; gene_biotype "lincRNA"; transcript_name "RP11-34P13.8-001"; transcript_source "havana"; transcript_biotype "lincRNA"; exon_id "ENSE00001927725"; exon_version "1"; tag "basic"; transcript_support_level "5";\n+1\thavana\tgene\t131025\t134836\t.\t+\t.\tgene_id "ENSG00000233750"; gene_version "3"; gene_name "CICP27"; gene_source "havana"; gene_biotype "processed_pseudogene";\n+1\thavana\ttranscript\t131025\t134836\t.\t+\t.\tgene_id "ENSG00000233750"; gene_version "3"; transcript_id "ENST00000442987"; transcript_version "3"; gene_name "CICP27"; gene_source "havana"; gene_biotype "processed_pseudogene"; transcript_name "CICP27-001"; transcript_source "havana"; transcript_biotype "processed_pseudogene"; tag "basic"; transcript_support_level "NA";\n+1\thavana\texon\t131025\t134836\t.\t+\t.\tgene_id "ENSG00000233750"; gene_version "3"; transcript_id "ENST00000442987"; transcript_version "3"; exon_number "1"; gene_name "CICP27"; gene_source "havana"; gene_biotype "processed_pseudogene"; transcript_name "CICP27-001"; transcript_source "havana"; transcript_biotype "processed_pseudogene"; exon_id "ENSE00001625118"; exon_version "3"; tag "basic"; transcript_support_level "NA";\n+1\thavana\tgene\t135141\t135895\t.\t-\t.\tgene_id "ENSG00000268903"; gene_version "1"; gene_name "RP11-34P13.15"; gene_source "havana"; gene_biotype "processed_pseudogene";\n+1\thavana\ttranscript\t135141\t135895\t.\t-\t.\tgene_id "ENSG00000268903"; gene_version "1"; transcript_id "ENST00000494149"; transcript_version "2"; gene_name "RP11-34P13.15"; gene_source "havana"; gene_biotype "processed_pseudogene"; transcript_name "RP11-34P13.15-001"; transcript_source "havana"; transcript_biotype "processed_pseudogene"; tag "basic"; transcript_support_level "NA";\n+1\thavana\texon\t135141\t135895\t.\t-\t.\tgene_id "ENSG00000268903"; gene_version "1"; transcript_id "ENST00000494149"; transcript_version "2"; exon_number "1"; gene_name "RP11-34P13.15"; gene_source "havana"; gene_biotype "processed_pseudogene"; transcript_name "RP11-34P13.15-001"; transcript_source "havana"; transcript_biotype "processed_pseudogene"; exon_id "ENSE00001879101"; exon_version "2"; tag "basic"; transcript_support_level "NA";\n+1\thavana\tgene\t137682\t137965\t.\t-\t.\tgene_id "ENSG00000269981"; gene_version "1"; gene_name "RP11-34P13.16"; gene_source "havana"; gene_biotype "processed_pseudogene";\n+1\thavana\ttranscript\t137682\t137965\t.\t-\t.\tgene_id "ENSG00000269981"; gene_version "1"; transcript_id "ENST00000595919"; transcript_version "1"; gene_name "RP11-34P13.16"; gene_source "havana"; gene_biotype "processed_pseudogene"; transcript_name "RP11-34P13.16-001"; transcript_source "havana"; transcript_biotype "processed_pseudogene"; tag "basic"; transcript_support_level "NA";\n+1\thavana\texon\t137682\t137965\t.\t-\t.\tgene_id "ENSG00000269981"; gene_version "1"; transcript_id "ENST00000595919"; transcript_version "1"; exon_number "1"; gene_name "RP11-34P13.16"; gene_source "havana"; gene_biotype "processed_pseudogene"; transcript_name "RP11-34P13.16-001"; transcript_source "havana"; transcript_biotype "processed_pseudogene"; exon_id "ENSE00001936432"; exon_version "2"; tag "basic"; transcript_support_level "NA";\n'
b
diff -r 000000000000 -r 75a14cc16d4d test-data/gtf2bed_test_transcript_info.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gtf2bed_test_transcript_info.txt Fri May 18 15:10:34 2018 -0400
b
@@ -0,0 +1,22 @@
+#transId geneId source chrom start end strand proteinId geneName transcriptName geneType transcriptType
+ENST00000456328 ENSG00000223972 havana 1 11868 14409 + DDX11L1 DDX11L1-002 transcribed_unprocessed_pseudogene processed_transcript
+ENST00000450305 ENSG00000223972 havana 1 12009 13670 + DDX11L1 DDX11L1-001 transcribed_unprocessed_pseudogene transcribed_unprocessed_pseudogene
+ENST00000488147 ENSG00000227232 havana 1 14403 29570 - WASH7P WASH7P-001 unprocessed_pseudogene unprocessed_pseudogene
+ENST00000619216 ENSG00000278267 ensembl 1 17368 17436 - MIR6859-1 MIR6859-1-201 miRNA miRNA
+ENST00000473358 ENSG00000243485 havana 1 29553 31097 + RP11-34P13.3 RP11-34P13.3-001 lincRNA lincRNA
+ENST00000469289 ENSG00000243485 havana 1 30266 31109 + RP11-34P13.3 RP11-34P13.3-002 lincRNA lincRNA
+ENST00000607096 ENSG00000274890 ensembl 1 30365 30503 + MIR1302-2 MIR1302-2-201 miRNA miRNA
+ENST00000417324 ENSG00000237613 havana 1 34553 36081 - FAM138A FAM138A-001 lincRNA lincRNA
+ENST00000461467 ENSG00000237613 havana 1 35244 36073 - FAM138A FAM138A-002 lincRNA lincRNA
+ENST00000606857 ENSG00000268020 havana 1 52472 53312 + OR4G4P OR4G4P-001 unprocessed_pseudogene unprocessed_pseudogene
+ENST00000492842 ENSG00000240361 havana 1 62947 63887 + OR4G11P OR4G11P-001 unprocessed_pseudogene unprocessed_pseudogene
+ENST00000335137 ENSG00000186092 ensembl_havana 1 69090 70008 + ENSP00000334393 OR4F5 OR4F5-001 protein_coding protein_coding
+ENST00000466430 ENSG00000238009 havana 1 89294 120932 - RP11-34P13.7 RP11-34P13.7-001 lincRNA lincRNA
+ENST00000477740 ENSG00000238009 havana 1 92229 129217 - RP11-34P13.7 RP11-34P13.7-003 lincRNA lincRNA
+ENST00000471248 ENSG00000238009 havana 1 110952 129173 - RP11-34P13.7 RP11-34P13.7-002 lincRNA lincRNA
+ENST00000610542 ENSG00000238009 ensembl 1 120724 133723 - RP11-34P13.7 RP11-34P13.7-201 lincRNA lincRNA
+ENST00000453576 ENSG00000238009 havana 1 129080 133566 - RP11-34P13.7 RP11-34P13.7-004 lincRNA lincRNA
+ENST00000495576 ENSG00000239945 havana 1 89550 91105 - RP11-34P13.8 RP11-34P13.8-001 lincRNA lincRNA
+ENST00000442987 ENSG00000233750 havana 1 131024 134836 + CICP27 CICP27-001 processed_pseudogene processed_pseudogene
+ENST00000494149 ENSG00000268903 havana 1 135140 135895 - RP11-34P13.15 RP11-34P13.15-001 processed_pseudogene processed_pseudogene
+ENST00000595919 ENSG00000269981 havana 1 137681 137965 - RP11-34P13.16 RP11-34P13.16-001 processed_pseudogene processed_pseudogene