Next changeset 1:57d45064f114 (2018-08-05) |
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gtfToBed12 commit adc4f5c431fca0bcdf93188b7065bdc4c9c424c6 |
added:
gtfToBed12.xml test-data/gtf2bed_test.bed test-data/gtf2bed_test.gtf test-data/gtf2bed_test_havana.bed test-data/gtf2bed_test_include_version.bed test-data/gtf2bed_test_missing_exon.bed test-data/gtf2bed_test_missing_exon.gtf test-data/gtf2bed_test_transcript_info.txt |
b |
diff -r 000000000000 -r 75a14cc16d4d gtfToBed12.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gtfToBed12.xml Fri May 18 15:10:34 2018 -0400 |
[ |
@@ -0,0 +1,127 @@ +<tool id="gtftobed12" name="Convert GTF to BED12" version="357"> + <requirements> + <requirement type="package" version="357">ucsc-gtftogenepred</requirement> + <requirement type="package" version="357">ucsc-genepredtobed</requirement> + </requirements> + <command detect_errors="aggressive"><![CDATA[ + gtfToGenePred + #if $advanced_options.advanced_options_selector == "advanced": + $advanced_options.ignoreGroupsWithoutExons + $advanced_options.simple + $advanced_options.allErrors + $advanced_options.impliedStopAfterCds + $advanced_options.includeVersion + #if $advanced_options.infoOut: + -infoOut='${transcript_info_file}' + #end if + #for $prefix in $advanced_options.sourcePrefixes + -sourcePrefix='${prefix.sourcePrefix}' + #end for + #end if + '${gtf_file}' + 'temp.genePred' && + genePredToBed 'temp.genePred' '${bed_file}' + ]]> + </command> + <inputs> + <param name="gtf_file" type="data" format="gtf" label="GTF File to convert" /> + <conditional name="advanced_options"> + <param name="advanced_options_selector" type="select" label="Advanced options" + help="Advanced options for gtfToGenePred."> + <option value="default" selected="true">Use default options</option> + <option value="advanced">Set advanced options</option> + </param> + <when value="default" /> + <when value="advanced"> + <repeat name="sourcePrefixes" title="Source Prefixes" + help="Only process entries where the source name has the specified prefixes"> + <param argument="-sourcePrefix" label="Source prefix" + type="text" /> + </repeat> + <param argument="-ignoreGroupsWithoutExons" label="Ignore groups without exons" + help="Ignore groups that do not have exons, otherwise they will cause an error." + type="boolean" truevalue="-ignoreGroupsWithoutExons" falsevalue="" checked="false" /> + <param argument="-simple" label="Skip hierarchy check" + help="Only check column validity, not heirarchy, may result in invalid output." + type="boolean" truevalue="-simple" falsevalue="" checked="false" /> + <param argument="-allErrors" label="Skip all errors" + help="Skip groups with errors rather than aborting. Useful for getting information + about as many errors as possible." + type="boolean" truevalue="-allErrors" falsevalue="" checked="false" /> + <param argument="-impliedStopAfterCds" label="Implied stop codon in after CDS" + help="Assume there is an implied stop codon after CDS." + type="boolean" truevalue="-impliedStopAfterCds" falsevalue="" checked="false" /> + <param argument="-includeVersion" label="Include gene and transcript version" + help="If gene_version and/or transcript_version attributes exist, include the version + in the corresponding identifiers." + type="boolean" truevalue="-includeVersion" falsevalue="" checked="false" /> + <param argument="-infoOut" label="Output transcript information file" + help="Outputs a file with information about each transcript." + type="boolean" checked="false" /> + </when> + </conditional> + </inputs> + <outputs> + <data name="bed_file" format="bed12" metadata_source="gtf_file" /> + <data name="transcript_info_file" format="tabular" metadata_source="gtf_file"> + <filter>advanced_options['infoOut']</filter> + </data> + </outputs> + <tests> + <test> + <param name="gtf_file" value="gtf2bed_test.gtf"/> + <output name="bed_file" file="gtf2bed_test.bed" ftype="bed12"/> + </test> + <test> + <param name="gtf_file" value="gtf2bed_test_missing_exon.gtf"/> + <param name="advanced_options_selector" value="advanced" /> + <param name="ignoreGroupsWithoutExons" value="true" /> + <output name="bed_file" file="gtf2bed_test_missing_exon.bed" ftype="bed12"/> + </test> + <test> + <param name="gtf_file" value="gtf2bed_test.gtf"/> + <param name="advanced_options_selector" value="advanced" /> + <param name="includeVersion" value="true" /> + <output name="bed_file" file="gtf2bed_test_include_version.bed" ftype="bed12"/> + </test> + <test> + <param name="gtf_file" value="gtf2bed_test.gtf"/> + <param name="advanced_options_selector" value="advanced" /> + <param name="infoOut" value="true" /> + <output name="bed_file" file="gtf2bed_test.bed" ftype="bed12"/> + <output name="transcript_info_file" file="gtf2bed_test_transcript_info.txt" ftype="tabular"/> + </test> + <test> + <param name="gtf_file" value="gtf2bed_test.gtf"/> + <param name="advanced_options_selector" value="advanced" /> + <repeat name="sourcePrefixes"> + <param name="sourcePrefix" value="hav" /> + </repeat> + <output name="bed_file" file="gtf2bed_test_havana.bed" ftype="bed12"/> + </test> + <test> + <param name="gtf_file" value="gtf2bed_test.gtf"/> + <param name="advanced_options_selector" value="advanced" /> + <repeat name="sourcePrefixes"> + <param name="sourcePrefix" value="hav" /> + </repeat> + <repeat name="sourcePrefixes"> + <param name="sourcePrefix" value="ens" /> + </repeat> + <output name="bed_file" file="gtf2bed_test.bed" ftype="bed12"/> + </test> + </tests> + <help><![CDATA[ +Converts a GTF_ file to a BED12_ formatted file using UCSC tools from Jim Kent. + +``gtfToGenePred``, followed by ``genePredToBed`` + +.. _GTF: https://genome.ucsc.edu/FAQ/FAQformat.html#format4 +.. _BED12: https://genome.ucsc.edu/FAQ/FAQformat.html#format1 + ]]> + </help> + + <citations> + <citation type="doi">10.1101/gr.229102</citation> + </citations> +</tool> |
b |
diff -r 000000000000 -r 75a14cc16d4d test-data/gtf2bed_test.bed --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/gtf2bed_test.bed Fri May 18 15:10:34 2018 -0400 |
b |
@@ -0,0 +1,21 @@ +1 11868 14409 ENST00000456328 0 + 14409 14409 0 3 359,109,1189, 0,744,1352, +1 12009 13670 ENST00000450305 0 + 13670 13670 0 6 48,49,85,78,154,218, 0,169,603,965,1211,1443, +1 14403 29570 ENST00000488147 0 - 29570 29570 0 11 98,34,152,159,198,136,137,147,99,154,37, 0,601,1392,2203,2454,2829,3202,3511,3864,10334,15130, +1 17368 17436 ENST00000619216 0 - 17436 17436 0 1 68, 0, +1 29553 31097 ENST00000473358 0 + 31097 31097 0 3 486,104,122, 0,1010,1422, +1 30266 31109 ENST00000469289 0 + 31109 31109 0 2 401,134, 0,709, +1 30365 30503 ENST00000607096 0 + 30503 30503 0 1 138, 0, +1 34553 36081 ENST00000417324 0 - 36081 36081 0 3 621,205,361, 0,723,1167, +1 35244 36073 ENST00000461467 0 - 36073 36073 0 2 237,353, 0,476, +1 52472 53312 ENST00000606857 0 + 53312 53312 0 1 840, 0, +1 62947 63887 ENST00000492842 0 + 63887 63887 0 1 940, 0, +1 69090 70008 ENST00000335137 0 + 69090 70008 0 1 918, 0, +1 89294 120932 ENST00000466430 0 - 120932 120932 0 4 2335,150,105,158, 0,2796,23405,31480, +1 92229 129217 ENST00000477740 0 - 129217 129217 0 4 11,105,212,163, 0,20470,28491,36825, +1 110952 129173 ENST00000471248 0 - 129173 129173 0 3 405,105,119, 0,1747,18102, +1 120724 133723 ENST00000610542 0 - 133723 133723 0 4 145,59,169,350, 0,149,8330,12649, +1 129080 133566 ENST00000453576 0 - 133566 133566 0 2 143,193, 0,4293, +1 89550 91105 ENST00000495576 0 - 91105 91105 0 2 500,819, 0,736, +1 131024 134836 ENST00000442987 0 + 134836 134836 0 1 3812, 0, +1 135140 135895 ENST00000494149 0 - 135895 135895 0 1 755, 0, +1 137681 137965 ENST00000595919 0 - 137965 137965 0 1 284, 0, |
b |
diff -r 000000000000 -r 75a14cc16d4d test-data/gtf2bed_test.gtf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/gtf2bed_test.gtf Fri May 18 15:10:34 2018 -0400 |
b |
b'@@ -0,0 +1,100 @@\n+#!genome-build GRCh38.p2\n+#!genome-version GRCh38\n+#!genome-date 2013-12\n+#!genome-build-accession NCBI:GCA_000001405.17\n+#!genebuild-last-updated 2015-01\n+1\thavana\tgene\t11869\t14409\t.\t+\t.\tgene_id "ENSG00000223972"; gene_version "5"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene";\n+1\thavana\ttranscript\t11869\t14409\t.\t+\t.\tgene_id "ENSG00000223972"; gene_version "5"; transcript_id "ENST00000456328"; transcript_version "2"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "DDX11L1-002"; transcript_source "havana"; transcript_biotype "processed_transcript"; tag "basic"; transcript_support_level "1";\n+1\thavana\texon\t11869\t12227\t.\t+\t.\tgene_id "ENSG00000223972"; gene_version "5"; transcript_id "ENST00000456328"; transcript_version "2"; exon_number "1"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "DDX11L1-002"; transcript_source "havana"; transcript_biotype "processed_transcript"; exon_id "ENSE00002234944"; exon_version "1"; tag "basic"; transcript_support_level "1";\n+1\thavana\texon\t12613\t12721\t.\t+\t.\tgene_id "ENSG00000223972"; gene_version "5"; transcript_id "ENST00000456328"; transcript_version "2"; exon_number "2"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "DDX11L1-002"; transcript_source "havana"; transcript_biotype "processed_transcript"; exon_id "ENSE00003582793"; exon_version "1"; tag "basic"; transcript_support_level "1";\n+1\thavana\texon\t13221\t14409\t.\t+\t.\tgene_id "ENSG00000223972"; gene_version "5"; transcript_id "ENST00000456328"; transcript_version "2"; exon_number "3"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "DDX11L1-002"; transcript_source "havana"; transcript_biotype "processed_transcript"; exon_id "ENSE00002312635"; exon_version "1"; tag "basic"; transcript_support_level "1";\n+1\thavana\ttranscript\t12010\t13670\t.\t+\t.\tgene_id "ENSG00000223972"; gene_version "5"; transcript_id "ENST00000450305"; transcript_version "2"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "DDX11L1-001"; transcript_source "havana"; transcript_biotype "transcribed_unprocessed_pseudogene"; tag "basic"; transcript_support_level "NA";\n+1\thavana\texon\t12010\t12057\t.\t+\t.\tgene_id "ENSG00000223972"; gene_version "5"; transcript_id "ENST00000450305"; transcript_version "2"; exon_number "1"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "DDX11L1-001"; transcript_source "havana"; transcript_biotype "transcribed_unprocessed_pseudogene"; exon_id "ENSE00001948541"; exon_version "1"; tag "basic"; transcript_support_level "NA";\n+1\thavana\texon\t12179\t12227\t.\t+\t.\tgene_id "ENSG00000223972"; gene_version "5"; transcript_id "ENST00000450305"; transcript_version "2"; exon_number "2"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "DDX11L1-001"; transcript_source "havana"; transcript_biotype "transcribed_unprocessed_pseudogene"; exon_id "ENSE00001671638"; exon_version "2"; tag "basic"; transcript_support_level "NA";\n+1\thavana\texon\t12613\t12697\t.\t+\t.\tgene_id "ENSG00000223972"; gene_version "5"; transcript_id "ENST00000450305"; transcript_version "2"; exon_number "3"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "DDX11L1-001"; transcript_source "havana"; transcript_biotype "transcribed_unprocessed_pseudogene"; exon_id "ENSE00001758273"; exon_version "2"; tag "basic"; transcript_support_level "NA";\n+1\thavana\texon\t12975\t13052\t.\t+\t.\tgene_id "ENSG00000223972"; gene_version "5"; transcript_id "ENST00000450305"; transcript_version "2"; exon_number "4"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcri'..b'1"; transcript_id "ENST00000495576"; transcript_version "1"; gene_name "RP11-34P13.8"; gene_source "havana"; gene_biotype "lincRNA"; transcript_name "RP11-34P13.8-001"; transcript_source "havana"; transcript_biotype "lincRNA"; tag "basic"; transcript_support_level "5";\n+1\thavana\texon\t90287\t91105\t.\t-\t.\tgene_id "ENSG00000239945"; gene_version "1"; transcript_id "ENST00000495576"; transcript_version "1"; exon_number "1"; gene_name "RP11-34P13.8"; gene_source "havana"; gene_biotype "lincRNA"; transcript_name "RP11-34P13.8-001"; transcript_source "havana"; transcript_biotype "lincRNA"; exon_id "ENSE00001907785"; exon_version "1"; tag "basic"; transcript_support_level "5";\n+1\thavana\texon\t89551\t90050\t.\t-\t.\tgene_id "ENSG00000239945"; gene_version "1"; transcript_id "ENST00000495576"; transcript_version "1"; exon_number "2"; gene_name "RP11-34P13.8"; gene_source "havana"; gene_biotype "lincRNA"; transcript_name "RP11-34P13.8-001"; transcript_source "havana"; transcript_biotype "lincRNA"; exon_id "ENSE00001927725"; exon_version "1"; tag "basic"; transcript_support_level "5";\n+1\thavana\tgene\t131025\t134836\t.\t+\t.\tgene_id "ENSG00000233750"; gene_version "3"; gene_name "CICP27"; gene_source "havana"; gene_biotype "processed_pseudogene";\n+1\thavana\ttranscript\t131025\t134836\t.\t+\t.\tgene_id "ENSG00000233750"; gene_version "3"; transcript_id "ENST00000442987"; transcript_version "3"; gene_name "CICP27"; gene_source "havana"; gene_biotype "processed_pseudogene"; transcript_name "CICP27-001"; transcript_source "havana"; transcript_biotype "processed_pseudogene"; tag "basic"; transcript_support_level "NA";\n+1\thavana\texon\t131025\t134836\t.\t+\t.\tgene_id "ENSG00000233750"; gene_version "3"; transcript_id "ENST00000442987"; transcript_version "3"; exon_number "1"; gene_name "CICP27"; gene_source "havana"; gene_biotype "processed_pseudogene"; transcript_name "CICP27-001"; transcript_source "havana"; transcript_biotype "processed_pseudogene"; exon_id "ENSE00001625118"; exon_version "3"; tag "basic"; transcript_support_level "NA";\n+1\thavana\tgene\t135141\t135895\t.\t-\t.\tgene_id "ENSG00000268903"; gene_version "1"; gene_name "RP11-34P13.15"; gene_source "havana"; gene_biotype "processed_pseudogene";\n+1\thavana\ttranscript\t135141\t135895\t.\t-\t.\tgene_id "ENSG00000268903"; gene_version "1"; transcript_id "ENST00000494149"; transcript_version "2"; gene_name "RP11-34P13.15"; gene_source "havana"; gene_biotype "processed_pseudogene"; transcript_name "RP11-34P13.15-001"; transcript_source "havana"; transcript_biotype "processed_pseudogene"; tag "basic"; transcript_support_level "NA";\n+1\thavana\texon\t135141\t135895\t.\t-\t.\tgene_id "ENSG00000268903"; gene_version "1"; transcript_id "ENST00000494149"; transcript_version "2"; exon_number "1"; gene_name "RP11-34P13.15"; gene_source "havana"; gene_biotype "processed_pseudogene"; transcript_name "RP11-34P13.15-001"; transcript_source "havana"; transcript_biotype "processed_pseudogene"; exon_id "ENSE00001879101"; exon_version "2"; tag "basic"; transcript_support_level "NA";\n+1\thavana\tgene\t137682\t137965\t.\t-\t.\tgene_id "ENSG00000269981"; gene_version "1"; gene_name "RP11-34P13.16"; gene_source "havana"; gene_biotype "processed_pseudogene";\n+1\thavana\ttranscript\t137682\t137965\t.\t-\t.\tgene_id "ENSG00000269981"; gene_version "1"; transcript_id "ENST00000595919"; transcript_version "1"; gene_name "RP11-34P13.16"; gene_source "havana"; gene_biotype "processed_pseudogene"; transcript_name "RP11-34P13.16-001"; transcript_source "havana"; transcript_biotype "processed_pseudogene"; tag "basic"; transcript_support_level "NA";\n+1\thavana\texon\t137682\t137965\t.\t-\t.\tgene_id "ENSG00000269981"; gene_version "1"; transcript_id "ENST00000595919"; transcript_version "1"; exon_number "1"; gene_name "RP11-34P13.16"; gene_source "havana"; gene_biotype "processed_pseudogene"; transcript_name "RP11-34P13.16-001"; transcript_source "havana"; transcript_biotype "processed_pseudogene"; exon_id "ENSE00001936432"; exon_version "2"; tag "basic"; transcript_support_level "NA";\n' |
b |
diff -r 000000000000 -r 75a14cc16d4d test-data/gtf2bed_test_havana.bed --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/gtf2bed_test_havana.bed Fri May 18 15:10:34 2018 -0400 |
b |
@@ -0,0 +1,17 @@ +1 11868 14409 ENST00000456328 0 + 14409 14409 0 3 359,109,1189, 0,744,1352, +1 12009 13670 ENST00000450305 0 + 13670 13670 0 6 48,49,85,78,154,218, 0,169,603,965,1211,1443, +1 14403 29570 ENST00000488147 0 - 29570 29570 0 11 98,34,152,159,198,136,137,147,99,154,37, 0,601,1392,2203,2454,2829,3202,3511,3864,10334,15130, +1 29553 31097 ENST00000473358 0 + 31097 31097 0 3 486,104,122, 0,1010,1422, +1 30266 31109 ENST00000469289 0 + 31109 31109 0 2 401,134, 0,709, +1 34553 36081 ENST00000417324 0 - 36081 36081 0 3 621,205,361, 0,723,1167, +1 35244 36073 ENST00000461467 0 - 36073 36073 0 2 237,353, 0,476, +1 52472 53312 ENST00000606857 0 + 53312 53312 0 1 840, 0, +1 62947 63887 ENST00000492842 0 + 63887 63887 0 1 940, 0, +1 89294 120932 ENST00000466430 0 - 120932 120932 0 4 2335,150,105,158, 0,2796,23405,31480, +1 92229 129217 ENST00000477740 0 - 129217 129217 0 4 11,105,212,163, 0,20470,28491,36825, +1 110952 129173 ENST00000471248 0 - 129173 129173 0 3 405,105,119, 0,1747,18102, +1 129080 133566 ENST00000453576 0 - 133566 133566 0 2 143,193, 0,4293, +1 89550 91105 ENST00000495576 0 - 91105 91105 0 2 500,819, 0,736, +1 131024 134836 ENST00000442987 0 + 134836 134836 0 1 3812, 0, +1 135140 135895 ENST00000494149 0 - 135895 135895 0 1 755, 0, +1 137681 137965 ENST00000595919 0 - 137965 137965 0 1 284, 0, |
b |
diff -r 000000000000 -r 75a14cc16d4d test-data/gtf2bed_test_include_version.bed --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/gtf2bed_test_include_version.bed Fri May 18 15:10:34 2018 -0400 |
b |
@@ -0,0 +1,21 @@ +1 11868 14409 ENST00000456328.2 0 + 14409 14409 0 3 359,109,1189, 0,744,1352, +1 12009 13670 ENST00000450305.2 0 + 13670 13670 0 6 48,49,85,78,154,218, 0,169,603,965,1211,1443, +1 14403 29570 ENST00000488147.1 0 - 29570 29570 0 11 98,34,152,159,198,136,137,147,99,154,37, 0,601,1392,2203,2454,2829,3202,3511,3864,10334,15130, +1 17368 17436 ENST00000619216.1 0 - 17436 17436 0 1 68, 0, +1 29553 31097 ENST00000473358.1 0 + 31097 31097 0 3 486,104,122, 0,1010,1422, +1 30266 31109 ENST00000469289.1 0 + 31109 31109 0 2 401,134, 0,709, +1 30365 30503 ENST00000607096.1 0 + 30503 30503 0 1 138, 0, +1 34553 36081 ENST00000417324.1 0 - 36081 36081 0 3 621,205,361, 0,723,1167, +1 35244 36073 ENST00000461467.1 0 - 36073 36073 0 2 237,353, 0,476, +1 52472 53312 ENST00000606857.1 0 + 53312 53312 0 1 840, 0, +1 62947 63887 ENST00000492842.1 0 + 63887 63887 0 1 940, 0, +1 69090 70008 ENST00000335137.3 0 + 69090 70008 0 1 918, 0, +1 89294 120932 ENST00000466430.4 0 - 120932 120932 0 4 2335,150,105,158, 0,2796,23405,31480, +1 92229 129217 ENST00000477740.4 0 - 129217 129217 0 4 11,105,212,163, 0,20470,28491,36825, +1 110952 129173 ENST00000471248.1 0 - 129173 129173 0 3 405,105,119, 0,1747,18102, +1 120724 133723 ENST00000610542.1 0 - 133723 133723 0 4 145,59,169,350, 0,149,8330,12649, +1 129080 133566 ENST00000453576.2 0 - 133566 133566 0 2 143,193, 0,4293, +1 89550 91105 ENST00000495576.1 0 - 91105 91105 0 2 500,819, 0,736, +1 131024 134836 ENST00000442987.3 0 + 134836 134836 0 1 3812, 0, +1 135140 135895 ENST00000494149.2 0 - 135895 135895 0 1 755, 0, +1 137681 137965 ENST00000595919.1 0 - 137965 137965 0 1 284, 0, |
b |
diff -r 000000000000 -r 75a14cc16d4d test-data/gtf2bed_test_missing_exon.bed --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/gtf2bed_test_missing_exon.bed Fri May 18 15:10:34 2018 -0400 |
b |
@@ -0,0 +1,20 @@ +1 12009 13670 ENST00000450305 0 + 13670 13670 0 6 48,49,85,78,154,218, 0,169,603,965,1211,1443, +1 14403 29570 ENST00000488147 0 - 29570 29570 0 11 98,34,152,159,198,136,137,147,99,154,37, 0,601,1392,2203,2454,2829,3202,3511,3864,10334,15130, +1 17368 17436 ENST00000619216 0 - 17436 17436 0 1 68, 0, +1 29553 31097 ENST00000473358 0 + 31097 31097 0 3 486,104,122, 0,1010,1422, +1 30266 31109 ENST00000469289 0 + 31109 31109 0 2 401,134, 0,709, +1 30365 30503 ENST00000607096 0 + 30503 30503 0 1 138, 0, +1 34553 36081 ENST00000417324 0 - 36081 36081 0 3 621,205,361, 0,723,1167, +1 35244 36073 ENST00000461467 0 - 36073 36073 0 2 237,353, 0,476, +1 52472 53312 ENST00000606857 0 + 53312 53312 0 1 840, 0, +1 62947 63887 ENST00000492842 0 + 63887 63887 0 1 940, 0, +1 69090 70008 ENST00000335137 0 + 69090 70008 0 1 918, 0, +1 89294 120932 ENST00000466430 0 - 120932 120932 0 4 2335,150,105,158, 0,2796,23405,31480, +1 92229 129217 ENST00000477740 0 - 129217 129217 0 4 11,105,212,163, 0,20470,28491,36825, +1 110952 129173 ENST00000471248 0 - 129173 129173 0 3 405,105,119, 0,1747,18102, +1 120724 133723 ENST00000610542 0 - 133723 133723 0 4 145,59,169,350, 0,149,8330,12649, +1 129080 133566 ENST00000453576 0 - 133566 133566 0 2 143,193, 0,4293, +1 89550 91105 ENST00000495576 0 - 91105 91105 0 2 500,819, 0,736, +1 131024 134836 ENST00000442987 0 + 134836 134836 0 1 3812, 0, +1 135140 135895 ENST00000494149 0 - 135895 135895 0 1 755, 0, +1 137681 137965 ENST00000595919 0 - 137965 137965 0 1 284, 0, |
b |
diff -r 000000000000 -r 75a14cc16d4d test-data/gtf2bed_test_missing_exon.gtf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/gtf2bed_test_missing_exon.gtf Fri May 18 15:10:34 2018 -0400 |
b |
b'@@ -0,0 +1,97 @@\n+#!genome-build GRCh38.p2\n+#!genome-version GRCh38\n+#!genome-date 2013-12\n+#!genome-build-accession NCBI:GCA_000001405.17\n+#!genebuild-last-updated 2015-01\n+1\thavana\tgene\t11869\t14409\t.\t+\t.\tgene_id "ENSG00000223972"; gene_version "5"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene";\n+1\thavana\ttranscript\t11869\t14409\t.\t+\t.\tgene_id "ENSG00000223972"; gene_version "5"; transcript_id "ENST00000456328"; transcript_version "2"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "DDX11L1-002"; transcript_source "havana"; transcript_biotype "processed_transcript"; tag "basic"; transcript_support_level "1";\n+1\thavana\ttranscript\t12010\t13670\t.\t+\t.\tgene_id "ENSG00000223972"; gene_version "5"; transcript_id "ENST00000450305"; transcript_version "2"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "DDX11L1-001"; transcript_source "havana"; transcript_biotype "transcribed_unprocessed_pseudogene"; tag "basic"; transcript_support_level "NA";\n+1\thavana\texon\t12010\t12057\t.\t+\t.\tgene_id "ENSG00000223972"; gene_version "5"; transcript_id "ENST00000450305"; transcript_version "2"; exon_number "1"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "DDX11L1-001"; transcript_source "havana"; transcript_biotype "transcribed_unprocessed_pseudogene"; exon_id "ENSE00001948541"; exon_version "1"; tag "basic"; transcript_support_level "NA";\n+1\thavana\texon\t12179\t12227\t.\t+\t.\tgene_id "ENSG00000223972"; gene_version "5"; transcript_id "ENST00000450305"; transcript_version "2"; exon_number "2"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "DDX11L1-001"; transcript_source "havana"; transcript_biotype "transcribed_unprocessed_pseudogene"; exon_id "ENSE00001671638"; exon_version "2"; tag "basic"; transcript_support_level "NA";\n+1\thavana\texon\t12613\t12697\t.\t+\t.\tgene_id "ENSG00000223972"; gene_version "5"; transcript_id "ENST00000450305"; transcript_version "2"; exon_number "3"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "DDX11L1-001"; transcript_source "havana"; transcript_biotype "transcribed_unprocessed_pseudogene"; exon_id "ENSE00001758273"; exon_version "2"; tag "basic"; transcript_support_level "NA";\n+1\thavana\texon\t12975\t13052\t.\t+\t.\tgene_id "ENSG00000223972"; gene_version "5"; transcript_id "ENST00000450305"; transcript_version "2"; exon_number "4"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "DDX11L1-001"; transcript_source "havana"; transcript_biotype "transcribed_unprocessed_pseudogene"; exon_id "ENSE00001799933"; exon_version "2"; tag "basic"; transcript_support_level "NA";\n+1\thavana\texon\t13221\t13374\t.\t+\t.\tgene_id "ENSG00000223972"; gene_version "5"; transcript_id "ENST00000450305"; transcript_version "2"; exon_number "5"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "DDX11L1-001"; transcript_source "havana"; transcript_biotype "transcribed_unprocessed_pseudogene"; exon_id "ENSE00001746346"; exon_version "2"; tag "basic"; transcript_support_level "NA";\n+1\thavana\texon\t13453\t13670\t.\t+\t.\tgene_id "ENSG00000223972"; gene_version "5"; transcript_id "ENST00000450305"; transcript_version "2"; exon_number "6"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "DDX11L1-001"; transcript_source "havana"; transcript_biotype "transcribed_unprocessed_pseudogene"; exon_id "ENSE00001863096"; exon_version "1"; tag "basic"; transcript_support_level "NA";\n+1\thavana\tgene\t14404\t29570\t.\t-\t.\tgene_id "ENSG00000227232"; gene_version "5"; gene_name "WASH7P"; gene_source "havana"; gene_biotype "unprocessed_pseudogene";\n+1\thavana\ttran'..b'1"; transcript_id "ENST00000495576"; transcript_version "1"; gene_name "RP11-34P13.8"; gene_source "havana"; gene_biotype "lincRNA"; transcript_name "RP11-34P13.8-001"; transcript_source "havana"; transcript_biotype "lincRNA"; tag "basic"; transcript_support_level "5";\n+1\thavana\texon\t90287\t91105\t.\t-\t.\tgene_id "ENSG00000239945"; gene_version "1"; transcript_id "ENST00000495576"; transcript_version "1"; exon_number "1"; gene_name "RP11-34P13.8"; gene_source "havana"; gene_biotype "lincRNA"; transcript_name "RP11-34P13.8-001"; transcript_source "havana"; transcript_biotype "lincRNA"; exon_id "ENSE00001907785"; exon_version "1"; tag "basic"; transcript_support_level "5";\n+1\thavana\texon\t89551\t90050\t.\t-\t.\tgene_id "ENSG00000239945"; gene_version "1"; transcript_id "ENST00000495576"; transcript_version "1"; exon_number "2"; gene_name "RP11-34P13.8"; gene_source "havana"; gene_biotype "lincRNA"; transcript_name "RP11-34P13.8-001"; transcript_source "havana"; transcript_biotype "lincRNA"; exon_id "ENSE00001927725"; exon_version "1"; tag "basic"; transcript_support_level "5";\n+1\thavana\tgene\t131025\t134836\t.\t+\t.\tgene_id "ENSG00000233750"; gene_version "3"; gene_name "CICP27"; gene_source "havana"; gene_biotype "processed_pseudogene";\n+1\thavana\ttranscript\t131025\t134836\t.\t+\t.\tgene_id "ENSG00000233750"; gene_version "3"; transcript_id "ENST00000442987"; transcript_version "3"; gene_name "CICP27"; gene_source "havana"; gene_biotype "processed_pseudogene"; transcript_name "CICP27-001"; transcript_source "havana"; transcript_biotype "processed_pseudogene"; tag "basic"; transcript_support_level "NA";\n+1\thavana\texon\t131025\t134836\t.\t+\t.\tgene_id "ENSG00000233750"; gene_version "3"; transcript_id "ENST00000442987"; transcript_version "3"; exon_number "1"; gene_name "CICP27"; gene_source "havana"; gene_biotype "processed_pseudogene"; transcript_name "CICP27-001"; transcript_source "havana"; transcript_biotype "processed_pseudogene"; exon_id "ENSE00001625118"; exon_version "3"; tag "basic"; transcript_support_level "NA";\n+1\thavana\tgene\t135141\t135895\t.\t-\t.\tgene_id "ENSG00000268903"; gene_version "1"; gene_name "RP11-34P13.15"; gene_source "havana"; gene_biotype "processed_pseudogene";\n+1\thavana\ttranscript\t135141\t135895\t.\t-\t.\tgene_id "ENSG00000268903"; gene_version "1"; transcript_id "ENST00000494149"; transcript_version "2"; gene_name "RP11-34P13.15"; gene_source "havana"; gene_biotype "processed_pseudogene"; transcript_name "RP11-34P13.15-001"; transcript_source "havana"; transcript_biotype "processed_pseudogene"; tag "basic"; transcript_support_level "NA";\n+1\thavana\texon\t135141\t135895\t.\t-\t.\tgene_id "ENSG00000268903"; gene_version "1"; transcript_id "ENST00000494149"; transcript_version "2"; exon_number "1"; gene_name "RP11-34P13.15"; gene_source "havana"; gene_biotype "processed_pseudogene"; transcript_name "RP11-34P13.15-001"; transcript_source "havana"; transcript_biotype "processed_pseudogene"; exon_id "ENSE00001879101"; exon_version "2"; tag "basic"; transcript_support_level "NA";\n+1\thavana\tgene\t137682\t137965\t.\t-\t.\tgene_id "ENSG00000269981"; gene_version "1"; gene_name "RP11-34P13.16"; gene_source "havana"; gene_biotype "processed_pseudogene";\n+1\thavana\ttranscript\t137682\t137965\t.\t-\t.\tgene_id "ENSG00000269981"; gene_version "1"; transcript_id "ENST00000595919"; transcript_version "1"; gene_name "RP11-34P13.16"; gene_source "havana"; gene_biotype "processed_pseudogene"; transcript_name "RP11-34P13.16-001"; transcript_source "havana"; transcript_biotype "processed_pseudogene"; tag "basic"; transcript_support_level "NA";\n+1\thavana\texon\t137682\t137965\t.\t-\t.\tgene_id "ENSG00000269981"; gene_version "1"; transcript_id "ENST00000595919"; transcript_version "1"; exon_number "1"; gene_name "RP11-34P13.16"; gene_source "havana"; gene_biotype "processed_pseudogene"; transcript_name "RP11-34P13.16-001"; transcript_source "havana"; transcript_biotype "processed_pseudogene"; exon_id "ENSE00001936432"; exon_version "2"; tag "basic"; transcript_support_level "NA";\n' |
b |
diff -r 000000000000 -r 75a14cc16d4d test-data/gtf2bed_test_transcript_info.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/gtf2bed_test_transcript_info.txt Fri May 18 15:10:34 2018 -0400 |
b |
@@ -0,0 +1,22 @@ +#transId geneId source chrom start end strand proteinId geneName transcriptName geneType transcriptType +ENST00000456328 ENSG00000223972 havana 1 11868 14409 + DDX11L1 DDX11L1-002 transcribed_unprocessed_pseudogene processed_transcript +ENST00000450305 ENSG00000223972 havana 1 12009 13670 + DDX11L1 DDX11L1-001 transcribed_unprocessed_pseudogene transcribed_unprocessed_pseudogene +ENST00000488147 ENSG00000227232 havana 1 14403 29570 - WASH7P WASH7P-001 unprocessed_pseudogene unprocessed_pseudogene +ENST00000619216 ENSG00000278267 ensembl 1 17368 17436 - MIR6859-1 MIR6859-1-201 miRNA miRNA +ENST00000473358 ENSG00000243485 havana 1 29553 31097 + RP11-34P13.3 RP11-34P13.3-001 lincRNA lincRNA +ENST00000469289 ENSG00000243485 havana 1 30266 31109 + RP11-34P13.3 RP11-34P13.3-002 lincRNA lincRNA +ENST00000607096 ENSG00000274890 ensembl 1 30365 30503 + MIR1302-2 MIR1302-2-201 miRNA miRNA +ENST00000417324 ENSG00000237613 havana 1 34553 36081 - FAM138A FAM138A-001 lincRNA lincRNA +ENST00000461467 ENSG00000237613 havana 1 35244 36073 - FAM138A FAM138A-002 lincRNA lincRNA +ENST00000606857 ENSG00000268020 havana 1 52472 53312 + OR4G4P OR4G4P-001 unprocessed_pseudogene unprocessed_pseudogene +ENST00000492842 ENSG00000240361 havana 1 62947 63887 + OR4G11P OR4G11P-001 unprocessed_pseudogene unprocessed_pseudogene +ENST00000335137 ENSG00000186092 ensembl_havana 1 69090 70008 + ENSP00000334393 OR4F5 OR4F5-001 protein_coding protein_coding +ENST00000466430 ENSG00000238009 havana 1 89294 120932 - RP11-34P13.7 RP11-34P13.7-001 lincRNA lincRNA +ENST00000477740 ENSG00000238009 havana 1 92229 129217 - RP11-34P13.7 RP11-34P13.7-003 lincRNA lincRNA +ENST00000471248 ENSG00000238009 havana 1 110952 129173 - RP11-34P13.7 RP11-34P13.7-002 lincRNA lincRNA +ENST00000610542 ENSG00000238009 ensembl 1 120724 133723 - RP11-34P13.7 RP11-34P13.7-201 lincRNA lincRNA +ENST00000453576 ENSG00000238009 havana 1 129080 133566 - RP11-34P13.7 RP11-34P13.7-004 lincRNA lincRNA +ENST00000495576 ENSG00000239945 havana 1 89550 91105 - RP11-34P13.8 RP11-34P13.8-001 lincRNA lincRNA +ENST00000442987 ENSG00000233750 havana 1 131024 134836 + CICP27 CICP27-001 processed_pseudogene processed_pseudogene +ENST00000494149 ENSG00000268903 havana 1 135140 135895 - RP11-34P13.15 RP11-34P13.15-001 processed_pseudogene processed_pseudogene +ENST00000595919 ENSG00000269981 havana 1 137681 137965 - RP11-34P13.16 RP11-34P13.16-001 processed_pseudogene processed_pseudogene |