Galaxy |

Changeset 1:247b439a78f7 (2019-10-18)

Previous changeset 0:040d4b3a19d5 (2019-04-03) Next changeset 2:e255c0e5dfca (2019-10-18)

Commit message:
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/tree/develop/tools/util/.shed.yml commit 194d2e0af16624c9a3d1af92f7b3686d2e0ee552

added:
gtf2featureAnnotation.R
gtf2featureAnnotation.xml
test-data/annotation.txt
test-data/test.gtf

removed:
gtf2gene.R
gtf2gene_list.xml

diff -r 040d4b3a19d5 -r 247b439a78f7 gtf2featureAnnotation.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gtf2featureAnnotation.R Fri Oct 18 10:10:54 2019 -0400

[

@@ -0,0 +1,195 @@
+#!/usr/bin/env Rscript
+
+# This script parses the GTF file to create a feature-wise annotation file with
+# mitochondrial features flagged, to assist in annotation and QC of single-cell
+# expression data analysis.
+
+suppressPackageStartupMessages(require(rtracklayer))
+suppressPackageStartupMessages(require(optparse))
+
+ucfirst <- function (str) {
+  paste(toupper(substring(str, 1, 1)), tolower(substring(str, 2)), sep = "")
+}
+
+die <- function(message){
+  write(message, stderr())
+  q(status = 1)
+}
+
+cleanlist <- function(str){
+  tolower(unlist(strsplit(str, ',')))
+}
+
+cl <- commandArgs(trailingOnly = TRUE)
+
+option_list = list(
+  make_option(
+    c("-g", "--gtf-file"),
+    action = "store",
+    default = NA,
+    type = 'character',
+    help = "Path to a valid GTF file"
+  ),
+  make_option(
+    c("-t", "--feature-type"),
+    action = "store",
+    default = 'gene',
+    type = 'character',
+    help = 'Feature type to use (default: gene)'
+  ),
+  make_option(
+    c("-f", "--first-field"),
+    action = "store",
+    default = 'gene_id',
+    type = 'character',
+    help = 'Field to place first in output table (default: gene_id)'
+  ),
+  make_option(
+    c("-r", "--no-header"),
+    action = "store_false",
+    default = TRUE,
+    type = 'logical',
+    help = 'Suppress header on output'
+  ),
+  make_option(
+    c("-l", "--fields"),
+    action = "store",
+    default = NULL,
+    type = 'character',
+    help = 'Comma-separated list of output fields to retain (default: all)'
+  ),
+  make_option(
+    c("-m", "--mito"),
+    action = "store_true",
+    default = FALSE,
+    type = 'character',
+    help = 'Mark mitochondrial elements with reference to chromsomes and biotypes'
+  ),
+  make_option(
+    c("-n", "--mito-chr"),
+    action = "store",
+    default = 'mt,mitochondrion_genome,mito,m,chrM,chrMt',
+    type = 'character',
+    help = 'If specified, marks in a column called "mito" features on the specified chromosomes (case insensitive)'
+  ),
+  make_option(
+    c("-p", "--mito-biotypes"),
+    action = "store",
+    default = 'mt_trna,mt_rrna,mt_trna_pseudogene',
+    type = 'character',
+    help = 'If specified,  marks in a column called "mito" features with the specified biotypes (case insensitve)'
+  ),
+  make_option(
+    c("-c", "--filter-cdnas"),
+    action = "store",
+    default = NULL,
+    type = 'character',
+    help = 'If specified, sequences in the provided FASTA-format cDNAs file will be filtered to remove entries not present in the annotation'
+  ),
+  make_option(
+    c("-d", "--filter-cdnas-field"),
+    action = "store",
+    default = 'transcript_id',
+    type = 'character',
+    help = 'Where --filter-cdnas is specified, what field should be used to compare to identfiers from the FASTA?'
+  ),
+  make_option(
+    c("-e", "--filter-cdnas-output"),
+    action = "store",
+    default = 'filtered.fa.gz',
+    type = 'character',
+    help = 'Where --filter-cdnas is specified, what file should the filtered sequences be output to?'
+  ),
+  make_option(
+    c("-u", "--version-transcripts"),
+    action = "store_true",
+    default = FALSE,
+    type = 'logical',
+    help = 'Where the GTF contains transcript versions, should these be appended to transcript identifiers? Useful when generating transcript/gene mappings for use with transcriptomes.'
+  ),
+  make_option(
+    c("-o", "--output-file"),
+    action = "store",
+    default = NA,
+    type = 'character',
+    help = 'Output file path'
+  )
+)
+
+opt <- parse_args(OptionParser(option_list = option_list), convert_hyphens_to_underscores = TRUE)
+
+if (is.na(opt$gtf_file)){
+  die('ERROR: No input GTF file specified')
+}
+
+if (is.na(opt$output_file)){
+  die('ERROR: No output file specified')
+}
+
+# Import the GTF
+
+print(paste('Reading', opt$gtf_file, 'elements of type', opt$feature_type))
+gtf <- import(opt$gtf_file, feature.type = opt$feature_type )
+
+# Combine basic info (chromosomes, coordinates) with annotation found in GTF attributes
+
+anno <- cbind(chromosome = seqnames(gtf), as.data.frame(ranges(gtf)), elementMetadata(gtf))
+print(paste('Found', nrow(anno), 'features'))
+
+# Mark mitochondrial features
+
+if (opt$mito){
+  anno$mito <- ucfirst(as.character(tolower(anno$gene_biotype) %in% cleanlist(opt$mito_biotypes) | tolower(anno$chromosome) %in% cleanlist(opt$mito_chr)))
+}
+
+# If specified, put the desired field first
+
+if (! is.na(opt$first_field)){
+  if (! opt$first_field %in% colnames(anno)){
+    die(paste(first_field, 'is not a valid field'))
+  }
+  anno <- anno[,c(opt$first_field, colnames(anno)[colnames(anno) != opt$first_field])]
+}
+
+# Version transcripts
+
+if ( opt$feature_type == 'transcript' && opt$version_transcripts && all(c('transcript_id', 'transcript_version') %in% colnames(anno) )){
+  anno$transcript_id <- paste(anno$transcript_id, anno$transcript_version, sep='.')
+}
+
+# If specified, filter down a provided cDNA FASTA file
+
+if (! is.null(opt$filter_cdnas)){
+
+  print(paste("Filtering", opt$filter_cdnas, "to match the GTF"))
+
+  suppressPackageStartupMessages(require(Biostrings))
+
+  cdna <- readDNAStringSet(opt$filter_cdnas)
+  cdna_transcript_names <- unlist(lapply(names(cdna), function(x) unlist(strsplit(x, ' '))[1]  ))
+
+  # Filter out cDNAs without matching transcript entries in the GTF
+
+  if (! any(cdna_transcript_names %in% anno[[opt$filter_cdnas_field]])){
+    die(paste("ERROR: None of the input sequences have matching", opt$filter_cdnas_field, 'values in the GTF file'))
+  }
+
+  cdna <- cdna[which(cdna_transcript_names %in% anno[[opt$filter_cdnas_field]])]
+
+  print(paste('Storing filtered seqeunces to', opt$filter_cdnas_output))
+  writeXStringSet(x = cdna, filepath = opt$filter_cdnas_output, compress = 'gzip')
+}
+
+# If specified, subset to desired fields
+
+if (! is.null(opt$fields) && opt$fields != ''){
+  fields <- unlist(strsplit(opt$fields, ','))
+  if (any(! fields %in% colnames(anno))){
+    die(paste('ERROR:', fields, 'contains invalid field(s)'))
+  }
+  anno <- anno[,fields, drop = FALSE]
+  anno <- anno[apply(anno, 1, function(x) all(! is.na(x))), ]
+}
+
+print(paste('Storing output to', opt$output_file))
+write.table(anno, file = opt$output_file, sep = "\t", quote=FALSE, row.names = FALSE, col.names = opt$no_header)

diff -r 040d4b3a19d5 -r 247b439a78f7 gtf2featureAnnotation.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gtf2featureAnnotation.xml Fri Oct 18 10:10:54 2019 -0400

[

@@ -0,0 +1,96 @@
+<tool id="_ensembl_gtf2gene_list" name="GTF2GeneAnnotationTable" version="1.42.1+galaxy1">
+    <description>extracts a complete annotation table or subsets thereof from an Ensembl GTF using rtracklayer</description>
+    <requirements>
+      <requirement type="package" version="1.42.1">bioconductor-rtracklayer</requirement>
+      <requirement type="package">bioconductor-biostrings</requirement>
+      <requirement type="package">r-optparse</requirement>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+       ln -s '$gtf_input' input.gtf;
+       $__tool_directory__/gtf2featureAnnotation.R --gtf-file input.gtf
+#if $noheader
+--no-header
+#end if
+#if $version_transcripts
+--version-transcripts
+#end if
+#if $mito.mark_mito
+--mito --mito-chr "${mito.mito_chr}" --mito-biotypes "${mito.mito_biotypes}"
+#end if
+#if $cdnas.filter_cdnas
+--filter-cdnas "${cdnas.fasta_input}" --filter-cdnas-field "${cdnas.cdnas_field}" --filter-cdnas-output "${fasta_output}"
+#end if
+--feature-type "${feature_type}" --first-field "${first_field}" --output-file annotation.txt --fields "${fields}"
+     ]]></command>
+
+    <inputs>
+        <param name="gtf_input" type="data" format="gff" label="Ensembl GTF file" />
+        <param name="feature_type" type="text" optional='true' value="gene" label="Feature type for which to derive annotation"/>
+        <param name="first_field" type="text" optional='true' value="gene_id" label="Field to place first in output table"/>
+        <param name="noheader" type="boolean" checked="false" label="Suppress header line in output?"/>
+        <param name="fields" type="text" optional='true' value="" label="Comma-separated list of field names to extract from the GTF (default: use all fields)"/>
+        <param name="version_transcripts" type="boolean" checked="false" label="Append version to transcript identifiers?" help="For transcript feature type only: where the GTF contains transcript versions, should these be appended to transcript identifiers? Useful when generating transcript/gene mappings for use with transcriptomes"/>
+        <conditional name="mito">
+          <param name="mark_mito" type="boolean" checked="true" label="Flag mitochondrial features?"/>
+          <when value="true">
+            <param name="mito_chr" type="text" optional='true' value="mt,mitochondrion_genome,mito,m,chrM,chrMt" label="Comma-separated list of possible mitochondrial chromosome names (case insensitive)"/>
+            <param name="mito_biotypes" type="text" optional='true' value="mt_trna,mt_rrna,mt_trna_pseudogene" label="Comma-separated list of possible mitochondrial biotypes (case insensitive)"/>
+          </when>
+          <when value="false" />
+        </conditional>
+        <conditional name="cdnas">
+          <param name="filter_cdnas" type="boolean" checked="false" label="Filter a FASTA-format cDNA file to match annotations?" help="For some applications, e.g. transcriptome mappers, its useful to match a cDNAs file to an annotation list (e.g. transcript-to-gene mapping)"/>
+          <when value="true">
+            <param name="fasta_input" type="data" format="fasta" label="FASTA-format cDNA/ transcript file" />
+            <param name="cdnas_field" type="text" optional='true' value="transcript_id" label="Annotation field to match with sequences."/>
+          </when>
+          <when value="false" />
+        </conditional>
+    </inputs>
+
+    <outputs>
+        <data name="feature_annotation" format="tsv" from_work_dir="annotation.txt" label="${tool.name} on ${on_string}: annotation table"/>
+        <data name="fasta_output" format="fasta" from_work_dir="filtered.fa.gz" label="${tool.name} on ${on_string}: annotation-matched sequences">
+          <filter>filter_cdnas</filter>
+        </data>
+    </outputs>
+    <tests>
+      <test>
+        <param name="gtf_input" ftype="gtf" value="test.gtf"/>
+        <output name="feature_annotation" file="annotation.txt"/>
+      </test>
+    </tests>
+
+
+    <help><![CDATA[
+.. class:: infomark
+
+**What it does**
+
+Given an Ensembl GTF file, it will extract all information on chromosomes, coordinates, and attributes provided at the specified feature level. Mitochondrial features can also be flagged.
+
+You can also supply a fasta-format file of sequences, which will be filtered by identifier to match annotation. This can be useful for tools such as Alevin which need a transcript-to-gene mapping and a transcriptome file without any missing entries (with respect to annotation).
+
+
+**Inputs**
+
+    * Ensembl GTF file
+
+-----
+
+**Outputs**
+
+    * Gene annotations in tsv.
+]]></help>
+<citations>
+  <citation type="bibtex">
+@misc{github-hinxton-single-cell,
+author = {Jonathan Manning, EBI Gene Expression Team},
+year = {2019},
+title = {Hinxton Single Cell Anlysis Environment},
+publisher = {GitHub},
+journal = {GitHub repository},
+url = {https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary},
+  }</citation>
+</citations>
+</tool>

diff -r 040d4b3a19d5 -r 247b439a78f7 gtf2gene.R
--- a/gtf2gene.R Wed Apr 03 12:05:26 2019 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000

[

@@ -1,8 +0,0 @@
-#!/usr/bin/env Rscript
-
-suppressPackageStartupMessages(require(rtracklayer))
-args <- commandArgs(TRUE)
-
-annotation <- elementMetadata(import( args[1] ))
-genes <- unique(annotation[['gene_id']])
-writeLines(genes[ ! is.na(genes)], con = 'genes.txt')

diff -r 040d4b3a19d5 -r 247b439a78f7 gtf2gene_list.xml
--- a/gtf2gene_list.xml Wed Apr 03 12:05:26 2019 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000

[

@@ -1,54 +0,0 @@
-<tool id="_ensembl_gtf2gene_list" name="GTF2GeneList" version="1.42.1+galaxy0">
-    <description>extracts gene ids from Ensembl GTF using rtracklayer</description>
-    <requirements>
-      <requirement type="package" version="1.42.1">bioconductor-rtracklayer</requirement>
-    </requirements>
-    <command detect_errors="exit_code"><![CDATA[
-       ln -s '$gtf_input' input.gtf;
-       Rscript $__tool_directory__/gtf2gene.R input.gtf
-     ]]></command>
-
-    <inputs>
-        <param name="gtf_input" type="data" format="gff" label="Ensembl GTF file" />
-    </inputs>
-
-    <outputs>
-        <data name="gene_list" format="tsv" from_work_dir="genes.txt" label="${tool.name} on ${on_string}"/>
-    </outputs>
-    <tests>
-      <test>
-        <param name="gtf_input" ftype="data" value="short_ensembl_gtf.gtf"/>
-        <output name="gene_list" ftype="data" value="genes.txt" compare="sim_size"/>
-      </test>
-    </tests>
-
-
-    <help><![CDATA[
-.. class:: infomark
-
-**What it does**
-
-Given an Ensembl GTF file, it will extract the list of all gene identifiers in the GTF to a simple tsv file.
-
-**Inputs**
-
-    * Ensembl GTF file
-
------
-
-**Outputs**
-
-    * Gene identifier list in tsv.
-]]></help>
-<citations>
-  <citation type="bibtex">
-@misc{github-hinxton-single-cell,
-author = {Pablo Moreno, EBI Gene Expression Team},
-year = {2019},
-title = {Hinxton Single Cell Anlysis Environment},
-publisher = {GitHub},
-journal = {GitHub repository},
-url = {https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary},
-  }</citation>
-</citations>
-</tool>

diff -r 040d4b3a19d5 -r 247b439a78f7 test-data/annotation.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/annotation.txt Fri Oct 18 10:10:54 2019 -0400

@@ -0,0 +1,11 @@
+gene_id chromosome start end width source type score phase gene_version gene_name gene_source gene_biotype mito
+ENSG00000223972 1 11869 14409 2541 havana gene NA NA 5 DDX11L1 havana transcribed_unprocessed_pseudogene False
+ENSG00000227232 1 14404 29570 15167 havana gene NA NA 5 WASH7P havana unprocessed_pseudogene False
+ENSG00000278267 1 17369 17436 68 mirbase gene NA NA 1 MIR6859-1 mirbase miRNA False
+ENSG00000243485 1 29554 31109 1556 havana gene NA NA 5 MIR1302-2HG havana lncRNA False
+ENSG00000284332 1 30366 30503 138 mirbase gene NA NA 1 MIR1302-2 mirbase miRNA False
+ENSG00000237613 1 34554 36081 1528 havana gene NA NA 2 FAM138A havana lncRNA False
+ENSG00000268020 1 52473 53312 840 havana gene NA NA 3 OR4G4P havana unprocessed_pseudogene False
+ENSG00000240361 1 57598 64116 6519 havana gene NA NA 2 OR4G11P havana transcribed_unprocessed_pseudogene False
+ENSG00000186092 1 65419 71585 6167 ensembl_havana gene NA NA 6 OR4F5 ensembl_havana protein_coding False
+ENSG00000238009 1 89295 133723 44429 ensembl_havana gene NA NA 6 AL627309.1 ensembl_havana lncRNA False

diff -r 040d4b3a19d5 -r 247b439a78f7 test-data/test.gtf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test.gtf Fri Oct 18 10:10:54 2019 -0400

b'@@ -0,0 +1,100 @@\n+#!genome-build GRCh38.p13\n+#!genome-version GRCh38\n+#!genome-date 2013-12\n+#!genome-build-accession NCBI:GCA_000001405.28\n+#!genebuild-last-updated 2019-06\n+1\thavana\tgene\t11869\t14409\t.\t+\t.\tgene_id "ENSG00000223972"; gene_version "5"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene";\n+1\thavana\ttranscript\t11869\t14409\t.\t+\t.\tgene_id "ENSG00000223972"; gene_version "5"; transcript_id "ENST00000456328"; transcript_version "2"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "DDX11L1-202"; transcript_source "havana"; transcript_biotype "lncRNA"; tag "basic"; transcript_support_level "1";\n+1\thavana\texon\t11869\t12227\t.\t+\t.\tgene_id "ENSG00000223972"; gene_version "5"; transcript_id "ENST00000456328"; transcript_version "2"; exon_number "1"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "DDX11L1-202"; transcript_source "havana"; transcript_biotype "lncRNA"; exon_id "ENSE00002234944"; exon_version "1"; tag "basic"; transcript_support_level "1";\n+1\thavana\texon\t12613\t12721\t.\t+\t.\tgene_id "ENSG00000223972"; gene_version "5"; transcript_id "ENST00000456328"; transcript_version "2"; exon_number "2"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "DDX11L1-202"; transcript_source "havana"; transcript_biotype "lncRNA"; exon_id "ENSE00003582793"; exon_version "1"; tag "basic"; transcript_support_level "1";\n+1\thavana\texon\t13221\t14409\t.\t+\t.\tgene_id "ENSG00000223972"; gene_version "5"; transcript_id "ENST00000456328"; transcript_version "2"; exon_number "3"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "DDX11L1-202"; transcript_source "havana"; transcript_biotype "lncRNA"; exon_id "ENSE00002312635"; exon_version "1"; tag "basic"; transcript_support_level "1";\n+1\thavana\ttranscript\t12010\t13670\t.\t+\t.\tgene_id "ENSG00000223972"; gene_version "5"; transcript_id "ENST00000450305"; transcript_version "2"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "DDX11L1-201"; transcript_source "havana"; transcript_biotype "transcribed_unprocessed_pseudogene"; tag "basic"; transcript_support_level "NA";\n+1\thavana\texon\t12010\t12057\t.\t+\t.\tgene_id "ENSG00000223972"; gene_version "5"; transcript_id "ENST00000450305"; transcript_version "2"; exon_number "1"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "DDX11L1-201"; transcript_source "havana"; transcript_biotype "transcribed_unprocessed_pseudogene"; exon_id "ENSE00001948541"; exon_version "1"; tag "basic"; transcript_support_level "NA";\n+1\thavana\texon\t12179\t12227\t.\t+\t.\tgene_id "ENSG00000223972"; gene_version "5"; transcript_id "ENST00000450305"; transcript_version "2"; exon_number "2"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "DDX11L1-201"; transcript_source "havana"; transcript_biotype "transcribed_unprocessed_pseudogene"; exon_id "ENSE00001671638"; exon_version "2"; tag "basic"; transcript_support_level "NA";\n+1\thavana\texon\t12613\t12697\t.\t+\t.\tgene_id "ENSG00000223972"; gene_version "5"; transcript_id "ENST00000450305"; transcript_version "2"; exon_number "3"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "DDX11L1-201"; transcript_source "havana"; transcript_biotype "transcribed_unprocessed_pseudogene"; exon_id "ENSE00001758273"; exon_version "2"; tag "basic"; transcript_support_level "NA";\n+1\thavana\texon\t12975\t13052\t.\t+\t.\tgene_id "ENSG00000223972"; gene_version "5"; transcript_id "ENST00000450305"; transcript_version "2"; exon_number "4"; gene_name "DDX11L1"; gene_source "havana"; gene_biotype "transcribed_unprocessed_pseudogene"; transcript_name "DDX11L1-2'..b'ncRNA"; exon_id "ENSE00001171005"; exon_version "3"; transcript_support_level "5";\n+1\thavana\texon\t112700\t112804\t.\t-\t.\tgene_id "ENSG00000238009"; gene_version "6"; transcript_id "ENST00000477740"; transcript_version "5"; exon_number "3"; gene_name "AL627309.1"; gene_source "ensembl_havana"; gene_biotype "lncRNA"; transcript_name "AL627309.1-202"; transcript_source "havana"; transcript_biotype "lncRNA"; exon_id "ENSE00001957285"; exon_version "1"; transcript_support_level "5";\n+1\thavana\texon\t92230\t92240\t.\t-\t.\tgene_id "ENSG00000238009"; gene_version "6"; transcript_id "ENST00000477740"; transcript_version "5"; exon_number "4"; gene_name "AL627309.1"; gene_source "ensembl_havana"; gene_biotype "lncRNA"; transcript_name "AL627309.1-202"; transcript_source "havana"; transcript_biotype "lncRNA"; exon_id "ENSE00001896976"; exon_version "1"; transcript_support_level "5";\n+1\thavana\ttranscript\t110953\t129173\t.\t-\t.\tgene_id "ENSG00000238009"; gene_version "6"; transcript_id "ENST00000471248"; transcript_version "1"; gene_name "AL627309.1"; gene_source "ensembl_havana"; gene_biotype "lncRNA"; transcript_name "AL627309.1-203"; transcript_source "havana"; transcript_biotype "lncRNA"; transcript_support_level "5";\n+1\thavana\texon\t129055\t129173\t.\t-\t.\tgene_id "ENSG00000238009"; gene_version "6"; transcript_id "ENST00000471248"; transcript_version "1"; exon_number "1"; gene_name "AL627309.1"; gene_source "ensembl_havana"; gene_biotype "lncRNA"; transcript_name "AL627309.1-203"; transcript_source "havana"; transcript_biotype "lncRNA"; exon_id "ENSE00001934975"; exon_version "1"; transcript_support_level "5";\n+1\thavana\texon\t112700\t112804\t.\t-\t.\tgene_id "ENSG00000238009"; gene_version "6"; transcript_id "ENST00000471248"; transcript_version "1"; exon_number "2"; gene_name "AL627309.1"; gene_source "ensembl_havana"; gene_biotype "lncRNA"; transcript_name "AL627309.1-203"; transcript_source "havana"; transcript_biotype "lncRNA"; exon_id "ENSE00001957285"; exon_version "1"; transcript_support_level "5";\n+1\thavana\texon\t110953\t111357\t.\t-\t.\tgene_id "ENSG00000238009"; gene_version "6"; transcript_id "ENST00000471248"; transcript_version "1"; exon_number "3"; gene_name "AL627309.1"; gene_source "ensembl_havana"; gene_biotype "lncRNA"; transcript_name "AL627309.1-203"; transcript_source "havana"; transcript_biotype "lncRNA"; exon_id "ENSE00001879696"; exon_version "1"; transcript_support_level "5";\n+1\tensembl\ttranscript\t120725\t133723\t.\t-\t.\tgene_id "ENSG00000238009"; gene_version "6"; transcript_id "ENST00000610542"; transcript_version "1"; gene_name "AL627309.1"; gene_source "ensembl_havana"; gene_biotype "lncRNA"; transcript_name "AL627309.1-205"; transcript_source "ensembl"; transcript_biotype "lncRNA"; tag "basic"; transcript_support_level "5";\n+1\tensembl\texon\t133374\t133723\t.\t-\t.\tgene_id "ENSG00000238009"; gene_version "6"; transcript_id "ENST00000610542"; transcript_version "1"; exon_number "1"; gene_name "AL627309.1"; gene_source "ensembl_havana"; gene_biotype "lncRNA"; transcript_name "AL627309.1-205"; transcript_source "ensembl"; transcript_biotype "lncRNA"; exon_id "ENSE00003748456"; exon_version "1"; tag "basic"; transcript_support_level "5";\n+1\tensembl\texon\t129055\t129223\t.\t-\t.\tgene_id "ENSG00000238009"; gene_version "6"; transcript_id "ENST00000610542"; transcript_version "1"; exon_number "2"; gene_name "AL627309.1"; gene_source "ensembl_havana"; gene_biotype "lncRNA"; transcript_name "AL627309.1-205"; transcript_source "ensembl"; transcript_biotype "lncRNA"; exon_id "ENSE00003734824"; exon_version "1"; tag "basic"; transcript_support_level "5";\n+1\tensembl\texon\t120874\t120932\t.\t-\t.\tgene_id "ENSG00000238009"; gene_version "6"; transcript_id "ENST00000610542"; transcript_version "1"; exon_number "3"; gene_name "AL627309.1"; gene_source "ensembl_havana"; gene_biotype "lncRNA"; transcript_name "AL627309.1-205"; transcript_source "ensembl"; transcript_biotype "lncRNA"; exon_id "ENSE00003740919"; exon_version "1"; tag "basic"; transcript_support_level "5";\n'