Repository 'length_and_gc_content'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/length_and_gc_content

Changeset 0:2ca1baabdae0 (2016-11-17)
Next changeset 1:f088370d2a3c (2018-01-28)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/length_and_gc_content commit b7dcd020c6a15fa55f392cc09cbc37580d6e75c4
added:
all_fasta.loc.sample
get_length_and_gc_content.r
get_length_and_gc_content.xml
test-data/gc.tab
test-data/gene_length.tab
test-data/in.fasta
test-data/in.gtf
test-data/length.tab
tool_data_table_conf.xml.sample
b
diff -r 000000000000 -r 2ca1baabdae0 all_fasta.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/all_fasta.loc.sample Thu Nov 17 16:41:06 2016 -0500
b
@@ -0,0 +1,18 @@
+#This file lists the locations and dbkeys of all the fasta files
+#under the "genome" directory (a directory that contains a directory
+#for each build). The script extract_fasta.py will generate the file
+#all_fasta.loc. This file has the format (white space characters are
+#TAB characters):
+#
+#<unique_build_id> <dbkey> <display_name> <file_path>
+#
+#So, all_fasta.loc could look something like this:
+#
+#apiMel3 apiMel3 Honeybee (Apis mellifera): apiMel3 /path/to/genome/apiMel3/apiMel3.fa
+#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /path/to/genome/hg19/hg19canon.fa
+#hg19full hg19 Human (Homo sapiens): hg19 Full /path/to/genome/hg19/hg19full.fa
+#
+#Your all_fasta.loc file should contain an entry for each individual
+#fasta file. So there will be multiple fasta files for each build,
+#such as with hg19 above.
+#
b
diff -r 000000000000 -r 2ca1baabdae0 get_length_and_gc_content.r
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/get_length_and_gc_content.r Thu Nov 17 16:41:06 2016 -0500
[
@@ -0,0 +1,59 @@
+# originally by Devon Ryan, https://www.biostars.org/p/84467/
+
+options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
+
+# we need that to not crash galaxy with an UTF8 error on German LC settings.
+loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
+
+suppressPackageStartupMessages({
+    library("GenomicRanges")
+    library("rtracklayer")
+    library("Rsamtools")
+    library("optparse")
+    library("data.table")
+})
+
+option_list <- list(
+    make_option(c("-g","--gtf"), type="character", help="Input GTF file with gene / exon information."),
+    make_option(c("-f","--fasta"), type="character", default=FALSE, help="Fasta file that corresponds to the supplied GTF."),
+    make_option(c("-l","--length"), type="character", default=FALSE, help="Output file with gene name and length."),
+    make_option(c("-gc","--gc_content"), type="character", default=FALSE, help="Output file with gene name and GC content.")
+  )
+
+parser <- OptionParser(usage = "%prog [options] file", option_list=option_list)
+args = parse_args(parser)
+
+GTFfile = args$gtf
+FASTAfile = args$fasta
+length = args$length
+gc_content = args$gc_content
+
+#Load the annotation and reduce it
+GTF <- import.gff(GTFfile, format="gtf", genome=NA, feature.type="exon")
+grl <- reduce(split(GTF, elementMetadata(GTF)$gene_id))
+reducedGTF <- unlist(grl, use.names=T)
+elementMetadata(reducedGTF)$gene_id <- rep(names(grl), elementNROWS(grl))
+
+#Open the fasta file
+FASTA <- FaFile(FASTAfile)
+open(FASTA)
+
+#Add the GC numbers
+elementMetadata(reducedGTF)$nGCs <- letterFrequency(getSeq(FASTA, reducedGTF), "GC")[,1]
+elementMetadata(reducedGTF)$widths <- width(reducedGTF)
+
+#Create a list of the ensembl_id/GC/length
+calc_GC_length <- function(x) {
+    nGCs = sum(elementMetadata(x)$nGCs)
+    width = sum(elementMetadata(x)$widths)
+    c(width, nGCs/width)
+}
+output <- t(sapply(split(reducedGTF, elementMetadata(reducedGTF)$gene_id), calc_GC_length))
+output <- data.frame(setDT(data.frame(output), keep.rownames = TRUE)[])
+
+
+write.table(output[,c(1,2)], file=length, col.names=FALSE, row.names=FALSE, quote=FALSE, sep="\t")
+write.table(output[,c(1,3)], file=gc_content, col.names=FALSE, row.names=FALSE, quote=FALSE, sep="\t")
+
+
+sessionInfo()
b
diff -r 000000000000 -r 2ca1baabdae0 get_length_and_gc_content.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/get_length_and_gc_content.xml Thu Nov 17 16:41:06 2016 -0500
[
@@ -0,0 +1,89 @@
+<tool id="length_and_gc_content" name="Gene length and gc content" version="0.1.0">
+    <description>from GTF file</description>
+    <requirements>
+        <requirement type="package" version="1.3.2">r-optparse</requirement>
+        <requirement type="package" version="1.4.1">r-reshape2</requirement>
+        <requirement type="package" version="1.9.6">r-data.table</requirement>
+        <requirement type="package" version="1.34.1">bioconductor-rtracklayer</requirement>
+    </requirements>
+    <stdio>
+        <regex match="Execution halted"
+               source="both"
+               level="fatal"
+               description="Execution halted." />
+        <regex match="Error in"
+               source="both"
+               level="fatal"
+               description="An undefined error occured, please check your input carefully and contact your administrator." />
+        <regex match="Fatal error"
+               source="both"
+               level="fatal"
+               description="An undefined error occured, please check your input carefully and contact your administrator." />
+    </stdio>
+    <command><![CDATA[
+        Rscript '$__tool_directory__'/get_length_and_gc_content.r --gtf '$gtf'
+        #if $fastaSource.genomeSource == 'indexed':
+            --fasta '$fastaSource.fasta_pre_installed.fields.path'
+        #else:
+            --fasta '$fastaSource.fasta_history'
+        #end if
+        --length '$length'
+        --gc_content '$gc_content'
+    ]]></command>
+    <inputs>
+        <param name="gtf" type="data" format="gtf" help="The GTF must match the FASTA file" label="GTF file for length and GC calculation"/>
+        <conditional name="fastaSource">
+            <param help="choose history if you don't see the correct genome fasta" label="Select a reference fasta from your history or use a built-in fasta?" name="genomeSource" type="select">
+                <option value="indexed">Use a built-in fasta</option>
+                <option value="history">Use fasta from history</option>
+            </param>
+            <when value="indexed">
+                <param name="fasta_pre_installed" type="select" help="Select the fasta file from a list of pre-installed genomes" label="Select a fasta sequence">
+                    <options from_data_table="all_fasta">
+                        <filter type="data_meta" key="dbkey" ref="gtf" column="0"/>
+                    </options>
+                    <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
+                </param>
+            </when>
+        <when value="history">
+            <param name="fasta_history" type="data" format="fasta" label="Select a fasta file that matches the supplied GTF file">
+                <options>
+                    <filter type="data_meta" key="dbkey" ref="gtf"/>
+                </options>
+                <validator type="no_options" message="The current history does not include a fasta dataset with the build associated with the selected input file"/>
+            </param>
+        </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <data name="length" format="tabular" label="gene length">
+            <actions>
+                <action name="column_names" type="metadata" default="gene,length" />
+            </actions>
+        </data>
+        <data name="gc_content" format="tabular" label="gene gc content">
+             <actions>
+                <action name="column_names" type="metadata" default="gene,gc_content" />
+            </actions>
+        </data>
+    </outputs>
+    <tests>
+        <test>
+            <param name="gtf" value="in.gtf" ftype="gtf"></param>
+            <param name="fastaSource|genomeSource" value="history"></param>
+            <param name="fastaSource|fasta_history" value="in.fasta" ftype="fasta"></param>
+            <output name="length" file="length.tab"></output>
+            <output name="gc_content" file="gc.tab"></output>
+        </test>
+    </tests>
+    <help>
+
+        **What it does**
+
+        Returns a tabular file with gene id and length and a tabular file with gene id and GC content, based on a supplied GTF and a FASTA file.
+
+
+        </help>
+    <citations>
+    </citations>
+</tool>
b
diff -r 000000000000 -r 2ca1baabdae0 test-data/gc.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gc.tab Thu Nov 17 16:41:06 2016 -0500
b
@@ -0,0 +1,1 @@
+ENSG00000162526 0.388349514563107
b
diff -r 000000000000 -r 2ca1baabdae0 test-data/gene_length.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gene_length.tab Thu Nov 17 16:41:06 2016 -0500
b
b'@@ -0,0 +1,948 @@\n+ENSG00000003096\t6983\n+ENSG00000004534\t7302\n+ENSG00000006327\t1848\n+ENSG00000006831\t5878\n+ENSG00000006837\t3057\n+ENSG00000007392\t6176\n+ENSG00000008735\t5901\n+ENSG00000009844\t7262\n+ENSG00000010322\t9161\n+ENSG00000010932\t3602\n+ENSG00000011638\t2558\n+ENSG00000012983\t7620\n+ENSG00000013275\t2387\n+ENSG00000014216\t7553\n+ENSG00000018408\t8413\n+ENSG00000018607\t1774\n+ENSG00000018699\t3185\n+ENSG00000022556\t6313\n+ENSG00000023041\t3748\n+ENSG00000023330\t2536\n+ENSG00000023697\t2844\n+ENSG00000023892\t2545\n+ENSG00000027697\t2571\n+ENSG00000029363\t9297\n+ENSG00000032389\t5775\n+ENSG00000033050\t4448\n+ENSG00000035403\t9992\n+ENSG00000042445\t4005\n+ENSG00000049541\t2211\n+ENSG00000057608\t4033\n+ENSG00000057935\t10580\n+ENSG00000059122\t8419\n+ENSG00000059588\t7743\n+ENSG00000063015\t5115\n+ENSG00000063322\t3835\n+ENSG00000064545\t3281\n+ENSG00000065000\t8601\n+ENSG00000065060\t9908\n+ENSG00000066739\t14778\n+ENSG00000066923\t8583\n+ENSG00000068028\t3683\n+ENSG00000068650\t13733\n+ENSG00000069712\t4556\n+ENSG00000070495\t5969\n+ENSG00000070610\t4803\n+ENSG00000070961\t9493\n+ENSG00000071889\t5098\n+ENSG00000072071\t8487\n+ENSG00000072121\t15706\n+ENSG00000072134\t8517\n+ENSG00000072864\t5525\n+ENSG00000072958\t14371\n+ENSG00000073614\t12106\n+ENSG00000074054\t11052\n+ENSG00000074071\t1097\n+ENSG00000074211\t9198\n+ENSG00000074319\t3839\n+ENSG00000074621\t9084\n+ENSG00000075399\t3428\n+ENSG00000076356\t13781\n+ENSG00000079215\t6265\n+ENSG00000079246\t5463\n+ENSG00000079785\t3833\n+ENSG00000079974\t5870\n+ENSG00000080603\t13674\n+ENSG00000080815\t9947\n+ENSG00000081087\t5325\n+ENSG00000082068\t7039\n+ENSG00000083535\t4253\n+ENSG00000083544\t6814\n+ENSG00000083720\t4194\n+ENSG00000084073\t3572\n+ENSG00000085365\t5106\n+ENSG00000085377\t3762\n+ENSG00000085982\t9352\n+ENSG00000085999\t3212\n+ENSG00000086205\t3824\n+ENSG00000086289\t2864\n+ENSG00000087586\t2928\n+ENSG00000088340\t9067\n+ENSG00000088448\t4563\n+ENSG00000089009\t4447\n+ENSG00000090020\t5974\n+ENSG00000090273\t2856\n+ENSG00000090402\t6138\n+ENSG00000091140\t5299\n+ENSG00000092068\t5991\n+ENSG00000092098\t4982\n+ENSG00000092208\t2532\n+ENSG00000092445\t10945\n+ENSG00000099139\t12705\n+ENSG00000099910\t4646\n+ENSG00000100014\t7324\n+ENSG00000100027\t4713\n+ENSG00000100038\t6641\n+ENSG00000100106\t12238\n+ENSG00000100191\t2030\n+ENSG00000100292\t2405\n+ENSG00000100336\t4377\n+ENSG00000100354\t19998\n+ENSG00000100441\t8015\n+ENSG00000100478\t7223\n+ENSG00000100526\t1836\n+ENSG00000100577\t8477\n+ENSG00000100852\t10888\n+ENSG00000101247\t7217\n+ENSG00000101294\t10000\n+ENSG00000101473\t4213\n+ENSG00000102030\t4742\n+ENSG00000102349\t9022\n+ENSG00000102606\t11768\n+ENSG00000102804\t8586\n+ENSG00000102901\t5539\n+ENSG00000103035\t2740\n+ENSG00000103121\t12781\n+ENSG00000103932\t7046\n+ENSG00000104325\t3507\n+ENSG00000104331\t7594\n+ENSG00000104368\t6618\n+ENSG00000104450\t5055\n+ENSG00000105173\t2550\n+ENSG00000105220\t9112\n+ENSG00000105223\t5184\n+ENSG00000105325\t5802\n+ENSG00000105355\t2813\n+ENSG00000105438\t2251\n+ENSG00000105519\t4428\n+ENSG00000105568\t7437\n+ENSG00000105879\t5481\n+ENSG00000106012\t9240\n+ENSG00000106305\t1838\n+ENSG00000106683\t6902\n+ENSG00000106771\t9484\n+ENSG00000106789\t5717\n+ENSG00000106803\t1190\n+ENSG00000106868\t4183\n+ENSG00000106948\t10601\n+ENSG00000107295\t2682\n+ENSG00000107833\t928\n+ENSG00000108055\t4275\n+ENSG00000108091\t7345\n+ENSG00000108306\t11038\n+ENSG00000108591\t7411\n+ENSG00000108666\t5740\n+ENSG00000108848\t8151\n+ENSG00000108947\t3222\n+ENSG00000108953\t3847\n+ENSG00000108960\t3177\n+ENSG00000109079\t3889\n+ENSG00000109171\t6524\n+ENSG00000109610\t2128\n+ENSG00000109680\t3434\n+ENSG00000109771\t7360\n+ENSG00000109787\t6297\n+ENSG00000109920\t7596\n+ENSG00000109929\t5566\n+ENSG00000110002\t6403\n+ENSG00000110092\t4830\n+ENSG00000110906\t9865\n+ENSG00000111247\t2558\n+ENSG00000111249\t7648\n+ENSG00000111331\t8251\n+ENSG00000111652\t3113\n+ENSG00000111707\t5731\n+ENSG00000111860\t9462\n+ENSG00000111877\t12496\n+ENSG00000112062\t6860\n+ENSG00000112306\t767\n+ENSG00000112312\t2476\n+ENSG00000112365\t5519\n+ENSG00000112406\t5614\n+ENSG00000112531\t17368\n+ENSG00000112874\t4304\n+ENSG00000113048\t6511\n+ENSG00000113328\t3096\n+ENSG00000113621\t5265\n+ENSG00000113649\t8714\n+ENSG00000113812\t4066\n+ENSG00000113916\t5938\n+ENSG00000114026\t8733\n+E'..b'213148\t464\n+ENSG00000213174\t414\n+ENSG00000213197\t694\n+ENSG00000213318\t783\n+ENSG00000213339\t3430\n+ENSG00000213493\t1451\n+ENSG00000213588\t3014\n+ENSG00000213711\t814\n+ENSG00000213742\t5308\n+ENSG00000213760\t2147\n+ENSG00000213793\t551\n+ENSG00000213864\t676\n+ENSG00000213880\t797\n+ENSG00000213904\t4208\n+ENSG00000213906\t3233\n+ENSG00000213917\t815\n+ENSG00000213971\t5091\n+ENSG00000214029\t15455\n+ENSG00000214174\t3858\n+ENSG00000214389\t784\n+ENSG00000214617\t4479\n+ENSG00000214694\t5490\n+ENSG00000214810\t311\n+ENSG00000214961\t1372\n+ENSG00000214975\t499\n+ENSG00000215286\t754\n+ENSG00000215333\t1283\n+ENSG00000216854\t553\n+ENSG00000216915\t1495\n+ENSG00000217716\t494\n+ENSG00000217801\t2171\n+ENSG00000218965\t609\n+ENSG00000219553\t723\n+ENSG00000220131\t354\n+ENSG00000220157\t961\n+ENSG00000220483\t871\n+ENSG00000221843\t6199\n+ENSG00000221909\t2717\n+ENSG00000222046\t1869\n+ENSG00000223382\t1326\n+ENSG00000223620\t1102\n+ENSG00000223877\t622\n+ENSG00000224016\t291\n+ENSG00000224520\t1447\n+ENSG00000224578\t1377\n+ENSG00000224628\t1519\n+ENSG00000224664\t316\n+ENSG00000224892\t997\n+ENSG00000225405\t390\n+ENSG00000225544\t392\n+ENSG00000225787\t306\n+ENSG00000225806\t1521\n+ENSG00000226067\t2075\n+ENSG00000226086\t822\n+ENSG00000226114\t361\n+ENSG00000226144\t454\n+ENSG00000226232\t1728\n+ENSG00000226268\t959\n+ENSG00000226478\t1126\n+ENSG00000226703\t812\n+ENSG00000226752\t7181\n+ENSG00000226790\t1139\n+ENSG00000226833\t1438\n+ENSG00000227006\t861\n+ENSG00000227057\t3115\n+ENSG00000227343\t600\n+ENSG00000227376\t552\n+ENSG00000227401\t284\n+ENSG00000227543\t3835\n+ENSG00000227666\t316\n+ENSG00000227742\t946\n+ENSG00000227968\t999\n+ENSG00000228118\t459\n+ENSG00000228195\t881\n+ENSG00000228236\t315\n+ENSG00000228599\t742\n+ENSG00000228612\t2737\n+ENSG00000228981\t843\n+ENSG00000229044\t439\n+ENSG00000229344\t682\n+ENSG00000229503\t477\n+ENSG00000229956\t6794\n+ENSG00000230006\t8042\n+ENSG00000230022\t634\n+ENSG00000230074\t665\n+ENSG00000230118\t258\n+ENSG00000230146\t1176\n+ENSG00000230243\t319\n+ENSG00000230295\t351\n+ENSG00000230406\t421\n+ENSG00000230531\t1798\n+ENSG00000230551\t8636\n+ENSG00000230650\t3130\n+ENSG00000230667\t909\n+ENSG00000230863\t742\n+ENSG00000230869\t2418\n+ENSG00000230913\t744\n+ENSG00000231096\t390\n+ENSG00000231181\t559\n+ENSG00000231245\t402\n+ENSG00000231434\t2167\n+ENSG00000231615\t1337\n+ENSG00000231711\t4947\n+ENSG00000231955\t1411\n+ENSG00000232186\t1228\n+ENSG00000232581\t357\n+ENSG00000232676\t1124\n+ENSG00000232699\t736\n+ENSG00000232905\t946\n+ENSG00000232943\t400\n+ENSG00000233122\t2436\n+ENSG00000233454\t275\n+ENSG00000233503\t1501\n+ENSG00000233602\t619\n+ENSG00000233836\t3242\n+ENSG00000233846\t487\n+ENSG00000234231\t2095\n+ENSG00000234639\t1239\n+ENSG00000234722\t3487\n+ENSG00000234742\t555\n+ENSG00000234981\t792\n+ENSG00000235065\t475\n+ENSG00000235363\t225\n+ENSG00000235424\t288\n+ENSG00000235444\t618\n+ENSG00000235512\t292\n+ENSG00000235623\t574\n+ENSG00000235655\t411\n+ENSG00000235698\t1200\n+ENSG00000235750\t4783\n+ENSG00000235847\t965\n+ENSG00000235859\t1234\n+ENSG00000235892\t1677\n+ENSG00000236086\t262\n+ENSG00000236285\t837\n+ENSG00000236290\t703\n+ENSG00000236330\t886\n+ENSG00000236468\t1335\n+ENSG00000236570\t1227\n+ENSG00000236680\t1238\n+ENSG00000236681\t523\n+ENSG00000236735\t375\n+ENSG00000236739\t535\n+ENSG00000236753\t2715\n+ENSG00000236801\t474\n+ENSG00000236824\t13458\n+ENSG00000236946\t1087\n+ENSG00000237017\t4158\n+ENSG00000237033\t609\n+ENSG00000237054\t3194\n+ENSG00000237101\t1323\n+ENSG00000237357\t2579\n+ENSG00000237517\t7448\n+ENSG00000237939\t652\n+ENSG00000237977\t563\n+ENSG00000238221\t500\n+ENSG00000238251\t514\n+ENSG00000239377\t420\n+ENSG00000239524\t400\n+ENSG00000239569\t736\n+ENSG00000239791\t1918\n+ENSG00000239887\t4495\n+ENSG00000239926\t747\n+ENSG00000240005\t589\n+ENSG00000240392\t575\n+ENSG00000240418\t893\n+ENSG00000240540\t1183\n+ENSG00000240821\t579\n+ENSG00000241258\t3540\n+ENSG00000241370\t1606\n+ENSG00000241494\t438\n+ENSG00000241680\t375\n+ENSG00000241697\t2611\n+ENSG00000241772\t1051\n+ENSG00000241923\t622\n+ENSG00000242061\t438\n+ENSG00000242140\t231\n+ENSG00000242349\t1427\n+ENSG00000242600\t2616\n+ENSG00000242612\t4046\n+ENSG00000242858\t602\n+ENSG00000243122\t413\n+ENSG00000243396\t402\n+ENSG00000243701\t4206\n+ENSG00000243779\t321\n+ENSG00000244171\t1291\n+ENSG00000244270\t403\n'
b
diff -r 000000000000 -r 2ca1baabdae0 test-data/in.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/in.fasta Thu Nov 17 16:41:06 2016 -0500
b
@@ -0,0 +1,2 @@
+>1
+AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGGAAAAAAAAAAAAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGGAAAAAAAAAATTTTT
\ No newline at end of file
b
diff -r 000000000000 -r 2ca1baabdae0 test-data/in.gtf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/in.gtf Thu Nov 17 16:41:06 2016 -0500
b
@@ -0,0 +1,6 @@
+1 ensembl_havana gene 1 103 . + . gene_id "ENSG00000162526"; gene_version "4"; gene_name "OR4F5"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; havana_gene "OTTHUMG00000001094"; havana_gene_version "1";
+1 ensembl_havana transcript 1 103 . + . gene_id "ENSG00000162526"; gene_version "4"; transcript_id "ENST00000335137"; transcript_version "3"; gene_name "OR4F5"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; havana_gene "OTTHUMG00000001094"; havana_gene_version "1"; transcript_name "OR4F5-001"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30547"; havana_transcript "OTTHUMT00000003223"; havana_transcript_version "1"; tag "basic"; transcript_support_level "NA";
+1 ensembl_havana exon 1 103 . + . gene_id "ENSG00000162526"; gene_version "4"; transcript_id "ENST00000335137"; transcript_version "3"; exon_number "1"; gene_name "OR4F5"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; havana_gene "OTTHUMG00000001094"; havana_gene_version "1"; transcript_name "OR4F5-001"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30547"; havana_transcript "OTTHUMT00000003223"; havana_transcript_version "1"; exon_id "ENSE00002319515"; exon_version "1"; tag "basic"; transcript_support_level "NA";
+1 ensembl_havana CDS 1 100 . + 0 gene_id "ENSG00000162526"; gene_version "4"; transcript_id "ENST00000335137"; transcript_version "3"; exon_number "1"; gene_name "OR4F5"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; havana_gene "OTTHUMG00000001094"; havana_gene_version "1"; transcript_name "OR4F5-001"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30547"; havana_transcript "OTTHUMT00000003223"; havana_transcript_version "1"; protein_id "ENSP00000334393"; protein_version "3"; tag "basic"; transcript_support_level "NA";
+1 ensembl_havana start_codon 1 3 . + 0 gene_id "ENSG00000162526"; gene_version "4"; transcript_id "ENST00000335137"; transcript_version "3"; exon_number "1"; gene_name "OR4F5"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; havana_gene "OTTHUMG00000001094"; havana_gene_version "1"; transcript_name "OR4F5-001"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30547"; havana_transcript "OTTHUMT00000003223"; havana_transcript_version "1"; tag "basic"; transcript_support_level "NA";
+1 ensembl_havana stop_codon 101 103 . + 0 gene_id "ENSG00000162526"; gene_version "4"; transcript_id "ENST00000335137"; transcript_version "3"; exon_number "1"; gene_name "OR4F5"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; havana_gene "OTTHUMG00000001094"; havana_gene_version "1"; transcript_name "OR4F5-001"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30547"; havana_transcript "OTTHUMT00000003223"; havana_transcript_version "1"; tag "basic"; transcript_support_level "NA";
b
diff -r 000000000000 -r 2ca1baabdae0 test-data/length.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/length.tab Thu Nov 17 16:41:06 2016 -0500
b
@@ -0,0 +1,1 @@
+ENSG00000162526 103
b
diff -r 000000000000 -r 2ca1baabdae0 tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Thu Nov 17 16:41:06 2016 -0500
b
@@ -0,0 +1,7 @@
+<tables>
+    <!-- Locations of all fasta files under genome directory -->
+    <table name="all_fasta" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/all_fasta.loc" />
+    </table>
+</tables>