Repository 'length_and_gc_content'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/length_and_gc_content

Changeset 1:f088370d2a3c (2018-01-28)
Previous changeset 0:2ca1baabdae0 (2016-11-17) Next changeset 2:e3ba567abdf5 (2022-03-11)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/length_and_gc_content commit 0a56599c36b4968095ec5a3cb589f94fb139466c
modified:
get_length_and_gc_content.r
get_length_and_gc_content.xml
tool_data_table_conf.xml.sample
added:
test-data/cached_locally/all_fasta.loc
test-data/cached_locally/gene_sets.loc
test-data/cached_locally/ref.fasta
test-data/cached_locally/ref.gtf
tool-data/all_fasta.loc.sample
tool-data/gene_sets.loc.sample
tool_data_table_conf.xml.test
removed:
all_fasta.loc.sample
test-data/gene_length.tab
b
diff -r 2ca1baabdae0 -r f088370d2a3c all_fasta.loc.sample
--- a/all_fasta.loc.sample Thu Nov 17 16:41:06 2016 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,18 +0,0 @@
-#This file lists the locations and dbkeys of all the fasta files
-#under the "genome" directory (a directory that contains a directory
-#for each build). The script extract_fasta.py will generate the file
-#all_fasta.loc. This file has the format (white space characters are
-#TAB characters):
-#
-#<unique_build_id> <dbkey> <display_name> <file_path>
-#
-#So, all_fasta.loc could look something like this:
-#
-#apiMel3 apiMel3 Honeybee (Apis mellifera): apiMel3 /path/to/genome/apiMel3/apiMel3.fa
-#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /path/to/genome/hg19/hg19canon.fa
-#hg19full hg19 Human (Homo sapiens): hg19 Full /path/to/genome/hg19/hg19full.fa
-#
-#Your all_fasta.loc file should contain an entry for each individual
-#fasta file. So there will be multiple fasta files for each build,
-#such as with hg19 above.
-#
b
diff -r 2ca1baabdae0 -r f088370d2a3c get_length_and_gc_content.r
--- a/get_length_and_gc_content.r Thu Nov 17 16:41:06 2016 -0500
+++ b/get_length_and_gc_content.r Sun Jan 28 04:04:58 2018 -0500
[
@@ -15,9 +15,9 @@
 
 option_list <- list(
     make_option(c("-g","--gtf"), type="character", help="Input GTF file with gene / exon information."),
-    make_option(c("-f","--fasta"), type="character", default=FALSE, help="Fasta file that corresponds to the supplied GTF."),
-    make_option(c("-l","--length"), type="character", default=FALSE, help="Output file with gene name and length."),
-    make_option(c("-gc","--gc_content"), type="character", default=FALSE, help="Output file with gene name and GC content.")
+    make_option(c("-f","--fasta"), type="character", default=FALSE, help="FASTA file that corresponds to the supplied GTF."),
+    make_option(c("-l","--length"), type="character", default=FALSE, help="Output file with Gene ID and length."),
+    make_option(c("-gc","--gc_content"), type="character", default=FALSE, help="Output file with Gene ID and GC content.")
   )
 
 parser <- OptionParser(usage = "%prog [options] file", option_list=option_list)
b
diff -r 2ca1baabdae0 -r f088370d2a3c get_length_and_gc_content.xml
--- a/get_length_and_gc_content.xml Thu Nov 17 16:41:06 2016 -0500
+++ b/get_length_and_gc_content.xml Sun Jan 28 04:04:58 2018 -0500
[
b'@@ -1,10 +1,10 @@\n-<tool id="length_and_gc_content" name="Gene length and gc content" version="0.1.0">\n-    <description>from GTF file</description>\n+<tool id="length_and_gc_content" name="Gene length and GC content" version="0.1.1">\n+    <description>from GTF and FASTA file</description>\n     <requirements>\n         <requirement type="package" version="1.3.2">r-optparse</requirement>\n-        <requirement type="package" version="1.4.1">r-reshape2</requirement>\n-        <requirement type="package" version="1.9.6">r-data.table</requirement>\n-        <requirement type="package" version="1.34.1">bioconductor-rtracklayer</requirement>\n+        <requirement type="package" version="1.4.2">r-reshape2</requirement>\n+        <requirement type="package" version="1.10.4">r-data.table</requirement>\n+        <requirement type="package" version="1.34.2">bioconductor-rtracklayer</requirement>\n     </requirements>\n     <stdio>\n         <regex match="Execution halted"\n@@ -20,70 +20,165 @@\n                level="fatal"\n                description="An undefined error occured, please check your input carefully and contact your administrator." />\n     </stdio>\n+    <version_command><![CDATA[\n+        echo $(R --version | grep version | grep -v GNU)", optparse version" $(R --vanilla --slave -e "library(optparse); cat(sessionInfo()\\$otherPkgs\\$optparse\\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", reshape2 version" $(R --vanilla --slave -e "library(reshape2); cat(sessionInfo()\\$otherPkgs\\$reshape2\\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", rtracklayer version" $(R --vanilla --slave -e "library(rtracklayer); cat(sessionInfo()\\$otherPkgs\\$rtracklayer\\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", data.table version" $(R --vanilla --slave -e "library(data.table); cat(sessionInfo()\\$otherPkgs\\$data.table\\$Version)" 2> /dev/null | grep -v -i "WARNING: ")\n+    ]]></version_command>\n     <command><![CDATA[\n-        Rscript \'$__tool_directory__\'/get_length_and_gc_content.r --gtf \'$gtf\'\n-        #if $fastaSource.genomeSource == \'indexed\':\n-            --fasta \'$fastaSource.fasta_pre_installed.fields.path\'\n-        #else:\n-            --fasta \'$fastaSource.fasta_history\'\n-        #end if\n-        --length \'$length\'\n-        --gc_content \'$gc_content\'\n+\n+## Get GTF\n+\n+#if $gtf_file.gtfSource == \'cached\':\n+    ln -s \'$gtf_file.gtf_pre_installed.fields.path\' gtf\n+#else:\n+    ln -s \'$gtf_file.gtf_history\' gtf\n+#end if\n+\n+&&\n+\n+## Get FASTA\n+\n+#if $fasta_file.fastaSource == \'indexed\':\n+    ln -s \'$fasta_file.fasta_pre_installed.fields.path\' fasta\n+#else:\n+    ln -s \'$fasta_file.fasta_history\' fasta\n+#end if\n+\n+&&\n+\n+Rscript \'$__tool_directory__/get_length_and_gc_content.r\'\n+\n+--gtf gtf\n+--fasta fasta\n+\n+#if $length_out:\n+    --length \'$length\'\n+#end if\n+\n+#if $gc_out:\n+    --gc_content \'$gc_content\'\n+#end if\n+\n     ]]></command>\n+\n     <inputs>\n-        <param name="gtf" type="data" format="gtf" help="The GTF must match the FASTA file" label="GTF file for length and GC calculation"/>\n-        <conditional name="fastaSource">\n-            <param help="choose history if you don\'t see the correct genome fasta" label="Select a reference fasta from your history or use a built-in fasta?" name="genomeSource" type="select">\n-                <option value="indexed">Use a built-in fasta</option>\n-                <option value="history">Use fasta from history</option>\n+        <conditional name="gtf_file">\n+            <param name="gtfSource" type="select" label="Select a built-in GTF file or one from your history"  help="Choose history if you don\'t see the correct GTF" >\n+                <option value="cached" selected="true">Use a built-in GTF</option>\n+                <option value="history">Use a GTF from history</option>\n+            </param>\n+            <when value="cached">\n+                <param name="gtf_pre_installed" type="select" label="Select a GTF file" help="Select the GTF from a list of pre-installed files">\n+                    <option'..b'that matches the supplied GTF file" />\n+            </when>\n         </conditional>\n+\n+\n+        <param name="length_out" type="boolean" truevalue="1" falsevalue="0" checked="true" label="Output length file?" help="Default: Yes" />\n+        <param name="gc_out" type="boolean" truevalue="1" falsevalue="0" checked="true" label="Output GC content file?" help="Default: Yes" />\n+\n     </inputs>\n+\n     <outputs>\n-        <data name="length" format="tabular" label="gene length">\n+        <data name="length" format="tabular" label="Gene length">\n+            <filter>length_out is True</filter>\n             <actions>\n-                <action name="column_names" type="metadata" default="gene,length" />\n+                <action name="column_names" type="metadata" default="GeneID,Length" />\n             </actions>\n         </data>\n-        <data name="gc_content" format="tabular" label="gene gc content">\n+        <data name="gc_content" format="tabular" label="Gene GC content">\n+            <filter>gc_out is True</filter>\n              <actions>\n-                <action name="column_names" type="metadata" default="gene,gc_content" />\n+                <action name="column_names" type="metadata" default="GeneID,GC_content" />\n             </actions>\n         </data>\n     </outputs>\n+\n     <tests>\n-        <test>\n-            <param name="gtf" value="in.gtf" ftype="gtf"></param>\n-            <param name="fastaSource|genomeSource" value="history"></param>\n-            <param name="fastaSource|fasta_history" value="in.fasta" ftype="fasta"></param>\n-            <output name="length" file="length.tab"></output>\n-            <output name="gc_content" file="gc.tab"></output>\n+        <!-- Ensure length and GC files are output -->\n+        <test expect_num_outputs="2">\n+            <param name="gtfSource" value="history" />\n+            <param name="gtf_history" ftype="gtf" value="in.gtf" />\n+            <param name="fastaSource" value="history" />\n+            <param name="fasta_history" ftype="fasta" value="in.fasta" />\n+            <output name="length" file="length.tab" />\n+            <output name="gc_content" file="gc.tab" />\n+        </test>\n+        <!-- Ensure built-in fasta and gtf work -->\n+        <test expect_num_outputs="2">\n+            <param name="gtfSource" value="cached" />\n+            <param name="fastaSource" value="indexed" />\n+            <output name="length" file="length.tab" />\n+            <output name="gc_content" file="gc.tab" />\n+        </test>\n+        <!-- Ensure optional gc content works  -->\n+        <test expect_num_outputs="1">\n+            <param name="gtfSource" value="cached" />\n+            <param name="fastaSource" value="indexed" />\n+            <param name="gc_out" value="False" />\n+            <output name="length" file="length.tab" />\n+        </test>\n+        <!-- Ensure optional length works -->\n+        <test expect_num_outputs="1">\n+            <param name="gtfSource" value="cached" />\n+            <param name="fastaSource" value="indexed" />\n+            <param name="length_out" value="False" />\n+            <output name="gc_content" file="gc.tab" />\n         </test>\n     </tests>\n-    <help>\n+    <help><![CDATA[\n+\n+**What it does**\n+\n+.. class:: infomark\n \n-        **What it does**\n+This tool calculates the length and GC content for the genes in a GTF file. It requires a FASTA file that is the same genome version as the GTF.\n+\n+-----\n+\n+**Inputs**\n+\n+- a GTF file\n+- a FASTA file\n \n-        Returns a tabular file with gene id and length and a tabular file with gene id and GC content, based on a supplied GTF and a FASTA file.\n+-----\n+\n+**Outputs**\n+\n+- a tabular file with Gene ID and length\n+- a tabular file with Gene ID and GC content\n \n+-----\n \n-        </help>\n+**More Information**\n+\n+To calculate gene length, this tool counts the number of bases in all exons of a gene, after merging any overlapping exons from different transcripts.\n+\n+    ]]></help>\n     <citations>\n     </citations>\n </tool>\n'
b
diff -r 2ca1baabdae0 -r f088370d2a3c test-data/cached_locally/all_fasta.loc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cached_locally/all_fasta.loc Sun Jan 28 04:04:58 2018 -0500
b
@@ -0,0 +1,1 @@
+hg38 hg38 Human (hg38) ${__HERE__}/ref.fasta
b
diff -r 2ca1baabdae0 -r f088370d2a3c test-data/cached_locally/gene_sets.loc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cached_locally/gene_sets.loc Sun Jan 28 04:04:58 2018 -0500
b
@@ -0,0 +1,1 @@
+hg38 hg38 hg38GTF ${__HERE__}/ref.gtf
b
diff -r 2ca1baabdae0 -r f088370d2a3c test-data/cached_locally/ref.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cached_locally/ref.fasta Sun Jan 28 04:04:58 2018 -0500
b
@@ -0,0 +1,2 @@
+>1
+AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGGAAAAAAAAAAAAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGGAAAAAAAAAATTTTT
\ No newline at end of file
b
diff -r 2ca1baabdae0 -r f088370d2a3c test-data/cached_locally/ref.gtf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cached_locally/ref.gtf Sun Jan 28 04:04:58 2018 -0500
b
@@ -0,0 +1,6 @@
+1 ensembl_havana gene 1 103 . + . gene_id "ENSG00000162526"; gene_version "4"; gene_name "OR4F5"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; havana_gene "OTTHUMG00000001094"; havana_gene_version "1";
+1 ensembl_havana transcript 1 103 . + . gene_id "ENSG00000162526"; gene_version "4"; transcript_id "ENST00000335137"; transcript_version "3"; gene_name "OR4F5"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; havana_gene "OTTHUMG00000001094"; havana_gene_version "1"; transcript_name "OR4F5-001"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30547"; havana_transcript "OTTHUMT00000003223"; havana_transcript_version "1"; tag "basic"; transcript_support_level "NA";
+1 ensembl_havana exon 1 103 . + . gene_id "ENSG00000162526"; gene_version "4"; transcript_id "ENST00000335137"; transcript_version "3"; exon_number "1"; gene_name "OR4F5"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; havana_gene "OTTHUMG00000001094"; havana_gene_version "1"; transcript_name "OR4F5-001"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30547"; havana_transcript "OTTHUMT00000003223"; havana_transcript_version "1"; exon_id "ENSE00002319515"; exon_version "1"; tag "basic"; transcript_support_level "NA";
+1 ensembl_havana CDS 1 100 . + 0 gene_id "ENSG00000162526"; gene_version "4"; transcript_id "ENST00000335137"; transcript_version "3"; exon_number "1"; gene_name "OR4F5"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; havana_gene "OTTHUMG00000001094"; havana_gene_version "1"; transcript_name "OR4F5-001"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30547"; havana_transcript "OTTHUMT00000003223"; havana_transcript_version "1"; protein_id "ENSP00000334393"; protein_version "3"; tag "basic"; transcript_support_level "NA";
+1 ensembl_havana start_codon 1 3 . + 0 gene_id "ENSG00000162526"; gene_version "4"; transcript_id "ENST00000335137"; transcript_version "3"; exon_number "1"; gene_name "OR4F5"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; havana_gene "OTTHUMG00000001094"; havana_gene_version "1"; transcript_name "OR4F5-001"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30547"; havana_transcript "OTTHUMT00000003223"; havana_transcript_version "1"; tag "basic"; transcript_support_level "NA";
+1 ensembl_havana stop_codon 101 103 . + 0 gene_id "ENSG00000162526"; gene_version "4"; transcript_id "ENST00000335137"; transcript_version "3"; exon_number "1"; gene_name "OR4F5"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; havana_gene "OTTHUMG00000001094"; havana_gene_version "1"; transcript_name "OR4F5-001"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30547"; havana_transcript "OTTHUMT00000003223"; havana_transcript_version "1"; tag "basic"; transcript_support_level "NA";
b
diff -r 2ca1baabdae0 -r f088370d2a3c test-data/gene_length.tab
--- a/test-data/gene_length.tab Thu Nov 17 16:41:06 2016 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,948 +0,0 @@\n-ENSG00000003096\t6983\n-ENSG00000004534\t7302\n-ENSG00000006327\t1848\n-ENSG00000006831\t5878\n-ENSG00000006837\t3057\n-ENSG00000007392\t6176\n-ENSG00000008735\t5901\n-ENSG00000009844\t7262\n-ENSG00000010322\t9161\n-ENSG00000010932\t3602\n-ENSG00000011638\t2558\n-ENSG00000012983\t7620\n-ENSG00000013275\t2387\n-ENSG00000014216\t7553\n-ENSG00000018408\t8413\n-ENSG00000018607\t1774\n-ENSG00000018699\t3185\n-ENSG00000022556\t6313\n-ENSG00000023041\t3748\n-ENSG00000023330\t2536\n-ENSG00000023697\t2844\n-ENSG00000023892\t2545\n-ENSG00000027697\t2571\n-ENSG00000029363\t9297\n-ENSG00000032389\t5775\n-ENSG00000033050\t4448\n-ENSG00000035403\t9992\n-ENSG00000042445\t4005\n-ENSG00000049541\t2211\n-ENSG00000057608\t4033\n-ENSG00000057935\t10580\n-ENSG00000059122\t8419\n-ENSG00000059588\t7743\n-ENSG00000063015\t5115\n-ENSG00000063322\t3835\n-ENSG00000064545\t3281\n-ENSG00000065000\t8601\n-ENSG00000065060\t9908\n-ENSG00000066739\t14778\n-ENSG00000066923\t8583\n-ENSG00000068028\t3683\n-ENSG00000068650\t13733\n-ENSG00000069712\t4556\n-ENSG00000070495\t5969\n-ENSG00000070610\t4803\n-ENSG00000070961\t9493\n-ENSG00000071889\t5098\n-ENSG00000072071\t8487\n-ENSG00000072121\t15706\n-ENSG00000072134\t8517\n-ENSG00000072864\t5525\n-ENSG00000072958\t14371\n-ENSG00000073614\t12106\n-ENSG00000074054\t11052\n-ENSG00000074071\t1097\n-ENSG00000074211\t9198\n-ENSG00000074319\t3839\n-ENSG00000074621\t9084\n-ENSG00000075399\t3428\n-ENSG00000076356\t13781\n-ENSG00000079215\t6265\n-ENSG00000079246\t5463\n-ENSG00000079785\t3833\n-ENSG00000079974\t5870\n-ENSG00000080603\t13674\n-ENSG00000080815\t9947\n-ENSG00000081087\t5325\n-ENSG00000082068\t7039\n-ENSG00000083535\t4253\n-ENSG00000083544\t6814\n-ENSG00000083720\t4194\n-ENSG00000084073\t3572\n-ENSG00000085365\t5106\n-ENSG00000085377\t3762\n-ENSG00000085982\t9352\n-ENSG00000085999\t3212\n-ENSG00000086205\t3824\n-ENSG00000086289\t2864\n-ENSG00000087586\t2928\n-ENSG00000088340\t9067\n-ENSG00000088448\t4563\n-ENSG00000089009\t4447\n-ENSG00000090020\t5974\n-ENSG00000090273\t2856\n-ENSG00000090402\t6138\n-ENSG00000091140\t5299\n-ENSG00000092068\t5991\n-ENSG00000092098\t4982\n-ENSG00000092208\t2532\n-ENSG00000092445\t10945\n-ENSG00000099139\t12705\n-ENSG00000099910\t4646\n-ENSG00000100014\t7324\n-ENSG00000100027\t4713\n-ENSG00000100038\t6641\n-ENSG00000100106\t12238\n-ENSG00000100191\t2030\n-ENSG00000100292\t2405\n-ENSG00000100336\t4377\n-ENSG00000100354\t19998\n-ENSG00000100441\t8015\n-ENSG00000100478\t7223\n-ENSG00000100526\t1836\n-ENSG00000100577\t8477\n-ENSG00000100852\t10888\n-ENSG00000101247\t7217\n-ENSG00000101294\t10000\n-ENSG00000101473\t4213\n-ENSG00000102030\t4742\n-ENSG00000102349\t9022\n-ENSG00000102606\t11768\n-ENSG00000102804\t8586\n-ENSG00000102901\t5539\n-ENSG00000103035\t2740\n-ENSG00000103121\t12781\n-ENSG00000103932\t7046\n-ENSG00000104325\t3507\n-ENSG00000104331\t7594\n-ENSG00000104368\t6618\n-ENSG00000104450\t5055\n-ENSG00000105173\t2550\n-ENSG00000105220\t9112\n-ENSG00000105223\t5184\n-ENSG00000105325\t5802\n-ENSG00000105355\t2813\n-ENSG00000105438\t2251\n-ENSG00000105519\t4428\n-ENSG00000105568\t7437\n-ENSG00000105879\t5481\n-ENSG00000106012\t9240\n-ENSG00000106305\t1838\n-ENSG00000106683\t6902\n-ENSG00000106771\t9484\n-ENSG00000106789\t5717\n-ENSG00000106803\t1190\n-ENSG00000106868\t4183\n-ENSG00000106948\t10601\n-ENSG00000107295\t2682\n-ENSG00000107833\t928\n-ENSG00000108055\t4275\n-ENSG00000108091\t7345\n-ENSG00000108306\t11038\n-ENSG00000108591\t7411\n-ENSG00000108666\t5740\n-ENSG00000108848\t8151\n-ENSG00000108947\t3222\n-ENSG00000108953\t3847\n-ENSG00000108960\t3177\n-ENSG00000109079\t3889\n-ENSG00000109171\t6524\n-ENSG00000109610\t2128\n-ENSG00000109680\t3434\n-ENSG00000109771\t7360\n-ENSG00000109787\t6297\n-ENSG00000109920\t7596\n-ENSG00000109929\t5566\n-ENSG00000110002\t6403\n-ENSG00000110092\t4830\n-ENSG00000110906\t9865\n-ENSG00000111247\t2558\n-ENSG00000111249\t7648\n-ENSG00000111331\t8251\n-ENSG00000111652\t3113\n-ENSG00000111707\t5731\n-ENSG00000111860\t9462\n-ENSG00000111877\t12496\n-ENSG00000112062\t6860\n-ENSG00000112306\t767\n-ENSG00000112312\t2476\n-ENSG00000112365\t5519\n-ENSG00000112406\t5614\n-ENSG00000112531\t17368\n-ENSG00000112874\t4304\n-ENSG00000113048\t6511\n-ENSG00000113328\t3096\n-ENSG00000113621\t5265\n-ENSG00000113649\t8714\n-ENSG00000113812\t4066\n-ENSG00000113916\t5938\n-ENSG00000114026\t8733\n-E'..b'213148\t464\n-ENSG00000213174\t414\n-ENSG00000213197\t694\n-ENSG00000213318\t783\n-ENSG00000213339\t3430\n-ENSG00000213493\t1451\n-ENSG00000213588\t3014\n-ENSG00000213711\t814\n-ENSG00000213742\t5308\n-ENSG00000213760\t2147\n-ENSG00000213793\t551\n-ENSG00000213864\t676\n-ENSG00000213880\t797\n-ENSG00000213904\t4208\n-ENSG00000213906\t3233\n-ENSG00000213917\t815\n-ENSG00000213971\t5091\n-ENSG00000214029\t15455\n-ENSG00000214174\t3858\n-ENSG00000214389\t784\n-ENSG00000214617\t4479\n-ENSG00000214694\t5490\n-ENSG00000214810\t311\n-ENSG00000214961\t1372\n-ENSG00000214975\t499\n-ENSG00000215286\t754\n-ENSG00000215333\t1283\n-ENSG00000216854\t553\n-ENSG00000216915\t1495\n-ENSG00000217716\t494\n-ENSG00000217801\t2171\n-ENSG00000218965\t609\n-ENSG00000219553\t723\n-ENSG00000220131\t354\n-ENSG00000220157\t961\n-ENSG00000220483\t871\n-ENSG00000221843\t6199\n-ENSG00000221909\t2717\n-ENSG00000222046\t1869\n-ENSG00000223382\t1326\n-ENSG00000223620\t1102\n-ENSG00000223877\t622\n-ENSG00000224016\t291\n-ENSG00000224520\t1447\n-ENSG00000224578\t1377\n-ENSG00000224628\t1519\n-ENSG00000224664\t316\n-ENSG00000224892\t997\n-ENSG00000225405\t390\n-ENSG00000225544\t392\n-ENSG00000225787\t306\n-ENSG00000225806\t1521\n-ENSG00000226067\t2075\n-ENSG00000226086\t822\n-ENSG00000226114\t361\n-ENSG00000226144\t454\n-ENSG00000226232\t1728\n-ENSG00000226268\t959\n-ENSG00000226478\t1126\n-ENSG00000226703\t812\n-ENSG00000226752\t7181\n-ENSG00000226790\t1139\n-ENSG00000226833\t1438\n-ENSG00000227006\t861\n-ENSG00000227057\t3115\n-ENSG00000227343\t600\n-ENSG00000227376\t552\n-ENSG00000227401\t284\n-ENSG00000227543\t3835\n-ENSG00000227666\t316\n-ENSG00000227742\t946\n-ENSG00000227968\t999\n-ENSG00000228118\t459\n-ENSG00000228195\t881\n-ENSG00000228236\t315\n-ENSG00000228599\t742\n-ENSG00000228612\t2737\n-ENSG00000228981\t843\n-ENSG00000229044\t439\n-ENSG00000229344\t682\n-ENSG00000229503\t477\n-ENSG00000229956\t6794\n-ENSG00000230006\t8042\n-ENSG00000230022\t634\n-ENSG00000230074\t665\n-ENSG00000230118\t258\n-ENSG00000230146\t1176\n-ENSG00000230243\t319\n-ENSG00000230295\t351\n-ENSG00000230406\t421\n-ENSG00000230531\t1798\n-ENSG00000230551\t8636\n-ENSG00000230650\t3130\n-ENSG00000230667\t909\n-ENSG00000230863\t742\n-ENSG00000230869\t2418\n-ENSG00000230913\t744\n-ENSG00000231096\t390\n-ENSG00000231181\t559\n-ENSG00000231245\t402\n-ENSG00000231434\t2167\n-ENSG00000231615\t1337\n-ENSG00000231711\t4947\n-ENSG00000231955\t1411\n-ENSG00000232186\t1228\n-ENSG00000232581\t357\n-ENSG00000232676\t1124\n-ENSG00000232699\t736\n-ENSG00000232905\t946\n-ENSG00000232943\t400\n-ENSG00000233122\t2436\n-ENSG00000233454\t275\n-ENSG00000233503\t1501\n-ENSG00000233602\t619\n-ENSG00000233836\t3242\n-ENSG00000233846\t487\n-ENSG00000234231\t2095\n-ENSG00000234639\t1239\n-ENSG00000234722\t3487\n-ENSG00000234742\t555\n-ENSG00000234981\t792\n-ENSG00000235065\t475\n-ENSG00000235363\t225\n-ENSG00000235424\t288\n-ENSG00000235444\t618\n-ENSG00000235512\t292\n-ENSG00000235623\t574\n-ENSG00000235655\t411\n-ENSG00000235698\t1200\n-ENSG00000235750\t4783\n-ENSG00000235847\t965\n-ENSG00000235859\t1234\n-ENSG00000235892\t1677\n-ENSG00000236086\t262\n-ENSG00000236285\t837\n-ENSG00000236290\t703\n-ENSG00000236330\t886\n-ENSG00000236468\t1335\n-ENSG00000236570\t1227\n-ENSG00000236680\t1238\n-ENSG00000236681\t523\n-ENSG00000236735\t375\n-ENSG00000236739\t535\n-ENSG00000236753\t2715\n-ENSG00000236801\t474\n-ENSG00000236824\t13458\n-ENSG00000236946\t1087\n-ENSG00000237017\t4158\n-ENSG00000237033\t609\n-ENSG00000237054\t3194\n-ENSG00000237101\t1323\n-ENSG00000237357\t2579\n-ENSG00000237517\t7448\n-ENSG00000237939\t652\n-ENSG00000237977\t563\n-ENSG00000238221\t500\n-ENSG00000238251\t514\n-ENSG00000239377\t420\n-ENSG00000239524\t400\n-ENSG00000239569\t736\n-ENSG00000239791\t1918\n-ENSG00000239887\t4495\n-ENSG00000239926\t747\n-ENSG00000240005\t589\n-ENSG00000240392\t575\n-ENSG00000240418\t893\n-ENSG00000240540\t1183\n-ENSG00000240821\t579\n-ENSG00000241258\t3540\n-ENSG00000241370\t1606\n-ENSG00000241494\t438\n-ENSG00000241680\t375\n-ENSG00000241697\t2611\n-ENSG00000241772\t1051\n-ENSG00000241923\t622\n-ENSG00000242061\t438\n-ENSG00000242140\t231\n-ENSG00000242349\t1427\n-ENSG00000242600\t2616\n-ENSG00000242612\t4046\n-ENSG00000242858\t602\n-ENSG00000243122\t413\n-ENSG00000243396\t402\n-ENSG00000243701\t4206\n-ENSG00000243779\t321\n-ENSG00000244171\t1291\n-ENSG00000244270\t403\n'
b
diff -r 2ca1baabdae0 -r f088370d2a3c tool-data/all_fasta.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/all_fasta.loc.sample Sun Jan 28 04:04:58 2018 -0500
b
@@ -0,0 +1,18 @@
+#This file lists the locations and dbkeys of all the genome and transcriptome fasta files
+#under the "genome" directory (a directory that contains a directory
+#for each build. This file has the format (white space characters are
+#TAB characters):
+#
+#<unique_build_id> <dbkey> <display_name> <file_path>
+#
+#So, all_fasta.loc could look something like this:
+#
+#apiMel4.5 apiMel4.5 Honeybee (Apis mellifera): apiMel4.5 /path/to/genome/apiMel4.5/apiMel4.5.fa
+#hg38canon hg38 Human (Homo sapiens): hg38 Canonical /path/to/genome/hg38/hg38canon.fa
+#hg38full hg38 Human (Homo sapiens): hg38 Full /path/to/genome/hg38/hg38full.fa
+#hg38full.90 hg38    Human (Homo sapiens): hg38 Full Trans v90 /path/to/genome/hg38/hg38fulltrans.fa
+
+#Your all_fasta.loc file should contain an entry for each individual
+#fasta file. So there will be multiple fasta files for each build,
+#such as with hg38 above.
+
b
diff -r 2ca1baabdae0 -r f088370d2a3c tool-data/gene_sets.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/gene_sets.loc.sample Sun Jan 28 04:04:58 2018 -0500
b
@@ -0,0 +1,15 @@
+# This is a sample file distributed with featureCounts that enables it and other# tools to use gene/exon annotations in the GFF/GTF format.
+# 
+# The gene_sets.loc file syntax is:
+#<unique_build_id> <dbkey> <display_name> <path>
+# 
+# Please ensure that the above fields are tab separated.
+# 
+# In case you have TWO or MORE providers PER dbkey, the one mentioned
+# first in the file, should have the "default" priority.
+#
+#Example:
+#
+#Homo_sapiens.GRCh38.90 hg38 GRCh38 (hg38) annotation from Ensembl, release 90 /depot/data2/galaxy/hg38/gene_sets/Homo_sapiens.GRCh38.90.gtf
+#Homo_sapiens.GRCh37.87 hg19 GRCh37 (hg19) annotation from Ensembl, release 87 /depot/data2/galaxy/hg19/gene_sets/Homo_sapiens.GRCh37.87.gtf
+
b
diff -r 2ca1baabdae0 -r f088370d2a3c tool_data_table_conf.xml.sample
--- a/tool_data_table_conf.xml.sample Thu Nov 17 16:41:06 2016 -0500
+++ b/tool_data_table_conf.xml.sample Sun Jan 28 04:04:58 2018 -0500
b
@@ -1,7 +1,12 @@
 <tables>
     <!-- Locations of all fasta files under genome directory -->
-    <table name="all_fasta" comment_char="#">
+    <table name="all_fasta" comment_char="#" allow_duplicate_entries="False">
         <columns>value, dbkey, name, path</columns>
         <file path="tool-data/all_fasta.loc" />
     </table>
+    <!-- Locations of all gtf files with annotations of genome builds -->
+    <table name="gene_sets" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/gene_sets.loc" />
+    </table>
 </tables>
b
diff -r 2ca1baabdae0 -r f088370d2a3c tool_data_table_conf.xml.test
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test Sun Jan 28 04:04:58 2018 -0500
b
@@ -0,0 +1,10 @@
+<tables>
+    <table name="all_fasta" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="${__HERE__}/test-data/cached_locally/all_fasta.loc" />
+    </table>
+    <table name="gene_sets" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="${__HERE__}/test-data/cached_locally/gene_sets.loc" />
+    </table>
+</tables>
\ No newline at end of file