changeset 0:fabda887a71f draft

Imported from capsule None
author devteam
date Mon, 28 Jul 2014 11:56:10 -0400
parents
children 188392a0d0a8
files count_gff_features.py count_gff_features.xml test-data/count_gff_features_out1.txt test-data/count_gff_features_out2.txt test-data/gff2bed_in2.gff test-data/gff_filter_by_feature_count_out1.gff tool_dependencies.xml
diffstat 7 files changed, 110 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/count_gff_features.py	Mon Jul 28 11:56:10 2014 -0400
@@ -0,0 +1,18 @@
+#!/usr/bin/env python
+# This tool takes a gff file as input and counts the number of features in it.
+
+import sys, fileinput
+from galaxy import eggs
+from galaxy.datatypes.util.gff_util import GFFReaderWrapper
+from bx.intervals.io import GenomicInterval
+
+# Get args.
+input_file = sys.argv[1:]
+
+# Count features.
+count = 0
+for feature in GFFReaderWrapper( fileinput.FileInput( input_file ), fix_strand=True ):
+    if isinstance( feature, GenomicInterval ):
+        count += 1
+
+print count
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/count_gff_features.xml	Mon Jul 28 11:56:10 2014 -0400
@@ -0,0 +1,29 @@
+<tool id="count_gff_features" name="Count GFF Features" version="0.1">
+    <description></description>
+    <requirements>
+        <requirement type="package" version="0.7.1">bx-python</requirement>
+    </requirements>
+    <command interpreter="python">
+        count_gff_features.py $input &gt; $output
+    </command>
+    <inputs>
+        <param format="gff" name="input" type="data" label="GFF Dataset to Filter"/>
+    </inputs>
+    <outputs>
+        <data format="txt" name="output"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="input" value="gff2bed_in2.gff"/>
+            <output name="output" file="count_gff_features_out1.txt"/>
+        </test>
+        <test>
+            <param name="input" value="gff_filter_by_feature_count_out1.gff"/>
+            <output name="output" file="count_gff_features_out2.txt"/>
+        </test>
+    </tests>
+    <help>
+        Counts the number of features in a GFF dataset. GFF features are often spread across multiple lines; this tool counts the number of 
+        features in dataset rather than the number of lines.
+    </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/count_gff_features_out1.txt	Mon Jul 28 11:56:10 2014 -0400
@@ -0,0 +1,1 @@
+3
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/count_gff_features_out2.txt	Mon Jul 28 11:56:10 2014 -0400
@@ -0,0 +1,1 @@
+14
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gff2bed_in2.gff	Mon Jul 28 11:56:10 2014 -0400
@@ -0,0 +1,13 @@
+chr1	Cufflinks	exon	3204563	3207049	1000	-	.	gene_id "Xkr4"; transcript_id "Xkr4"; exon_number "1"; FPKM "0.3924207844"; frac "1.000000"; conf_lo "0.000000"; conf_hi "0.845549"; cov "0.022289";
+chr1	Cufflinks	exon	3411783	3411982	1000	-	.	gene_id "Xkr4"; transcript_id "Xkr4"; exon_number "2"; FPKM "0.3924207844"; frac "1.000000"; conf_lo "0.000000"; conf_hi "0.845549"; cov "0.022289";
+chr1	Cufflinks	exon	3660633	3661579	1000	-	.	gene_id "Xkr4"; transcript_id "Xkr4"; exon_number "3"; FPKM "0.3924207844"; frac "1.000000"; conf_lo "0.000000"; conf_hi "0.845549"; cov "0.022289";
+chr1	Cufflinks	exon	4481009	4482749	1000	-	.	gene_id "Sox17"; transcript_id "Sox17"; exon_number "1"; FPKM "1.5186976896"; frac "1.000000"; conf_lo "0.558189"; conf_hi "2.479206"; cov "0.086262";
+chr1	Cufflinks	exon	4483181	4483547	1000	-	.	gene_id "Sox17"; transcript_id "Sox17"; exon_number "2"; FPKM "1.5186976896"; frac "1.000000"; conf_lo "0.558189"; conf_hi "2.479206"; cov "0.086262";
+chr1	Cufflinks	exon	4483853	4483944	1000	-	.	gene_id "Sox17"; transcript_id "Sox17"; exon_number "3"; FPKM "1.5186976896"; frac "1.000000"; conf_lo "0.558189"; conf_hi "2.479206"; cov "0.086262";
+chr1	Cufflinks	exon	4485217	4486023	1000	-	.	gene_id "Sox17"; transcript_id "Sox17"; exon_number "4"; FPKM "1.5186976896"; frac "1.000000"; conf_lo "0.558189"; conf_hi "2.479206"; cov "0.086262";
+chr1	Cufflinks	exon	4486372	4486494	1000	-	.	gene_id "Sox17"; transcript_id "Sox17"; exon_number "5"; FPKM "1.5186976896"; frac "1.000000"; conf_lo "0.558189"; conf_hi "2.479206"; cov "0.086262";
+chr1	Cufflinks	exon	4763279	4766882	1000	-	.	gene_id "Mrpl15"; transcript_id "Mrpl15"; exon_number "1"; FPKM "9.1596238357"; frac "1.000000"; conf_lo "7.124003"; conf_hi "11.195245"; cov "0.520266";
+chr1	Cufflinks	exon	4767606	4767729	1000	-	.	gene_id "Mrpl15"; transcript_id "Mrpl15"; exon_number "2"; FPKM "9.1596238357"; frac "1.000000"; conf_lo "7.124003"; conf_hi "11.195245"; cov "0.520266";
+chr1	Cufflinks	exon	4772649	4772814	1000	-	.	gene_id "Mrpl15"; transcript_id "Mrpl15"; exon_number "3"; FPKM "9.1596238357"; frac "1.000000"; conf_lo "7.124003"; conf_hi "11.195245"; cov "0.520266";
+chr1	Cufflinks	exon	4774032	4774186	1000	-	.	gene_id "Mrpl15"; transcript_id "Mrpl15"; exon_number "4"; FPKM "9.1596238357"; frac "1.000000"; conf_lo "7.124003"; conf_hi "11.195245"; cov "0.520266";
+chr1	Cufflinks	exon	4775654	4775807	1000	-	.	gene_id "Mrpl15"; transcript_id "Mrpl15"; exon_number "5"; FPKM "9.1596238357"; frac "1.000000"; conf_lo "7.124003"; conf_hi "11.195245"; cov "0.520266";
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gff_filter_by_feature_count_out1.gff	Mon Jul 28 11:56:10 2014 -0400
@@ -0,0 +1,42 @@
+chr13	Cufflinks	transcript	3565855	3566203	1000	-	.	gene_id "CUFF.50195"; transcript_id "CUFF.50195.1"; FPKM "29.8710998584"; frac "1.000000"; conf_lo "7.290671"; conf_hi "52.451529"; cov "1.909091";
+chr13	Cufflinks	exon	3565855	3565913	1000	-	.	gene_id "CUFF.50195"; transcript_id "CUFF.50195.1"; exon_number "1"; FPKM "29.8710998584"; frac "1.000000"; conf_lo "7.290671"; conf_hi "52.451529"; cov "1.909091";
+chr13	Cufflinks	exon	3566164	3566203	1000	-	.	gene_id "CUFF.50195"; transcript_id "CUFF.50195.1"; exon_number "2"; FPKM "29.8710998584"; frac "1.000000"; conf_lo "7.290671"; conf_hi "52.451529"; cov "1.909091";
+chr13	Cufflinks	transcript	3606116	3613028	1000	-	.	gene_id "CUFF.50207"; transcript_id "CUFF.50207.1"; FPKM "19.6171377865"; frac "1.000000"; conf_lo "0.936995"; conf_hi "38.297281"; cov "1.253750";
+chr13	Cufflinks	exon	3606116	3606146	1000	-	.	gene_id "CUFF.50207"; transcript_id "CUFF.50207.1"; exon_number "1"; FPKM "19.6171377865"; frac "1.000000"; conf_lo "0.936995"; conf_hi "38.297281"; cov "1.253750";
+chr13	Cufflinks	exon	3612965	3613028	1000	-	.	gene_id "CUFF.50207"; transcript_id "CUFF.50207.1"; exon_number "2"; FPKM "19.6171377865"; frac "1.000000"; conf_lo "0.936995"; conf_hi "38.297281"; cov "1.253750";
+chr13	Cufflinks	transcript	4594319	4594938	1000	-	.	gene_id "CUFF.50261"; transcript_id "CUFF.50261.1"; FPKM "29.3887094260"; frac "1.000000"; conf_lo "8.607754"; conf_hi "50.169665"; cov "1.878261";
+chr13	Cufflinks	exon	4594319	4594400	1000	-	.	gene_id "CUFF.50261"; transcript_id "CUFF.50261.1"; exon_number "1"; FPKM "29.3887094260"; frac "1.000000"; conf_lo "8.607754"; conf_hi "50.169665"; cov "1.878261";
+chr13	Cufflinks	exon	4594906	4594938	1000	-	.	gene_id "CUFF.50261"; transcript_id "CUFF.50261.1"; exon_number "2"; FPKM "29.3887094260"; frac "1.000000"; conf_lo "8.607754"; conf_hi "50.169665"; cov "1.878261";
+chr13	Cufflinks	transcript	4596799	4598059	1000	-	.	gene_id "CUFF.50263"; transcript_id "CUFF.50263.1"; FPKM "22.8358215134"; frac "1.000000"; conf_lo "0.000000"; conf_hi "45.671643"; cov "1.459459";
+chr13	Cufflinks	exon	4596799	4596828	1000	-	.	gene_id "CUFF.50263"; transcript_id "CUFF.50263.1"; exon_number "1"; FPKM "22.8358215134"; frac "1.000000"; conf_lo "0.000000"; conf_hi "45.671643"; cov "1.459459";
+chr13	Cufflinks	exon	4598016	4598059	1000	-	.	gene_id "CUFF.50263"; transcript_id "CUFF.50263.1"; exon_number "2"; FPKM "22.8358215134"; frac "1.000000"; conf_lo "0.000000"; conf_hi "45.671643"; cov "1.459459";
+chr13	Cufflinks	transcript	5861035	5872268	1000	-	.	gene_id "CUFF.50289"; transcript_id "CUFF.50289.1"; FPKM "7.5439767500"; frac "1.000000"; conf_lo "0.000000"; conf_hi "18.212771"; cov "0.482143";
+chr13	Cufflinks	exon	5861035	5861117	1000	-	.	gene_id "CUFF.50289"; transcript_id "CUFF.50289.1"; exon_number "1"; FPKM "7.5439767500"; frac "1.000000"; conf_lo "0.000000"; conf_hi "18.212771"; cov "0.482143";
+chr13	Cufflinks	exon	5872240	5872268	1000	-	.	gene_id "CUFF.50289"; transcript_id "CUFF.50289.1"; exon_number "2"; FPKM "7.5439767500"; frac "1.000000"; conf_lo "0.000000"; conf_hi "18.212771"; cov "0.482143";
+chr13	Cufflinks	transcript	5865442	5866941	1000	+	.	gene_id "CUFF.50297"; transcript_id "CUFF.50297.1"; FPKM "13.2019593124"; frac "1.000000"; conf_lo "0.000000"; conf_hi "28.446269"; cov "0.843750";
+chr13	Cufflinks	exon	5865442	5865510	1000	+	.	gene_id "CUFF.50297"; transcript_id "CUFF.50297.1"; exon_number "1"; FPKM "13.2019593124"; frac "1.000000"; conf_lo "0.000000"; conf_hi "28.446269"; cov "0.843750";
+chr13	Cufflinks	exon	5866915	5866941	1000	+	.	gene_id "CUFF.50297"; transcript_id "CUFF.50297.1"; exon_number "2"; FPKM "13.2019593124"; frac "1.000000"; conf_lo "0.000000"; conf_hi "28.446269"; cov "0.843750";
+chr13	Cufflinks	transcript	6583845	6585843	1000	-	.	gene_id "CUFF.50339"; transcript_id "CUFF.50339.1"; FPKM "163.2242242265"; frac "1.000000"; conf_lo "127.815919"; conf_hi "198.632530"; cov "10.431818";
+chr13	Cufflinks	exon	6583845	6583946	1000	-	.	gene_id "CUFF.50339"; transcript_id "CUFF.50339.1"; exon_number "1"; FPKM "163.2242242265"; frac "1.000000"; conf_lo "127.815919"; conf_hi "198.632530"; cov "10.431818";
+chr13	Cufflinks	exon	6585726	6585843	1000	-	.	gene_id "CUFF.50339"; transcript_id "CUFF.50339.1"; exon_number "2"; FPKM "163.2242242265"; frac "1.000000"; conf_lo "127.815919"; conf_hi "198.632530"; cov "10.431818";
+chr13	Cufflinks	transcript	6586295	6587966	1000	-	.	gene_id "CUFF.50341"; transcript_id "CUFF.50341.1"; FPKM "82.5011329424"; frac "1.000000"; conf_lo "60.835274"; conf_hi "104.166992"; cov "5.272727";
+chr13	Cufflinks	exon	6586295	6586359	1000	-	.	gene_id "CUFF.50341"; transcript_id "CUFF.50341.1"; exon_number "1"; FPKM "82.5011329424"; frac "1.000000"; conf_lo "60.835274"; conf_hi "104.166992"; cov "5.272727";
+chr13	Cufflinks	exon	6587735	6587966	1000	-	.	gene_id "CUFF.50341"; transcript_id "CUFF.50341.1"; exon_number "2"; FPKM "82.5011329424"; frac "1.000000"; conf_lo "60.835274"; conf_hi "104.166992"; cov "5.272727";
+chr13	Cufflinks	transcript	6580385	6581757	1000	-	.	gene_id "CUFF.50365"; transcript_id "CUFF.50365.1"; FPKM "324.9135847836"; frac "1.000000"; conf_lo "293.684884"; conf_hi "356.142286"; cov "20.765542";
+chr13	Cufflinks	exon	6580385	6580838	1000	-	.	gene_id "CUFF.50365"; transcript_id "CUFF.50365.1"; exon_number "1"; FPKM "324.9135847836"; frac "1.000000"; conf_lo "293.684884"; conf_hi "356.142286"; cov "20.765542";
+chr13	Cufflinks	exon	6581649	6581757	1000	-	.	gene_id "CUFF.50365"; transcript_id "CUFF.50365.1"; exon_number "2"; FPKM "324.9135847836"; frac "1.000000"; conf_lo "293.684884"; conf_hi "356.142286"; cov "20.765542";
+chr13	Cufflinks	transcript	8803760	8819743	1000	+	.	gene_id "CUFF.50481"; transcript_id "CUFF.50481.1"; FPKM "15.1783005269"; frac "1.000000"; conf_lo "2.785270"; conf_hi "27.571331"; cov "0.970060";
+chr13	Cufflinks	exon	8803760	8803879	1000	+	.	gene_id "CUFF.50481"; transcript_id "CUFF.50481.1"; exon_number "1"; FPKM "15.1783005269"; frac "1.000000"; conf_lo "2.785270"; conf_hi "27.571331"; cov "0.970060";
+chr13	Cufflinks	exon	8819697	8819743	1000	+	.	gene_id "CUFF.50481"; transcript_id "CUFF.50481.1"; exon_number "2"; FPKM "15.1783005269"; frac "1.000000"; conf_lo "2.785270"; conf_hi "27.571331"; cov "0.970060";
+chr13	Cufflinks	transcript	8855128	8864773	1000	-	.	gene_id "CUFF.50497"; transcript_id "CUFF.50497.1"; FPKM "6.4009499697"; frac "1.000000"; conf_lo "0.000000"; conf_hi "19.202850"; cov "0.409091";
+chr13	Cufflinks	exon	8855128	8855158	1000	-	.	gene_id "CUFF.50497"; transcript_id "CUFF.50497.1"; exon_number "1"; FPKM "6.4009499697"; frac "1.000000"; conf_lo "0.000000"; conf_hi "19.202850"; cov "0.409091";
+chr13	Cufflinks	exon	8864739	8864773	1000	-	.	gene_id "CUFF.50497"; transcript_id "CUFF.50497.1"; exon_number "2"; FPKM "6.4009499697"; frac "1.000000"; conf_lo "0.000000"; conf_hi "19.202850"; cov "0.409091";
+chr13	Cufflinks	transcript	9169898	9172437	1000	+	.	gene_id "CUFF.50509"; transcript_id "CUFF.50509.1"; FPKM "41.4918721248"; frac "1.000000"; conf_lo "16.471332"; conf_hi "66.512412"; cov "2.651786";
+chr13	Cufflinks	exon	9169898	9169928	1000	+	.	gene_id "CUFF.50509"; transcript_id "CUFF.50509.1"; exon_number "1"; FPKM "41.4918721248"; frac "1.000000"; conf_lo "16.471332"; conf_hi "66.512412"; cov "2.651786";
+chr13	Cufflinks	exon	9172357	9172437	1000	+	.	gene_id "CUFF.50509"; transcript_id "CUFF.50509.1"; exon_number "2"; FPKM "41.4918721248"; frac "1.000000"; conf_lo "16.471332"; conf_hi "66.512412"; cov "2.651786";
+chr13	Cufflinks	transcript	9353602	9373527	1000	-	.	gene_id "CUFF.50527"; transcript_id "CUFF.50527.1"; FPKM "16.2485653076"; frac "1.000000"; conf_lo "0.000000"; conf_hi "35.010792"; cov "1.038462";
+chr13	Cufflinks	exon	9353602	9353648	1000	-	.	gene_id "CUFF.50527"; transcript_id "CUFF.50527.1"; exon_number "1"; FPKM "16.2485653076"; frac "1.000000"; conf_lo "0.000000"; conf_hi "35.010792"; cov "1.038462";
+chr13	Cufflinks	exon	9373497	9373527	1000	-	.	gene_id "CUFF.50527"; transcript_id "CUFF.50527.1"; exon_number "2"; FPKM "16.2485653076"; frac "1.000000"; conf_lo "0.000000"; conf_hi "35.010792"; cov "1.038462";
+chr13	Cufflinks	transcript	9586173	9593034	1000	-	.	gene_id "CUFF.50563"; transcript_id "CUFF.50563.1"; FPKM "10.3039682439"; frac "1.000000"; conf_lo "0.000000"; conf_hi "24.875980"; cov "0.658537";
+chr13	Cufflinks	exon	9586173	9586218	1000	-	.	gene_id "CUFF.50563"; transcript_id "CUFF.50563.1"; exon_number "1"; FPKM "10.3039682439"; frac "1.000000"; conf_lo "0.000000"; conf_hi "24.875980"; cov "0.658537";
+chr13	Cufflinks	exon	9592999	9593034	1000	-	.	gene_id "CUFF.50563"; transcript_id "CUFF.50563.1"; exon_number "2"; FPKM "10.3039682439"; frac "1.000000"; conf_lo "0.000000"; conf_hi "24.875980"; cov "0.658537";
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml	Mon Jul 28 11:56:10 2014 -0400
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<tool_dependency>
+  <package name="bx-python" version="0.7.1">
+      <repository changeset_revision="2d0c08728bca" name="package_bx_python_0_7" owner="devteam" toolshed="http://toolshed.g2.bx.psu.edu" />
+    </package>
+</tool_dependency>