changeset 47:b6481845eb0d

Uploaded
author m-zytnicki
date Mon, 30 Sep 2013 05:51:28 -0400
parents 169d364ddd91
children 809ed01c8014
files SMART/Java/Python/ncList/.NCList.py.swp SMART/Java/Python/ncList/.NCListCursor.py.swp SMART/galaxy/CompareOverlappingSmallQuery.xml SMART/galaxy/CompareOverlappingSmallRef.xml commons/core/parsing/.BamParser.py.swp tool_conf.xml
diffstat 6 files changed, 415 insertions(+), 9 deletions(-) [+]
line wrap: on
line diff
Binary file SMART/Java/Python/ncList/.NCList.py.swp has changed
Binary file SMART/Java/Python/ncList/.NCListCursor.py.swp has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/CompareOverlappingSmallQuery.xml	Mon Sep 30 05:51:28 2013 -0400
@@ -0,0 +1,203 @@
+<tool id="CompareOverlappingSmallQuery" name="compare overlapping small query">
+	<description>Provide the queries that overlap with a reference, when the query data set is small.</description>  
+	<requirements>
+		<requirement type="set_environment">PYTHONPATH</requirement>
+	</requirements>
+	<command interpreter="python">
+		../Java/Python/CompareOverlappingSmallQuery.py -i $formatType.inputFileName1 
+		#if $formatType.FormatInputFileName1 == 'bed':  
+		-f bed
+		#elif $formatType.FormatInputFileName1 == 'gff':
+			-f gff	
+		#elif $formatType.FormatInputFileName1 == 'gff2':
+			-f gff2
+		#elif $formatType.FormatInputFileName1 == 'gff3':
+			-f gff3
+		#elif $formatType.FormatInputFileName1 == 'sam':
+			-f sam
+		#elif $formatType.FormatInputFileName1 == 'gtf':
+			-f gtf
+		#end if
+		-j $formatType2.inputFileName2
+		#if $formatType2.FormatInputFileName2 == 'bed':
+			-g bed
+		#elif $formatType2.FormatInputFileName2 == 'gff':
+			-g gff	
+		#elif $formatType2.FormatInputFileName2 == 'gff2':
+			-g gff2
+		#elif $formatType2.FormatInputFileName2 == 'gff3':
+			-g gff3
+		#elif $formatType2.FormatInputFileName2 == 'sam':
+			-g sam
+		#elif $formatType2.FormatInputFileName2 == 'gtf':
+		    -g gtf
+		#end if
+		-o $outputFileGff 
+		#if $OptionDistance.Dist == 'Yes':
+			-d $OptionDistance.distance
+		#end if
+		#if $OptionMinOverlap.present == 'Yes':
+			-m $OptionMinOverlap.minOverlap
+		#end if
+		#if $OptionPcOverlapQuery.present == 'Yes':
+			-p $OptionPcOverlapQuery.minOverlap
+		#end if
+		#if $OptionPcOverlapRef.present == 'Yes':
+			-P $OptionPcOverlapRef.minOverlap
+		#end if
+		#if $OptionCollinearOrAntiSens.OptionCA == 'Collinear':
+			-c 
+		#elif $OptionCollinearOrAntiSens.OptionCA == 'AntiSens':
+			-a
+		#end if	
+		$InvertMatch
+		$NotOverlapping
+		$OptionInclusionQuery
+		$OptionInclusionRef
+	</command>
+
+	<inputs>
+		<conditional name="formatType">
+			<param name="FormatInputFileName1" type="select" label="Input Query File Format">
+				<option value="bed">bed</option>
+				<option value="gff">gff</option>
+				<option value="gff2">gff2</option>
+				<option value="gff3">gff3</option>
+				<option value="sam">sam</option>
+				<option value="gtf">gtf</option>
+			</param>
+			<when value="bed">
+				<param name="inputFileName1" format="bed" type="data" label="Input File 1"/>
+			</when>
+			<when value="gff">
+				<param name="inputFileName1" format="gff" type="data" label="Input File 1"/>
+			</when>
+			<when value="gff2">
+				<param name="inputFileName1" format="gff2" type="data" label="Input File 1"/>
+			</when>
+			<when value="gff3">
+				<param name="inputFileName1" format="gff3" type="data" label="Input File 1"/>
+			</when>
+			<when value="sam">
+				<param name="inputFileName1" format="sam" type="data" label="Input File 1"/>
+			</when>
+			<when value="gtf">
+				<param name="inputFileName1" format="gtf" type="data" label="Input File 1"/>
+								                        </when>
+		</conditional>
+
+		<conditional name="formatType2">
+			<param name="FormatInputFileName2" type="select" label="Input Reference File Format">
+				<option value="bed">bed</option>
+				<option value="gff">gff</option>
+				<option value="gff2">gff2</option>
+				<option value="gff3">gff3</option>
+				<option value="sam">sam</option>
+				<option value="gtf">gtf</option>
+			</param>
+			<when value="bed">
+				<param name="inputFileName2" format="bed" type="data" label="Input File 2"/>
+			</when>
+			<when value="gff">
+				<param name="inputFileName2" format="gff" type="data" label="Input File 2"/>
+			</when>
+			<when value="gff2">
+				<param name="inputFileName2" format="gff2" type="data" label="Input File 2"/>
+			</when>
+			<when value="gff3">
+				<param name="inputFileName2" format="gff3" type="data" label="Input File 2"/>
+			</when>
+			<when value="sam">
+				<param name="inputFileName2" format="sam" type="data" label="Input File 2"/>
+			</when>
+			<when value="gtf">
+				<param name="inputFileName2" format="gtf" type="data" label="Input File 2"/>
+			</when>
+		</conditional>
+		<conditional name="OptionDistance">
+			<param name="Dist" type="select" label="Maximum Distance between two reads">
+				<option value="Yes">Yes</option>
+				<option value="No" selected="true">No</option>
+			</param>
+			<when value="Yes">
+				<param name="distance" type="integer" value="0"/>
+			</when>
+			<when value="No">
+			</when>
+		</conditional>
+		<conditional name="OptionMinOverlap">
+			<param name="present" type="select" label="Minimum number of common nucleotides to declare an overlap">
+				<option value="Yes">Yes</option>
+				<option value="No" selected="true">No</option>
+			</param>
+			<when value="Yes">
+				<param name="minOverlap" type="integer" value="0"/>
+			</when>
+			<when value="No">
+			</when>
+		</conditional>
+		<conditional name="OptionPcOverlapQuery">
+			<param name="present" type="select" label="N% of the query must overlap">
+				<option value="Yes">Yes</option>
+				<option value="No" selected="true">No</option>
+			</param>
+			<when value="Yes">
+				<param name="minOverlap" type="integer" value="0"/>
+			</when>
+			<when value="No">
+			</when>
+		</conditional>
+		<conditional name="OptionPcOverlapRef">
+			<param name="present" type="select" label="N% of the reference must overlap">
+				<option value="Yes">Yes</option>
+				<option value="No" selected="true">No</option>
+			</param>
+			<when value="Yes">
+				<param name="minOverlap" type="integer" value="0"/>
+			</when>
+			<when value="No">
+			</when>
+		</conditional>
+		<param name="OptionInclusionQuery" type="boolean" truevalue="-k" falsevalue="" checked="false" label="The query must be nested in a query"/>
+		<param name="OptionInclusionRef" type="boolean" truevalue="-K" falsevalue="" checked="false" label="The reference must be nested in a query"/>
+		<conditional name="OptionCollinearOrAntiSens">
+			<param name="OptionCA" type="select" label="Collinear or anti-sens">
+				<option value="Collinear">Collinear</option>
+				<option value="AntiSens">AntiSens</option>
+				<option value="NONE" selected="true">NONE</option>
+			</param>
+			<when value="Collinear">
+			</when>
+			<when value="AntiSens">
+			</when>
+			<when value="NONE">
+			</when>
+		</conditional>
+		<param name="InvertMatch" type="boolean" truevalue="-x" falsevalue="" checked="false" label="Invert match: the output file will contain all query elements which do NOT overlap"/>
+		<param name="NotOverlapping" type="boolean" truevalue="-O" falsevalue="" checked="false" label="Also report the query data which do not overlap, with the nbOverlaps tag set to 0."/>
+	</inputs>
+
+	<outputs>
+		<data name="outputFileGff" format="gff3"/>
+	</outputs> 
+
+	<help>
+This script may be the most important one. It basically compares two sets of transcripts and keeps those from the first set which overlap with the second one. The first set is considered as the query set (basically, your data) and the second one is the reference set (RefSeq data, for example).
+  
+It is vital to understand that it will output the elements of the first file which overlap with the elements of the second one.
+
+Various modifiers are also available:
+
+-Invert selection (report those which do not overlap).
+
+-Restrict to colinear / anti-sense overlapping data.
+
+-Keep the query data even if they do not strictly overlap with the reference data, but are located not further away than *n* nucleotide from some reference data.
+
+-Keep the query data with are strictly included into reference data, meaning that a query transcript such that at least 1 nucleotide does not overlap with reference data will not be presented as a solution.
+
+The mechanism of shrinking and extending is also useful to make a fine grain comparison. For example, if you want to keep those such that the TSS is overlapping the reference set, you just shrink the query set to 1 nucleotide. Now, if you want to keep those which are overlapping you data or located 2kb downstream of it, just extend the query data in the downstream direction, and you will have what you want. You can also extend in the opposite direction to get the possible transcript factor sites which are upstream.
+
+Some option reverses the selection. Put in other words, it performs the comparison as usual, and outputs all those query data which do not overlap.
+	</help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/CompareOverlappingSmallRef.xml	Mon Sep 30 05:51:28 2013 -0400
@@ -0,0 +1,203 @@
+<tool id="CompareOverlappingSmallRef" name="compare overlapping small reference">
+	<description>Provide the queries that overlap with a reference, when the reference dataset is small.</description>  
+	<requirements>
+		<requirement type="set_environment">PYTHONPATH</requirement>
+	</requirements>
+	<command interpreter="python">
+		../Java/Python/CompareOverlappingSmallQuery.py -i $formatType.inputFileName1 
+		#if $formatType.FormatInputFileName1 == 'bed':  
+		-f bed
+		#elif $formatType.FormatInputFileName1 == 'gff':
+			-f gff	
+		#elif $formatType.FormatInputFileName1 == 'gff2':
+			-f gff2
+		#elif $formatType.FormatInputFileName1 == 'gff3':
+			-f gff3
+		#elif $formatType.FormatInputFileName1 == 'sam':
+			-f sam
+		#elif $formatType.FormatInputFileName1 == 'gtf':
+			-f gtf
+		#end if
+		-j $formatType2.inputFileName2
+		#if $formatType2.FormatInputFileName2 == 'bed':
+			-g bed
+		#elif $formatType2.FormatInputFileName2 == 'gff':
+			-g gff	
+		#elif $formatType2.FormatInputFileName2 == 'gff2':
+			-g gff2
+		#elif $formatType2.FormatInputFileName2 == 'gff3':
+			-g gff3
+		#elif $formatType2.FormatInputFileName2 == 'sam':
+			-g sam
+		#elif $formatType2.FormatInputFileName2 == 'gtf':
+		    -g gtf
+		#end if
+		-o $outputFileGff 
+		#if $OptionDistance.Dist == 'Yes':
+			-d $OptionDistance.distance
+		#end if
+		#if $OptionMinOverlap.present == 'Yes':
+			-m $OptionMinOverlap.minOverlap
+		#end if
+		#if $OptionPcOverlapQuery.present == 'Yes':
+			-p $OptionPcOverlapQuery.minOverlap
+		#end if
+		#if $OptionPcOverlapRef.present == 'Yes':
+			-P $OptionPcOverlapRef.minOverlap
+		#end if
+		#if $OptionCollinearOrAntiSens.OptionCA == 'Collinear':
+			-c 
+		#elif $OptionCollinearOrAntiSens.OptionCA == 'AntiSens':
+			-a
+		#end if	
+		$InvertMatch
+		$NotOverlapping
+		$OptionInclusionQuery
+		$OptionInclusionRef
+	</command>
+
+	<inputs>
+		<conditional name="formatType">
+			<param name="FormatInputFileName1" type="select" label="Input Query File Format">
+				<option value="bed">bed</option>
+				<option value="gff">gff</option>
+				<option value="gff2">gff2</option>
+				<option value="gff3">gff3</option>
+				<option value="sam">sam</option>
+				<option value="gtf">gtf</option>
+			</param>
+			<when value="bed">
+				<param name="inputFileName1" format="bed" type="data" label="Input File 1"/>
+			</when>
+			<when value="gff">
+				<param name="inputFileName1" format="gff" type="data" label="Input File 1"/>
+			</when>
+			<when value="gff2">
+				<param name="inputFileName1" format="gff2" type="data" label="Input File 1"/>
+			</when>
+			<when value="gff3">
+				<param name="inputFileName1" format="gff3" type="data" label="Input File 1"/>
+			</when>
+			<when value="sam">
+				<param name="inputFileName1" format="sam" type="data" label="Input File 1"/>
+			</when>
+			<when value="gtf">
+				<param name="inputFileName1" format="gtf" type="data" label="Input File 1"/>
+								                        </when>
+		</conditional>
+
+		<conditional name="formatType2">
+			<param name="FormatInputFileName2" type="select" label="Input Reference File Format">
+				<option value="bed">bed</option>
+				<option value="gff">gff</option>
+				<option value="gff2">gff2</option>
+				<option value="gff3">gff3</option>
+				<option value="sam">sam</option>
+				<option value="gtf">gtf</option>
+			</param>
+			<when value="bed">
+				<param name="inputFileName2" format="bed" type="data" label="Input File 2"/>
+			</when>
+			<when value="gff">
+				<param name="inputFileName2" format="gff" type="data" label="Input File 2"/>
+			</when>
+			<when value="gff2">
+				<param name="inputFileName2" format="gff2" type="data" label="Input File 2"/>
+			</when>
+			<when value="gff3">
+				<param name="inputFileName2" format="gff3" type="data" label="Input File 2"/>
+			</when>
+			<when value="sam">
+				<param name="inputFileName2" format="sam" type="data" label="Input File 2"/>
+			</when>
+			<when value="gtf">
+				<param name="inputFileName2" format="gtf" type="data" label="Input File 2"/>
+			</when>
+		</conditional>
+		<conditional name="OptionDistance">
+			<param name="Dist" type="select" label="Maximum Distance between two reads">
+				<option value="Yes">Yes</option>
+				<option value="No" selected="true">No</option>
+			</param>
+			<when value="Yes">
+				<param name="distance" type="integer" value="0"/>
+			</when>
+			<when value="No">
+			</when>
+		</conditional>
+		<conditional name="OptionMinOverlap">
+			<param name="present" type="select" label="Minimum number of common nucleotides to declare an overlap">
+				<option value="Yes">Yes</option>
+				<option value="No" selected="true">No</option>
+			</param>
+			<when value="Yes">
+				<param name="minOverlap" type="integer" value="0"/>
+			</when>
+			<when value="No">
+			</when>
+		</conditional>
+		<conditional name="OptionPcOverlapQuery">
+			<param name="present" type="select" label="N% of the query must overlap">
+				<option value="Yes">Yes</option>
+				<option value="No" selected="true">No</option>
+			</param>
+			<when value="Yes">
+				<param name="minOverlap" type="integer" value="0"/>
+			</when>
+			<when value="No">
+			</when>
+		</conditional>
+		<conditional name="OptionPcOverlapRef">
+			<param name="present" type="select" label="N% of the reference must overlap">
+				<option value="Yes">Yes</option>
+				<option value="No" selected="true">No</option>
+			</param>
+			<when value="Yes">
+				<param name="minOverlap" type="integer" value="0"/>
+			</when>
+			<when value="No">
+			</when>
+		</conditional>
+		<param name="OptionInclusionQuery" type="boolean" truevalue="-k" falsevalue="" checked="false" label="The query must be nested in a query"/>
+		<param name="OptionInclusionRef" type="boolean" truevalue="-K" falsevalue="" checked="false" label="The reference must be nested in a query"/>
+		<conditional name="OptionCollinearOrAntiSens">
+			<param name="OptionCA" type="select" label="Collinear or anti-sens">
+				<option value="Collinear">Collinear</option>
+				<option value="AntiSens">AntiSens</option>
+				<option value="NONE" selected="true">NONE</option>
+			</param>
+			<when value="Collinear">
+			</when>
+			<when value="AntiSens">
+			</when>
+			<when value="NONE">
+			</when>
+		</conditional>
+		<param name="InvertMatch" type="boolean" truevalue="-x" falsevalue="" checked="false" label="Invert match: the output file will contain all query elements which do NOT overlap"/>
+		<param name="NotOverlapping" type="boolean" truevalue="-O" falsevalue="" checked="false" label="Also report the query data which do not overlap, with the nbOverlaps tag set to 0."/>
+	</inputs>
+
+	<outputs>
+		<data name="outputFileGff" format="gff3"/>
+	</outputs> 
+
+	<help>
+This script may be the most important one. It basically compares two sets of transcripts and keeps those from the first set which overlap with the second one. The first set is considered as the query set (basically, your data) and the second one is the reference set (RefSeq data, for example).
+  
+It is vital to understand that it will output the elements of the first file which overlap with the elements of the second one.
+
+Various modifiers are also available:
+
+-Invert selection (report those which do not overlap).
+
+-Restrict to colinear / anti-sense overlapping data.
+
+-Keep the query data even if they do not strictly overlap with the reference data, but are located not further away than *n* nucleotide from some reference data.
+
+-Keep the query data with are strictly included into reference data, meaning that a query transcript such that at least 1 nucleotide does not overlap with reference data will not be presented as a solution.
+
+The mechanism of shrinking and extending is also useful to make a fine grain comparison. For example, if you want to keep those such that the TSS is overlapping the reference set, you just shrink the query set to 1 nucleotide. Now, if you want to keep those which are overlapping you data or located 2kb downstream of it, just extend the query data in the downstream direction, and you will have what you want. You can also extend in the opposite direction to get the possible transcript factor sites which are upstream.
+
+Some option reverses the selection. Put in other words, it performs the comparison as usual, and outputs all those query data which do not overlap.
+	</help>
+</tool>
Binary file commons/core/parsing/.BamParser.py.swp has changed
--- a/tool_conf.xml	Mon Sep 30 03:19:26 2013 -0400
+++ b/tool_conf.xml	Mon Sep 30 05:51:28 2013 -0400
@@ -1,43 +1,43 @@
   <section id="s_mart" name="S-MART" version="3.0">
-    <label id="Smart_Comparison" text="Comparison Tools" version="3.0"/>
+    <label id="Smart_Comparison" text="Comparison Tools"/>
       <tool file="s_mart/CompareOverlappingAdapt.xml"/>
       <tool file="s_mart/getDifference.xml"/>
       <tool file="s_mart/computeCoverage.xml"/>
       <tool file="s_mart/GetFlanking.xml"/>
       <tool file="s_mart/GetDifferentialExpression.xml"/>
-    <label id="Smart_Merge" text="Merge Tools" version="3.0"/>
+    <label id="Smart_Merge" text="Merge Tools"/>
       <tool file="s_mart/clusterize.xml"/>
       <tool file="s_mart/mergeTranscriptLists.xml"/>
       <tool file="s_mart/CollapseReads.xml"/>
       <tool file="s_mart/clusterizeBySlidingWindows.xml"/>
       <tool file="s_mart/mergeSlidingWindowsClusters.xml"/>
-    <label id="Smart_Visualization" text="Visualization Tools" version="3.0"/>
+    <label id="Smart_Visualization" text="Visualization Tools"/>
       <tool file="s_mart/getDistribution.xml"/>
       <tool file="s_mart/getDistance.xml"/>
       <tool file="s_mart/getSizes.xml"/>
       <tool file="s_mart/plotCoverage.xml"/>
       <tool file="s_mart/WrappGetLetterDistribution1.xml"/>
       <tool file="s_mart/plotTranscriptList.xml"/>
-    <label id="Smart_Sequence" text="Sequence Tools" version="3.0"/>
+    <label id="Smart_Sequence" text="Sequence Tools"/>
       <tool file="s_mart/CountReadGCPercent.xml"/>
-    <label id="Smart_Modification" text="Modification Tools" version="3.0"/>
+    <label id="Smart_Modification" text="Modification Tools"/>
       <tool file="s_mart/modifyGenomicCoordinates.xml"/>
       <tool file="s_mart/modifySequenceList.xml"/>
       <tool file="s_mart/trimSequences.xml"/>
-    <label id="Smart_Selection" text="Selection Tools" version="3.0"/>
+    <label id="Smart_Selection" text="Selection Tools"/>
       <tool file="s_mart/getExons.xml"/>
       <tool file="s_mart/getIntrons.xml"/>
       <tool file="s_mart/restrictFromSize.xml"/>
       <tool file="s_mart/restrictTranscriptList.xml"/>
-    <label id="Smart_Conversion" text="Conversion Tools" version="3.0"/>
+    <label id="Smart_Conversion" text="Conversion Tools"/>
       <tool file="s_mart/ConvertTranscriptFile.xml"/>
       <tool file="s_mart/coordinatesToSequence.xml"/>
       <tool file="s_mart/mapperAnalyzer.xml"/>
-    <label id="Smart_WIG" text="WIG Manipulation Tools" version="3.0"/>
+    <label id="Smart_WIG" text="WIG Manipulation Tools"/>
       <tool file="s_mart/getWigData.xml"/>
       <tool file="s_mart/getWigDistance.xml"/>
       <tool file="s_mart/getWigProfile.xml"/>
-    <label id="Smart_GFF" text="GFF Manipulation Tools" version="3.0"/>
+    <label id="Smart_GFF" text="GFF Manipulation Tools"/>
       <tool file="s_mart/CleanTranscriptFile.xml"/>
       <tool file="s_mart/changeTagName.xml"/>
       <tool file="s_mart/changeGffFeatures.xml"/>