# HG changeset patch # User m-zytnicki # Date 1380534688 14400 # Node ID b6481845eb0d9950f5c542109a0cd7aa8db4d4b3 # Parent 169d364ddd916d691503f5e47a1d619be2b86b5b Uploaded diff -r 169d364ddd91 -r b6481845eb0d SMART/Java/Python/ncList/.NCList.py.swp Binary file SMART/Java/Python/ncList/.NCList.py.swp has changed diff -r 169d364ddd91 -r b6481845eb0d SMART/Java/Python/ncList/.NCListCursor.py.swp Binary file SMART/Java/Python/ncList/.NCListCursor.py.swp has changed diff -r 169d364ddd91 -r b6481845eb0d SMART/galaxy/CompareOverlappingSmallQuery.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/SMART/galaxy/CompareOverlappingSmallQuery.xml Mon Sep 30 05:51:28 2013 -0400 @@ -0,0 +1,203 @@ + + Provide the queries that overlap with a reference, when the query data set is small. + + PYTHONPATH + + + ../Java/Python/CompareOverlappingSmallQuery.py -i $formatType.inputFileName1 + #if $formatType.FormatInputFileName1 == 'bed': + -f bed + #elif $formatType.FormatInputFileName1 == 'gff': + -f gff + #elif $formatType.FormatInputFileName1 == 'gff2': + -f gff2 + #elif $formatType.FormatInputFileName1 == 'gff3': + -f gff3 + #elif $formatType.FormatInputFileName1 == 'sam': + -f sam + #elif $formatType.FormatInputFileName1 == 'gtf': + -f gtf + #end if + -j $formatType2.inputFileName2 + #if $formatType2.FormatInputFileName2 == 'bed': + -g bed + #elif $formatType2.FormatInputFileName2 == 'gff': + -g gff + #elif $formatType2.FormatInputFileName2 == 'gff2': + -g gff2 + #elif $formatType2.FormatInputFileName2 == 'gff3': + -g gff3 + #elif $formatType2.FormatInputFileName2 == 'sam': + -g sam + #elif $formatType2.FormatInputFileName2 == 'gtf': + -g gtf + #end if + -o $outputFileGff + #if $OptionDistance.Dist == 'Yes': + -d $OptionDistance.distance + #end if + #if $OptionMinOverlap.present == 'Yes': + -m $OptionMinOverlap.minOverlap + #end if + #if $OptionPcOverlapQuery.present == 'Yes': + -p $OptionPcOverlapQuery.minOverlap + #end if + #if $OptionPcOverlapRef.present == 'Yes': + -P $OptionPcOverlapRef.minOverlap + #end if + #if $OptionCollinearOrAntiSens.OptionCA == 'Collinear': + -c + #elif $OptionCollinearOrAntiSens.OptionCA == 'AntiSens': + -a + #end if + $InvertMatch + $NotOverlapping + $OptionInclusionQuery + $OptionInclusionRef + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +This script may be the most important one. It basically compares two sets of transcripts and keeps those from the first set which overlap with the second one. The first set is considered as the query set (basically, your data) and the second one is the reference set (RefSeq data, for example). + +It is vital to understand that it will output the elements of the first file which overlap with the elements of the second one. + +Various modifiers are also available: + +-Invert selection (report those which do not overlap). + +-Restrict to colinear / anti-sense overlapping data. + +-Keep the query data even if they do not strictly overlap with the reference data, but are located not further away than *n* nucleotide from some reference data. + +-Keep the query data with are strictly included into reference data, meaning that a query transcript such that at least 1 nucleotide does not overlap with reference data will not be presented as a solution. + +The mechanism of shrinking and extending is also useful to make a fine grain comparison. For example, if you want to keep those such that the TSS is overlapping the reference set, you just shrink the query set to 1 nucleotide. Now, if you want to keep those which are overlapping you data or located 2kb downstream of it, just extend the query data in the downstream direction, and you will have what you want. You can also extend in the opposite direction to get the possible transcript factor sites which are upstream. + +Some option reverses the selection. Put in other words, it performs the comparison as usual, and outputs all those query data which do not overlap. + + diff -r 169d364ddd91 -r b6481845eb0d SMART/galaxy/CompareOverlappingSmallRef.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/SMART/galaxy/CompareOverlappingSmallRef.xml Mon Sep 30 05:51:28 2013 -0400 @@ -0,0 +1,203 @@ + + Provide the queries that overlap with a reference, when the reference dataset is small. + + PYTHONPATH + + + ../Java/Python/CompareOverlappingSmallQuery.py -i $formatType.inputFileName1 + #if $formatType.FormatInputFileName1 == 'bed': + -f bed + #elif $formatType.FormatInputFileName1 == 'gff': + -f gff + #elif $formatType.FormatInputFileName1 == 'gff2': + -f gff2 + #elif $formatType.FormatInputFileName1 == 'gff3': + -f gff3 + #elif $formatType.FormatInputFileName1 == 'sam': + -f sam + #elif $formatType.FormatInputFileName1 == 'gtf': + -f gtf + #end if + -j $formatType2.inputFileName2 + #if $formatType2.FormatInputFileName2 == 'bed': + -g bed + #elif $formatType2.FormatInputFileName2 == 'gff': + -g gff + #elif $formatType2.FormatInputFileName2 == 'gff2': + -g gff2 + #elif $formatType2.FormatInputFileName2 == 'gff3': + -g gff3 + #elif $formatType2.FormatInputFileName2 == 'sam': + -g sam + #elif $formatType2.FormatInputFileName2 == 'gtf': + -g gtf + #end if + -o $outputFileGff + #if $OptionDistance.Dist == 'Yes': + -d $OptionDistance.distance + #end if + #if $OptionMinOverlap.present == 'Yes': + -m $OptionMinOverlap.minOverlap + #end if + #if $OptionPcOverlapQuery.present == 'Yes': + -p $OptionPcOverlapQuery.minOverlap + #end if + #if $OptionPcOverlapRef.present == 'Yes': + -P $OptionPcOverlapRef.minOverlap + #end if + #if $OptionCollinearOrAntiSens.OptionCA == 'Collinear': + -c + #elif $OptionCollinearOrAntiSens.OptionCA == 'AntiSens': + -a + #end if + $InvertMatch + $NotOverlapping + $OptionInclusionQuery + $OptionInclusionRef + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +This script may be the most important one. It basically compares two sets of transcripts and keeps those from the first set which overlap with the second one. The first set is considered as the query set (basically, your data) and the second one is the reference set (RefSeq data, for example). + +It is vital to understand that it will output the elements of the first file which overlap with the elements of the second one. + +Various modifiers are also available: + +-Invert selection (report those which do not overlap). + +-Restrict to colinear / anti-sense overlapping data. + +-Keep the query data even if they do not strictly overlap with the reference data, but are located not further away than *n* nucleotide from some reference data. + +-Keep the query data with are strictly included into reference data, meaning that a query transcript such that at least 1 nucleotide does not overlap with reference data will not be presented as a solution. + +The mechanism of shrinking and extending is also useful to make a fine grain comparison. For example, if you want to keep those such that the TSS is overlapping the reference set, you just shrink the query set to 1 nucleotide. Now, if you want to keep those which are overlapping you data or located 2kb downstream of it, just extend the query data in the downstream direction, and you will have what you want. You can also extend in the opposite direction to get the possible transcript factor sites which are upstream. + +Some option reverses the selection. Put in other words, it performs the comparison as usual, and outputs all those query data which do not overlap. + + diff -r 169d364ddd91 -r b6481845eb0d commons/core/parsing/.BamParser.py.swp Binary file commons/core/parsing/.BamParser.py.swp has changed diff -r 169d364ddd91 -r b6481845eb0d tool_conf.xml --- a/tool_conf.xml Mon Sep 30 03:19:26 2013 -0400 +++ b/tool_conf.xml Mon Sep 30 05:51:28 2013 -0400 @@ -1,43 +1,43 @@
-