# HG changeset patch # User m-zytnicki # Date 1379507558 14400 # Node ID 5f796c5c579fcbcf6980e167c65dbd85b7f0de71 # Parent b3f5a1cb4704970446318e280bb5b5a75f71cb9e Uploaded diff -r b3f5a1cb4704 -r 5f796c5c579f CompareOverlappingAdapt.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/CompareOverlappingAdapt.xml Wed Sep 18 08:32:38 2013 -0400 @@ -0,0 +1,153 @@ + + Provide the queries that overlap with a reference. + + PYTHONPATH + + + ../Java/Python/CompareOverlappingAdapt.py -i $formatType.inputFileName1 + #if $formatType.FormatInputFileName1 == 'bed': + -f bed + #elif $formatType.FormatInputFileName1 == 'gff': + -f gff + #elif $formatType.FormatInputFileName1 == 'gff2': + -f gff2 + #elif $formatType.FormatInputFileName1 == 'gff3': + -f gff3 + #elif $formatType.FormatInputFileName1 == 'sam': + -f sam + #elif $formatType.FormatInputFileName1 == 'gtf': + -f gtf + #end if + -j $formatType2.inputFileName2 + #if $formatType2.FormatInputFileName2 == 'bed': + -g bed + #elif $formatType2.FormatInputFileName2 == 'gff': + -g gff + #elif $formatType2.FormatInputFileName2 == 'gff2': + -g gff2 + #elif $formatType2.FormatInputFileName2 == 'gff3': + -g gff3 + #elif $formatType2.FormatInputFileName2 == 'sam': + -g sam + #elif $formatType2.FormatInputFileName2 == 'gtf': + -g gtf + #end if + -o $outputFileGff + #if $OptionDistance.Dist == 'Yes': + -d $OptionDistance.distance + #end if + #if $OptionCollinearOrAntiSens.OptionCA == 'Collinear': + -c + #elif $OptionCollinearOrAntiSens.OptionCA == 'AntiSens': + -a + #end if + $InvertMatch + $NotOverlapping + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +This script may be the most important one. It basically compares two sets of transcripts and keeps those from the first set which overlap with the second one. The first set is considered as the query set (basically, your data) and the second one is the reference set (RefSeq data, for example). + +It is vital to understand that it will output the elements of the first file which overlap with the elements of the second one. + +Various modifiers are also available: + +-Invert selection (report those which do not overlap). + +-Restrict to collinear / anti-sense overlapping data. + +-Keep the query data even if they do not strictly overlap with the reference data, but are located not further away than *n* nucleotide from some reference data. + +Some option reverses the selection. Put in other words, it performs the comparison as usual, and outputs all those query data which do not overlap. + +