changeset 0:229d36377838 draft

Uploaded
author amadeo
date Mon, 05 Sep 2016 05:53:08 -0400
parents
children ed814636753a
files Tools/CREF/Extract_matrix_subset_galaxy.pl Tools/CREF/Extract_motif_codes_galaxy.pl Tools/CREF/extract_motifs_codes.xml Tools/CREF/extract_motifs_subset.xml Tools/CREF/list_motifs.xml Tools/CREF/list_motifs_galaxy.pl Tools/CREF/short-headers.xml Tools/CREF/shorten-headers-galaxy.pl Tools/First_version/remove_motifs_overlaped.xml Tools/First_version/rm_overlap_motifs_galaxy.pl Tools/First_version/sorting_first.xml Tools/First_version/sorting_first_galaxy.pl Tools/Matrix/gene-TF-matrix-csv-galaxy.pl Tools/Matrix/matrix_generator.xml Tools/Motif_search/motif_search_galaxy.pl Tools/Motif_search/motif_search_galaxy.xml Tools/Motif_search/rules.xml Tools/Motif_search/rules_galaxy.pl Tools/Motif_search/sort_genes_galaxy.pl Tools/Motif_search/sort_genes_galaxy.xml Tools/Motif_search/sort_positions_galaxy.pl Tools/Motif_search/sort_positions_galaxy.xml Tools/Motif_search/testrules_galaxy.pl Tools/Motif_search/testrules_galaxy.xml Tools/Second/remove_motifs_galaxy.pl Tools/Second/remove_motifs_v2.xml Tools/Second/step1.xml Tools/Second/step1_galaxy.pl Tools/Second/step2.xml Tools/Second/step2_galaxy.pl
diffstat 30 files changed, 1670 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Tools/CREF/Extract_matrix_subset_galaxy.pl	Mon Sep 05 05:53:08 2016 -0400
@@ -0,0 +1,83 @@
+#!/usr/bin/perl -w
+
+
+$|=1;
+use strict; #using this makes debugging your code much easier
+use warnings;
+
+
+#Script to take a list of JASPAR ids and extract a subset of matrix information for each of them from the JASPAR_CORE_2016.meme file
+
+#Checking to see if the user has provided 3 arguments 
+
+
+
+if(@ARGV < 3){
+print "\nUsage: Extract_matrix_subset.pl list-motifs.txt JAPSAR_CORE_2016.meme JASPAR_AME_subset.meme\n\n";
+exit(0);
+}
+
+my $motif_id; 
+my $line; 
+my @lines;
+my %matrix=();
+my $header;
+my $data;
+my @header_list_motif;
+my $line2;
+
+
+
+open (MOTIF, "<$ARGV[0]") || 
+   die "File '$ARGV[0]' not found\n" ;
+
+open (JASPAR, "<$ARGV[1]") || 
+   die "File '$ARGV[1]' not found\n" ;
+
+open (OUTPUT, ">$ARGV[2]") || 
+   die "File '$ARGV[2]' not found\n" ;
+
+
+@lines=<JASPAR>;
+
+
+#In this loop I "delete" the 9 first lines of the JASPAR-database from @lines and store them in a new array called @header_list_motif,
+#that will be the header of the output file (I am doing this because I had errors on my hash because of the header lines).
+for (my $i = 0; $i < 9; $i++) {
+    $line2 = shift @lines;
+    push (@header_list_motif, $line2);
+}
+
+#Once I delete the first 9 lines, I create a hash with the motifs
+#as keys and the data as values.
+foreach $line(@lines){
+    if ($line =~ /^MO/) { 
+      $header = $line;
+    }        
+    else {
+      push( @{$matrix{$header}}, $line);
+}
+} 
+    
+#I use this to test if the number of motifs of my motif list are the same of the motifs
+#of my output file.
+#my $counter =0; 
+
+#Print the header.
+foreach my $line3(@header_list_motif){
+    printf OUTPUT $line3;
+}
+
+#Print the motifs with the data
+while (<MOTIF>){
+   chomp;
+   $motif_id = $_;
+   foreach my $motif_hash(keys %matrix){
+      if ($motif_hash=~/$motif_id/) {
+       printf OUTPUT "$motif_hash @{$matrix{$motif_hash}}\n";
+       #$counter = $counter +1;
+      }
+      
+   }
+}
+#print $counter;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Tools/CREF/Extract_motif_codes_galaxy.pl	Mon Sep 05 05:53:08 2016 -0400
@@ -0,0 +1,54 @@
+#!/usr/bin/perl -w
+
+# The lines that start with # are comment lines that are not executed
+
+
+$|=1;
+use strict; 
+use warnings;
+
+
+#Script to take output from AME (part of memesuite-org) and extract a
+#list of the overrepresented motifs and print them to a new file
+#called ame-motif-id.list
+
+
+#Checking to see if the user has provided 1 argument - which is the
+#name of the AME results file
+
+if(@ARGV < 2){
+print "\nUsage: Extract_motif_codes.pl ame.txt ame-shorted.txt\n\n";
+exit(0);
+}
+
+#Declaring variables
+my @cols; #an array variable
+my $line; # a scalar varaible
+
+#Using a FIELHANDLE to open the input file
+open (INPUT, "<$ARGV[0]") || 
+   die "File '$ARGV[0]' not found\n" ;
+   
+open (OUTPUT, ">$ARGV[1]") || 
+  die "File '>$ARGV[1]' not found\n" ;
+
+#looping through each line of the file
+ while (<INPUT>){
+   #assigning line to variable $line 
+   #$_ is a special default variable that here holds the line contents
+   $line = $_;
+   #match lines that have Ranksum 
+   if ($line =~ /Ranksum/){
+    printf OUTPUT "%s\n", $line; 
+    #split the lines on white space, so each part of the line gets
+    #stored as an array element
+    @cols=split;
+    #Testing to see what line elements are stored in the array
+    #print "cols [0] is $cols[0] \n";
+    #print "cols [2] is $cols[2] \n\n";
+    
+    #Now see if you can print out the array elemnent that stores the
+    #motif ID to a new file called ame-motif-id.list.
+  }
+ }
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Tools/CREF/extract_motifs_codes.xml	Mon Sep 05 05:53:08 2016 -0400
@@ -0,0 +1,10 @@
+<tool id="extract_motifs_codes" name="Extract motifs codes" version="1.0.0">
+	<description>Script to take output from AME (part of memesuite-org) and extract a list of the overrepresented motifs</description>
+	<command interpreter="perl">Extract_motif_codes_galaxy.pl $input $output</command>
+	<inputs>
+		<param format="txt" name="input" type="data" label="AME file"/>
+	</inputs>
+	<outputs>
+		<data format="txt" name="output" label="ame-shorted" />
+	</outputs>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Tools/CREF/extract_motifs_subset.xml	Mon Sep 05 05:53:08 2016 -0400
@@ -0,0 +1,12 @@
+<tool id="extract_motifs_subset" name="Extract motifs subset" version="1.0.0">
+	<description>Script to take a list of JASPAR ids and extract a subset of matrix information for each of them from a meme file</description>
+	<command interpreter="perl">Extract_matrix_subset_galaxy.pl $input $secondinput $output</command>
+	<inputs>
+		<param format="txt" name="input" type="data" label="Motifs"/>
+		<param format="meme" name="secondinput" type="data" label="MEME file"/>
+	</inputs>
+	<outputs>
+		<data format="txt" name="output" label="list-motifs-JASPAR-data" />
+	</outputs>
+</tool>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Tools/CREF/list_motifs.xml	Mon Sep 05 05:53:08 2016 -0400
@@ -0,0 +1,10 @@
+<tool id="list_motifs" name="List motifs" version="1.0.0">
+	<description>Extract the motifs names from ame processed file</description>
+	<command interpreter="perl">list_motifs_galaxy.pl $input $output</command>
+	<inputs>
+		<param format="txt" name="input" type="data" label="AME file processed"/>
+	</inputs>
+	<outputs>
+		<data format="txt" name="output" label="list_motifs" />
+	</outputs>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Tools/CREF/list_motifs_galaxy.pl	Mon Sep 05 05:53:08 2016 -0400
@@ -0,0 +1,29 @@
+#!/usr/bin/perl -w
+
+
+$|=1;
+use strict; #using this makes debugging your code much easier
+use warnings;
+
+my $line;
+my @cols;
+
+
+if(@ARGV < 2){
+print "\nUsage: list_motifs.pl ame-shorted.txt list-motifs.txt\n\n";
+exit(0);
+}
+open (INPUT, "<$ARGV[0]") || 
+   die "File '$ARGV[0]' not found\n" ;
+
+open (OUTPUT, ">$ARGV[1]") || 
+  die "File '>$ARGV[1]' not found\n" ;
+
+ while (<INPUT>){
+  
+   $line = $_;
+   @cols=split;
+   if ($line =~ /MA/){
+    printf OUTPUT "%s\n", $cols[5];
+   }
+ }
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Tools/CREF/short-headers.xml	Mon Sep 05 05:53:08 2016 -0400
@@ -0,0 +1,10 @@
+<tool id="shorten_headers" name="Get shorter headers" version="1.0.0">
+	<description>re-writes a fasta file with compatible headers for MEME suite tool.</description>
+	<command interpreter="perl">shorten-headers-galaxy.pl $input  $output</command>
+	<inputs>
+		<param format="txt" name="input" type="data" label="input"/>
+	</inputs>
+	<outputs>
+		<data format="fasta" name="output" label="promoters_sh" />
+	</outputs>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Tools/CREF/shorten-headers-galaxy.pl	Mon Sep 05 05:53:08 2016 -0400
@@ -0,0 +1,46 @@
+#!/usr/bin/perl -w
+
+$|=1;
+use strict; #using this makes debugging your code much easier
+use warnings;
+
+#Script to take a multiple fasta file and truncate the header lines to
+#GeneIDs so the fasta file can be used as input to AME
+
+#Checking to see if the user has provided 2 arguments - which is the
+#name of the promoter sequence file and an output file name
+
+if(@ARGV < 2){
+print "\nUsage:shorten-headers.pl promoters.fasta  promoters-sh.fasta\n\n";
+exit(0);
+}
+
+#Declaring variables
+my $line; # a scalar varaible
+
+#Using a FIELHANDLE to open the input file
+open (INPUT, "<$ARGV[0]") || 
+   die "File '$ARGV[0]' not found\n" ;
+
+#Using a FIELHANDLE to open the input file
+open (OUTPUT, ">$ARGV[1]") || 
+   die "File '>$ARGV[1]' not found\n" ;
+
+#looping through each line of the file
+ while (<INPUT>){
+   #assigning line to variable $line 
+   #$_ is a special default variable that here holds the line contents
+   $line = $_;
+   #match lines header lines 
+   if ($line =~ /^>/){
+     #printing header lines to file as a substring of x charaters
+     printf OUTPUT "%s\n", substr($line,0,21); #the third number is the x characters of the name of the header
+  }
+   else{
+     #printing out sequence lines just as they are in the orginal file.
+     printf OUTPUT "$line";
+   }
+ }
+
+close (INPUT);
+close(OUTPUT);
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Tools/First_version/remove_motifs_overlaped.xml	Mon Sep 05 05:53:08 2016 -0400
@@ -0,0 +1,12 @@
+<tool id="remove_same_motifs" name="Remove same motifs overlaped" version="1.0.0">
+	<description>Script to compare same contigous motifs in gff file and, in case that two overlap, remove the motif with the highest p value</description>
+	<command interpreter="perl">rm_overlap_motifs_galaxy.pl $input $output $secondoutput $value </command>
+	<inputs>
+		<param format="gff" name="input" type="data" label="FIMO file"/>
+		<param name="value" size="3" type="integer" value="20" label="Overlap value"/>
+	</inputs>
+	<outputs>
+		<data format="gff" name="output" label="positive_strand" />
+		<data format="gff" name="secondoutput" label="negative_strand" />
+	</outputs>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Tools/First_version/rm_overlap_motifs_galaxy.pl	Mon Sep 05 05:53:08 2016 -0400
@@ -0,0 +1,258 @@
+#!/usr/bin/perl -w
+
+$|=1;
+use warnings;
+use strict;
+
+
+my $line;
+my @cols;
+my %hash;
+my %hash_negative;
+my $gene;
+my $TF;
+my @sequences;
+my $seq_len;
+my $OL;
+my @output_pos;
+my @output_neg;
+my $actual_pvalue;
+my $pvalue;
+my $pvalue_neg;
+
+
+#$ARGV[3]=<STDIN>;
+
+if(@ARGV < 4){
+print "\nUsage: rm_overlap_motifs_posneg.pl fimo-test-sue.gff fimo-nol-pos.gff fimo-nol-neg.gff overlap_percentage\n\n";
+exit(0);
+}
+
+
+
+open(FIMO, "<$ARGV[0]") ||
+    die "File '$ARGV[0]' not found\n";
+open(POSITIVE, ">$ARGV[1]") ||
+    die "File '>$ARGV[1]' not found\n";
+open(NEGATIVE, ">$ARGV[2]") ||
+    die "File '>$ARGV[2]' not found\n";
+
+# Getting overlap value form user and testing to see if its 0-100 and
+# converting to 0-1 scale.
+if ($ARGV[3] >0.0 && $ARGV[3] <=100){
+   $OL=$ARGV[3]/100;
+}
+else{
+  print" ERROR: overlap is a value 0-100\n";
+    exit(0);
+}
+#print "OL is $OL\n";
+
+while (<FIMO>) {
+    $line=$_;
+    chomp $line;
+    @cols=split;
+    my $pos1;
+    my $pos2;
+    my $scalar;
+    my $decimal;
+    my $e;
+    
+    my @list=();
+    if ($line=~/^#/){
+        printf POSITIVE"%s\n", $line;
+        printf NEGATIVE"%s\n", $line;
+    }
+    elsif ($line!~/^##/ and $cols[6]eq"+") {
+        @cols=split;
+        $TF= substr $cols[8],5,8; 
+        $gene=substr $cols[0],0,21;
+        $pos1 = $cols[3];
+        $pos2=$cols[4];
+        @list=();
+        @list=($pos1,$pos2);
+        @sequences= split( "=", $cols[9]);
+        $seq_len = int(length (substr $sequences[1],0,-1));
+        $decimal= substr $cols[8],-16,4;
+        $e=substr $cols[8],-11,3;
+        $decimal =~ s/[^.\d]//g; #This removes all nondigit characters from the string.
+        $actual_pvalue=$decimal*(10**$e); #it will take the p value of the current line
+        
+        if (not exists $hash{$gene}{$TF}) { #Every time that a block of a GENE-MOTIF  starts, it will register
+                                       #the GENE-MOTIF in a hash: GENE-MOTIF as a key and pos1 and pos2 as values.
+            $hash{$gene}{$TF}=\@list;
+            $pvalue=$actual_pvalue; #p value of the current line that it will be compared in the next loop
+            push @output_pos, $line; #it saves the information of the gene motif in the array
+            }
+        
+         elsif (not($pos1>=@{$hash{$gene}{$TF}}[0] and $pos1<=@{$hash{$gene}{$TF}}[1])
+               and not($pos2>=@{$hash{$gene}{$TF}}[0] and $pos2<=@{$hash{$gene}{$TF}}[1])) {#if the gene exists and the motif is not overlaped
+                                                                                    #with the previous one
+                                                                                    #then it will take the line in the list and it will
+                                                                                    #consider the p value in the next loop
+               $hash{$gene}{$TF}=\@list;
+               $pvalue=$actual_pvalue;
+               push @output_pos, $line;
+            }
+        
+        
+         elsif (
+            
+               (not($pos1>=@{$hash{$gene}{$TF}}[0] and $pos1<=@{$hash{$gene}{$TF}}[1])and
+               ($pos2>=@{$hash{$gene}{$TF}}[0] and $pos2<=@{$hash{$gene}{$TF}}[1]) and (int($pos2-(@{$hash{$gene}{$TF}}[0]))/$seq_len)<$OL) 
+               
+               ) {#If the actual motif overlaps with the previous motif and the overlaping sequence includes the second position
+                  #position and not the first one of the actual motif AND it doesn't surpass the threshold $OL then it will consider the line.
+                  #It will store it in the array and its p value it will consider in the next loop.
+                  $hash{$gene}{$TF}=\@list;
+                  $pvalue=$actual_pvalue;
+                  push @output_pos, $line;
+                  #print $pvalue , "\n";
+            }
+         elsif (
+            
+               (not($pos1>=@{$hash{$gene}{$TF}}[0] and $pos1<=@{$hash{$gene}{$TF}}[1])and
+               ($pos2>=@{$hash{$gene}{$TF}}[0] and $pos2<=@{$hash{$gene}{$TF}}[1]) and (int($pos2-(@{$hash{$gene}{$TF}}[0]))/$seq_len)>$OL)
+               and $actual_pvalue<$pvalue
+               
+               
+               ) { #If the actual motif overlaps with the previous motif and the overlaping sequence includes the second
+                  #position and not the first one of the actual motif AND it DOES surpass the threshold $OL but the actual motif has a lower p value
+                  #than the last considered;then it will consider the line and it will remove the previous motif from the array; considering the motif
+                  #with the lowest p value. This p value will consider in the next loop.
+                  pop @output_pos;
+                  $hash{$gene}{$TF}=\@list;
+                  $pvalue=$actual_pvalue;
+                  push @output_pos, $line;
+                  #print $pvalue , "\n";
+            }
+         elsif (
+            
+               ((($pos1>=@{$hash{$gene}{$TF}}[0] and $pos1<=@{$hash{$gene}{$TF}}[1]) and (int((@{$hash{$gene}{$TF}}[1])-$pos1)/$seq_len)<$OL )
+               and not($pos2>=@{$hash{$gene}{$TF}}[0] and $pos2<=@{$hash{$gene}{$TF}}[1])) 
+               
+               ) {#If the actual motif overlaps with the previous motif and the overlaping sequence includes the first position
+                  #position and not the first one of the actual motif AND it doesn't surpass the threshold $OL then it will consider the line.
+                  #It will store it in the array and its p value it will consider in the next loop.
+            
+                  $hash{$gene}{$TF}=\@list;
+                  $pvalue=$actual_pvalue;
+                  push @output_pos, $line;
+            }
+         elsif (
+            
+               ((($pos1>=@{$hash{$gene}{$TF}}[0] and $pos1<=@{$hash{$gene}{$TF}}[1]) and (int((@{$hash{$gene}{$TF}}[1])-$pos1)/$seq_len)>$OL )
+               and not($pos2>=@{$hash{$gene}{$TF}}[0] and $pos2<=@{$hash{$gene}{$TF}}[1])) and $actual_pvalue<$pvalue
+               #If the actual motif overlaps with the previous motif and the overlaping sequence includes the first
+                  #position and not the second one of the actual motif AND it DOES surpass the threshold $OL but the actual motif has a lower p value
+                  #than the last considered;then it will consider the line and it will remove the previous motif from the array; considering the motif
+                  #with the lowest p value. This p value will consider in the next loop.
+               ) {
+                  $hash{$gene}{$TF}=\@list;
+                  $pvalue=$actual_pvalue;
+                  pop @output_pos;
+                  push @output_pos, $line;
+            }
+         elsif (
+            
+               ((($pos1>=@{$hash{$gene}{$TF}}[0] and $pos1<=@{$hash{$gene}{$TF}}[1])  )
+               and ($pos2>=@{$hash{$gene}{$TF}}[0] and $pos2<=@{$hash{$gene}{$TF}}[1])) and $actual_pvalue<$pvalue
+       
+               ) {
+                  $hash{$gene}{$TF}=\@list;
+                  $pvalue=$actual_pvalue;
+                  pop @output_pos;
+                  push @output_pos, $line;
+            }
+        
+       
+    }
+    elsif ($line!~/^##/ and $cols[6]eq"-") { #same strategy for the motifs located in the minus strand
+        @cols=split;
+        #$TF= substr $cols[8],5,8;
+        $gene=substr $cols[0],0,21;
+        $pos1 = $cols[3];
+        $pos2=$cols[4];
+        @list=();
+        @list=($pos1,$pos2);
+        @sequences= split( "=", $cols[9]);
+        $seq_len = int(length (substr $sequences[1],0,-1));
+        $decimal= substr $cols[8],-16,4;
+        $e=substr $cols[8],-11,3;
+        $decimal =~ s/[^.\d]//g; #This removes all nondigit characters from the string.
+        $actual_pvalue=$decimal*(10**$e);
+        
+        if (not exists $hash_negative{$gene}{$TF}) {
+            $hash_negative{$gene}{$TF}=\@list;
+            $pvalue_neg=$actual_pvalue;
+            push @output_neg, $line;
+        }
+        
+        elsif (not($pos1>=@{$hash_negative{$gene}{$TF}}[0] and $pos1<=@{$hash_negative{$gene}{$TF}}[1])
+               and not($pos2>=@{$hash_negative{$gene}{$TF}}[0] and $pos2<=@{$hash_negative{$gene}{$TF}}[1])) {
+                $pvalue_neg=$actual_pvalue;
+                $hash_negative{$gene}{$TF}=\@list;
+                push @output_neg, $line;
+            }
+        
+        
+        elsif (
+            
+               (not($pos1>=@{$hash_negative{$gene}{$TF}}[0] and $pos1<=@{$hash_negative{$gene}{$TF}}[1])and
+               ($pos2>=@{$hash_negative{$gene}{$TF}}[0] and $pos2<=@{$hash_negative{$gene}{$TF}}[1]) and (int($pos2-(@{$hash_negative{$gene}{$TF}}[0]))/$seq_len)<$OL               ) 
+               ) {
+                  $pvalue_neg=$actual_pvalue;
+                  $hash_negative{$gene}{$TF}=\@list;
+                  push @output_neg, $line;
+            }
+         elsif (
+            
+               (not($pos1>=@{$hash_negative{$gene}{$TF}}[0] and $pos1<=@{$hash_negative{$gene}{$TF}}[1]) and
+               ($pos2>=@{$hash_negative{$gene}{$TF}}[0] and $pos2<=@{$hash_negative{$gene}{$TF}}[1]) and (int($pos2-(@{$hash_negative{$gene}{$TF}}[0]))/$seq_len)>$OL and
+               $actual_pvalue<$pvalue_neg) 
+               ) {
+                  $pvalue=$actual_pvalue;
+                  $hash_negative{$gene}{$TF}=\@list;
+                  pop @output_neg;
+                  push @output_neg, $line;
+            }
+         elsif (
+               ((($pos1>=@{$hash_negative{$gene}{$TF}}[0] and $pos1<=@{$hash_negative{$gene}{$TF}}[1]) and (int((@{$hash_negative{$gene}{$TF}}[1])-$pos1)/$seq_len)<$OL )
+               and not($pos2>=@{$hash_negative{$gene}{$TF}}[0] and $pos2<=@{$hash_negative{$gene}{$TF}}[1] )) 
+               ) {
+                  $pvalue_neg=$actual_pvalue;
+                  $hash_negative{$gene}{$TF}=\@list;
+                  push @output_neg, $line;
+            }
+         elsif (
+               ((($pos1>=@{$hash_negative{$gene}{$TF}}[0] and $pos1<=@{$hash_negative{$gene}{$TF}}[1]) and
+                 (int((@{$hash_negative{$gene}{$TF}}[1])-$pos1)/$seq_len)>$OL )
+                  and not($pos2>=@{$hash_negative{$gene}{$TF}}[0] and $pos2<=@{$hash_negative{$gene}{$TF}}[1] )and
+                  $actual_pvalue<$pvalue_neg) 
+               ) {
+                  $pvalue_neg=$actual_pvalue;
+                  $hash_negative{$gene}{$TF}=\@list;
+                  pop @output_neg;
+                  push @output_neg, $line;
+            }
+          elsif (
+               ((($pos1>=@{$hash_negative{$gene}{$TF}}[0] and $pos1<=@{$hash_negative{$gene}{$TF}}[1]))
+                  and ($pos2>=@{$hash_negative{$gene}{$TF}}[0] and $pos2<=@{$hash_negative{$gene}{$TF}}[1] )and
+                  $actual_pvalue<$pvalue_neg) 
+               ) {
+                  $pvalue_neg=$actual_pvalue;
+                  $hash_negative{$gene}{$TF}=\@list;
+                  pop @output_neg;
+                  push @output_neg, $line;
+            }
+        
+       
+    }
+}
+foreach my $lines_pos (@output_pos){
+    printf POSITIVE"%s\n", $lines_pos;
+    
+}
+foreach my $lines_neg (@output_neg){
+    printf NEGATIVE"%s\n", $lines_neg;
+}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Tools/First_version/sorting_first.xml	Mon Sep 05 05:53:08 2016 -0400
@@ -0,0 +1,10 @@
+<tool id="sorting_by_position" name="Sort block of genes by first position" version="1.0.0">
+	<description>Script to take sort block of genes in a .gff file by the start position of the motifs</description>
+	<command interpreter="perl">sorting_first_galaxy.pl $input $output</command>
+	<inputs>
+		<param format="gff" name="input" type="data" label="FIMO file"/>
+	</inputs>
+	<outputs>
+		<data format="gff" name="output" label="fimo-position-sorted.gff" />
+	</outputs>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Tools/First_version/sorting_first_galaxy.pl	Mon Sep 05 05:53:08 2016 -0400
@@ -0,0 +1,88 @@
+#!/usr/bin/perl -w
+
+$|=1;
+use warnings;
+use strict;
+
+my $line;
+my @cols;
+my %hash1;
+my %hash2;
+my @list1;
+my @list2;
+my $gene;
+my $pos1;
+my $n;
+my $index;
+my $position;
+my $scalar;
+my $TF;
+
+if(@ARGV < 2){
+print "\nUsage: sorting_promoters_by_first_position.pl fimo.gff fimo-position-sorted.gff e\n\n";
+exit(0);
+}
+
+my $counter=0;
+open(FIMO, "<$ARGV[0]") ||
+    die "File '$ARGV[0]' not found\n";
+open(OUTPUT, ">$ARGV[1]") ||
+    die "File '>$ARGV[1]' not found\n";
+    
+while (<FIMO>) {
+    $line=$_;
+    chomp $line;
+    @cols=split;
+    
+    if ($line=~/^#/){
+        printf OUTPUT "%s\n", $line;
+        $counter++;
+    }
+    else {
+        $gene=substr $cols[0],0,21;
+        $pos1 = $cols[3];
+        $TF= substr $cols[8],5,8;
+        $scalar= scalar @list1;
+        if (not exists $hash1{$gene}{$TF} and not $scalar == 0) {
+            $n= scalar @list1;
+            my @list_pos_sorted= sort { $list1[$a] <=> $list1[$b] } 0..($n - 1);
+            for (my $i=0; $i <(scalar @list_pos_sorted); $i++){
+                $index=$list_pos_sorted[$i];
+                $position = $list1[$index];
+                printf OUTPUT "%s\n", $hash2{$position};
+                #print $list2[$index], "\n";
+                $counter++;
+            }
+        }
+        if (not exists $hash1{$gene}{$TF}) {
+           %hash1=();
+           %hash2=();
+           @list1=();
+           @list2=();
+           $hash1{$gene}{$TF}=1;
+           $hash2{$pos1}=$line;
+           push @list1, $pos1;
+           push @list2, $line;       
+        }
+       
+        elsif (exists $hash1{$gene}{$TF}) {
+           $hash2{$pos1}=$line;
+           push @list1, $pos1;
+           push @list2, $line;
+        }
+        
+        
+    }
+  
+}
+
+$n= scalar @list1;
+my @list_pos_sorted= sort { $list1[$a] <=> $list1[$b] } 0..($n - 1);
+            for (my $i=0; $i <(scalar @list_pos_sorted); $i++){
+                $index=$list_pos_sorted[$i];
+                $position = $list1[$index];
+                printf OUTPUT "%s\n", $hash2{$position};
+                #print $list2[$index], "\n";
+                $counter++;
+            }
+#print $counter;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Tools/Matrix/gene-TF-matrix-csv-galaxy.pl	Mon Sep 05 05:53:08 2016 -0400
@@ -0,0 +1,121 @@
+#!/usr/bin/perl -w
+$|=1;
+use strict;
+use warnings;
+
+
+
+# Script to create csv formatted gene vs TF matrix from a filtered gff
+# file. GFF file can contain just Positive or Just neagtive strand
+# TFBS. Has two types of matrix produced: (0) resence/Abscence with only
+# 1 and 0s. With option=0. (1) counts of TFs with numbers 1,3,5 etc. 
+
+
+
+
+my $line;
+my $line3;
+my @cols;
+my @TF_array;
+my @gene_array;
+my %matrix_1= ();
+my %matrix_2= ();
+my $TF;
+my $gene;
+my %matrix;
+my $matrixType;
+
+if(@ARGV < 3){
+print "\nUsage: gene-TF-matrix.pl fimo-nol-P.gff/fimo-nol-N.gff gene-matrix-P.csv/gene-matrix-N.csv 
+\n Options: Presence/Abscence=0 counts=1\n\n";
+exit(0);
+}
+open (FIMO, "<$ARGV[0]") || 
+    die "File '$ARGV[0]' not found\n" ;
+open(MATRIX, ">$ARGV[1]") ||
+    die "File '>$ARGV[1]' not found\n";
+
+$matrixType = $ARGV[2];
+print "MatrixTYpe is $matrixType\n";
+   
+#Put all the motifs and genes in two separate arrays: each appears
+#only once in each array.
+while (<FIMO>) {
+    $line=$_;   
+     if ($line!~/^##/) {#ignore header line
+       @cols=split;
+       $TF= substr $cols[8],5,8;
+       if (not exists $matrix_1{$TF}) {
+	 $matrix_1{$TF}="";
+	 push @TF_array, $TF;
+       }
+       $gene=substr $cols[0],0,21;
+        if (not exists $matrix_2{$gene}) { 
+	  $matrix_2{$gene}="";
+	  push @gene_array, $gene
+	}
+     }
+  }
+
+my $n_motifs=scalar @TF_array;
+my $n_genes=scalar@gene_array;
+#printf "Scalar motifs is %d\n", scalar@TF_array;
+#printf "Scalar genes is %d\n", scalar@gene_array;
+
+close(FIMO);
+#I want to create a hash on which each gene has a list of 0s. Then I want to "read" the .gff file
+#and if a gene has a certain TF it will add "+1" to the possition of the TF, and it will look like this.
+
+
+open (FIMO, "$ARGV[0]") || 
+   die "File '$ARGV[0]' not found\n" ;
+   
+#$matrix{"PGSC0003DMG400006788"}=(0,0,1,0,2,0,3,0,0,...,0)
+
+#Filling 2d gene/motif array with zeros to start
+foreach my $element (@gene_array){
+  my @auxilary_list = ();
+  for (my $i=1; $i <= $n_motifs; $i++){
+   $auxilary_list[$i-1] =0;
+  }
+  $matrix{$element}=\@auxilary_list;
+}
+
+#This is how I want to read the .gff file and check if a gene has a certain TF. I dont consider the positions yet. I just
+# want to see if this first step works.
+
+while (<FIMO>){
+  $line3 = $_;
+  if ($line3!~/^##/) {
+    for (my $j=0; $j < scalar@gene_array; $j++){  
+      for (my $h=0; $h < scalar@TF_array; $h++){
+	#printf "Genes[%d] -%s-  Motifs[%d] -%s- \n",$j, $gene_array[$j], $h, $TF_array[$h]; 
+	if (($line3 =~/$gene_array[$j]/) and ($line3 =~/$TF_array[$h]/)) {      
+            if ($matrixType ==0){${$matrix{$gene_array[$j]}}[$h]=1;}
+	    if ($matrixType ==1){${$matrix{$gene_array[$j]}}[$h]++;}
+	} 
+      }
+    }
+  }
+}
+
+printf MATRIX "Gene,";
+for (my $h=0; $h < scalar@TF_array; $h++){
+    if ($h!=scalar@TF_array-1) {
+    printf MATRIX  "$TF_array[$h],";
+    }
+    else{printf MATRIX  "$TF_array[$h]"}
+}   
+printf MATRIX "\n";
+foreach my $element(sort keys %matrix){
+    printf MATRIX  "$element,";
+    for (my $r=0; $r<scalar@{$matrix{$element}};$r++){
+        if ($r!=scalar@{$matrix{$element}}-1) {
+            printf MATRIX "$matrix{$element}[$r],"
+        }
+        else{printf MATRIX "$matrix{$element}[$r]"}
+        
+    }
+    printf MATRIX "\n"
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Tools/Matrix/matrix_generator.xml	Mon Sep 05 05:53:08 2016 -0400
@@ -0,0 +1,11 @@
+<tool id="matrix_generator" name="Gene-motif matrix" version="1.0.0">
+	<description>Script to create csv formatted gene vs TF matrix from a filtered gff file. GFF file can contain just Positive or Just neagtive strand TFBS. Has two types of matrix produced: (0) resence/Abscence with only 1 and 0s. With option=0. (1) counts of TFs with numbers 1,3,5 etc. </description>
+	<command interpreter="perl">gene-TF-matrix-csv-galaxy.pl $input $output $value </command>
+	<inputs>
+		<param format="gff" name="input" type="data" label="FIMO file"/>
+		<param name="value" size="1" type="integer" value="0" label="Type of matrix"/>
+	</inputs>
+	<outputs>
+		<data format="csv" name="output" label="Gene-Motif-matrix" />
+	</outputs>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Tools/Motif_search/motif_search_galaxy.pl	Mon Sep 05 05:53:08 2016 -0400
@@ -0,0 +1,56 @@
+#!/usr/bin/perl -w
+$|=1;
+use warnings;
+use strict;
+
+#Script that returns the lines of a gff file according to the ID motifs that we want.
+
+
+
+my $motif=" ";
+my @motifs;
+my $line;
+my @cols;
+my $motif_fimo;
+
+if ($ARGV[3] ne "#"){
+    push  @motifs, $ARGV[3];
+}
+if ($ARGV[4] ne "#"){
+    push  @motifs, $ARGV[3];
+}
+if ($ARGV[5] ne "#"){
+    push  @motifs, $ARGV[3];
+}
+if ($ARGV[6] ne "#"){
+    push  @motifs, $ARGV[3];
+}
+
+
+
+
+
+open(FIMO, "<$ARGV[0]") ||
+    die "File '$ARGV[0]' not found\n";
+open(OUTPUT, ">$ARGV[1]") ||
+    die "File '>$ARGV[1]' not found\n";
+
+while (<FIMO>) {
+    foreach my $tf (@motifs){
+    $line= $_;
+    chomp $line;
+    @cols=split;
+    if ($line=~/^#/){
+        printf OUTPUT "%s\n", " ";
+    }
+    elsif ($line!~/^##/ and $tf eq (substr $cols[8],5,8)) {
+        
+            printf OUTPUT "%s\n", $line;
+        
+        
+        
+    }
+    }
+    
+    
+}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Tools/Motif_search/motif_search_galaxy.xml	Mon Sep 05 05:53:08 2016 -0400
@@ -0,0 +1,15 @@
+<tool id="motif_searcher" name="Motif search" version="1.0.0">
+	<description>tool to compare contigous motifs in gff file and, in case that two overlap, remove the motif with the highest p value</description>
+	<command interpreter="perl">motif_search_galaxy.pl $input $output $motif1 $motif2 $motif3 $motif4 $motif5</command>
+	<inputs>
+		<param format="gff" name="input" type="data" label="FIMO file"/>
+		<param name="motif1" size="8" type="text" value="#" label="Motif 1"/>
+		<param name="motif2" size="8" type="text" value="#" label="Motif 2"/>
+		<param name="motif3" size="8" type="text" value="#" label="Motif 3"/>
+		<param name="motif4" size="8" type="text" value="#" label="Motif 4"/>
+		<param name="motif5" size="8" type="text" value="#" label="Motif 5"/>
+	</inputs>
+	<outputs>
+		<data format="gff" name="output" label="motif_search_output" />
+	</outputs>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Tools/Motif_search/rules.xml	Mon Sep 05 05:53:08 2016 -0400
@@ -0,0 +1,11 @@
+<tool id="rules1" name="Rules" version="1.0.0">
+	<description>tool that looks for genes that have motifs from a certain rule</description>
+	<command interpreter="perl">rules_galaxy.pl $input $output $value</command>
+	<inputs>
+		<param format="gff" name="input" type="data" label="input"/>
+		<param name="value" size="1" type="integer" value="1" label="Number of motifs of the rule: "/>
+	</inputs>
+	<outputs>
+		<data format="gff" name="output" label="rules" />
+	</outputs>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Tools/Motif_search/rules_galaxy.pl	Mon Sep 05 05:53:08 2016 -0400
@@ -0,0 +1,95 @@
+#!/usr/bin/perl -w
+
+$|=1;
+use warnings;
+use strict;
+#Script that looks for genes that have motifs from a certain rule.
+
+#Declaration of variables
+my $line;
+my $line2;
+my @cols;
+my @cols2;
+my %hash;
+my %hash1;
+my %hash3;
+
+my $gene;
+my $TF;
+my $num_motifs;
+my @genes_rules;
+
+
+$num_motifs=$ARGV[2];
+
+
+open(FIMO, "<$ARGV[0]") ||
+    die "File '$ARGV[0]' not found\n";
+        
+open(OUTPUT, ">$ARGV[1]") ||
+    die "File '>$ARGV[1]' not found\n";
+    
+
+while (<FIMO>) {
+    $line=$_;
+    chomp $line;
+    @cols=split;
+    if (not $line=~/^ /){
+       $TF= substr $cols[8],5,8; 
+       $gene=substr $cols[0],0,21;
+       
+       if (not exists $hash{$gene}) {
+            $hash1{$gene}=0;
+            
+        }
+       if (not exists $hash{$gene}{$TF}) {
+                $hash1{$gene}++;
+                $hash{$gene}{$TF}=1;
+                #print  $hash1{$gene};
+            }
+
+       
+       if ($hash1{$gene}==$num_motifs and not exists $hash3{$gene}) {
+        $hash3{$gene}=1;
+        #print $line, "\n";
+        
+       }
+       
+       
+       
+    }
+    
+}
+
+close FIMO;
+open(FIMO, "<$ARGV[0]") ||
+    die "File '$ARGV[0]' not found\n";
+    
+while (<FIMO>) {
+    $line2=$_;
+    chomp $line2;
+    @cols2=split;
+ 
+        if (not $line2=~/^ /){
+            $TF= substr $cols2[8],5,8; 
+            $gene=substr $cols2[0],0,21;
+            foreach my $gene_listed (keys %hash3){
+                
+                if ($gene_listed eq $gene) {
+                    printf OUTPUT "%s\n", $line2;
+                    
+            }
+            
+
+        }
+        
+    }   
+}
+
+
+print "Genes that have this rule:", "\n";
+foreach my $gene_listed (keys %hash3){
+                print $gene_listed,"\n";
+            }
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Tools/Motif_search/sort_genes_galaxy.pl	Mon Sep 05 05:53:08 2016 -0400
@@ -0,0 +1,67 @@
+#!/usr/bin/perl -w
+$|=1;
+use warnings;
+use strict;
+
+#Script that takes a gff format file as input and orders it by genes,
+#so it will create an output with all the information grouped by genes. Motifs will be mixed.
+
+my $line;
+my @cols;
+my %hash1;
+my %hash2;
+my @list1;
+my @list2;
+my $gene;
+my $pos1;
+my $n;
+my $index;
+my $position;
+my $scalar;
+my $TF;
+
+
+if(@ARGV < 2){
+print "\nUsage: step1.pl fimo.gff motif_search-position-sorted.gff e\n\n";
+exit(0);
+}
+
+my $counter=0;
+open(FIMO, "<$ARGV[0]") ||
+    die "File '$ARGV[0]' not found\n";
+open(OUTPUT, ">$ARGV[1]") ||
+    die "File '>$ARGV[1]' not found\n";
+    
+while (<FIMO>) {
+    $line=$_;
+    chomp $line;
+    @cols=split;
+    
+    if ($line=~/^ /){
+        printf OUTPUT "%s\n", $line;
+        $counter++;
+    }
+    else {
+        $gene=substr $cols[0],0,21;
+        $pos1 = $cols[3];
+        $TF= substr $cols[8],5,8;
+        if (not exists $hash1{$gene}{$TF}{$pos1}) {
+           $hash1{$gene}{$TF}{$pos1}=1;
+           push @list1, $gene;
+           push @list2, $line;       
+        }
+    
+    }
+  
+}
+
+$n= scalar @list1;
+my @list_gen_sorted= sort { $list1[$a] cmp $list1[$b] } 0..($n - 1);
+            for (my $i=0; $i <(scalar @list_gen_sorted); $i++){
+                $index=$list_gen_sorted[$i];
+                $position = $list1[$index];
+                #print $hash2{$position};
+                printf OUTPUT "%s\n", $list2[$index];
+                $counter++;
+            }
+print $counter;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Tools/Motif_search/sort_genes_galaxy.xml	Mon Sep 05 05:53:08 2016 -0400
@@ -0,0 +1,10 @@
+<tool id="sort_genes" name="Sort a gff file by genes ID" version="1.0.0">
+	<description>Script that takes a gff format file from MEME suite as input and orders it by genes, so it will create an output with all the information grouped by genes. Motifs will be mixed.</description>
+	<command interpreter="perl">sort_genes_galaxy.pl $input $output</command>
+	<inputs>
+		<param format="gff" name="input" type="data" label="FIMO file"/>
+	</inputs>
+	<outputs>
+		<data format="gff" name="output" label="fimo-position-sorted.gff" />
+	</outputs>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Tools/Motif_search/sort_positions_galaxy.pl	Mon Sep 05 05:53:08 2016 -0400
@@ -0,0 +1,89 @@
+#!/usr/bin/perl -w
+$|=1;
+use warnings;
+use strict;
+#Script that takes a gff format file from step1.pl as input and orders
+#each block of gene data by the start position of the motif.
+my $line;
+my @cols;
+my %hash1;
+my %hash2;
+my @list1;
+my @list2;
+my $gene;
+my $pos1;
+my $n;
+my $index;
+my $position;
+my $scalar;
+my $TF;
+
+
+if(@ARGV < 2){
+print "\nUsage: step2.pl motif_search-position-sorted.gff motif_search-gene-&-position-sorted.gff e\n\n";
+exit(0);
+}
+
+my $counter=0;
+open(FIMO, "<$ARGV[0]") ||
+    die "File '$ARGV[0]' not found\n";
+open(OUTPUT, ">$ARGV[1]") ||
+    die "File '>$ARGV[1]' not found\n";
+    
+while (<FIMO>) {
+    $line=$_;
+    chomp $line;
+    @cols=split;
+    
+    if ($line=~/^ /){
+        printf OUTPUT "%s\n", $line;
+        $counter++;
+    }
+    else {
+        $gene=substr $cols[0],0,21;
+        $pos1 = $cols[3];
+        $TF= substr $cols[8],5,8;
+        $scalar= scalar @list1;
+        if (not exists $hash1{$gene} and not $scalar == 0) {
+            $n= scalar @list1;
+            my @list_pos_sorted= sort { $list1[$a] <=> $list1[$b] } 0..($n - 1);
+            for (my $i=0; $i <(scalar @list_pos_sorted); $i++){
+                $index=$list_pos_sorted[$i];
+                $position = $list1[$index];
+                #printf OUTPUT "%s\n",$hash2{$position};
+                printf OUTPUT "%s\n", $list2[$index];
+                $counter++;
+            }
+        }
+        if (not exists $hash1{$gene}) {
+           %hash1=();
+           %hash2=();
+           @list1=();
+           @list2=();
+           $hash1{$gene}=1;
+           $hash2{$pos1}=$line;
+           push @list1, $pos1;
+           push @list2, $line;       
+        }
+       
+        elsif (exists $hash1{$gene}) {
+           $hash2{$pos1}=$line;
+           push @list1, $pos1;
+           push @list2, $line;
+        }
+        
+        
+    }
+  
+}
+
+$n= scalar @list1;
+my @list_pos_sorted= sort { $list1[$a] <=> $list1[$b] } 0..($n - 1);
+            for (my $i=0; $i <(scalar @list_pos_sorted); $i++){
+                $index=$list_pos_sorted[$i];
+                $position = $list1[$index];
+                printf OUTPUT "%s\n", $hash2{$position};
+                #printf OUTPUT "%s\n", $list2[$index];
+                $counter++;
+            }
+print $counter;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Tools/Motif_search/sort_positions_galaxy.xml	Mon Sep 05 05:53:08 2016 -0400
@@ -0,0 +1,10 @@
+<tool id="sort_positions" name="Sort block of genes by first position" version="1.0.0">
+	<description>Script that takes a gff format file from step1.pl as input and orders each block of gene data by the start position of the motif.</description>
+	<command interpreter="perl">sort_positions_galaxy.pl $input $output</command>
+	<inputs>
+		<param format="gff" name="input" type="data" label="FIMO file"/>
+	</inputs>
+	<outputs>
+		<data format="gff" name="output" label="fimo-gene-and-position-sorted.gff" />
+	</outputs>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Tools/Motif_search/testrules_galaxy.pl	Mon Sep 05 05:53:08 2016 -0400
@@ -0,0 +1,73 @@
+#!/usr/bin/perl -w
+
+$|=1;
+use warnings;
+use strict;
+#Script that looks for genes that have motifs from a certain rule.
+
+#Declaration of variables
+my %hash;
+my $line;
+my @cols;
+my @pos;
+my @motif;
+my @genes;
+my $pos1;
+my $gene;
+my $TF;
+my $current_gene;
+my $size;
+
+if(@ARGV < 2){
+print "\nUsage: testrules_galaxy.pl fimo.gff testrules.gff \n\n";
+exit(0);
+}
+
+open(FIMO, ">$ARGV[0]") ||
+    die "File '$ARGV[0]' not found\n";
+open(OUTPUT, "<$ARGV[1]") ||
+    die "File '<$ARGV[0]' not found\n";
+
+$current_gene="";
+        
+while (<FIMO>) {
+    $line=$_;
+    @cols=split;
+    $TF= substr $cols[8],5,8; 
+    $gene=substr $cols[0],0,21;
+    $pos1 = $cols[3];
+    $size=scalar @motif;
+    if (not exists $hash{$gene} ) {
+        
+        if ($current_gene ne "") {
+            printf OUTPUT "%s\n", $current_gene, " ", "=>"," ";
+        }
+        for (my $i=0;$i<$size;$i++){
+            printf OUTPUT "%s\n", $motif[$i],"($pos[$i])","\t";
+        }
+        print "\n";
+        @motif=();
+        @pos=();
+        $current_gene=$gene;
+        push @motif,$TF;
+        push @pos, $pos1;
+        
+        $hash{$gene}=1;
+           
+        
+    }
+   
+    else {
+        push @motif,$TF;
+        push @pos, $pos1;
+    }
+        
+    }
+
+$size=scalar @motif;
+printf OUTPUT "%s\n", $current_gene, " ", "=>"," ";   
+for (my $i=0;$i<$size;$i++){
+            printf OUTPUT "%s\n", $motif[$i],"($pos[$i])","\t";
+        }   
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Tools/Motif_search/testrules_galaxy.xml	Mon Sep 05 05:53:08 2016 -0400
@@ -0,0 +1,10 @@
+<tool id="test_rules" name="Print out motifs of a certain rule that are present in a gene" version="1.0.0">
+	<description>tool prints out motifs of a certain rule that are present in a gene</description>
+	<command interpreter="perl">testrules_galaxy.pl $input $output</command>
+	<inputs>
+		<param format="gff" name="input" type="data" label="FIMO file"/>
+	</inputs>
+	<outputs>
+		<data format="gff" name="output" label="testrules.gff" />
+	</outputs>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Tools/Second/remove_motifs_galaxy.pl	Mon Sep 05 05:53:08 2016 -0400
@@ -0,0 +1,262 @@
+#!/usr/bin/perl -w
+
+$|=1;
+use warnings;
+use strict;
+#Script that takes a gff format file from step2.pl as input and compares contiguous motifs listed in the gff file.
+#If motifs overlap and surpass the threshold, then it will remove that motif with the highest p value.
+
+my $line;
+my @cols;
+my %hash;
+my %hash_negative;
+my $gene;
+my @sequences;
+my $seq_len;
+my $OL;
+my @output_pos;
+my @output_neg;
+my $actual_pvalue;
+my $actual_pvalue_neg;
+my $pvalue;
+my $pvalue_neg;
+
+
+if(@ARGV < 4){
+print "\nUsage: rm_overlap_motifs_posneg.pl fimo-test-sue.gff fimo-nol-pos.gff fimo-nol-neg.gff overlap_percentage\n\n";
+exit(0);
+}
+
+
+
+open(FIMO, "<$ARGV[0]") ||
+    die "File '$ARGV[0]' not found\n";
+open(POSITIVE, ">$ARGV[1]") ||
+    die "File '>$ARGV[1]' not found\n";
+open(NEGATIVE, ">$ARGV[2]") ||
+    die "File '>$ARGV[2]' not found\n";
+
+# Getting overlap value form user and testing to see if it's 0-100 and
+# converting to 0-1 scale.
+if ($ARGV[3] >0.0 && $ARGV[3] <=100){
+   $OL=$ARGV[3]/100; 
+}
+else{
+  print" ERROR: overlap is a value 0-100\n";
+    exit(0);
+}
+#print "OL is $OL\n";
+
+while (<FIMO>) {
+    $line=$_; #assigning line to variable $line | $_ is a special default variable that here holds the line contents
+    chomp $line; #avoid \n on last field
+    @cols=split;#Splits the string EXPR into a list of strings and returns the list in list context, or the size of the list in scalar context.
+                #This is very useful because the data of the gff file can be called using this variable.
+    my $pos1;
+    my $pos2;
+    my $scalar;
+    my $decimal;
+    my $e;
+    
+    my @list=();
+    if ($line=~/^#/){
+        printf POSITIVE"%s\n", $line;
+        printf NEGATIVE"%s\n", $line;
+    }
+    elsif ($line!~/^##/ and $cols[6]eq"+") {
+        @cols=split;
+        #$TF= substr $cols[8],5,8; #in this case we don't need that the hash considers the motif
+        $gene=substr $cols[0],0,21;
+        $pos1 = $cols[3]; #start position of the motif
+        $pos2=$cols[4]; #end position of the motif
+        @list=();
+        @list=($pos1,$pos2);
+        @sequences= split( "=", $cols[9]); 
+        $seq_len = int(length (substr $sequences[1],0,-1)); #returns the length of the sequence
+        ####These variables consider the p value####
+        $decimal= substr $cols[8],-16,4; 
+        $e=substr $cols[8],-11,3;
+        $decimal =~ s/[^.\d]//g; #This removes all nondigit characters from the string.
+        $actual_pvalue=$decimal*(10**$e); #it will take the p value of the current line
+        ####====###
+        if (not exists $hash{$gene}) { #Every time that a block of a gene with all the different motifs starts, it will register
+                                       #the gene in a hash: gene as a key and pos1 and pos2 as values.
+            $hash{$gene}=\@list;
+            $pvalue=$actual_pvalue; #p value of the current line that it will be compared in the next loop
+            push @output_pos, $line; #it saves the information of the gene motif in the array
+            }
+        
+         elsif (not($pos1>=@{$hash{$gene}}[0] and $pos1<=@{$hash{$gene}}[1])
+               and not($pos2>=@{$hash{$gene}}[0] and $pos2<=@{$hash{$gene}}[1])) {#if the gene exists and the motif is not overlaped
+                                                                                    #with the previous one
+                                                                                    #then it will take the line in the list and it will
+                                                                                    #consider the p value in the next loop
+               $hash{$gene}=\@list;
+               $pvalue=$actual_pvalue;
+               push @output_pos, $line;
+            }
+        
+        
+         elsif (
+            
+               (not($pos1>=@{$hash{$gene}}[0] and $pos1<=@{$hash{$gene}}[1])and
+               ($pos2>=@{$hash{$gene}}[0] and $pos2<=@{$hash{$gene}}[1]) and (int($pos2-(@{$hash{$gene}}[0]))/$seq_len)<$OL) 
+               
+               ) {#If the actual motif overlaps with the previous motif and the overlaping sequence includes the second position
+                  #position and not the first one of the actual motif AND it doesn't surpass the threshold $OL then it will consider the line.
+                  #It will store it in the array and its p value it will consider in the next loop.
+                  $hash{$gene}=\@list;
+                  $pvalue=$actual_pvalue;
+                  push @output_pos, $line;
+                  #print $pvalue , "\n";
+            }
+         elsif (
+            
+               (not($pos1>=@{$hash{$gene}}[0] and $pos1<=@{$hash{$gene}}[1])and
+               ($pos2>=@{$hash{$gene}}[0] and $pos2<=@{$hash{$gene}}[1]) and (int($pos2-(@{$hash{$gene}}[0]))/$seq_len)>$OL)
+               and $actual_pvalue<$pvalue
+               
+               
+               ) { #If the actual motif overlaps with the previous motif and the overlaping sequence includes the second
+                  #position and not the first one of the actual motif AND it DOES surpass the threshold $OL but the actual motif has a lower p value
+                  #than the last considered;then it will consider the line and it will remove the previous motif from the array; considering the motif
+                  #with the lowest p value. This p value will consider in the next loop.
+                  pop @output_pos;
+                  $hash{$gene}=\@list;
+                  $pvalue=$actual_pvalue;
+                  push @output_pos, $line;
+                  #print $pvalue , "\n";
+            }
+         elsif (
+            
+               ((($pos1>=@{$hash{$gene}}[0] and $pos1<=@{$hash{$gene}}[1]) and (int((@{$hash{$gene}}[1])-$pos1)/$seq_len)<$OL )
+               and not($pos2>=@{$hash{$gene}}[0] and $pos2<=@{$hash{$gene}}[1])) 
+               
+               ) {#If the actual motif overlaps with the previous motif and the overlaping sequence includes the first position
+                  #position and not the first one of the actual motif AND it doesn't surpass the threshold $OL then it will consider the line.
+                  #It will store it in the array and its p value it will consider in the next loop.
+            
+                  $hash{$gene}=\@list;
+                  $pvalue=$actual_pvalue;
+                  push @output_pos, $line;
+            }
+         elsif (
+            
+               ((($pos1>=@{$hash{$gene}}[0] and $pos1<=@{$hash{$gene}}[1]) and (int((@{$hash{$gene}}[1])-$pos1)/$seq_len)>$OL )
+               and not($pos2>=@{$hash{$gene}}[0] and $pos2<=@{$hash{$gene}}[1])) and $actual_pvalue<$pvalue
+               #If the actual motif overlaps with the previous motif and the overlaping sequence includes the first
+                  #position and not the second one of the actual motif AND it DOES surpass the threshold $OL but the actual motif has a lower p value
+                  #than the last considered;then it will consider the line and it will remove the previous motif from the array; considering the motif
+                  #with the lowest p value. This p value will consider in the next loop.
+               ) {
+                  $hash{$gene}=\@list;
+                  $pvalue=$actual_pvalue;
+                  pop @output_pos;
+                  push @output_pos, $line;
+            }
+          elsif (
+            
+               (($pos1>=@{$hash{$gene}}[0] and $pos1<=@{$hash{$gene}}[1]) 
+               and ($pos2>=@{$hash{$gene}}[0] and $pos2<=@{$hash{$gene}}[1])) and $actual_pvalue<$pvalue
+              
+               ) {
+                  $hash{$gene}=\@list;
+                  $pvalue=$actual_pvalue;
+                  pop @output_pos;
+                  push @output_pos, $line;
+            }
+        
+       
+    }
+    
+    ##===========Same strategy applied to the motifs located in the minus strand===========#
+    elsif ($line!~/^##/ and $cols[6]eq"-") { 
+        @cols=split;
+        #$TF= substr $cols[8],5,8;
+        $gene=substr $cols[0],0,21;
+        $pos1 = $cols[3];
+        $pos2=$cols[4];
+        @list=();
+        @list=($pos1,$pos2);
+        @sequences= split( "=", $cols[9]);
+        $seq_len = int(length (substr $sequences[1],0,-1));
+        $decimal= substr $cols[8],-16,4;
+        $e=substr $cols[8],-11,3;
+        $decimal =~ s/[^.\d]//g; #This removes all nondigit characters from the string.
+        $actual_pvalue_neg=$decimal*(10**$e);
+        
+        if (not exists $hash_negative{$gene}) {
+            $hash_negative{$gene}=\@list;
+            $pvalue_neg=$actual_pvalue_neg;
+            push @output_neg, $line;
+        }
+        
+        elsif (not($pos1>=@{$hash_negative{$gene}}[0] and $pos1<=@{$hash_negative{$gene}}[1])
+               and not($pos2>=@{$hash_negative{$gene}}[0] and $pos2<=@{$hash_negative{$gene}}[1])) {
+                $pvalue_neg=$actual_pvalue_neg;
+                $hash_negative{$gene}=\@list;
+                push @output_neg, $line;
+            }
+        
+        
+        elsif (
+            
+               (not($pos1>=@{$hash_negative{$gene}}[0] and $pos1<=@{$hash_negative{$gene}}[1])and
+               ($pos2>=@{$hash_negative{$gene}}[0] and $pos2<=@{$hash_negative{$gene}}[1]) and (int($pos2-(@{$hash_negative{$gene}}[0]))/$seq_len)<$OL               ) 
+               ) {
+                  $pvalue_neg=$actual_pvalue_neg;
+                  $hash_negative{$gene}=\@list;
+                  push @output_neg, $line;
+            }
+         elsif (
+            
+               (not($pos1>=@{$hash_negative{$gene}}[0] and $pos1<=@{$hash_negative{$gene}}[1]) and
+               ($pos2>=@{$hash_negative{$gene}}[0] and $pos2<=@{$hash_negative{$gene}}[1]) and (int($pos2-(@{$hash_negative{$gene}}[0]))/$seq_len)>$OL and
+               $actual_pvalue_neg<$pvalue_neg) 
+               ) {
+                  $pvalue=$actual_pvalue_neg;
+                  $hash_negative{$gene}=\@list;
+                  pop @output_neg;
+                  push @output_neg, $line;
+            }
+         elsif (
+               ((($pos1>=@{$hash_negative{$gene}}[0] and $pos1<=@{$hash_negative{$gene}}[1]) and (int((@{$hash_negative{$gene}}[1])-$pos1)/$seq_len)<$OL )
+               and not($pos2>=@{$hash_negative{$gene}}[0] and $pos2<=@{$hash_negative{$gene}}[1] )) 
+               ) {
+                  $pvalue_neg=$actual_pvalue_neg;
+                  $hash_negative{$gene}=\@list;
+                  push @output_neg, $line;
+            }
+         elsif (
+               ((($pos1>=@{$hash_negative{$gene}}[0] and $pos1<=@{$hash_negative{$gene}}[1]) and
+                 (int((@{$hash_negative{$gene}}[1])-$pos1)/$seq_len)>$OL )
+                  and not($pos2>=@{$hash_negative{$gene}}[0] and $pos2<=@{$hash_negative{$gene}}[1] )and
+                  $actual_pvalue_neg<$pvalue_neg) 
+               ) {
+                  $pvalue_neg=$actual_pvalue_neg;
+                  $hash_negative{$gene}=\@list;
+                  pop @output_neg;
+                  push @output_neg, $line;
+            }
+         
+          elsif (
+               ((($pos1>=@{$hash_negative{$gene}}[0] and $pos1<=@{$hash_negative{$gene}}[1])  )
+                  and ($pos2>=@{$hash_negative{$gene}}[0] and $pos2<=@{$hash_negative{$gene}}[1] )and
+                  $actual_pvalue_neg<$pvalue_neg) 
+               ) {
+                  $pvalue_neg=$actual_pvalue_neg;
+                  $hash_negative{$gene}=\@list;
+                  pop @output_neg;
+                  push @output_neg, $line;
+            }
+        
+       
+    }
+}
+foreach my $lines_pos (@output_pos){
+    printf POSITIVE"%s\n", $lines_pos;
+    
+}
+foreach my $lines_neg (@output_neg){
+    printf NEGATIVE"%s\n", $lines_neg;
+}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Tools/Second/remove_motifs_v2.xml	Mon Sep 05 05:53:08 2016 -0400
@@ -0,0 +1,12 @@
+<tool id="remove_same_dif_motifs" name="Remove motifs overlaped" version="1.0.0">
+	<description>Script to compare contigous motifs in gff file and, in case that two overlap, remove the motif with the highest p value</description>
+	<command interpreter="perl">remove_motifs_galaxy.pl $input $output $secondoutput $value </command>
+	<inputs>
+		<param format="gff" name="input" type="data" label="FIMO file"/>
+		<param name="value" size="3" type="integer" value="20" label="Overlap value"/>
+	</inputs>
+	<outputs>
+		<data format="gff" name="output" label="positive_strand" />
+		<data format="gff" name="secondoutput" label="negative_strand" />
+	</outputs>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Tools/Second/step1.xml	Mon Sep 05 05:53:08 2016 -0400
@@ -0,0 +1,10 @@
+<tool id="step1" name="Sort a gff file by genes ID" version="1.0.0">
+	<description>Script that takes a gff format file from MEME suite as input and orders it by genes, so it will create an output with all the information grouped by genes. Motifs will be mixed.</description>
+	<command interpreter="perl">step1_galaxy.pl $input $output</command>
+	<inputs>
+		<param format="gff" name="input" type="data" label="FIMO file"/>
+	</inputs>
+	<outputs>
+		<data format="gff" name="output" label="fimo-position-sorted.gff" />
+	</outputs>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Tools/Second/step1_galaxy.pl	Mon Sep 05 05:53:08 2016 -0400
@@ -0,0 +1,78 @@
+#!/usr/bin/perl -w
+$|=1;
+use warnings;
+use strict;
+
+#Script that takes a gff format file from MEME suite as input and orders it by genes,
+#so it will create an output with all the information grouped by genes. Motifs will be mixed.
+
+#Declaration of variables
+my $line;
+my @cols;
+my %hash1;
+my %hash2;
+my @list1;
+my @list2;
+my $gene;
+my $pos1;
+my $n;
+my $index;
+my $position;
+my $scalar;
+my $TF;
+my $counter=0;#it gives you the number of lines of the gff file. It is a good way to check that the information is not lost.
+
+#Files that I am going to use
+
+if(@ARGV < 2){
+print "\nUsage: step1.pl fimo.gff fimo-position-sorted.gff e\n\n"; 
+exit(0);
+}
+
+#I open both files, FIMO as the input and OUTPUT as the ouput.
+open(FIMO, "<$ARGV[0]") ||
+    die "File '$ARGV[0]' not found\n";
+open(OUTPUT, ">$ARGV[1]") ||
+    die "File '>$ARGV[1]' not found\n";
+
+
+while (<FIMO>) {
+    $line=$_; #assigning line to variable $line | $_ is a special default variable that here holds the line contents
+    chomp $line;  #avoid \n on last field
+    @cols=split; #Splits the string EXPR into a list of strings and returns the list in list context, or the size of the list in scalar context.
+                #This is very useful because the data of the gff file can be called using this variable.
+    
+    if ($line=~/^#/){ #prints the first line of the gff file that is different from the rest
+        printf OUTPUT "%s\n", $line;
+        $counter++;
+    }
+    else { #considers the other lines of the file
+        $gene=substr $cols[0],0,21; #variable that returns the name of the gene of the line
+        $pos1 = $cols[3]; #variable that returns the motif's start position on the gene
+        $TF= substr $cols[8],5,8; #variable that returns the name of the motif
+        
+        #I use two arrays (list1 and list2) list1 returns the name of the genes and list2 the lines with all the information.
+        #Notice that the gene and its line will have the same position in both list.
+        if (not exists $hash1{$gene}{$TF}{$pos1}) {
+           $hash1{$gene}{$TF}{$pos1}=1;
+           push @list1, $gene;
+           push @list2, $line;       
+        }
+    
+    }
+  
+}
+
+#In this section I sort the list1 (genes) by the name of the genes, so I will take the position of every gene sorted
+#and I will use the position to print out the lines in the order that I want. The main function of this script
+#is to write the gff file but having the genes sorted by blocks.
+$n= scalar @list1;
+my @list_pos_sorted= sort { $list1[$a] cmp $list1[$b] } 0..($n - 1);
+            for (my $i=0; $i <(scalar @list_pos_sorted); $i++){
+                $index=$list_pos_sorted[$i];
+                $position = $list1[$index];
+                #print $hash2{$position};
+                printf OUTPUT "%s\n", $list2[$index];
+                $counter++;
+            }
+print $counter;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Tools/Second/step2.xml	Mon Sep 05 05:53:08 2016 -0400
@@ -0,0 +1,10 @@
+<tool id="step2" name="Sort block of genes by first position" version="1.0.0">
+	<description>Script that takes a gff format file from step1.pl as input and orders each block of gene data by the start position of the motif.</description>
+	<command interpreter="perl">step2_galaxy.pl $input $output</command>
+	<inputs>
+		<param format="gff" name="input" type="data" label="FIMO file"/>
+	</inputs>
+	<outputs>
+		<data format="gff" name="output" label="fimo-gene-and-position-sorted.gff" />
+	</outputs>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Tools/Second/step2_galaxy.pl	Mon Sep 05 05:53:08 2016 -0400
@@ -0,0 +1,108 @@
+#!/usr/bin/perl -w
+$|=1;
+use warnings;
+use strict;
+
+#Script that takes a gff format file from step1.pl as input and orders
+#each block of gene data by the start position of the motif.
+
+#Declaration of variables
+my $line;
+my @cols;
+my %hash1;
+my %hash2;
+my @list1;
+my @list2;
+my $gene;
+my $pos1;
+my $n;
+my $index;
+my $position;
+my $scalar;
+my $TF;
+my $counter=0; #it gives you the number of lines of the gff file. It is a good way to check that the information is not lost.
+
+#Files that I am going to use
+
+if(@ARGV < 2){
+print "\nUsage: step2.pl fimo-gene-sorted.gff fimo-gene-&-position-sorted.gff e\n\n";
+exit(0);
+}
+
+#I open both files, FIMO as the input and OUTPUT as the ouput.
+open(FIMO, "$ARGV[0]") ||
+    die "File '$ARGV[0]' not found\n";
+open(OUTPUT, ">$ARGV[1]") ||
+    die "File '>$ARGV[1]' not found\n";
+    
+while (<FIMO>) {
+    $line=$_; #assigning line to variable $line | $_ is a special default variable that here holds the line contents
+    chomp $line;  #avoid \n on last field
+    @cols=split; #Splits the string EXPR into a list of strings and returns the list in list context, or the size of the list in scalar context.
+                #This is very useful because the data of the gff file can be called using this variable.
+    
+    if ($line=~/^#/){ #prints the first line of the gff file that is different from the rest
+        printf OUTPUT "%s\n", $line;
+        $counter++;
+    }
+    else { #considers the other lines of the file
+        $gene=substr $cols[0],0,21; #variable that returns the name of the gene of the line
+        $pos1 = $cols[3]; #variable that returns the motif's first position on the gene
+        $TF= substr $cols[8],5,8; #variable that returns the name of the motif
+        $scalar= scalar @list1; #returns the size of the current list1
+        
+        #This script stores infromation in two arrays (list1 and list2). The first one will register the first position of the motif in the
+        #gene and the list2 will store the corresponding line.
+        
+        if (not exists $hash1{$gene} and not $scalar == 0) { #Every time that a new gene is considered in the loop, it will print out
+                                                            #all the information of the previous one
+            
+            #This section will print out each line of each block of genes sorted by the first position.
+            $n= scalar @list1;
+            my @list_pos_sorted= sort { $list1[$a] <=> $list1[$b] } 0..($n - 1); #This will sort the POSITION NUMBERS of the array
+                                                                                    #list1 and store them in a new array name
+              
+            #This will print out the information of each gene sorted by the first position.                                                                      #list_pos_sorted
+            for (my $i=0; $i <(scalar @list_pos_sorted); $i++){
+                $index=$list_pos_sorted[$i]; 
+                #$position = $list1[$index];
+                #printf OUTPUT "%s\n",$hash2{$position};
+                printf OUTPUT "%s\n", $list2[$index];
+                $counter++;
+            }
+        }
+        if (not exists $hash1{$gene}) {#Every time that a new gene is considered in the loop, it will reset the variables
+                                                            #so a new gene can be registered
+           %hash1=();
+           %hash2=();
+           @list1=();
+           @list2=();
+           $hash1{$gene}=1;
+           $hash2{$pos1}=$line;
+           push @list1, $pos1;
+           push @list2, $line;       
+        }
+       
+        elsif (exists $hash1{$gene}) { #if the next line has information of the same gene, it will
+                                        #store the information in the arrays.
+           $hash2{$pos1}=$line;
+           push @list1, $pos1;
+           push @list2, $line;
+        }
+        
+        
+    }
+  
+}
+
+#Section that has the same structure of the previous one to print the LAST block of the file.
+$n= scalar @list1;
+my @list_pos_sorted= sort { $list1[$a] <=> $list1[$b] } 0..($n - 1);
+            for (my $i=0; $i <(scalar @list_pos_sorted); $i++){
+                $index=$list_pos_sorted[$i];
+                $position = $list1[$index];
+                printf OUTPUT "%s\n", $hash2{$position};
+                #printf OUTPUT "%s\n", $list2[$index];
+                $counter++;
+            }
+print $counter;