Galaxy |

Changeset 15:440ceca58672 (2013-04-22)

Previous changeset 14:c79b9ae3f65f (2013-04-19) Next changeset 16:6135c3075bc5 (2013-04-22)

Commit message:
Uploaded

removed:
SMART/galaxy/WrappGetLetterDistribution.py
SMART/galaxy/__init__.py
SMART/galaxy/removeExonLines.sh
SMART/galaxy/test/CollapseReads.xml
SMART/galaxy/test/Test_F_WrappGetLetterDistribution.py
SMART/galaxy/test/__init__.py

diff -r c79b9ae3f65f -r 440ceca58672 SMART/galaxy/CleanTranscriptFile.xml
--- a/SMART/galaxy/CleanTranscriptFile.xml Fri Apr 19 10:13:11 2013 -0400
+++ b/SMART/galaxy/CleanTranscriptFile.xml Mon Apr 22 11:08:07 2013 -0400

@@ -1,5 +1,5 @@
-<tool id="CleanTranscriptFile" name="Clean Transcript File">
- <description> Clean a transcript file so that it is useable for S-MART.</description>
+<tool id="CleanTranscriptFile" name="clean Transcript File">
+ <description>Clean a transcript file so that it is useable for S-MART.</description>
<command interpreter="python"> ../Java/Python/CleanTranscriptFile.py -i $formatType.inputFileName
#if $formatType.FormatInputFileName == 'gff':
-f gff
@@ -57,4 +57,17 @@
</data>

</outputs>
+<tests>
+    <test>
+      <param name="FormatInputFileName" value="gtf" />
+      <param name="inputFileName" value="genes.gtf" />
+      <param name="type" value="No" />
+      <output name="outputFile" file="exp_cleantranscriptfile_genes.gtf" />
+    </test>
+  </tests>
+
+ <help>
+ A GFF/GTF file (please consult http://www.sequenceontology.org/gff3.shtml to know more about the GFF3 format, and http://mblab.wustl.edu/GTF22.html for the GTF format) may contain different sources of information: chromosome size, genes, transcripts, etc. S-MART mostly works on transcripts. This scripts filters the input file to keep the information you really want, based on the feature (3rd column).
+ </help>
+
</tool>

diff -r c79b9ae3f65f -r 440ceca58672 SMART/galaxy/Clusterize.xml
--- a/SMART/galaxy/Clusterize.xml Fri Apr 19 10:13:11 2013 -0400
+++ b/SMART/galaxy/Clusterize.xml Mon Apr 22 11:08:07 2013 -0400

@@ -1,5 +1,5 @@
-<tool id="MergingDataClusterize" name="Clusterize">
- <description>Clusterizes the reads when their genomic intervals overlap.</description>
+<tool id="MergingDataClusterize" name="clusterize">
+ <description>Clusterize features when their genomic intervals overlap.</description>
<command interpreter="python">
../Java/Python/clusterize.py -i $formatType.inputFileName
#if $formatType.FormatInputFileName == 'bed':
@@ -10,8 +10,6 @@
-f gff2
#elif $formatType.FormatInputFileName == 'gff3':
-f gff3
- #elif $formatType.FormatInputFileName == 'csv':
- -f csv
#elif $formatType.FormatInputFileName == 'sam':
-f sam
#elif $formatType.FormatInputFileName == 'gtf':
@@ -21,7 +19,6 @@
$colinear
$normalize
-d $distance
- $log $outputFileLog
</command>

<inputs>
@@ -31,7 +28,6 @@
<option value="gff">gff</option>
<option value="gff2">gff2</option>
<option value="gff3">gff3</option>
- <option value="csv">csv</option>
<option value="sam">sam</option>
<option value="gtf">gtf</option>
</param>
@@ -47,9 +43,6 @@
<when value="gff3">
<param name="inputFileName" format="gff3" type="data" label="Input File"/>
</when>
- <when value="csv">
- <param name="inputFileName" format="csv" type="data" label="Input File"/>
- </when>
<when value="sam">
<param name="inputFileName" format="sam" type="data" label="Input File"/>
</when>
@@ -58,16 +51,20 @@
</when>
</conditional>

- <param name="colinear" type="boolean" truevalue="-c" falsevalue="" checked="false" label="colinear option" help="This option clusterizes only the same strand reads"/>
- <param name="normalize" type="boolean" truevalue="-n" falsevalue="" checked="false" label="normalize option for only GFF3 file format" help="This option normalize (attention!! Only for GFF3 file!!!!!)"/>
- <param name="log" type="boolean" truevalue="-l" falsevalue="" checked="false" label="log option" help="This option create a log file"/>
- <param name="distance" type="text" value="0" label="distance option" help="Limit the maximum distance between two reads"/>
+ <param name="colinear" type="boolean" truevalue="-c" falsevalue="" checked="false" label="Only merge collinear features"/>
+ <param name="normalize" type="boolean" truevalue="-n" falsevalue="" checked="false" label="Normalize counts" help="Only works if the nbOccurrences tag is set."/>
+ <param name="distance" type="text" value="0" label="merge features if their relative distance is within N nt"/>
</inputs>

<outputs>
<data name="outputFileGff" format="gff3"/>
- <data name="outputFileLog" format="txt">
- <filter>log</filter>
- </data>
</outputs>
+
+ <help>
+The script clusterizes the input genomic data. Two features are clusterized when their genomic intervals overlap. The output is a GFF3 file, where each element is a cluster. The number of elements in the cluster is given by the tag **nbElements**. The name of a cluster is the concatation of the names of its reads (like **read1--read2--read3**). Note that if the size of the name of the cluster exceeds 100 characters, it is truncated to the first 100 characters.
+
+Some options may clusterize the features which are closer than a given distance.
+
+By default, the tool clusterizes all features which overlap (or nearly overlap), even if they are on different strands. If you want to clusterize the features which are on the same strand only, you can specify it.
+ </help>
</tool>

diff -r c79b9ae3f65f -r 440ceca58672 SMART/galaxy/CollapseReads.xml
--- a/SMART/galaxy/CollapseReads.xml Fri Apr 19 10:13:11 2013 -0400
+++ b/SMART/galaxy/CollapseReads.xml Mon Apr 22 11:08:07 2013 -0400

[

@@ -1,5 +1,5 @@
<tool id="collapseReads" name="collapse reads">
- <description>Merges two reads if they have exactly the same genomic coordinates.</description>
+ <description>Merges two genomic features if they have exactly the same genomic coordinates.</description>
<command interpreter="python">
../Java/Python/CollapseReads.py -i $formatType.inputFileName
#if $formatType.FormatInputFileName == 'bed':
@@ -49,11 +49,16 @@
</when>
</conditional>

- <param name="strand" type="boolean" truevalue="-s" falsevalue="" checked="false" label="Strand option merges 2 different strands[default:False]."/>
+ <param name="strand" type="boolean" truevalue="-s" falsevalue="" checked="false" label="Merges features even if they are on different strands."/>
</inputs>

<outputs>
<data name="outputFileGff" format="gff3"/>
</outputs>

+ <help>
+Merge two input genomic coordinates iff they are exactly the same. If two or more genomic coordinates are merged, the tag **nbElements** is updated accordingly. As a consequence, all the reads which are exactly the same appear as one genomic coordinate.
+
+This is especially useful for short RNA sequencing (where you want to count the number of read per miRNA, siRNA, etc.) or 5' capped short reads.
+ </help>
</tool>

diff -r c79b9ae3f65f -r 440ceca58672 SMART/galaxy/CompareOverlappingSmallQuery.xml
--- a/SMART/galaxy/CompareOverlappingSmallQuery.xml Fri Apr 19 10:13:11 2013 -0400
+++ b/SMART/galaxy/CompareOverlappingSmallQuery.xml Mon Apr 22 11:08:07 2013 -0400

@@ -1,5 +1,5 @@
-<tool id="CompareOverlappingSmallQuery" name="Compare Overlapping Small Query">
- <description>Provide the queries that overlap with a reference, when the query is small.</description>
+<tool id="CompareOverlappingSmallQuery" name="compare Overlapping Small Query">
+ <description>Provide the queries that overlap with a reference, when the query data set is small.</description>
<command interpreter="python">
../Java/Python/CompareOverlappingSmallQuery.py -i $formatType.inputFileName1
#if $formatType.FormatInputFileName1 == 'bed':
@@ -36,13 +36,21 @@
#if $OptionMinOverlap.present == 'Yes':
-m $OptionMinOverlap.minOverlap
#end if
+ #if $OptionPcOverlapQuery.present == 'Yes':
+ -p $OptionPcOverlapQuery.minOverlap
+ #end if
+ #if $OptionPcOverlapRef.present == 'Yes':
+ -P $OptionPcOverlapRef.minOverlap
+ #end if
#if $OptionCollinearOrAntiSens.OptionCA == 'Collinear':
-c
- #elif $OptionCollinearOrAntiSens.OptionCA == 'Antisense':
+ #elif $OptionCollinearOrAntiSens.OptionCA == 'AntiSens':
-a
#end if
$InvertMatch
$NotOverlapping
+ $OptionInclusionQuery
+ $OptionInclusionRef
</command>

<inputs>
@@ -104,7 +112,7 @@
</when>
</conditional>
<conditional name="OptionDistance">
- <param name="Dist" type="select" label="Maximum Distance between two regions">
+ <param name="Dist" type="select" label="Maximum Distance between two reads">
<option value="Yes">Yes</option>
<option value="No" selected="true">No</option>
</param>
@@ -125,24 +133,68 @@
<when value="No">
</when>
</conditional>
+ <conditional name="OptionPcOverlapQuery">
+ <param name="present" type="select" label="N% of the query must overlap">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="minOverlap" type="integer" value="0"/>
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+ <conditional name="OptionPcOverlapRef">
+ <param name="present" type="select" label="N% of the reference must overlap">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="minOverlap" type="integer" value="0"/>
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+ <param name="OptionInclusionQuery" type="boolean" truevalue="-k" falsevalue="" checked="false" label="The query must be nested in a query"/>
+ <param name="OptionInclusionRef" type="boolean" truevalue="-K" falsevalue="" checked="false" label="The reference must be nested in a query"/>
<conditional name="OptionCollinearOrAntiSens">
- <param name="OptionCA" type="select" label="Collinear or Antisense">
+ <param name="OptionCA" type="select" label="Collinear or anti-sens">
<option value="Collinear">Collinear</option>
- <option value="Antisense">Antisense</option>
+ <option value="AntiSens">AntiSens</option>
<option value="NONE" selected="true">NONE</option>
</param>
<when value="Collinear">
</when>
- <when value="Antisense">
+ <when value="AntiSens">
</when>
<when value="NONE">
</when>
</conditional>
- <param name="InvertMatch" type="boolean" truevalue="-x" falsevalue="" checked="false" label="Invert match"/>
- <param name="NotOverlapping" type="boolean" truevalue="-O" falsevalue="" checked="false" label="When there is no overlapping, the number of Overlapping will be set to 0 by default."/>
+ <param name="InvertMatch" type="boolean" truevalue="-x" falsevalue="" checked="false" label="Invert match: the output file will contain all query elements which do NOT overlap"/>
+ <param name="NotOverlapping" type="boolean" truevalue="-O" falsevalue="" checked="false" label="Also report the query data which do not overlap, with the nbOverlaps tag set to 0."/>
</inputs>

<outputs>
<data name="outputFileGff" format="gff3"/>
</outputs>
+
+ <help>
+This script may be the most important one. It basically compares two sets of transcripts and keeps those from the first set which overlap with the second one. The first set is considered as the query set (basically, your data) and the second one is the reference set (RefSeq data, for example).
+
+It is vital to understand that it will output the elements of the first file which overlap with the elements of the second one.
+
+Various modifiers are also available:
+
+-Invert selection (report those which do not overlap).
+
+-Restrict to colinear / anti-sense overlapping data.
+
+-Keep the query data even if they do not strictly overlap with the reference data, but are located not further away than *n* nucleotide from some reference data.
+
+-Keep the query data with are strictly included into reference data, meaning that a query transcript such that at least 1 nucleotide does not overlap with reference data will not be presented as a solution.
+
+The mechanism of shrinking and extending is also useful to make a fine grain comparison. For example, if you want to keep those such that the TSS is overlapping the reference set, you just shrink the query set to 1 nucleotide. Now, if you want to keep those which are overlapping you data or located 2kb downstream of it, just extend the query data in the downstream direction, and you will have what you want. You can also extend in the opposite direction to get the possible transcript factor sites which are upstream.
+
+Some option reverses the selection. Put in other words, it performs the comparison as usual, and outputs all those query data which do not overlap.
+ </help>
</tool>

diff -r c79b9ae3f65f -r 440ceca58672 SMART/galaxy/CompareOverlappingSmallRef.xml
--- a/SMART/galaxy/CompareOverlappingSmallRef.xml Fri Apr 19 10:13:11 2013 -0400
+++ b/SMART/galaxy/CompareOverlappingSmallRef.xml Mon Apr 22 11:08:07 2013 -0400

@@ -1,5 +1,5 @@
-<tool id="CompareOverlappingSmallRef" name="Compare Overlapping Small Reference">
- <description>Provide the queries that overlap with a reference, when the reference is small.</description>
+<tool id="CompareOverlappingSmallRef" name="compare Overlapping Small Reference">
+ <description>Provide the queries that overlap with a reference, when the reference dataset is small.</description>
<command interpreter="python">
../Java/Python/CompareOverlappingSmallQuery.py -i $formatType.inputFileName1
#if $formatType.FormatInputFileName1 == 'bed':
@@ -155,8 +155,8 @@
<when value="No">
</when>
</conditional>
- <param name="OptionInclusionQuery" type="boolean" truevalue="-k" falsevalue="" checked="false" label="The query must nested in a query"/>
- <param name="OptionInclusionRef" type="boolean" truevalue="-K" falsevalue="" checked="false" label="The reference must nested in a query"/>
+ <param name="OptionInclusionQuery" type="boolean" truevalue="-k" falsevalue="" checked="false" label="The query must be nested in a query"/>
+ <param name="OptionInclusionRef" type="boolean" truevalue="-K" falsevalue="" checked="false" label="The reference must be nested in a query"/>
<conditional name="OptionCollinearOrAntiSens">
<param name="OptionCA" type="select" label="Collinear or anti-sens">
<option value="Collinear">Collinear</option>
@@ -170,11 +170,31 @@
<when value="NONE">
</when>
</conditional>
- <param name="InvertMatch" type="boolean" truevalue="-x" falsevalue="" checked="false" label="Invert match"/>
- <param name="NotOverlapping" type="boolean" truevalue="-O" falsevalue="" checked="false" label="When there is no overlapping, the number of Overlapping will be set to 0 by defalt."/>
+ <param name="InvertMatch" type="boolean" truevalue="-x" falsevalue="" checked="false" label="Invert match: the output file will contain all query elements which do NOT overlap"/>
+ <param name="NotOverlapping" type="boolean" truevalue="-O" falsevalue="" checked="false" label="Also report the query data which do not overlap, with the nbOverlaps tag set to 0."/>
</inputs>

<outputs>
<data name="outputFileGff" format="gff3"/>
</outputs>
+
+ <help>
+This script may be the most important one. It basically compares two sets of transcripts and keeps those from the first set which overlap with the second one. The first set is considered as the query set (basically, your data) and the second one is the reference set (RefSeq data, for example).
+
+It is vital to understand that it will output the elements of the first file which overlap with the elements of the second one.
+
+Various modifiers are also available:
+
+-Invert selection (report those which do not overlap).
+
+-Restrict to colinear / anti-sense overlapping data.
+
+-Keep the query data even if they do not strictly overlap with the reference data, but are located not further away than *n* nucleotide from some reference data.
+
+-Keep the query data with are strictly included into reference data, meaning that a query transcript such that at least 1 nucleotide does not overlap with reference data will not be presented as a solution.
+
+The mechanism of shrinking and extending is also useful to make a fine grain comparison. For example, if you want to keep those such that the TSS is overlapping the reference set, you just shrink the query set to 1 nucleotide. Now, if you want to keep those which are overlapping you data or located 2kb downstream of it, just extend the query data in the downstream direction, and you will have what you want. You can also extend in the opposite direction to get the possible transcript factor sites which are upstream.
+
+Some option reverses the selection. Put in other words, it performs the comparison as usual, and outputs all those query data which do not overlap.
+ </help>
</tool>

diff -r c79b9ae3f65f -r 440ceca58672 SMART/galaxy/ConvertTranscriptFile.xml
--- a/SMART/galaxy/ConvertTranscriptFile.xml Fri Apr 19 10:13:11 2013 -0400
+++ b/SMART/galaxy/ConvertTranscriptFile.xml Mon Apr 22 11:08:07 2013 -0400

@@ -1,12 +1,10 @@
-<tool id="ConvertTranscriptFile" name="Convert transcript file">
+<tool id="ConvertTranscriptFile" name="convert transcript file">
   <description>Convert a file from a format to another.</description>
   <command interpreter="python"> ../Java/Python/convertTranscriptFile.py -i $inputFormatType.inputFileName
  #if $inputFormatType.FormatInputFileName == 'gff3':
   -f gff3
  #elif $inputFormatType.FormatInputFileName == 'bed':
-f bed
-   #elif $inputFormatType.FormatInputFileName == 'gff2':
- -f gff2
  #elif $inputFormatType.FormatInputFileName == 'bam':
-f blast
  #elif $inputFormatType.FormatInputFileName == 'sam':
@@ -16,10 +14,6 @@
           #end if

-g $outputFormatType.outFormat
-     #if $optionSequence.choose == 'Yes':
-    -s $optionSequence.value
- #end if
-

  -n $name
  $strand
@@ -31,7 +25,6 @@
  <param name="FormatInputFileName"  type="select" label="Input File Format">
  <option value="gff3">GFF3</option>
  <option value="bed">BED</option>
-   <option value="gff2">GFF2</option>
  <option value="bam">BAM</option>
  <option value="sam">SAM</option>
  <option value="gtf">GTF</option>
@@ -42,9 +35,6 @@
  <when value="bed">
  <param name="inputFileName" format="bed" type="data" label="Input File"/>
  </when>
-   <when value="gff2">
-   <param name="inputFileName" format="gff2" type="data" label="Input File"/>
-   </when>
  <when value="bam">
  <param name="inputFileName" format="bam" type="data" label="Input File"/>
  </when>
@@ -61,7 +51,6 @@
  <param name="outFormat"  type="select" label="Please choose the format that you want to convert to (corresponding to your input file format).">
  <option value="gff3">GFF3</option>
  <option value="bed">BED</option>
-   <option value="gff2">GFF2</option>
  <option value="wig">WIG</option>
  <option value="sam">SAM</option>
  <option value="csv">CSV</option>
@@ -71,8 +60,6 @@
  </when>
  <when value="bed">
  </when>
-     <when value="gff2">
-   </when>
  <when value="wig">
  </when>
      <when value="sam">
@@ -85,18 +72,6 @@

   <param name="name" type="text" value="SMART" label="name for the transcripts"/>

- <conditional name="optionSequence">
- <param name="choose" type="select" label="give the corresponding Multi-Fasta file (useful for EMBL format)">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="value" type="data" format="mfa" />
- </when>
- <when value="No">
- </when>
- </conditional>
-
   <param name="strand" type="boolean" truevalue="-t" falsevalue="" checked="false" label="consider the 2 strands as different (only useful for writing WIG files)"/>

   </inputs>
@@ -105,7 +80,7 @@
    <data name="outputFile" format="gff3" label="$inputFormatType.FormatInputFileName to $outputFormatType.outFormat">
<change_format>
<when input="outputFormatType.outFormat" value="bed" format="bed" />
- <when input="outputFormatType.outFormat" value="gff2" format="gff2" />
+ <when input="outputFormatType.outFormat" value="gff" format="gff" />
<when input="outputFormatType.outFormat" value="wig" format="wig" />
<when input="outputFormatType.outFormat" value="sam" format="sam" />
<when input="outputFormatType.outFormat" value="csv" format="csv" />
@@ -115,5 +90,6 @@
   </outputs>

   <help>
+Simple conversion tool.
   </help>
</tool>

diff -r c79b9ae3f65f -r 440ceca58672 SMART/galaxy/ConvertTranscriptFile_BedToCsv.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/ConvertTranscriptFile_BedToCsv.xml Mon Apr 22 11:08:07 2013 -0400

[

@@ -0,0 +1,15 @@
+<tool id="ConvertTranscriptFile_BedToCsv" name="Bed -> Csv">
+  <description>Convert Bed File to Csv File.</description>
+  <command interpreter="python"> ../Java/Python/convertTranscriptFile.py -i $inputFile -f bed -o $outputFile -g csv yes 2>$logFile </command>
+  <inputs>
+    <param name="inputFile" type="data" label="Input File" format="bed"/>
+  </inputs>
+
+  <outputs>
+    <data format="csv" name="outputFile" label="[bed -> csv] Output File"/>
+    <data format="txt" name="logFile" label="[bed -> csv] Log File"/>
+  </outputs>
+
+  <help>
+  </help>
+</tool>

diff -r c79b9ae3f65f -r 440ceca58672 SMART/galaxy/ConvertTranscriptFile_BedToGff2.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/ConvertTranscriptFile_BedToGff2.xml Mon Apr 22 11:08:07 2013 -0400

[

@@ -0,0 +1,15 @@
+<tool id="ConvertTranscriptFile_BedToGff2" name="Bed -> Gff2">
+  <description>Convert Bed File to Gff2 File.</description>
+  <command interpreter="python"> ../Java/Python/convertTranscriptFile.py -i $inputFile -f bed -o $outputFile -g gff yes 2>$logFile </command>
+  <inputs>
+    <param name="inputFile" type="data" label="Input File" format="bed"/>
+  </inputs>
+
+  <outputs>
+    <data format="gff" name="outputFile" label="[bed -> gff2] Output File"/>
+    <data format="txt" name="logFile" label="[bed -> gff2] Log File"/>
+  </outputs>
+
+  <help>
+  </help>
+</tool>

diff -r c79b9ae3f65f -r 440ceca58672 SMART/galaxy/ConvertTranscriptFile_BedToGff3.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/ConvertTranscriptFile_BedToGff3.xml Mon Apr 22 11:08:07 2013 -0400

[

@@ -0,0 +1,15 @@
+<tool id="ConvertTranscriptFile_BedToGff3" name="Bed -> Gff3">
+  <description>Convert Bed File to Gff3 File.</description>
+  <command interpreter="python"> ../Java/Python/convertTranscriptFile.py -i $inputFile -f bed -o $outputFile -g gff3 yes 2>$logFile </command>
+  <inputs>
+    <param name="inputFile" type="data" label="Input File" format="bed"/>
+  </inputs>
+
+  <outputs>
+    <data format="gff3" name="outputFile" label="[bed -> gff3] Output File"/>
+    <data format="txt" name="logFile" label="[bed -> gff3] Log File"/>
+  </outputs>
+
+  <help>
+  </help>
+</tool>

diff -r c79b9ae3f65f -r 440ceca58672 SMART/galaxy/ConvertTranscriptFile_BedToSam.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/ConvertTranscriptFile_BedToSam.xml Mon Apr 22 11:08:07 2013 -0400

[

@@ -0,0 +1,15 @@
+<tool id="ConvertTranscriptFile_BedToSam" name="Bed -> Sam">
+  <description>Convert Bed File to Sam File.</description>
+  <command interpreter="python"> ../Java/Python/convertTranscriptFile.py -i $inputFile -f bed -o $outputFile -g sam yes 2>$logFile </command>
+  <inputs>
+    <param name="inputFile" type="data" label="Input File" format="bed"/>
+  </inputs>
+
+  <outputs>
+    <data format="sam" name="outputFile" label="[bed -> sam] Output File"/>
+    <data format="txt" name="logFile" label="[bed -> sam] Log File"/>
+  </outputs>
+
+  <help>
+  </help>
+</tool>

diff -r c79b9ae3f65f -r 440ceca58672 SMART/galaxy/ConvertTranscriptFile_BlastToCsv.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/ConvertTranscriptFile_BlastToCsv.xml Mon Apr 22 11:08:07 2013 -0400

[

@@ -0,0 +1,15 @@
+<tool id="ConvertTranscriptFile_BlastToCsv" name="Blast (-m 8) -> Csv">
+  <description>Convert Blast (-m 8) File to Csv File.</description>
+  <command interpreter="python"> ../Java/Python/convertTranscriptFile.py -i $inputFile -f blast -o $outputFile -g csv yes 2>$logFile </command>
+  <inputs>
+    <param name="inputFile" type="data" label="Input File" format="tabular"/>
+  </inputs>
+
+  <outputs>
+    <data format="csv" name="outputFile" label="[blast -> csv] Output File"/>
+    <data format="txt" name="logFile" label="[blast -> csv] Log File"/>
+  </outputs>
+
+  <help>
+  </help>
+</tool>

diff -r c79b9ae3f65f -r 440ceca58672 SMART/galaxy/ConvertTranscriptFile_BlastToGff2.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/ConvertTranscriptFile_BlastToGff2.xml Mon Apr 22 11:08:07 2013 -0400

[

@@ -0,0 +1,15 @@
+<tool id="ConvertTranscriptFile_BlastToGff2" name="Blast (-m 8) -> Gff2">
+  <description>Convert Blast (-m 8) File to Gff2 File.</description>
+  <command interpreter="python"> ../Java/Python/convertTranscriptFile.py -i $inputFile -f blast -o $outputFile -g gff2 yes 2>$logFile </command>
+  <inputs>
+    <param name="inputFile" type="data" label="Input File" format="tabular"/>
+  </inputs>
+
+  <outputs>
+    <data format="gff" name="outputFile" label="[blast -> gff2] Output File"/>
+    <data format="txt" name="logFile" label="[blast -> gff2] Log File"/>
+  </outputs>
+
+  <help>
+  </help>
+</tool>

diff -r c79b9ae3f65f -r 440ceca58672 SMART/galaxy/ConvertTranscriptFile_BlastToGff3.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/ConvertTranscriptFile_BlastToGff3.xml Mon Apr 22 11:08:07 2013 -0400

[

@@ -0,0 +1,15 @@
+<tool id="ConvertTranscriptFile_BlastToGff3" name="Blast (-m 8) -> Gff3">
+  <description>Convert Blast (-m 8) File to Gff3 File.</description>
+  <command interpreter="python"> ../Java/Python/convertTranscriptFile.py -i $inputFile -f blast -o $outputFile -g gff3 yes 2>$logFile </command>
+  <inputs>
+    <param name="inputFile" type="data" label="Input File" format="tabular"/>
+  </inputs>
+
+  <outputs>
+    <data format="gff3" name="outputFile" label="[blast -> gff3] Output File"/>
+    <data format="txt" name="logFile" label="[blast -> gff3] Log File"/>
+  </outputs>
+
+  <help>
+  </help>
+</tool>

diff -r c79b9ae3f65f -r 440ceca58672 SMART/galaxy/ConvertTranscriptFile_BlastToSam.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/ConvertTranscriptFile_BlastToSam.xml Mon Apr 22 11:08:07 2013 -0400

[

@@ -0,0 +1,15 @@
+<tool id="ConvertTranscriptFile_BlastToSam" name="Blast (-m 8) -> Sam">
+  <description>Convert Blast (-m 8) File to Sam File.</description>
+  <command interpreter="python"> ../Java/Python/convertTranscriptFile.py -i $inputFile -f blast -o $outputFile -g sam yes 2>$logFile </command>
+  <inputs>
+    <param name="inputFile" type="data" label="Input File" format="tabular"/>
+  </inputs>
+
+  <outputs>
+    <data format="sam" name="outputFile" label="[blast -> sam] Output File"/>
+    <data format="txt" name="logFile" label="[blast -> sam] Log File"/>
+  </outputs>
+
+  <help>
+  </help>
+</tool>

diff -r c79b9ae3f65f -r 440ceca58672 SMART/galaxy/ConvertTranscriptFile_FastqToFasta.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/ConvertTranscriptFile_FastqToFasta.xml Mon Apr 22 11:08:07 2013 -0400

[

@@ -0,0 +1,15 @@
+<tool id="ConvertTranscriptFile_FastqToFasta" name="Fastq -> Fasta">
+  <description>Convert Fastq File to Fasta File.</description>
+  <command interpreter="python"> ../Java/Python/fastqToFasta.py -i $inputFile -o $outputFile 2>$logFile </command>
+  <inputs>
+    <param name="inputFile" type="data" label="Input File" format="fastq"/>
+  </inputs>
+
+  <outputs>
+    <data format="fasta" name="outputFile" label="[fastq -> fasta] Output File"/>
+    <data format="txt" name="logFile" label="[fastq -> fasta] Log File"/>
+  </outputs>
+
+  <help>
+  </help>
+</tool>

diff -r c79b9ae3f65f -r 440ceca58672 SMART/galaxy/ConvertTranscriptFile_Gff2ToCsv.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/ConvertTranscriptFile_Gff2ToCsv.xml Mon Apr 22 11:08:07 2013 -0400

[

@@ -0,0 +1,15 @@
+<tool id="ConvertTranscriptFile_Gff2ToCsv" name="Gff2 -> Csv">
+  <description>Convert Gff2 File to Csv File.</description>
+  <command interpreter="python"> ../Java/Python/convertTranscriptFile.py -i $inputFile -f gff2 -o $outputFile -g csv yes 2>$logFile </command>
+  <inputs>
+    <param name="inputFile" type="data" label="Input File" format="gff"/>
+  </inputs>
+
+  <outputs>
+    <data format="csv" name="outputFile" label="[gff2 -> csv] Output File"/>
+    <data format="txt" name="logFile" label="[gff2 -> csv] Log File"/>
+  </outputs>
+
+  <help>
+  </help>
+</tool>

diff -r c79b9ae3f65f -r 440ceca58672 SMART/galaxy/ConvertTranscriptFile_Gff2ToGff3.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/ConvertTranscriptFile_Gff2ToGff3.xml Mon Apr 22 11:08:07 2013 -0400

[

@@ -0,0 +1,15 @@
+<tool id="ConvertTranscriptFile_Gff2ToGff3" name="Gff2 -> Gff3">
+  <description>Convert Gff2 File to Gff3 File.</description>
+  <command interpreter="python"> ../Java/Python/convertTranscriptFile.py -i $inputFile -f gff2 -o $outputFile -g gff3 yes 2>$logFile </command>
+  <inputs>
+    <param name="inputFile" type="data" label="Input File" format="gff"/>
+  </inputs>
+
+  <outputs>
+    <data format="gff3" name="outputFile" label="[gff2 -> gff3] Output File"/>
+    <data format="txt" name="logFile" label="[gff2 -> gff3] Log File"/>
+  </outputs>
+
+  <help>
+  </help>
+</tool>

diff -r c79b9ae3f65f -r 440ceca58672 SMART/galaxy/ConvertTranscriptFile_Gff2ToSam.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/ConvertTranscriptFile_Gff2ToSam.xml Mon Apr 22 11:08:07 2013 -0400

[

@@ -0,0 +1,15 @@
+<tool id="ConvertTranscriptFile_Gff2ToSam" name="Gff2 -> Sam">
+  <description>Convert Gff2 File to Sam File.</description>
+  <command interpreter="python"> ../Java/Python/convertTranscriptFile.py -i $inputFile -f gff2 -o $outputFile -g sam yes 2>$logFile </command>
+  <inputs>
+    <param name="inputFile" type="data" label="Input File" format="gff"/>
+  </inputs>
+
+  <outputs>
+    <data format="sam" name="outputFile" label="[gff2 -> sam] Output File"/>
+    <data format="txt" name="logFile" label="[gff2 -> sam] Log File"/>
+  </outputs>
+
+  <help>
+  </help>
+</tool>

diff -r c79b9ae3f65f -r 440ceca58672 SMART/galaxy/ConvertTranscriptFile_Gff3ToCsv.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/ConvertTranscriptFile_Gff3ToCsv.xml Mon Apr 22 11:08:07 2013 -0400

[

@@ -0,0 +1,15 @@
+<tool id="ConvertTranscriptFile_Gff3ToCsv" name="Gff3 -> Csv">
+  <description>Convert Gff3 File to Csv File.</description>
+  <command interpreter="python"> ../Java/Python/convertTranscriptFile.py -i $inputFile -f gff3 -o $outputFile -g csv yes 2>$logFile </command>
+  <inputs>
+    <param name="inputFile" type="data" label="Input File" format="gff3"/>
+  </inputs>
+
+  <outputs>
+    <data format="csv" name="outputFile" label="[gff3 -> csv] Output File"/>
+    <data format="txt" name="logFile" label="[gff3 -> csv] Log File"/>
+  </outputs>
+
+  <help>
+  </help>
+</tool>

diff -r c79b9ae3f65f -r 440ceca58672 SMART/galaxy/ConvertTranscriptFile_Gff3ToGff2.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/ConvertTranscriptFile_Gff3ToGff2.xml Mon Apr 22 11:08:07 2013 -0400

[

@@ -0,0 +1,15 @@
+<tool id="ConvertTranscriptFile_Gff3ToGff2" name="Gff3 -> Gff2">
+  <description>Convert Gff3 File to Gff2 File.</description>
+  <command interpreter="python"> ../Java/Python/convertTranscriptFile.py -i $inputFile -f gff3 -o $outputFile -g gff2 yes 2>$logFile </command>
+  <inputs>
+    <param name="inputFile" type="data" label="Input File" format="gff3"/>
+  </inputs>
+
+  <outputs>
+    <data format="gff" name="outputFile" label="[gff3 -> gff2] Output File"/>
+    <data format="txt" name="logFile" label="[gff3 -> gff2] Log File"/>
+  </outputs>
+
+  <help>
+  </help>
+</tool>

diff -r c79b9ae3f65f -r 440ceca58672 SMART/galaxy/ConvertTranscriptFile_Gff3ToSam.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/ConvertTranscriptFile_Gff3ToSam.xml Mon Apr 22 11:08:07 2013 -0400

[

@@ -0,0 +1,15 @@
+<tool id="ConvertTranscriptFile_Gff3ToSam" name="Gff3 -> Sam">
+  <description>Convert Gff3 File to Sam File.</description>
+  <command interpreter="python"> ../Java/Python/convertTranscriptFile.py -i $inputFile -f gff3 -o $outputFile -g sam yes 2>$logFile </command>
+  <inputs>
+    <param name="inputFile" type="data" label="Input File" format="gff3"/>
+  </inputs>
+
+  <outputs>
+    <data format="sam" name="outputFile" label="[gff3 -> sam] Output File"/>
+    <data format="txt" name="logFile" label="[gff3 -> sam] Log File"/>
+  </outputs>
+
+  <help>
+  </help>
+</tool>

diff -r c79b9ae3f65f -r 440ceca58672 SMART/galaxy/ConvertTranscriptFile_Gff3ToWig.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/ConvertTranscriptFile_Gff3ToWig.xml Mon Apr 22 11:08:07 2013 -0400

[

@@ -0,0 +1,15 @@
+<tool id="ConvertTranscriptFile_Gff3ToWig" name="Gff3 -> Wig">
+  <description>Convert Gff3 File to Wig File.</description>
+  <command interpreter="python"> ../Java/Python/convertTranscriptFile.py -i $inputFile -f gff3 -o $outputFile -g wig yes 2>$logFile </command>
+  <inputs>
+    <param name="inputFile" type="data" label="Input File" format="gff3"/>
+  </inputs>
+
+  <outputs>
+    <data format="wig" name="outputFile" label="[gff3 -> wig] Output File"/>
+    <data format="txt" name="logFile" label="[gff3 -> wig] Log File"/>
+  </outputs>
+
+  <help>
+  </help>
+</tool>
\ No newline at end of file

diff -r c79b9ae3f65f -r 440ceca58672 SMART/galaxy/ConvertTranscriptFile_SamToCsv.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/ConvertTranscriptFile_SamToCsv.xml Mon Apr 22 11:08:07 2013 -0400

[

@@ -0,0 +1,15 @@
+<tool id="ConvertTranscriptFile_SamToCsv" name="Sam -> Csv">
+  <description>Convert Sam File to Csv File.</description>
+  <command interpreter="python"> ../Java/Python/convertTranscriptFile.py -i $inputFile -f sam -o $outputFile -g csv yes 2>$logFile </command>
+  <inputs>
+    <param name="inputFile" type="data" label="Input File" format="sam"/>
+  </inputs>
+
+  <outputs>
+    <data format="csv" name="outputFile" label="[sam -> csv] Output File"/>
+    <data format="txt" name="logFile" label="[sam -> csv] Log File"/>
+  </outputs>
+
+  <help>
+  </help>
+</tool>

diff -r c79b9ae3f65f -r 440ceca58672 SMART/galaxy/ConvertTranscriptFile_SamToGff2.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/ConvertTranscriptFile_SamToGff2.xml Mon Apr 22 11:08:07 2013 -0400

[

@@ -0,0 +1,15 @@
+<tool id="ConvertTranscriptFile_SamToGff2" name="Sam -> Gff2">
+  <description>Convert Sam File to Gff2 File.</description>
+  <command interpreter="python"> ../Java/Python/convertTranscriptFile.py -i $inputFile -f sam -o $outputFile -g gff2 yes 2>$logFile </command>
+  <inputs>
+    <param name="inputFile" type="data" label="Input File" format="sam"/>
+  </inputs>
+
+  <outputs>
+    <data format="gff" name="outputFile" label="[sam -> gff2] Output File"/>
+    <data format="txt" name="logFile" label="[sam -> gff2] Log File"/>
+  </outputs>
+
+  <help>
+  </help>
+</tool>

diff -r c79b9ae3f65f -r 440ceca58672 SMART/galaxy/ConvertTranscriptFile_SamToGff3.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/ConvertTranscriptFile_SamToGff3.xml Mon Apr 22 11:08:07 2013 -0400

[

@@ -0,0 +1,15 @@
+<tool id="ConvertTranscriptFile_SamToGff3" name="Sam -> Gff3">
+  <description>Convert Sam File to Gff3 File.</description>
+  <command interpreter="python"> ../Java/Python/convertTranscriptFile.py -i $inputFile -f sam -o $outputFile -g gff3 yes 2>$logFile </command>
+  <inputs>
+    <param name="inputFile" type="data" label="Input File" format="sam"/>
+  </inputs>
+
+  <outputs>
+    <data format="gff3" name="outputFile" label="[sam -> gff3] Output File"/>
+    <data format="txt" name="logFile" label="[sam -> gff3] Log File"/>
+  </outputs>
+
+  <help>
+  </help>
+</tool>

diff -r c79b9ae3f65f -r 440ceca58672 SMART/galaxy/CountReadGCPercent.xml
--- a/SMART/galaxy/CountReadGCPercent.xml Fri Apr 19 10:13:11 2013 -0400
+++ b/SMART/galaxy/CountReadGCPercent.xml Mon Apr 22 11:08:07 2013 -0400

@@ -11,6 +11,7 @@
    </outputs>

   <help>
+Count the GC% of a FASTA file.
   </help>
</tool>

diff -r c79b9ae3f65f -r 440ceca58672 SMART/galaxy/GetDifferentialExpression.xml
--- a/SMART/galaxy/GetDifferentialExpression.xml Fri Apr 19 10:13:11 2013 -0400
+++ b/SMART/galaxy/GetDifferentialExpression.xml Mon Apr 22 11:08:07 2013 -0400

[

@@ -51,7 +51,6 @@
$simple
$adjusted

-
#if $optionSimplePara.simplePara == 'Yes':
-S $optionSimplePara.paraValue
#end if
@@ -63,7 +62,6 @@
#if $optionFDR.FDR == 'Yes':
-d $optionFDR.FDRValue
#end if
- $plot $outputFilePNG
</command>

<inputs>
@@ -154,8 +152,8 @@
</when>
</conditional>

- <param name="simple" type="boolean" truevalue="-s" falsevalue="" checked="false" label="normalize using the number of reads in each condition"/>
- <param name="adjusted" type="boolean" truevalue="-a" falsevalue="" checked="false" label="normalize using the number of reads of 'mean' regions"/>
+ <param name="simple" type="boolean" truevalue="-s" falsevalue="" checked="false" label="Normalize using the number of reads in each condition"/>
+ <param name="adjusted" type="boolean" truevalue="-a" falsevalue="" checked="false" label="Normalize using the number of reads of interquartile expression region"/>

<conditional name="optionSimplePara">
<param name="simplePara" type="select" label="provide the number of reads" >
@@ -170,7 +168,7 @@
</conditional>

<conditional name="optionFixedSizeFactor">
- <param name="FSF" type="select" label="give the magnification factor for the normalization using fixed size sliding windows in reference regions (leave empty for no such normalization)">
+ <param name="FSF" type="select" label="Give the magnification factor for the normalization using fixed size sliding windows in reference regions (leave empty for no such normalization)">
<option value="Yes">Yes</option>
<option value="No" selected="true">No</option>
</param>
@@ -193,18 +191,21 @@
</when>
</conditional>

- <param name="plot" type="boolean" truevalue="-p" falsevalue="" checked="false" label="plot option" help="plot cloud plot"/>
-
</inputs>

<outputs>
<data name="outputFileGff" format="gff3" label="[GetDifferentialExpression]out file"/>
- <data name="outputFilePNG" format="PNG" label="[GetDifferentialExpression]PNG file">
- <filter>plot</filter>
- </data>
</outputs>

<help>
- example: python GetDifferentialExpression.py -i input1 -f gff3 -j input2 -g gff3 -k ref -l gff3 -o output.gff3
+This tool compares two sets of data and find the differential expression. One very important component of the tool is the reference set. Actually, to use the tool, you need the two input sets of data, of course, and the reference set. The reference set is a set of genomic coordinates and, for each interval, it will count the number of feature on each sample and compute the differential expression. For each reference interval, it will output the direction of the regulation (up or down, with respect to the first input set), and a *p*-value from a Fisher exact test.
+
+This reference set seems boring. Why not computing the differential expression without this set? The answer is: the differential expression of what? I cannot guess it. Actually, you might want to compare the expression of genes, of small RNAs, of transposable elements, of anything... So the reference set can be a list of genes, and in this case, you can compute the differential expression of genes. But you can also compute many other things.
+
+Suppose that you cluster the data of your two input samples (you can do it with the *clusterize* and the *mergeTranscriptLists* tools). You now have a list of all the regions which are transcribed in at least one of the input samples. This can be your reference set. This reference set is interesting since you can detect the differential expression of data which is outside any annotation.
+
+Suppose now that you clusterize using a sliding window the two input samples (you can do it with the *clusterizeBySlidingWindows* and the *mergeSlidingWindowsClusters* tools). You can now select all the regions of a given size which contain at least one read in one of the two input samples (do it with *selectByTag* and the tag **nbElements**). Again, this can be an other interesting reference set.
+
+In most cases, the sizes of the two input samples will be different, so you should probably normalize the data, which is an available option. The ---rather crude--- normalization increases the number of data in the least populated sample and decreases the number of data in the most populated sample to the average number of data.
</help>
</tool>

diff -r c79b9ae3f65f -r 440ceca58672 SMART/galaxy/GetFlanking.xml
--- a/SMART/galaxy/GetFlanking.xml Fri Apr 19 10:13:11 2013 -0400
+++ b/SMART/galaxy/GetFlanking.xml Mon Apr 22 11:08:07 2013 -0400

[

@@ -52,9 +52,6 @@
#end if

    -o $outputFile
-
-
-
</command>

<inputs>
@@ -117,7 +114,7 @@
</conditional>

<conditional name="OptionUpDownStream">
- <param name="OptionUD" type="select" label="UpStream or DownStream">
+ <param name="OptionUD" type="select" label="Only provide upstream/dowstream features">
<option value="UpStream">UpStream</option>
<option value="DownStream">DownStream</option>
<option value="NONE" selected="true">NONE</option>
@@ -131,8 +128,8 @@
</conditional>

<conditional name="OptionColinearOrAntiSens">
- <param name="OptionCA" type="select" label="Colinear or anti-sens">
- <option value="Colinear">Colinear</option>
+ <param name="OptionCA" type="select" label="Only provide collinear/antisens features">
+ <option value="Colinear">Collinear</option>
<option value="AntiSens">AntiSens</option>
<option value="NONE" selected="true">NONE</option>
</param>
@@ -175,5 +172,20 @@
     <data format="gff3" name="outputFile" label="[GetFlanking] Output File"/>
    </outputs>

+ <help>
+This tool prints the elements from the second set of genomic intervals which are closest to (in other words, are flanking) the elements from the first set. You can also play on different parameters:
+
+- restrict the search to downstream or upstream elements, or print downstream and upstream elements,
+
+- only consider collinear flanking elements,
+
+- only consider anti-sense flanking elements,
+
+- only consider elements which are close enough (using some given distance),
+
+- only consider flanking elements which do not overlap with the reference element.
+
+Notice that elements from the second sets may be printed at most once, whether they are the flanking element of several elements from the first or not.
+ </help>

</tool>

diff -r c79b9ae3f65f -r 440ceca58672 SMART/galaxy/SelectByTag.xml
--- a/SMART/galaxy/SelectByTag.xml Fri Apr 19 10:13:11 2013 -0400
+++ b/SMART/galaxy/SelectByTag.xml Mon Apr 22 11:08:07 2013 -0400

[

@@ -1,17 +1,13 @@
<tool id="SelectByTag" name="select by tag">
- <description>Keeps the genomic coordinates such that a value of a given tag.</description>
+ <description>Keep the genomic coordinates such that a value of a given tag.</description>
<command interpreter="python">
../Java/Python/SelectByTag.py -i $formatType.inputFileName
- #if $formatType.FormatInputFileName == 'bed':
- -f bed
#elif $formatType.FormatInputFileName == 'gff':
-f gff
#elif $formatType.FormatInputFileName == 'gff2':
-f gff2
#elif $formatType.FormatInputFileName == 'gff3':
-f gff3
- #elif $formatType.FormatInputFileName == 'sam':
- -f sam
#elif $formatType.FormatInputFileName == 'gtf':
-f gtf
#end if
@@ -37,16 +33,11 @@
<inputs>
<conditional name="formatType">
<param name="FormatInputFileName" type="select" label="Input File Format">
- <option value="bed">bed</option>
<option value="gff">gff</option>
<option value="gff2">gff2</option>
<option value="gff3">gff3</option>
- <option value="sam">sam</option>
<option value="gtf">gtf</option>
</param>
- <when value="bed">
- <param name="inputFileName" format="bed" type="data" label="Input File"/>
- </when>
<when value="gff">
<param name="inputFileName" format="gff" type="data" label="Input File"/>
</when>
@@ -56,9 +47,6 @@
<when value="gff3">
<param name="inputFileName" format="gff3" type="data" label="Input File"/>
</when>
- <when value="sam">
- <param name="inputFileName" format="sam" type="data" label="Input File"/>
- </when>
<when value="gtf">
<param name="inputFileName" format="gtf" type="data" label="Input File"/>
</when>
@@ -67,12 +55,12 @@
<param name="Tag" type="text" value="None" label="tag option" help="A given tag, you must choose a tag."/>

<conditional name="OptionValue">
- <param name="Value" type="select" label="value of tag">
+ <param name="Value" type="select" label="given value for the tag">
<option value="Yes">Yes</option>
<option value="No" selected="true">No</option>
</param>
<when value="Yes">
- <param name="valeur" type="integer" value="1" help="Be Careful! The value must be upper than 0"/>
+ <param name="valeur" type="integer" value="1"/>
</when>
<when value="No">
</when>
@@ -84,7 +72,7 @@
<option value="No" selected="true">No</option>
</param>
<when value="Yes">
- <param name="max" type="integer" value="1" help="Be Careful! The value must be upper than 0"/>
+ <param name="max" type="integer" value="1"/>
</when>
<when value="No">
</when>
@@ -96,14 +84,14 @@
<option value="No" selected="true">No</option>
</param>
<when value="Yes">
- <param name="min" type="integer" value="1" help="Be Careful! The value must be upper than 0"/>
+ <param name="min" type="integer" value="1"/>
</when>
<when value="No">
</when>
</conditional>

<conditional name="OptionDefault">
- <param name="default" type="select" label="gives this value if tag is not present">
+ <param name="default" type="select" label="give this value if tag is not present">
<option value="Yes">Yes</option>
<option value="No" selected="true">No</option>
</param>
@@ -119,4 +107,13 @@
<data name="outputFileGff" format="gff3" label="[SelectByTag] Output File"/>
</outputs>

+ <help>
+The script reads a list of genomic coordinates and output all the features with specific tag values. If you want to know more about tags, please consult the GFF format page: http://www.sequenceontology.org/gff3.shtml
+
+The tools reads the input file, and more specifically the tag that you specified. You can mention a lower and a upper bound for its value, or a specific value, and the tool will print all the features such that the tags are between the specified bounds or matches the string.
+
+A tag has to be present for each feature. If not, you can specify a default value which will be used if the tag is absent.
+
+This tool can be used to select the clusters with a minimum number of elements (the tag **nbElements** counts the number of elements per clusters) or to select the reads which have mapped less than *n* times (the tag **nbOccurrences** counts the number of mappings per read).
+ </help>
</tool>

diff -r c79b9ae3f65f -r 440ceca58672 SMART/galaxy/WrappGetLetterDistribution.py
--- a/SMART/galaxy/WrappGetLetterDistribution.py Fri Apr 19 10:13:11 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000

[

@@ -1,97 +0,0 @@
-#! /usr/bin/env python
-
-import os
-import sys
-import getopt
-from pyRepetUnit.commons.checker.CheckerException import CheckerException
-
-SMART_PATH = "%s/SMART" % os.environ["REPET_PATH"]
-
-class WrappGetLetterDistribution(object):
-
-    def __init__(self):
-        self._inputFileName = ""
-        self._inputFileFormat = ""
-        self._outputFileName = "tmpOutputFile"
-        self._csv = False
-
-    def help( self ):
-        print
-        print "usage: %s [ options ]" % ( sys.argv[0] )
-        print "options:"
-        print "     -h: this help"
-        print "     -i: input file"
-        print "     -f: 'fasta' or 'fastq'"
-        print "     -c: CSV output file"
-        print "     -a: first PNG output file"
-        print "     -b: second PNG output file"
-        print
-        print "Exemple:"
-        print
-        print "1:\n\tpython WrappGetLetterDistribution.py -i inputFile.fasta -f fasta -c outputFile1.csv -a outputFile2.png -b outputFile3.png"
-        print
-        print "2:\n\tpython WrappGetLetterDistribution.py -i inputFile.fastq -f fastq -c outputFile1.csv -a outputFile2.png -b outputFile3.png"
-        print
-        print
-
-
-    def setAttributesFromCommandLine(self):
-        try:
-            opts, args = getopt.getopt( sys.argv[1:], "hi:f:a:b:c:" )
-        except getopt.GetoptError, err:
-            print str(err); sys.exit(1)
-        for o, a in opts:
-            if o == "-h":
-                self.help()
-                sys.exit(0)
-            if o == "-i":
-                self._inputFileName = a
-            elif o == "-f":
-                self._inputFileFormat = a
-            elif o == "-c":
-                self._outputFileNameCSV = a
-                self._csv = True
-            elif o == "-a":
-                self._outputFileNamePNG = a
-            elif o == "-b":
-                self._outputFileNamePerNtPNG = a
-
-    def checkAttributes(self):
-        lMsg = []
-        if self._inputFileName == "" and not os.path.exists(self._inputFileName):
-            lMsg.append("ERROR: This input file doesn't exist!")
-        if self._inputFileFormat == "":
-            lMsg.append("ERROR: No input file format specified in option!")
-        if self._outputFileNamePNG == "":
-            lMsg.append("ERROR: No output file.png specified in option!")
-        if self._outputFileNamePerNtPNG == "":
-            lMsg.append("ERROR: No output filePerNt.png specified in option!")
-        if self._outputFileNameCSV == "" and self._csv == True :
-            lMsg.append("ERROR: No output file.csv specified in option!")
-
-        print ">>> lMsg " + str(lMsg)
-        if lMsg != []:
-            exp = CheckerException()
-            exp.setMessages(lMsg)
-            raise (exp)
-
-    def _cleanWorkingDir(self, cDir):
-        os.system("rm %s/tmpData* %s/tmpScript*" % (cDir, cDir))
-
-    def wrapp(self):
-        self.checkAttributes()
- cDir = os.getcwd()
-
-        if self._csv == True:
-            os.system("python %s/Java/Python/getLetterDistribution.py -i %s -f %s -o %s/%s -c" % (SMART_PATH, self._inputFileName, self._inputFileFormat, cDir, self._outputFileName))
-            os.system("mv %s/%s.csv %s" % (cDir, self._outputFileName, self._outputFileNameCSV))
-            os.system("mv %s/%s.png %s" % (cDir, self._outputFileName, self._outputFileNamePNG))
-            os.system("mv %s/%sPerNt.png %s" % (cDir, self._outputFileName, self._outputFileNamePerNtPNG))
-
-        self._cleanWorkingDir(cDir)
-
-if __name__ == '__main__':
-    launcher = WrappGetLetterDistribution()
-    launcher.setAttributesFromCommandLine()
-    launcher.wrapp()
-

diff -r c79b9ae3f65f -r 440ceca58672 SMART/galaxy/WrappGetLetterDistribution.xml
--- a/SMART/galaxy/WrappGetLetterDistribution.xml Fri Apr 19 10:13:11 2013 -0400
+++ b/SMART/galaxy/WrappGetLetterDistribution.xml Mon Apr 22 11:08:07 2013 -0400

[

@@ -1,5 +1,5 @@
<tool id="getLetterDistribution1" name="Get Letter Distribution">
-    <description>Calculate distribution for each nucleotide per position for all short reads (S-MART)</description>
+    <description>Calculate distribution for each nucleotide per position for all short reads</description>
     <command interpreter="python">
    WrappGetLetterDistribution.py -i $inputFileName
#if $formatType.FormatInputFileName == 'fasta':
@@ -28,6 +28,18 @@
                 <data name="ouputFileNameCSV" format="tabular" label="[getLetterDistribution] CSV File"/>
                 <data name="ouputFileNamePNG1" format="png" label="[getLetterDistribution] PNG File 1"/>
                 <data name="ouputFileNamePNG2" format="png" label="[getLetterDistribution] PNG File 2"/>
-    </outputs>
+    </outputs>
+    <tests>
+     <test>
+            <param name="FormatInputFileName" value="fastq" />
+            <param name="inputFileName" value="short_fastq.fastq" />
+            <output name="outputFileNameCSV" file="exp_getletterdistribution_short_fastq.csv" />
+        </test>
+    </tests>
+
+ <help>
+The script gets the nucleotide distribution of the input sequence list. It outputs two files. The first file shows the nucleotide distribution of the data. More precisely, a point (*x*, *y*) on the curve **A** shows that *y* sequences have *x*% of **A**.
+
+The second plot shows the average nucleotide distribution for each position of the read. You can use it to detect a bias in the first nucleotides, for instance. A point *x*, *y* on the curve **A** shows that at the position *x*, there are *y*% of **A**. A point (*x*, *y*) on the curve **#** tells you that *y*% of the sequences contain not less than *x* nucleotides. By definition, this latter line is a decreasing function. It usually explains why the tail of the other curves are sometimes erratic: there are few sequences.
+ </help>
</tool>
-

diff -r c79b9ae3f65f -r 440ceca58672 SMART/galaxy/changeGffFeatures.xml
--- a/SMART/galaxy/changeGffFeatures.xml Fri Apr 19 10:13:11 2013 -0400
+++ b/SMART/galaxy/changeGffFeatures.xml Mon Apr 22 11:08:07 2013 -0400

[

@@ -1,16 +1,20 @@
<tool id="changeGffFeatures" name="change gff Features">
- <description>Changes one feature name by an other name (the feature name can be found on the 3rd column).</description>
+ <description>Change a feature in a GFF file (the feature is the 3rd column).</description>
<command interpreter="bash">
../Java/Python/changeGffFeatures.sh $inputFile $inputFeature $outputFeature >$outputFile
</command>
       <inputs>
      <param name="inputFile" type="data" label="Input File" format="gff"/>
-       <param name="inputFeature" type="text" value="exon" label="A given feature, you must choose a feature name(on the 3rd column)."/>
-       <param name="outputFeature" type="text" value="exon" label="You must choose an other feature name(on the 3rd column)."/>
+       <param name="inputFeature" type="text" value="exon" label="The feature you want to change"/>
+       <param name="outputFeature" type="text" value="exon" label="The new feature"/>
       </inputs>

       <outputs>
              <data name="outputFile" format="gff" label="[changeGffFeatures] Output File"/>
       </outputs>
+
+   <help>
+ This script changes the third column of a GFF3 file (please refer to http://www.sequenceontology.org/gff3.shtml to know more about this format).
+   </help>
</tool>

diff -r c79b9ae3f65f -r 440ceca58672 SMART/galaxy/changeTagName.xml
--- a/SMART/galaxy/changeTagName.xml Fri Apr 19 10:13:11 2013 -0400
+++ b/SMART/galaxy/changeTagName.xml Mon Apr 22 11:08:07 2013 -0400

[

@@ -1,9 +1,7 @@
<tool id="changeTagName" name="change tag name">
- <description>Changes the name of tag of a list of transcripts.</description>
+ <description>Change the name of a tag in a GFF file.</description>
<command interpreter="python">
../Java/Python/changeTagName.py -i $formatType.inputFileName
- #if $formatType.FormatInputFileName == 'bed':
- -f bed
#elif $formatType.FormatInputFileName == 'gff':
-f gff
#elif $formatType.FormatInputFileName == 'gff2':
@@ -21,14 +19,10 @@
<inputs>
<conditional name="formatType">
<param name="FormatInputFileName" type="select" label="Input File Format">
- <option value="bed">bed</option>
<option value="gff">gff</option>
<option value="gff2">gff2</option>
<option value="gff3">gff3</option>
</param>
- <when value="bed">
- <param name="inputFileName" format="bed" type="data" label="Input File"/>
- </when>
<when value="gff">
<param name="inputFileName" format="gff" type="data" label="Input File"/>
</when>
@@ -40,15 +34,15 @@
</when>
</conditional>

- <param name="Tag" type="text" value="None" label="tag option" help="A given tag, you must choose a tag."/>
- <param name="name" type="text" value="None" label="name option" help="new name for the tag, you must choose a new name."/>
-
-
-
+ <param name="Tag" type="text" label="tag option" help="The tag you want to change"/>
+ <param name="name" type="text" label="name option" help="A new name for the tag"/>
</inputs>

<outputs>
<data name="outputFileGff" format="gff3" label="[changeTagName] Output File"/>
</outputs>

+ <help>
+ Change the name of a tag in the 9th field of a GFF3 file (please consult http://www.sequenceontology.org/gff3.shtml to know more about this format).
+ </help>
</tool>

diff -r c79b9ae3f65f -r 440ceca58672 SMART/galaxy/cleanGff.xml
--- a/SMART/galaxy/cleanGff.xml Fri Apr 19 10:13:11 2013 -0400
+++ b/SMART/galaxy/cleanGff.xml Mon Apr 22 11:08:07 2013 -0400

[

@@ -1,5 +1,5 @@
<tool id="cleanGff" name="clean Gff">
-   <description>Cleans a GFF file as given by NCBI and outpus a Gff3 file.</description>
+   <description>Clean a GFF file (e.g. as given by NCBI) and produces a new GFF3 file, understood by S-MART.</description>
  <command interpreter="python"> ../Java/Python/cleanGff.py -i $inputFile
  -t $type
  -o $outputFile
@@ -7,12 +7,15 @@

      <inputs>
      <param name="inputFile" type="data" label="Input File" format="gff"/>
-       <param name="type" type="text" value="tRNA,rRNA,ncRNA,CDS" label="tag option, compulsory option" help="lists of comma separated types that you want to keep.EX: ncRNA,tRNA,rRNA,CDS"/>
+       <param name="type" type="text" value="tRNA,rRNA,ncRNA,CDS" label="Tags you keep" help="lists of comma separated types that you want to keep, e.g. ncRNA,tRNA,rRNA,CDS"/>
      </inputs>

      <outputs>
          <data format="gff3" name="outputFile" label="[cleanGff] Output File"/>
      </outputs>

+ <help>
+ A GFF file (please consult http://www.sequenceontology.org/gff3.shtml to know more about it) may contain different sources of information: chromosome size, genes, transcripts, etc. S-MART mostly works on transcripts. This scripts filters the input GFF3 to keep the information you really want, based on the feature (3rd column).
+ </help>
</tool>

diff -r c79b9ae3f65f -r 440ceca58672 SMART/galaxy/clusterize.xml
--- a/SMART/galaxy/clusterize.xml Fri Apr 19 10:13:11 2013 -0400
+++ b/SMART/galaxy/clusterize.xml Mon Apr 22 11:08:07 2013 -0400

[

@@ -1,5 +1,5 @@
<tool id="MergingDataClusterize" name="Clusterize">
- <description>Clusterizes the reads when their genomic intervals overlap.</description>
+ <description>Clusterize features when their genomic intervals overlap.</description>
<command interpreter="python">
../Java/Python/clusterize.py -i $formatType.inputFileName
#if $formatType.FormatInputFileName == 'bed':
@@ -10,8 +10,6 @@
-f gff2
#elif $formatType.FormatInputFileName == 'gff3':
-f gff3
- #elif $formatType.FormatInputFileName == 'csv':
- -f csv
#elif $formatType.FormatInputFileName == 'sam':
-f sam
#end if
@@ -29,7 +27,6 @@
<option value="gff">gff</option>
<option value="gff2">gff2</option>
<option value="gff3">gff3</option>
- <option value="csv">csv</option>
<option value="sam">sam</option>
</param>
<when value="bed">
@@ -44,24 +41,17 @@
<when value="gff3">
<param name="inputFileName" format="gff3" type="data" label="Input File"/>
</when>
- <when value="csv">
- <param name="inputFileName" format="csv" type="data" label="Input File"/>
- </when>
<when value="sam">
<param name="inputFileName" format="sam" type="data" label="Input File"/>
</when>
</conditional>

- <param name="colinear" type="boolean" truevalue="-c" falsevalue="" checked="false" label="colinear option" help="This option clusterizes only the same strand reads"/>
- <param name="normalize" type="boolean" truevalue="-n" falsevalue="" checked="false" label="normalize option for only GFF3 file format" help="This option normalize (attention!! Only for GFF3 file!!!!!)"/>
- <param name="log" type="boolean" truevalue="-l" falsevalue="" checked="false" label="log option" help="This option create a log file"/>
- <param name="distance" type="integer" value="0" label="distance option" help="Limit the maximum distance between two reads"/>
+ <param name="colinear" type="boolean" truevalue="-c" falsevalue="" checked="false" label="Only merge collinear data"/>
+ <param name="normalize" type="boolean" truevalue="-n" falsevalue="" checked="false" label="Normalize data." help="This only works if the tag nbOccurrences is set."/>
+ <param name="distance" type="integer" value="0" label="Merge features if their relative distance is withing N nt"/>
</inputs>

<outputs>
<data name="outputFileGff" format="gff3" label="[clusterize]output file"/>
- <data name="outputFileLog" format="txt" label="[clusterize]output file">
- <filter>log</filter>
- </data>
</outputs>
</tool>

diff -r c79b9ae3f65f -r 440ceca58672 SMART/galaxy/clusterizeBySlidingWindows.xml
--- a/SMART/galaxy/clusterizeBySlidingWindows.xml Fri Apr 19 10:13:11 2013 -0400
+++ b/SMART/galaxy/clusterizeBySlidingWindows.xml Mon Apr 22 11:08:07 2013 -0400

[

@@ -1,5 +1,5 @@
<tool id="clusterizeBySlidingWindows" name="clusterize By SlidingWindows">
- <description>Produces a GFF3 file that clusters a list of transcripts using a sliding window. Cluster the data into regions (defined by size and overlap with next region) and keep only highest peaks.</description>
+ <description>Produces a GFF3 file that clusters a list of transcripts using a sliding window. Cluster the data into regions (defined by size and overlap with next region).</description>
<command interpreter="python">
../Java/Python/clusterizeBySlidingWindows.py -i $formatType.inputFileName
#if $formatType.FormatInputFileName == 'bed':
@@ -73,16 +73,16 @@

<param name="size" type="text" value="50000" label="Size option" help="Size of the regions."/>
<param name="overlap" type="text" value="50" label="Overlap option" help="Overlap between two consecutive regions."/>
- <param name="normalize" type="boolean" truevalue="-m" falsevalue="" checked="false" label="Normalize option for only GFF3 file format" help="This option normalizes (Warning!! Only for GFF3 file!)"/>
- <param name="strands" type="boolean" truevalue="-2" falsevalue="" checked="false" label="strands option" help="Consider the two strands separately."/>
+ <param name="normalize" type="boolean" truevalue="-m" falsevalue="" checked="false" label="Normalize option for only GFF3 file format" help="(only work if the tag nbOccurrences is set)"/>
+ <param name="strands" type="boolean" truevalue="-2" falsevalue="" checked="false" label="Consider the two strands separately"/>

<conditional name="OptionTag">
- <param name="tag" type="select" label="use a given tag as input (instead of summing number of features)">
+ <param name="tag" type="select" label="Use a given tag as input (instead of summing number of features)">
<option value="Yes">Yes</option>
<option value="No" selected="true">No</option>
</param>
<when value="Yes">
- <param name="value" type="text" value="None" label="tag option" help="write a tag name you want to observe."/>
+ <param name="value" type="select" label="tag name"/>
</when>
<when value="No">
</when>
@@ -90,12 +90,18 @@

<conditional name="OptionsOperation">
- <param name="operation" type="select" label="combine tag value with given operation [choice (sum, avg, med, min, max)]">
+ <param name="operation" type="select" label="combine tag value with given operation">
<option value="Yes">Yes</option>
<option value="No" selected="true">No</option>
</param>
<when value="Yes">
- <param name="value" type="text" value="None" label="operation option" help="You can ONLY choose one of fowlling operation : sum, avg, med, min, max."/>
+ <param name="value" type="select" label="operation" help="You can ONLY choose one of following operation : sum, avg, med, min, max.">
+ <option value="sum">sum</option>
+ <option value="avg">average</option>
+ <option value="med">median</option>
+ <option value="min">minimum</option>
+ <option value="max">maximum</option>
+ </param>
</when>
<when value="No">
</when>
@@ -114,19 +120,19 @@
</when>
</conditional>

- <param name="strand" type="boolean" truevalue="-2" falsevalue="" checked="false" label="strand option" help="This option considers the two strands separately."/>
- <param name="plot" type="boolean" truevalue="-p" falsevalue="" checked="false" label="plot option" help="This option creates a png file."/>
- <param name="excel" type="boolean" truevalue="-x" falsevalue="" checked="false" label="excel option" help="This option creates a csv file."/>
-
</inputs>

<outputs>
<data name="outputFileGff" format="gff3"/>
- <data name="excelOutput" format="csv">
- <filter>excel</filter>
- </data>
- <data name="plotPng" format="png">
- <filter>plot</filter>
- </data>
</outputs>
+
+ <help>
+Sliding windows are a convenient ways to clusterize data mapped on the genome. There are two important parameters of a sliding window: the size of the window and the size of the overlap.
+
+By default, sliding windows count the number of reads in each window. However, you can basically merge any information which is contained in the tags. You can compute the average, sum, median, max or min of the tags for each window. For instance, every window can contain the average cluster size, if you merge clusters instead of reads.
+
+The output file is a GFF3 file, where each element is a window. There is a special tag for each window, whose name is **nbElements** if you counted the number of transcripts per sliding window. However, if you performed a **min** (resp. **max**, **sum**, **median**, **average**) operation on the tags **value** of the transcripts, then the tag of the window will be **minValue** (resp. **maxValue**, **sumValue**, **medValue**, **avgValue**). You can also specify the name of your tag (which is actually advised: **nbReadsInSample1** will always be more informative than **nbElements**).
+
+You also have different option, which can select the *n* % highest regions, or the regions with at least *n* features in it, or even the regions with at least *n* unique features. This last option is useful when you want to cluster the reads which have mapped only once, for instance.
+ </help>
</tool>

diff -r c79b9ae3f65f -r 440ceca58672 SMART/galaxy/compareOverlapping.xml
--- a/SMART/galaxy/compareOverlapping.xml Fri Apr 19 10:13:11 2013 -0400
+++ b/SMART/galaxy/compareOverlapping.xml Mon Apr 22 11:08:07 2013 -0400

b'@@ -1,4 +1,4 @@\n-<tool id="CompareOverlapping" name="Compare Overlapping">\n+<tool id="CompareOverlapping" name="compare Overlapping">\n \t<description>Print all the transcripts from a first file which overlap with the transcripts from a second file.</description>\n \t<command interpreter="python">\n \t\t../Java/Python/CompareOverlapping.py -i $formatType.inputFileName1\n@@ -107,7 +107,7 @@\n \t\t\t</when>\n \t\t\t<when value="gtf">\n \t\t\t\t<param name="inputFileName1" format="gtf" type="data" label="Input File 1"/>\n-\t\t\t\t\t\t\t\t </when>\n+\t\t\t</when>\n \t\t</conditional>\n \n \t\t<conditional name="formatType2">\n@@ -139,58 +139,54 @@\n \t\t\t</when>\n \t\t</conditional>\n \n-\n-\n \t\t<conditional name="optionNFirstFile1">\n-\t\t\t<param name="NFirstForFile1" type="select" label="NFirst for file 1" help="only consider the n first nucleotides of the transcripts in file 1">\n+\t\t\t<param name="NFirstForFile1" type="select" label="Shrink the queries to their first N nt.">\n \t\t\t\t\t<option value="Yes">Yes</option>\n \t\t\t\t\t<option value="No" selected="true">No</option>\n \t\t\t</param>\n \t\t\t<when value="Yes">\n-\t\t\t\t<param name="firstNtFile1" type="integer" value="1" label="n first nucleotides for input file 1" />\n+\t\t\t\t<param name="firstNtFile1" type="integer" value="1" label="size" />\n \t\t\t</when>\n \t\t\t<when value="No">\n \t\t\t</when>\n \t\t</conditional>\n \t\t<conditional name="optionNFirstFile2">\n-\t\t\t<param name="NFirstForFile2" type="select" label="NFirst for file 2" help="only consider the n first nucleotides of the transcripts in file 2">\n+\t\t\t<param name="NFirstForFile2" type="select" label="Shrink the references to their first N nt.">\n \t\t\t\t<option value="Yes">Yes</option>\n \t\t\t\t<option value="No" selected="true">No</option>\n \t\t\t</param>\n \t\t\t<when value="Yes">\n-\t\t\t\t<param name="firstNtFile2" type="integer" value="1" label="n first nucleotides for input file 1" />\n+\t\t\t\t<param name="firstNtFile2" type="integer" value="1" label="size" />\n \t\t\t</when>\n \t\t\t<when value="No">\n \t\t\t</when>\n \t\t</conditional>\n \n \t\t<conditional name="optionNLastFile1">\n-\t\t\t<param name="NLastForFile1" type="select" label="NLast for file 1">\n+\t\t\t<param name="NLastForFile1" type="select" label="Shrink the queries to their last N nt.">\n \t\t\t\t\t<option value="Yes">Yes</option>\n \t\t\t\t\t<option value="No" selected="true">No</option>\n \t\t\t</param>\n \t\t\t<when value="Yes">\n-\t\t\t\t<param name="lastNtFile1" type="integer" value="1" label="n last nucleotides for input file 1" help="only consider the n last nucleotides of the transcripts in file 1"/>\n+\t\t\t\t<param name="lastNtFile1" type="integer" value="1" label="size"/>\n \t\t\t</when>\n \t\t\t<when value="No">\n \t\t\t</when>\n \t\t</conditional>\n \t\t<conditional name="optionNLastFile2">\n-\t\t\t<param name="NLastForFile2" type="select" label="NLast for file 2">\n+\t\t\t<param name="NLastForFile2" type="select" label="Shrink the references to their last N nt.">\n \t\t\t\t<option value="Yes">Yes</option>\n \t\t\t\t<option value="No" selected="true">No</option>\n \t\t\t</param>\n \t\t\t<when value="Yes">\n-\t\t\t\t<param name="lastNtFile2" type="integer" value="1" label="n last nucleotides for input file 2" help="only consider the n last nucleotides of the transcripts in file 2"/>\n+\t\t\t\t<param name="lastNtFile2" type="integer" value="1" label="size"/>\n \t\t\t</when>\n \t\t\t<when value="No">\n \t\t\t</when>\n \t\t</conditional>\n \n-\n-\n \t\t<conditional name="optionExtentionCinqFile1">\n-\t\t\t<param name="extentionFile1" type="select" label="Extension towards 5 for file 1">\n+\t\t\t<param name="extentionFile1" type="select" label="Extend the query features towards the 5\' end">\n \t\t\t\t\t<option value="Yes">Yes</option>\n \t\t\t\t\t<option value="No" selected="true">No</option>\n \t\t\t</param>\n@@ -201,9 +197,8 @@\n \t\t\t</when>\n \t\t</conditional>\n \n-\n \t\t<conditional name="optionExtentionCinqFile2">\n-\t\t\t<param name="extentionFile2" type="select" label="Extension towards 5 for file 2">\n+\t\t\t<param name="extentionFile2" type="select" label="Extend the reference features towards 5\' end">\n \t\t\t\t<option value="Yes">Yes</option>\n \t\t\t\t<option value="No" selected="true">No</opt'..b'lue="No" selected="true">No</option>\n \t\t\t</param>\n@@ -227,7 +222,7 @@\n \t\t</conditional>\n \n \t\t<conditional name="optionExtentionTroisFile2">\n-\t\t\t<param name="extentionFile2" type="select" label="Extension towards 3 for file 2">\n+\t\t\t<param name="extentionFile2" type="select" label="Extend the reference features towards 3\' end">\n \t\t\t\t<option value="Yes">Yes</option>\n \t\t\t\t<option value="No" selected="true">No</option>\n \t\t\t</param>\n@@ -239,7 +234,7 @@\n \t\t</conditional>\n \n \t\t<conditional name="OptionColinearOrAntiSens">\n-\t\t\t<param name="OptionCA" type="select" label="Colinear or anti-sens">\n+\t\t\t<param name="OptionCA" type="select" label="Report queries which are collinear/antisens w.r.t. a reference">\n \t\t\t\t<option value="Colinear">Colinear</option>\n \t\t\t\t<option value="AntiSens">AntiSens</option>\n \t\t\t\t<option value="NONE" selected="true">NONE</option>\n@@ -275,14 +270,38 @@\n \t\t\t<when value="No">\n \t\t\t</when>\n \t\t</conditional>\n-\t\t<param name="InvertMatch" type="boolean" truevalue="-x" falsevalue="" checked="false" label="Invert match"/>\n-\t\t<param name="ReportIntron" type="boolean" truevalue="-t" falsevalue="" checked="false" label="Report intron"/>\n-\t\t<param name="NotOverlapping" type="boolean" truevalue="-O" falsevalue="" checked="false" label="When there is no overlapping, the number of Overlapping will be set to 0 by defalt."/>\n+\t\t<param name="ReportIntron" type="boolean" truevalue="-t" falsevalue="" checked="false" label="Also report queries which overlap with the introns of references, or queries such that a reference is in one of its intron"/>\n+\t\t<param name="InvertMatch" type="boolean" truevalue="-x" falsevalue="" checked="false" label="Invert match: the output file will contain all query elements which do NOT overlap"/>\n+\t\t<param name="NotOverlapping" type="boolean" truevalue="-O" falsevalue="" checked="false" label="Also report the query data which do not overlap, with the nbOverlaps tag set to 0."/>\n+\t</inputs>\n \t\t\n-\t</inputs>\n-\n \t<outputs>\n \t\t<data name="outputFileGff" format="gff3"/>\n \t</outputs> \n \t\n+\t<help>\n+This script may be the most important one. It basically compares two sets of transcripts and keeps those from the first set which overlap with the second one. The first set is considered as the query set (basically, your data) and the second one is the reference set (RefSeq data, for example).\n+ \n+It is vital to understand that it will output the elements of the first file which overlap with the elements of the second one.\n+\n+Various modifiers are also available:\n+\n+-Restrict query / reference set to the first nucleotide. Useful to check if the TSS of one set overlap with the other one.\n+\n+-Extend query / reference set on the 5\' / 3\' direction. Useful to check if one set is located upstream / downstream the other one.\n+\n+-Include introns in the comparison.\n+\n+-Invert selection (report those which do not overlap).\n+\n+-Restrict to colinear / anti-sense overlapping data.\n+\n+-Keep the query data even if they do not strictly overlap with the reference data, but are located not further away than *n* nucleotide from some reference data.\n+\n+-Keep the query data with are strictly included into reference data, meaning that a query transcript such that at least 1 nucleotide does not overlap with reference data will not be presented as a solution.\n+\n+The mechanism of shrinking and extending is also useful to make a fine grain comparison. For example, if you want to keep those such that the TSS is overlapping the reference set, you just shrink the query set to 1 nucleotide. Now, if you want to keep those which are overlapping you data or located 2kb downstream of it, just extend the query data in the downstream direction, and you will have what you want. You can also extend in the opposite direction to get the possible transcript factor sites which are upstream.\n+\n+Some option reverses the selection. Put in other words, it performs the comparison as usual, and outputs all those query data which do not overlap.\n+\t</help>\n </tool>\n'

diff -r c79b9ae3f65f -r 440ceca58672 SMART/galaxy/computeCoverage.xml
--- a/SMART/galaxy/computeCoverage.xml Fri Apr 19 10:13:11 2013 -0400
+++ b/SMART/galaxy/computeCoverage.xml Mon Apr 22 11:08:07 2013 -0400

[

@@ -1,4 +1,4 @@
-<tool id="ComputeCoverage" name="Compute coverage">
+<tool id="ComputeCoverage" name="compute coverage">
     <description>Compute the coverage of a set with respect to another set.</description>
     <command interpreter="python">
         ../Java/Python/ComputeCoverage.py -i $formatType.inputFileName1
@@ -103,5 +103,8 @@
<data name="outputFileGff" format="gff3" label="[computeCoverage] OUTPUT file"/>
</outputs>

+ <help>
+This tool considers a query and a reference files, and gives the coverage of the query file by the reference. The output file is similar to the query file, where a tag **coverage** has been added.
+ </help>
</tool>

diff -r c79b9ae3f65f -r 440ceca58672 SMART/galaxy/coordinatesToSequence.xml
--- a/SMART/galaxy/coordinatesToSequence.xml Fri Apr 19 10:13:11 2013 -0400
+++ b/SMART/galaxy/coordinatesToSequence.xml Mon Apr 22 11:08:07 2013 -0400

@@ -59,4 +59,7 @@
<data name="outputFileFasta" format="fasta" label="coordinates to sequences output"/>
</outputs>

+ <help>
+You can use this tool, if you just want to convert your mapping data to genomic coordinates, without any filtering. It requires a genomic coordinates file together with its format, an output format (GFF3, BED, etc...), the genome, and prints you the corresponding file.
+ </help>
</tool>

diff -r c79b9ae3f65f -r 440ceca58672 SMART/galaxy/getDifference.xml
--- a/SMART/galaxy/getDifference.xml Fri Apr 19 10:13:11 2013 -0400
+++ b/SMART/galaxy/getDifference.xml Mon Apr 22 11:08:07 2013 -0400

[

@@ -1,5 +1,5 @@
<tool id="getDifference" name="get Difference">
- <description>Gets all the regions of the genome, except the one given or get all the elements from the first set which does not ovelap with the second set (at the nucleotide level).</description>
+ <description>Gets all the regions of the genome, except the one given in an annotation file. Alternatively, it may also give all the elements from the first set which does not ovelap with the second set (at the nucleotide level).</description>
<command interpreter="python">
../Java/Python/getDifference.py -i $formatType.inputFileName1
#if $formatType.FormatInputFileName1 == 'bed':
@@ -31,7 +31,6 @@
-g gtf
#end if

-
$split

#if $OptionSequence.option == "Yes":
@@ -102,8 +101,7 @@
</when>
</conditional>

- <param name="split" type="boolean" truevalue="-p" falsevalue="" checked="false" label="split option" help="When comparing to a set of genomic coordinates, do not join."/>
-
+ <param name="split" type="boolean" truevalue="-p" falsevalue="" checked="false" label="When comparing to a set of genomic coordinates, do not join into exons."/>

<conditional name="OptionSequence">
<param name="option" type="select" label="Compare with a reference fasta file.">
@@ -124,4 +122,9 @@
<data name="outputFileGff" format="gff3" label="[getDifference]output File."/>
</outputs>

+ <help>
+This tools has two different (but similar) uses. When given two sets of transcripts, it trims the elements of the set so that they do not overlap with the second set.
+
+When only one set of transcripts is given, together with a reference genome, it produces a list of transcripts which complements the first set.
+ </help>
</tool>

diff -r c79b9ae3f65f -r 440ceca58672 SMART/galaxy/getDistance.xml
--- a/SMART/galaxy/getDistance.xml Fri Apr 19 10:13:11 2013 -0400
+++ b/SMART/galaxy/getDistance.xml Mon Apr 22 11:08:07 2013 -0400

@@ -1,5 +1,5 @@
<tool id="GetDistance" name="get distance">
- <description>Give the distances between every data from the first input set and the data from the second input set</description>
+ <description>Give the distances between every data from the first input set with respect to the data from the second input set.</description>
<command interpreter="python">
../Java/Python/getDistance.py -i $formatType.inputFileName1
#if $formatType.FormatInputFileName1 == 'bed':
@@ -40,16 +40,6 @@
-a
#end if

- #if $OptionFirstNucl5.FirstNu5 == "Yes":
- -s $OptionFirstNucl5.first5File1
- -S $OptionFirstNucl5.first5File2
- #end if
-
- #if $OptionFirstNucl3.FirstNu3 == "Yes":
- -e $OptionFirstNucl3.first3File1
- -E $OptionFirstNucl3.first3File2
- #end if
-
#if $OptionMinDistance.MinD == "Yes":
-m $OptionMinDistance.minDistance
#end if
@@ -77,8 +67,6 @@
#end if

-o $outputFilePng
- $outputDistance $outputFileDistance
-
</command>

<inputs>
@@ -140,14 +128,12 @@
</when>
</conditional>

- <param name="outputDistance" type="boolean" truevalue="-O" falsevalue="" checked="false" label="distance option" help="This option create a GFF3 output file containing the distance for each element of the query."/>
-
<param name="absolute" type="boolean" truevalue="-b" falsevalue="" checked="false" label="absolute value option" help="This option gives the absolute value of the distance."/>
<param name="proportion" type="boolean" truevalue="-p" falsevalue="" checked="false" label="proportion option" help="This option gives the proportion on the y-axis instead of the number of distances."/>

<conditional name="OptionColinearOrAntiSens">
- <param name="OptionCA" type="select" label="Colinear or anti-sens">
- <option value="Colinear">Colinear</option>
+ <param name="OptionCA" type="select" label="Provide distribution of distances between collinear/antisense pairs of features">
+ <option value="Colinear">Collinear</option>
<option value="AntiSens">AntiSens</option>
<option value="NONE" selected="true">NONE</option>
</param>
@@ -159,34 +145,8 @@
</when>
</conditional>

- <conditional name="OptionFirstNucl5">
- <param name="FirstNu5" type="select" label="only consider the n first 5' nucleotides for input files">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="first5File1" type="integer" value="1" label="in file 1" help="Be Careful! The value must be upper than 0"/>
- <param name="first5File2" type="integer" value="1" label="in file 2" help="Be Careful! The value must be upper than 0"/>
- </when>
- <when value="No">
- </when>
- </conditional>
-
- <conditional name="OptionFirstNucl3">
- <param name="FirstNu3" type="select" label="only consider the n first 3' nucleotides for input files">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="first3File1" type="integer" value="1" label="in file 1" help="Be Careful! The value must be upper than 0"/>
- <param name="first3File2" type="integer" value="1" label="in file 2" help="Be Careful! The value must be upper than 0"/>
- </when>
- <when value="No">
- </when>
- </conditional>
-
<conditional name="OptionMinDistance">
- <param name="MinD" type="select" label="minimum distance considered between two transcripts">
+ <param name="MinD" type="select" label="Minimum distance between two features">
<option value="Yes">Yes</option>
<option value="No" selected="true">No</option>
</param>
@@ -198,7 +158,7 @@
</conditional>

<conditional name="OptionMaxDistance">
- <param name="MaxD" type="select" label="maximum distance considered between two transcripts">
+ <param name="MaxD" type="select" label="Maximum distance between two features">
<option value="Yes">Yes</option>
<option value="No" selected="true">No</option>
</param>
@@ -267,9 +227,18 @@

<outputs>
<data name="outputFilePng" format="png"/>
- <data name="outputFileDistance" format="gff3">
- <filter>outputDistance</filter>
- </data>
</outputs>

+ <help>
+Give the distances between every data from the first input set and the data from the second input set. It outputs the size distribution. Each point (*x*, *y*) tells you that there exists *y* pairs of elements which are separated by *x* nucleotides.
+
+The general algorithm is the following. For each element of the first input set, it finds the closest element of the second set and computes the distance between the two elements. The distance is zero if the two elements overlap. This distance may not exist if the element of the first input set is alone on its chromosome (or contig).
+
+Actually, considering an element from the first input set, the algorithm will look at the vicinity of this element (1kb by default). You can increase the size of the vicinity using the appropriate option.
+
+As in *compare overlapping*, you can shrink or extend your sets of genomic coordinates, so that you can get the distance between starts of reads and starts or genes, for instance. You can also compute the distance from elements which are on the same strand only (which is not the case by default) or on the opposite strand only.
+
+You have several options for the output plot. You can first choose the region on the *x*-axis you want to plot. You can also display histograms instead of line plot. In this case, the data are summed into buckets, whose sizes are given as an option. For instance, a bucket of size *s* at the point (*x*, *y*) means that there are *y* pairs of elements which are separated by *x* to *x + s* nucleotides.
+ </help>
+
</tool>

diff -r c79b9ae3f65f -r 440ceca58672 SMART/galaxy/getDistribution.xml
--- a/SMART/galaxy/getDistribution.xml Fri Apr 19 10:13:11 2013 -0400
+++ b/SMART/galaxy/getDistribution.xml Mon Apr 22 11:08:07 2013 -0400

[

@@ -1,5 +1,5 @@
<tool id="getDistribution" name="get distribution">
- <description>Get Distribution: Get the distribution of the genomic coordinates on a genome.</description>
+ <description>Get Distribution: Get the distribution of the genomic coordinates along a genome.</description>
<command interpreter="python">
../Java/Python/GetDistribution.py -i $formatType.inputFileName
#if $formatType.FormatInputFileName == 'bed':
@@ -58,7 +58,6 @@

$bothStrands
$average
- -n $names
$normalize
$csv $outputCSV
$gff $outputGFF
@@ -74,7 +73,6 @@
<option value="gff">gff</option>
<option value="gff2">gff2</option>
<option value="gff3">gff3</option>
- <option value="csv">csv</option>
<option value="sam">sam</option>
<option value="gtf">gtf</option>
</param>
@@ -90,9 +88,6 @@
<when value="gff3">
<param name="inputFileName" format="gff3" type="data" label="Input File"/>
</when>
- <when value="csv">
- <param name="inputFileName" format="csv" type="data" label="Input File"/>
- </when>
<when value="sam">
<param name="inputFileName" format="sam" type="data" label="Input File"/>
</when>
@@ -104,7 +99,7 @@
<param name="refFile" format="fasta" type="data" label="reference genome file"/>

<conditional name="optionNbBin">
- <param name="Nb" type="select" label="number of bins">
+ <param name="Nb" type="select" label="number of points">
<option value="Yes">Yes</option>
<option value="No" selected="true">No</option>
</param>
@@ -115,8 +110,20 @@
</when>
</conditional>

+ <conditional name="optionChrom">
+ <param name="chrom" type="select" label="if you wish to plot only one chromosome, mention the chromosome name">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="chromValue" type="text" value="chromName" />
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
<conditional name="optionStart">
- <param name="start" type="select" label="start from a given region">
+ <param name="start" type="select" label="if you wish to plot only one locus, mention its start position">
<option value="Yes">Yes</option>
<option value="No" selected="true">No</option>
</param>
@@ -128,7 +135,7 @@
</conditional>

<conditional name="optionEnd">
- <param name="end" type="select" label="end from a given region">
+ <param name="end" type="select" label="if you wish to plot only one locus, mention its end position">
<option value="Yes">Yes</option>
<option value="No" selected="true">No</option>
</param>
@@ -140,7 +147,7 @@
</conditional>

<conditional name="optionHeight">
- <param name="height" type="select" label="height of the graphics">
+ <param name="height" type="select" label="height of the figure">
<option value="Yes">Yes</option>
<option value="No" selected="true">No</option>
</param>
@@ -152,7 +159,7 @@
</conditional>

<conditional name="optionWidth">
- <param name="width" type="select" label="width of the graphics">
+ <param name="width" type="select" label="width of the figure">
<option value="Yes">Yes</option>
<option value="No" selected="true">No</option>
</param>
@@ -187,18 +194,6 @@
</when>
</conditional>

- <conditional name="optionChrom">
- <param name="chrom" type="select" label="plot only one given chromosome">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="chromValue" type="text" value="chromName" />
- </when>
- <when value="No">
- </when>
- </conditional>
-
<conditional name="optionColor">
<param name="color" type="select" label="color of the lines (separated by commas and no space)">
<option value="Yes">Yes</option>
@@ -211,27 +206,35 @@
</when>
</conditional>

+ <param name="bothStrands" type="boolean" truevalue="-2" falsevalue="" checked="false" label="plot one curve per strand"/>
+ <param name="average" type="boolean" truevalue="-a" falsevalue="" checked="false" label="plot the number of element per bin (instead of sum)"/>

- <param name="bothStrands" type="boolean" truevalue="-2" falsevalue="" checked="false" label="plot one curve per strand"/>
- <param name="average" type="boolean" truevalue="-a" falsevalue="" checked="false" label="plot plot average (instead of sum)"/>
- <param name="names" type="text" value="nbElements" label="name for the tags (separated by commas and no space)"/>
- <param name="normalize" type="boolean" truevalue="-z" falsevalue="" checked="false" label="normalize data (when panels are different)"/>
- <param name="csv" type="boolean" truevalue="-x" falsevalue="" checked="false" label="write a .csv file."/>
- <param name="gff" type="boolean" truevalue="-g" falsevalue="" checked="false" label="write a .gff file."/>
+ <conditional name="optionNames">
+ <param name="names" type="select" label="name for the tags (separated by commas and no space)">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="namesValue" type="text" value="nbElements" />
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+
+ <param name="normalize" type="boolean" truevalue="-z" falsevalue="" checked="false" label="normalize data (when panel sizes are different)"/>
</inputs>

<outputs>
<data name="outputFile" format="png" label="[getDistribution] out png file"/>
- <data name="outputCSV" format="csv" label="[getDistribution] output csv file">
- <filter>csv</filter>
- </data>
-
- <data name="outputGFF" format="gff" label="[getDistribution] output gff file">
- <filter>gff</filter>
- </data>
</outputs>

     <help>
-        This script gives a .tar out file, if you want to take look at the results, you have to download it.
+This script gives a .tar out file, if you want to take look at the results, you have to download it.
+
+Print a density profile of the data for each chromosome, see Figure~\ref{fig:getDistribution}. You have to provide the reference genome, to know the sizes of the chromosomes. You can also provide the number of points (called *bins*) you want per chromosome.
+
+By default, only one curve is plotted per chromosome, but you can plot one curve per strand and per chromosome (the minus strand will be plotted with non-positive values on the *y*-axis).
+
+If you want, you can also plot a specific region, by mentionning the chromosome, the start and the end positions of the region.
     </help>
</tool>

diff -r c79b9ae3f65f -r 440ceca58672 SMART/galaxy/getExons.xml
--- a/SMART/galaxy/getExons.xml Fri Apr 19 10:13:11 2013 -0400
+++ b/SMART/galaxy/getExons.xml Mon Apr 22 11:08:07 2013 -0400

[

@@ -69,4 +69,16 @@
     <outputs>
         <data format="gff3" name="outputFileGff" label="[getExons -> gff3] Output File"/>
     </outputs>
+<tests>
+    <test>
+      <param name="FormatInputFileName" value="gtf" />
+      <param name="inputFileName" value="genes.gtf" />
+      <param name="Value" value="No"/>
+      <output name="outputFileGff" file="exp_getExons.gff3" />
+    </test>
+</tests>
+
+ <help>
+Provide all the exons of an annotation file.
+ </help>
</tool>

diff -r c79b9ae3f65f -r 440ceca58672 SMART/galaxy/getIntrons.xml
--- a/SMART/galaxy/getIntrons.xml Fri Apr 19 10:13:11 2013 -0400
+++ b/SMART/galaxy/getIntrons.xml Mon Apr 22 11:08:07 2013 -0400

[

@@ -53,4 +53,16 @@
     <outputs>
         <data format="gff3" name="outputFileGff" label="[getIntrons -> gff3] Output File"/>
     </outputs>
+<tests>
+    <test>
+      <param name="FormatInputFileName" value="gtf" />
+      <param name="inputFileName" value="genes.gtf" />
+      <output name="outputFileGff" file="exp_getIntrons.gff3" />
+    </test>
+  </tests>
+
+ <help>
+Provide all the introns of an annotation file.
+ </help>
+
</tool>

diff -r c79b9ae3f65f -r 440ceca58672 SMART/galaxy/getSizes.xml
--- a/SMART/galaxy/getSizes.xml Fri Apr 19 10:13:11 2013 -0400
+++ b/SMART/galaxy/getSizes.xml Mon Apr 22 11:08:07 2013 -0400

[

@@ -21,7 +21,6 @@
-b $OptionY.yLabValue
#end if
$barPlot
- $excel $excelOutput
</command>

<inputs>
@@ -84,7 +83,7 @@
</conditional>

<conditional name="OptionXMax">
- <param name="xMax" type="select" label="maximum value on the x-axis to plot [format: int]">
+ <param name="xMax" type="select" label="maximum x-value to plot">
<option value="Yes">Yes</option>
<option value="No" selected="true">No</option>
</param>
@@ -119,17 +118,16 @@
</when>
</conditional>

-
-
<param name="barPlot" type="boolean" truevalue="-B" falsevalue="" checked="false" label="use barplot representation"/>
-
- <param name="excel" type="boolean" truevalue="-c" falsevalue="" checked="false" label="excel option" help="This option creates a csv file."/>
</inputs>

<outputs>
- <data name="outputFile" format="png" label="[Get size] Output file"/>
- <data name="excelOutput" format="csv">
- <filter>excel</filter>
- </data>
+ <data name="outputFile" format="png" label="[Get sizes] output file"/>
</outputs>
+
+ <help>
+Get the sequence/annotation size distribution. A point (*x*, *y*) means that *y* elements have a size of *x* nucleotides.
+
+When your mapping include exon/intron structures, you can decide to count the size of the introns, the sizes of the exons or the size of the first exons.
+ </help>
</tool>

diff -r c79b9ae3f65f -r 440ceca58672 SMART/galaxy/getWigData.xml
--- a/SMART/galaxy/getWigData.xml Fri Apr 19 10:13:11 2013 -0400
+++ b/SMART/galaxy/getWigData.xml Mon Apr 22 11:08:07 2013 -0400

[

@@ -5,13 +5,21 @@
</command>

     <inputs>
-     <param name="inputGff3File" type="data" label="Input Gff3 File (compulsory option)" format="gff3"/>
-    <param name="inputWigFile" type="data" label="Input Wig File (compulsory option)" format="wig"/>
- <param name="tagName" type="text" value="None" label="tag option (compulsory option)" help="choose a tag name to write the wig information to output file."/>
+     <param name="inputGff3File" type="data" label="Input Gff3 File" format="gff3"/>
+    <param name="inputWigFile" type="data" label="Input Wig File" format="wig"/>
+ <param name="tagName" type="text" value="None" label="tag option" help="choose a tag name to write the wig information to output file."/>
<param name="strand" type="boolean" truevalue="-s" falsevalue="" checked="false" label="consider both strands separately."/>
     </inputs>

     <outputs>
         <data format="gff3" name="outputFile" label="[getWigData -> gff3] Output File"/>
     </outputs>
+
+ <help>
+Reads a transcript list, computes the average value of some WIG data (please consult http://genome.ucsc.edu/goldenPath/help/wiggle.html to know more about this format) for each transcript and adds a tag corresponding to this average value to the transcript.
+
+The script finds all the data which correspond to the genomic coordinates of a transcript, average these data and store the result into a tag. Then, the transcripts are written in an output file, together with the tag.
+
+You can then plot your data using *plotTranscriptList.py*.
+ </help>
</tool>

diff -r c79b9ae3f65f -r 440ceca58672 SMART/galaxy/getWigDistance.xml
--- a/SMART/galaxy/getWigDistance.xml Fri Apr 19 10:13:11 2013 -0400
+++ b/SMART/galaxy/getWigDistance.xml Mon Apr 22 11:08:07 2013 -0400

[

@@ -5,13 +5,21 @@
</command>

     <inputs>
-     <param name="inputGff3File" type="data" label="Input Gff3 File (compulsory option)" format="gff3"/>
-    <param name="inputWigFile" type="data" label="Input Wig File (compulsory option)" format="wig"/>
- <param name="distance" type="integer" value="1000" label="distance option (compulsory option)" help="Distance around position.Be Careful! The value must be upper than 0"/>
- <param name="strand" type="boolean" truevalue="-s" falsevalue="" checked="false" label="consider both strands separately."/>
+     <param name="inputGff3File" type="data" label="Input Gff3 File" format="gff3"/>
+    <param name="inputWigFile" type="data" label="Input Wig File" format="wig"/>
+ <param name="distance" type="integer" value="1000" label="Distance around positions."/>
+ <param name="strand" type="boolean" truevalue="-s" falsevalue="" checked="false" label="Consider both strands separately."/>
     </inputs>

     <outputs>
         <data name="outputFile" format="png" label="[getWigDistance] PNG output File"/>
     </outputs>
+
+ <help>
+Plots the average data contained in a set of WIG files (please consult http://genome.ucsc.edu/goldenPath/help/wiggle.html to know more about this format) around the first nucleotides of a annotation file.
+
+The tool needs an transcript list, some WIG files, and a distance. For each transcript, it collects all the values around its first nucleotide, the radius being given by the distance. Then, it computes the average value for each position. A point (*x*, *y*) means that the average value in the WIG file for a nucleotide distant by *x* nucleotides from the first nucleotide of an input transcript is *y*.
+
+You can possibly use a log scale for the *y*-axis.
+ </help>
</tool>

diff -r c79b9ae3f65f -r 440ceca58672 SMART/galaxy/getWigProfile.xml
--- a/SMART/galaxy/getWigProfile.xml Fri Apr 19 10:13:11 2013 -0400
+++ b/SMART/galaxy/getWigProfile.xml Mon Apr 22 11:08:07 2013 -0400

[

@@ -64,7 +64,12 @@
</inputs>

<outputs>
- <data name="outputFilePNG" format="png" label="[getWigProfile]out file"/>
+ <data name="outputFilePNG" format="png" label="[getWigProfile] out file"/>
</outputs>

+ <help>
+Computes the average distribution of the WIG data (please consult http://genome.ucsc.edu/goldenPath/help/wiggle.html to know more about this format) along the transcripts given in input, and possibly before and after the transcripts.
+
+The main inputs of the functions are a file containing a list of transcripts (or any sets of genomic interval) and a directory containing a set of WIG files (one file per chromosome, or one file per chromosome and per strand). The function then computes the WIG profile of each transcript. The user can also define a region around the transcripts that should also be plotted (in this case, the profile will include the WIG values which overlap with the transcript as well as the 5' and 3' regions). Since the transcript do not necessarily have the same sizes, all profiles will be extended or shrinked to fit in a size which is given by the user. If the resulting profile is a bit bumpy, the user can also smoothen the curve by using a linear smoothing function (the size of the smoothing window is given by the user). Finally, the user may want to plot the WIG data for the opposite strand too (if the strand specific WUG data are available).
+ </help>
</tool>

diff -r c79b9ae3f65f -r 440ceca58672 SMART/galaxy/mapperAnalyzer.xml
--- a/SMART/galaxy/mapperAnalyzer.xml Fri Apr 19 10:13:11 2013 -0400
+++ b/SMART/galaxy/mapperAnalyzer.xml Mon Apr 22 11:08:07 2013 -0400

[

@@ -1,5 +1,5 @@
<tool id="mapperAnalyzer" name="mapper analyzer">
- <description>Read the output of an aligner, print statistics and possibly translate into BED or GBrowse formats. </description>
+ <description>Read the output of an aligner, print statistics and possibly translate into GFF, BED or GBrowse formats. </description>
<command interpreter="python">
../Java/Python/mapperAnalyzer.py -i $formatType.inputFileName1
#if $formatType.FormatInputFileName1 == 'bed':
@@ -89,7 +89,6 @@
</when>
</conditional>

-
<conditional name="optionnumber">
<param name="number" type="select" label="max. number of occurrences of a sequence">
<option value="Yes">Yes</option>
@@ -183,4 +182,21 @@
<data name="outputFileGFF" format="gff3" label="[mapperAnalyzer] out file"/>
</outputs>

+ <help>
+Maybe the first program you may use. It reads a set of mapping given by the tool you have used to map your data on the reference genome and translate it to a set of genomic coordinates. You also have the possibility to extract only those that you are interested in (few matches in the genome, few errors in the mapping, etc.). You can also select those reads which map less than a given of times in the genome. Moreover, you can output the data in various different formats, which you can use to visualize them *via* UCSC genome browser or GBrowse. Unmatched reads can be written in an other file, in case you would like to try to map them with another tool (may sometimes work!).
+
+You can filter your data according to:
+
+- number of errors in the mapping
+
+- number of occurrences of the mapping in the genome
+
+- size of the read mapped
+
+- number of gaps in the mapping
+
+The script needs an input file (your mapped reads) together with its format and the read sequences file together with its format (FASTA or FASTQ). If you want, you can also append the results of this script to another GFF3 file. This is useful when the GFF3 file is the result of the mapping using another tool.
+
+By default, any gap in the alignment to the reference sequence is treated like an exon. You can decide to remove this feature by merging short introns (actually, gaps).
+ </help>
</tool>

diff -r c79b9ae3f65f -r 440ceca58672 SMART/galaxy/mergeSlidingWindowsClusters.xml
--- a/SMART/galaxy/mergeSlidingWindowsClusters.xml Fri Apr 19 10:13:11 2013 -0400
+++ b/SMART/galaxy/mergeSlidingWindowsClusters.xml Mon Apr 22 11:08:07 2013 -0400

@@ -99,5 +99,9 @@
<outputs>
<data name="outputFileGff" format="gff3"/>
</outputs>
+
+ <help>
+Sliding windows are also useful to compare two (or more!) sets of data. This can be very valuable when you want to compare differential expression in two different conditions. When you have two different sliding windows sets, this function merges them into one, where each window contains the two pieces of information. You may want to plot the data afterwards using the *plot transcript list* function.
+ </help>

</tool>

diff -r c79b9ae3f65f -r 440ceca58672 SMART/galaxy/mergeTranscriptLists.xml
--- a/SMART/galaxy/mergeTranscriptLists.xml Fri Apr 19 10:13:11 2013 -0400
+++ b/SMART/galaxy/mergeTranscriptLists.xml Mon Apr 22 11:08:07 2013 -0400

[

@@ -45,9 +45,6 @@
#end if

-o $outputFileGff
-
-
-
</command>

<inputs>
@@ -145,4 +142,9 @@
<data name="outputFileGff" format="gff3" label="[mergeTranscriptLists]out file"/>
</outputs>

+ <help>
+The script is similar to *compare overlapping*, except that when data of two different sets overlap, they are merged. You can use the same parameters as *compare overlapping* and use them to look for transcription on both strands, for example.
+
+Optionally, you can also add to the output all the elements from the first set which do not overlap with the second set.
+ </help>
</tool>

diff -r c79b9ae3f65f -r 440ceca58672 SMART/galaxy/modifyGenomicCoordinates.xml
--- a/SMART/galaxy/modifyGenomicCoordinates.xml Fri Apr 19 10:13:11 2013 -0400
+++ b/SMART/galaxy/modifyGenomicCoordinates.xml Mon Apr 22 11:08:07 2013 -0400

@@ -66,7 +66,7 @@
</conditional>

<conditional name="OptionStart">
- <param name="start" type="select" label="restrict to the start of the transcript">
+ <param name="start" type="select" label="shrink to the start of the feature">
<option value="Yes">Yes</option>
<option value="No" selected="true">No</option>
</param>
@@ -78,7 +78,7 @@
</conditional>

<conditional name="OptionEnd">
- <param name="end" type="select" label="restrict to the end of the transcript">
+ <param name="end" type="select" label="shrink to the end of the feature">
<option value="Yes">Yes</option>
<option value="No" selected="true">No</option>
</param>
@@ -122,5 +122,14 @@
   </outputs>

   <help>
+This tool reads a list of transcripts and modifies each feature by:
+
+- shrinking it to the $ n $ first nucleotides or the *n* last nucleotides, or
+
+- extending it to $ n $ nucleotides towards the 5' direction (upstream) or the 3' direction (downstream).
+
+Note that the 5' or 3' direction depends on the orientation of the feature (the 5' end of a transcript located on the minus strand is on the right hand of this transcript!).
+
+The tool needs a transcript file, its format, and outputs a new transcript file.
   </help>
</tool>

diff -r c79b9ae3f65f -r 440ceca58672 SMART/galaxy/modifySequenceList.xml
--- a/SMART/galaxy/modifySequenceList.xml Fri Apr 19 10:13:11 2013 -0400
+++ b/SMART/galaxy/modifySequenceList.xml Mon Apr 22 11:08:07 2013 -0400

[

@@ -43,4 +43,7 @@
      <data format="fasta" name="outputFile" label="[modifySequenceList] Output File"/>
   </outputs>

+  <help>
+  This tool reads a list of sequences (in multi-FASTA/Q format) that you provide and shrinks each sequence to the *n* first nucleotides or the *n* last nucleotides.
+  </help>
</tool>

diff -r c79b9ae3f65f -r 440ceca58672 SMART/galaxy/plotCoverage.xml
--- a/SMART/galaxy/plotCoverage.xml Fri Apr 19 10:13:11 2013 -0400
+++ b/SMART/galaxy/plotCoverage.xml Mon Apr 22 11:08:07 2013 -0400

@@ -169,7 +169,7 @@
</conditional>

<conditional name="optiontitle">
- <param name="title" type="select" label="title of the plots ">
+ <param name="title" type="select" label="title for the figure">
<option value="Yes">Yes</option>
<option value="No" selected="true">No</option>
</param>
@@ -260,6 +260,12 @@
</outputs>

     <help>
-        This script gives a .tar out file, if you want to take look at the results, you have to download it.
+Plot the coverage of the first set of genomic coordinates with respect to the second set of genomic coordinates. For each element of the second set (we will suppose that they are annotated genes), it computes the number of  elements of the first set (reads, for instance) which overlap it.
+
+Alternatively, if the first file is in GFF format, and contains the **Target** file, you can omit the second file. However, a fasta file corresponding to the second file should be given (to compute the size of the reference elements).
+
+The tool produces two plots per gene. The first plot gives the coverage: a point (*x*, *y*) $ means that *y* reads cover the *x*th nucleotide of the gene. The second figure displays the (possibly spliced) gene in black, and the overlapping reads (blue is colinear, red is anti-sense).
+
+This script gives a .tar out file, if you want to take look at the results, you have to download it.
     </help>
</tool>

diff -r c79b9ae3f65f -r 440ceca58672 SMART/galaxy/plotTranscriptList.xml
--- a/SMART/galaxy/plotTranscriptList.xml Fri Apr 19 10:13:11 2013 -0400
+++ b/SMART/galaxy/plotTranscriptList.xml Mon Apr 22 11:08:07 2013 -0400

[

@@ -2,16 +2,12 @@
<description>Plot some information from a list of transcripts. </description>
<command interpreter="python">
../Java/Python/plotTranscriptList.py -i $formatType.inputFileName
- #if $formatType.FormatInputFileName == 'bed':
- -f bed
- #elif $formatType.FormatInputFileName == 'gff':
+ #if $formatType.FormatInputFileName == 'gff':
-f gff
#elif $formatType.FormatInputFileName == 'gff2':
-f gff2
#elif $formatType.FormatInputFileName == 'gff3':
-f gff3
- #elif $formatType.FormatInputFileName == 'sam':
- -f sam
#elif $formatType.FormatInputFileName == 'gtf':
-f gtf
#end if
@@ -33,10 +29,7 @@
-m $optionyLab.labVal
#end if

- #if $optionyLog.log == 'Yes':
- -l $optionyLog.logVal
- #end if
-
+ $optionLog.log
-s $shape
-b $bucket

@@ -46,16 +39,11 @@
<inputs>
<conditional name="formatType">
<param name="FormatInputFileName" type="select" label="Input File Format">
- <option value="bed">bed</option>
<option value="gff">gff</option>
<option value="gff2">gff2</option>
<option value="gff3">gff3</option>
- <option value="sam">sam</option>
<option value="gtf">gtf</option>
</param>
- <when value="bed">
- <param name="inputFileName" format="bed" type="data" label="Input File"/>
- </when>
<when value="gff">
<param name="inputFileName" format="gff" type="data" label="Input File"/>
</when>
@@ -65,16 +53,13 @@
<when value="gff3">
<param name="inputFileName" format="gff3" type="data" label="Input File"/>
</when>
- <when value="sam">
- <param name="inputFileName" format="sam" type="data" label="Input File"/>
- </when>
<when value="gtf">
<param name="inputFileName" format="gtf" type="data" label="Input File"/>
</when>
</conditional>

- <param name="xVal" type="text" value="None" label="tag for the x value [compulsory option]"/>
- <param name="yVal" type="text" value="None" label="tag for the y value [compulsory option]"/>
+ <param name="xVal" type="text" value="None" label="tag for the x value"/>
+ <param name="yVal" type="text" value="None" label="tag for the y value"/>

<conditional name="optionz">
<param name="z" type="select" label="tag for the z value ">
@@ -92,7 +77,7 @@

<param name="YVal" type="float" value="0.0" label="value for y when tag is not present"/>

- <param name="ZVal" type="float" value="0.0" label="value for z when tag is not present"/>
+ <param name="ZVal" type="float" value="0.0" label="value for z when tag is not present (if applicable)"/>

<conditional name="optionxLab">
<param name="xLab" type="select" label="label on the x-axis ">
@@ -117,16 +102,13 @@
</when>
</conditional>

- <conditional name="optionyLog">
+ <conditional name="optionLog">
<param name="log" type="select" label="use log on x- or y-axis (write 'x', 'y' or 'xy')">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
+ <option value="" selected="true">No</option>
+ <option value="-l x">log on the x-axis</option>
+ <option value="-l y">log on the y-axis</option>
+ <option value="-l xy">log on the x- and y-axis</option>
</param>
- <when value="Yes">
- <param name="logVal" type="text" value=" "/>
- </when>
- <when value="No">
- </when>
</conditional>

<param name="shape" type="text" value="barplot" label="shape of the plot [format: choice (barplot, line, points, heatPoints)]"/>
@@ -138,4 +120,15 @@
<data name="outputFilePNG" format="png" label="[plotTranscriptList]out file"/>
</outputs>

+ <help>
+Plot the data attached as tags in a transcript list. This can be used for displaying the comparison of different sets of sliding windows, for instance.
+
+The tool reads the tags of a transcript file (actually, a GFF3 file). It considers more specifically the tag names that you specify as parameter. If you use only one tag name, you can display a line plot. In this case, you have to specify a bucket size *s* (which is by defaut 1) and a point (*x*, *y*) tells you that there are *y* transcripts with tag values *x* to *x + s*.
+
+You can display could plots if you use two tag names. Each point represents the values of the two tags of a transcript. If you use three variables, the third variable will be the color of the point. You can also use a log scale and name the axes of the plot.
+
+Each transcript must contain the tags which are specified. If not, you should provide a default value, which is used when the tag is not present.
+
+If you use a cloud plot, you can compute the Spearman's rho to quantify a correlation between your two tag values.
+ </help>
</tool>

diff -r c79b9ae3f65f -r 440ceca58672 SMART/galaxy/removeExonLines.sh
--- a/SMART/galaxy/removeExonLines.sh Fri Apr 19 10:13:11 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000

@@ -1,2 +0,0 @@
-#!/bin/bash
-sed '/exon/d' $1

diff -r c79b9ae3f65f -r 440ceca58672 SMART/galaxy/restrictFromSize.xml
--- a/SMART/galaxy/restrictFromSize.xml Fri Apr 19 10:13:11 2013 -0400
+++ b/SMART/galaxy/restrictFromSize.xml Mon Apr 22 11:08:07 2013 -0400

@@ -2,7 +2,9 @@
<description>Select the elements of a list of sequences or transcripts with a given size.</description>
<command interpreter="python">
../Java/Python/restrictFromSize.py -i $formatType.inputFileName
- #if $formatType.FormatInputFileName == 'bed':
+ #if $formatType.FormatInputFileName == 'fasta':
+ -f fasta
+ #elif $formatType.FormatInputFileName == 'bed':
-f bed
#elif $formatType.FormatInputFileName == 'gff':
-f gff
@@ -29,6 +31,7 @@
<inputs>
<conditional name="formatType">
<param name="FormatInputFileName" type="select" label="Input File Format">
+ <option value="fasta">fasta</option>
<option value="bed">bed</option>
<option value="gff">gff</option>
<option value="gff2">gff2</option>
@@ -36,6 +39,9 @@
<option value="sam">sam</option>
<option value="gtf">gtf</option>
</param>
+ <when value="fasta">
+ <param name="inputFileName" format="fasta" type="data" label="Input File"/>
+ </when>
<when value="bed">
<param name="inputFileName" format="bed" type="data" label="Input File"/>
</when>
@@ -87,7 +93,7 @@
</outputs>

<help>
- command example: restrictFromSize.py -i cis_e10_cluster20InSeed2515_nbEUp10.gff3 -f gff -o cis_e10_cluster20InSeed2515_nbEUp10_lgUp50 -m 50
+Reads a list of sequences or genomic coordinates and outputs those which are longer and / or shorter than a given size ---which you provide.
</help>

</tool>

diff -r c79b9ae3f65f -r 440ceca58672 SMART/galaxy/restrictTranscriptList.xml
--- a/SMART/galaxy/restrictTranscriptList.xml Fri Apr 19 10:13:11 2013 -0400
+++ b/SMART/galaxy/restrictTranscriptList.xml Mon Apr 22 11:08:07 2013 -0400

@@ -1,5 +1,5 @@
<tool id="restrictTranscriptList" name="restrict transcript list">
-  <description>Keep the coordinates which are located in a given position.</description>
+  <description>Select the features which are located in a given locus.</description>
   <command interpreter="python"> ../Java/Python/restrictTranscriptList.py -i $formatType.inputFileName
#if $formatType.FormatInputFileName == 'bed':
-f bed
@@ -75,7 +75,7 @@
</conditional>

<conditional name="OptionStart">
- <param name="start" type="select" label="restrict to the start of the transcript">
+ <param name="start" type="select" label="start region of the locus">
<option value="Yes">Yes</option>
<option value="No" selected="true">No</option>
</param>
@@ -87,7 +87,7 @@
</conditional>

<conditional name="OptionEnd">
- <param name="end" type="select" label="restrict to the end of the transcript">
+ <param name="end" type="select" label="end region of the locus">
<option value="Yes">Yes</option>
<option value="No" selected="true">No</option>
</param>
@@ -105,4 +105,20 @@

   <help>
   </help>
+<tests>
+    <test>
+      <param name="FormatInputFileName" value="gtf" />
+      <param name="inputFileName" value="genes.gtf" />
+ <param name="Chrom" value="Yes"/>
+ <param name="ChromName" value="I"/>
+ <param name="start" value="No" />
+<param name="end" value="No" />
+      <output name="outputFile" file="exp_restrictTranscriptList.gff3" />
+    </test>
+  </tests>
+
+ <help>
+Reads a list of genomic coordinates and outputs those which on a given chromosome and / or between two given positions.
+ </help>
+
</tool>

diff -r c79b9ae3f65f -r 440ceca58672 SMART/galaxy/test/CollapseReads.xml
--- a/SMART/galaxy/test/CollapseReads.xml Fri Apr 19 10:13:11 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000

[

@@ -1,49 +0,0 @@
-<tool id="collapseReads" name="collapseReads">
- <description>Merges two reads if they have exactly the same genomic coordinates.</description>
- <command interpreter="python">
- ../Java/Python/CollapseReads.py -i $formatType.inputFileName
- #if $formatType.FormatInputFileName == 'bed':
- -f bed
- #elif $formatType.FormatInputFileName == 'gff':
- -f gff
- #elif $formatType.FormatInputFileName == 'gff2':
- -f gff2
- #elif $formatType.FormatInputFileName == 'gff3':
- -f gff3
- #end if
-
- -$strand
- -o $outputFileGff
- --galaxy
- </command>
-
- <inputs>
- <conditional name="formatType">
- <param name="FormatInputFileName" type="select" label="Input File Format">
- <option value="bed">bed</option>
- <option value="gff">gff</option>
- <option value="gff2">gff2</option>
- <option value="gff3">gff3</option>
- </param>
- <when value="bed">
- <param name="inputFileName" format="bed" type="data" label="Input File"/>
- </when>
- <when value="gff">
- <param name="inputFileName" format="gff" type="data" label="Input File"/>
- </when>
- <when value="gff2">
- <param name="inputFileName" format="gff2" type="data" label="Input File"/>
- </when>
- <when value="gff3">
- <param name="inputFileName" format="gff3" type="data" label="Input File"/>
- </when>
- </conditional>
-
- <param name="strand" type="boolean" truevalue="-s" falsevalue="" checked="false" label="Strand option merges 2 different strands[default:False]."/>
- </inputs>
-
- <outputs>
- <data name="outputFileGff" format="gff3"/>
- </outputs>
-
-</tool>
\ No newline at end of file

diff -r c79b9ae3f65f -r 440ceca58672 SMART/galaxy/test/Test_F_WrappGetLetterDistribution.py
--- a/SMART/galaxy/test/Test_F_WrappGetLetterDistribution.py Fri Apr 19 10:13:11 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000

[

@@ -1,91 +0,0 @@
-import unittest
-import os
-from commons.core.utils.FileUtils import FileUtils
-from SMART.galaxy.WrappGetLetterDistribution import WrappGetLetterDistribution
-
-SMART_PATH = "%s/SMART" % os.environ["REPET_PATH"]
-SMART_DATA = SMART_PATH + "/data"
-
-class Test_F_WrappGetLetterDistribution(unittest.TestCase):
-
-
-    def setUp(self):
-        self._dirTest = "%s/galaxy/test" % SMART_PATH
-        self._iwrappFastq = WrappGetLetterDistribution()
-        self._iwrappFasta = WrappGetLetterDistribution()
-        self._expOutputCSV = "expOutputTomate.csv"
-
-    def test_wrappFasta(self):
-        self._iwrappFasta._inputFileName = "%s/SR1.fasta" % SMART_DATA
-        self._iwrappFasta._outputFileNamePrefix = "%s/galaxy/test/TomateFasta_res" % SMART_PATH
-        self._iwrappFasta._outputFileNamePNG = "%s/galaxy/test/TomateFasta_res.png" % SMART_PATH
-        self._iwrappFasta._outputFileNamePerNtPNG = "%s/galaxy/test/TomateFasta_resPerNt.png" % SMART_PATH
-        self._iwrappFasta._outputFileNameCSV = "%s/galaxy/test/TomateFasta_res.csv" % SMART_PATH
-        self._iwrappFasta._inputFileFormat = "fasta"
-        self._iwrappFasta._csv = True
-        if not(FileUtils.isEmpty(self._iwrappFasta._inputFileName)):
-            self._iwrappFasta.wrapp()
-            self.assertTrue(os.path.exists(self._iwrappFasta._outputFileNamePNG))
-            self.assertTrue (os.path.exists(self._iwrappFasta._outputFileNamePerNtPNG))
-            self.assertTrue (os.path.exists(self._iwrappFasta._outputFileNameCSV))
-            self.assertTrue(FileUtils.are2FilesIdentical(self._iwrappFasta._outputFileNameCSV,self._expOutputCSV))
-        else:
-            print "Problem : the input fasta file %s is empty!" % self._inputFileFasta
-
-
-#    def test_wrappFasta_withoutCSV_Opt(self):
-#        self._iwrappFasta._inputFileName = "%s/SR1.fasta" % SMART_DATA
-#        self._iwrappFasta._outputFileNamePrefix = "%s/galaxy/test/TomateFasta_res" % SMART_PATH
-#        self._iwrappFasta._outputFileNamePNG = "%s/galaxy/test/TomateFasta_res.png" % SMART_PATH
-#        self._iwrappFasta._outputFileNamePerNtPNG = "%s/galaxy/test/TomateFasta_resPerNt.png" % SMART_PATH
-#        self._iwrappFasta._outputFileNameCSV = "%s/galaxy/test/TomateFasta_res.csv" % SMART_PATH
-#        self._iwrappFasta._inputFileFormat = "fasta"
-#        self._iwrappFasta._csv = False
-#        if not(FileUtils.isEmpty(self._iwrappFasta._inputFileName)):
-#            self._iwrappFasta.wrapp()
-#            self.assertTrue(os.path.exists(self._iwrappFasta._outputFileNamePNG))
-#            self.assertTrue (os.path.exists(self._iwrappFasta._outputFileNamePerNtPNG))
-#        else:
-#            print "Problem : the input fasta file %s is empty!" % self._inputFileFasta
-#        os.system("rm %s/galaxy/test/*_res*.png" %SMART_PATH)
-#        os.system("rm %s/galaxy/test/*_res.csv" %SMART_PATH)
-#
-#
-#    def test_wrappFastq(self):
-#        self._iwrappFastq._inputFileName = "%s/SR1.fastq" % SMART_DATA
-#        self._iwrappFastq._outputFileNamePrefix = "%s/galaxy/test/TomateFastq_res" % SMART_PATH
-#        self._iwrappFastq._outputFileNamePNG = "%s/galaxy/test/TomateFastq_res.png" % SMART_PATH
-#        self._iwrappFastq._outputFileNamePerNtPNG = "%s/galaxy/test/TomateFastq_resPerNt.png" % SMART_PATH
-#        self._iwrappFastq._outputFileNameCSV = "%s/galaxy/test/TomateFastq_res.csv" % SMART_PATH
-#        self._iwrappFastq._inputFileFormat = "fastq"
-#        self._iwrappFastq._csv = True
-#        if not(FileUtils.isEmpty(self._iwrappFastq._inputFileName)):
-#            self._iwrappFastq.wrapp()
-#            self.assertTrue(os.path.exists(self._iwrappFastq._outputFileNamePNG))
-#            self.assertTrue (os.path.exists(self._iwrappFastq._outputFileNamePerNtPNG))
-#            self.assertTrue (os.path.exists(self._iwrappFastq._outputFileNameCSV))
-#            self.assertTrue(FileUtils.are2FilesIdentical(self._iwrappFastq._outputFileNameCSV,self._expOutputCSV))
-#        else:
-#            print "Problem : the input fastq file %s is empty!" % self._inputFileFastq
-#
-#
-#    def test_wrappFastq_withoutCSV_Opt(self):
-#        self._iwrappFastq._inputFileName = "%s/SR1.fastq" % SMART_DATA
-#        self._iwrappFastq._outputFileNamePrefix = "%s/galaxy/test/TomateFastq_res" % SMART_PATH
-#        self._iwrappFastq._outputFileNamePNG = "%s/galaxy/test/TomateFastq_res.png" % SMART_PATH
-#        self._iwrappFastq._outputFileNamePerNtPNG = "%s/galaxy/test/TomateFastq_resPerNt.png" % SMART_PATH
-#        self._iwrappFastq._outputFileNameCSV = "%s/galaxy/test/TomateFastq_res.csv" % SMART_PATH
-#        self._iwrappFastq._inputFileFormat = "fastq"
-#        self._iwrappFastq._csv = False
-#        if not(FileUtils.isEmpty(self._iwrappFastq._inputFileName)):
-#            self._iwrappFastq.wrapp()
-#            self.assertTrue(os.path.exists(self._iwrappFastq._outputFileNamePNG))
-#            self.assertTrue (os.path.exists(self._iwrappFastq._outputFileNamePerNtPNG))
-#        else:
-#            print "Problem : the input fastq file %s is empty!" % self._inputFileFastq
-#        os.system("rm %s/galaxy/test/*_res*.png" %SMART_PATH)
-#        os.system("rm %s/galaxy/test/*_res.csv" %SMART_PATH)
-
-
-if __name__ == "__main__":
-    unittest.main()
\ No newline at end of file

diff -r c79b9ae3f65f -r 440ceca58672 SMART/galaxy/testArgum.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/testArgum.xml Mon Apr 22 11:08:07 2013 -0400

@@ -0,0 +1,24 @@
+<tool id="test_argument" name="test_argu" version="1.0.0">
+  <description>To test the arguments from shell.</description>
+  <command>
+../testArgu.sh $test_out
+#for $i in $replicate_groups
+#for $j in $i.replicates
+$j.bam_alignment:#slurp
+#end for
+#end for
+    >> $Log_File </command>
+  <inputs>
+ <param format="gff3" name="anno_input_selected" type="data" label="Genome annotation in GFF3 file" help="A tab delimited format for storing sequence features and annotations"/>
+   <repeat name="replicate_groups" title="Replicate group" min="2">
+     <repeat name="replicates" title="Replicate">
+      <param format="fastq" name="bam_alignment" type="data" label="BAM alignment file" help="BAM alignment file. Can be generated from SAM files using the SAM Tools."/>
+     </repeat>
+   </repeat>
+  </inputs>
+
+  <outputs>
+    <data format="txt" name="test_out" label="DESeq result"/>
+ <data format="txt" name="Log_File" label="DESeq result"/>
+  </outputs>
+</tool>

diff -r c79b9ae3f65f -r 440ceca58672 SMART/galaxy/testR.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/testR.xml Mon Apr 22 11:08:07 2013 -0400

[

@@ -0,0 +1,19 @@
+<tool id="testDiffExpAnal" name="Differential Expression Analysis">
+  <description>Differential expression analysis for sequence count data (DESeq)</description>
+  <command interpreter="sh"> ../DiffExpAnal/testR.sh $inputFile $columnsOfGeneName $columnsOfCondition1 $columnsOfCondition2 $outputFileCSV $outputFilePNG 2>$outputLog </command>
+  <inputs>
+    <param name="inputFile" type="data" label="Input File" format="tabular"/>
+ <param name="columnsOfGeneName" type="text" value="0" label="Please indicate the column numbers of gene names with ',' separator. If There are not gene names, default value is 0."/>
+ <param name="columnsOfCondition1" type="text" value="1,2" label="Please indicate the column numbers of condition1 with ',' separator."/>
+ <param name="columnsOfCondition2" type="text" value="3,4" label="Please indicate the column numbers of condition2 with ',' separator."/>
+  </inputs>
+
+  <outputs>
+    <data format="tabular" name="outputFileCSV" label="[DiffExpAnal] Output CSV File"/>
+ <data format="png" name="outputFilePNG" label="[DiffExpAnal] Output PNG File"/>
+    <data format="tabular" name="outputLog" label="[DiffExpAnal] Log File"/>
+  </outputs>
+
+  <help>
+  </help>
+</tool>

diff -r c79b9ae3f65f -r 440ceca58672 SMART/galaxy/trimAdaptor.xml
--- a/SMART/galaxy/trimAdaptor.xml Fri Apr 19 10:13:11 2013 -0400
+++ b/SMART/galaxy/trimAdaptor.xml Mon Apr 22 11:08:07 2013 -0400

@@ -33,7 +33,15 @@
<filter>noAdaptor</filter>
</data>
   </outputs>
-
+  <tests>
+    <test>
+      <param name="inputFile" value="short_fastq.fastq" />
+      <param name="adaptor" value="AAAA" />
+      <param name ="Error" value="No"/>
+      <param name ="noAdaptor" value="False"/>
+      <output name="outputFile" file="exp_trimadaptator_short_fastq.fastq" />
+    </test>
+  </tests>
   <help>
   </help>
</tool>

diff -r c79b9ae3f65f -r 440ceca58672 SMART/galaxy/trimSequences.xml
--- a/SMART/galaxy/trimSequences.xml Fri Apr 19 10:13:11 2013 -0400
+++ b/SMART/galaxy/trimSequences.xml Mon Apr 22 11:08:07 2013 -0400

@@ -1,5 +1,5 @@
<tool id="trimSequences" name="trim sequences">
-  <description>Remove the 5' and/or 3' adaptors of a list of reads.</description>
+  <description>Remove the 5' and/or 3' adapters of a list of reads.</description>
   <command interpreter="python"> ../Java/Python/trimSequences.py -i $inputFile -f fastq
    #if $OptionFPADP.FPADP == "Yes":
-5 $OptionFPADP.fivePAdaptor
@@ -7,10 +7,7 @@
#if $OptionTPADP.TPADP == "Yes":
-3 $OptionTPADP.threePAdaptor
#end if
-   #if $OptionError.Error == "Yes":
- -e $OptionError.ErrorVal
- #end if
-
+ -e $errors
$indels
    $noAdaptor5p $noAdaptorFile5p
    $noAdaptor3p $noAdaptorFile3p
@@ -23,7 +20,7 @@
     <param name="inputFile" type="data" label="Input fastq File" format="fastq"/>

<conditional name="OptionFPADP">
- <param name="FPADP" type="select" label="5'adaptor">
+ <param name="FPADP" type="select" label="5' adapter">
<option value="Yes">Yes</option>
<option value="No" selected="true">No</option>
</param>
@@ -35,7 +32,7 @@
</conditional>

<conditional name="OptionTPADP">
- <param name="TPADP" type="select" label="3'adaptor">
+ <param name="TPADP" type="select" label="3' adapter">
<option value="Yes">Yes</option>
<option value="No" selected="true">No</option>
</param>
@@ -46,23 +43,10 @@
</when>
</conditional>

- <conditional name="OptionError">
- <param name="Error" type="select" label="number of errors in percent">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="ErrorVal" type="integer" value="0" />
- </when>
- <when value="No">
- </when>
- </conditional>
-
+ <param name="errors" type="integer" label="number of errors in percent" value="0" />
<param name="indels" type="boolean" truevalue="-d" falsevalue="" checked="false" label="indels option" help="also accept indels"/>
- <param name="noAdaptor5p" type="boolean" truevalue="-n" falsevalue="" checked="false" label="noAdaptor 5' option" help="file name where to print sequences with no 5' adaptor "/>
- <param name="noAdaptor3p" type="boolean" truevalue="-m" falsevalue="" checked="false" label="noAdaptor 3' option" help="file name where to print sequences with no 3' adaptor "/>
-
-
+ <param name="noAdaptor5p" type="boolean" truevalue="-n" falsevalue="" checked="false" label="noAdaptor 5' option" help="file name where to print sequences with no 5' adapter "/>
+ <param name="noAdaptor3p" type="boolean" truevalue="-m" falsevalue="" checked="false" label="noAdaptor 3' option" help="file name where to print sequences with no 3' adapter "/>

   </inputs>

@@ -78,4 +62,21 @@

   <help>
   </help>
+  <tests>
+ <test>
+ <param name="inputFile" value="short_fastq.fastq" />
+ <param name="FPADP" value="Yes"/>
+       <param name="fivePAdaptor" value="AAAA" />
+ <param name="TPADP" value="No"/>
+       <param name ="Error" value="No"/>
+ <param name="indels" value="False"/>
+       <param name ="noAdaptor5p" value="False"/>
+ <param name= "noAdaptor3p" value="False"/>
+       <output name="outputFile" file="exp_trimsequences_short_fastq.fastq" />
+ </test>
+  </tests>
+
+  <help>
+This function removes the adaptor from the 5' or 3' end of your reads. It can even recognize the adaptators which are partially present. You can specify whether you are ready to accept indels or not.
+  </help>
</tool>