diff overlapping_reads.xml @ 3:4d9682bd3a6b draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_signatures commit 96ed5824190aff281cc3aa47dc60fc66aac41db3
author artbio
date Sat, 02 Sep 2017 06:35:15 -0400
parents 320e06bf99b9
children 20d28cfdeefe
line wrap: on
line diff
--- a/overlapping_reads.xml	Wed Aug 30 05:40:18 2017 -0400
+++ b/overlapping_reads.xml	Sat Sep 02 06:35:15 2017 -0400
@@ -1,4 +1,4 @@
-<tool id="overlapping_reads" name="Get overlapping reads" version="0.9.1">
+<tool id="overlapping_reads" name="Get overlapping reads" version="0.9.2">
     <description />
     <requirements>
         <requirement type="package" version="0.11.2.1=py27_0">pysam</requirement>
@@ -38,6 +38,15 @@
             <param name="overlap" value="10" />
             <output file="paired.fa" ftype="fasta" name="output" />
         </test>
+        <test>
+            <param ftype="bam" name="input" value="sr_bowtie.bam" />
+            <param name="minquery" value="20" />
+            <param name="maxquery" value="22" />
+            <param name="mintarget" value="23" />
+            <param name="maxtarget" value="29" />
+            <param name="overlap" value="10" />
+            <output file="paired_2.fa" ftype="fasta" name="output" />
+        </test>
     </tests>
     <help>
 
@@ -52,24 +61,43 @@
 
 **Input**
 
-A **sorted** BAM alignment file.
+*A **sorted** BAM alignment file.*
+
+*Query and target sizes:*
+
+The algorithm search for each *query* reads (of specified size) in the bam alignment if
+there are *target* reads (of specified size) that align on the opposite strand with a 10 nt
+overlap.
+
+Searching query reads of 20-22 nt that overlap by 10 nt with target
+reads of 23-29 nt is different from searching query reads of 23-29 nt that overlap by 10 nt
+with target reads of 20-22 nt. i.e, searching for siRNAs that pair with piRNAs is distinct
+from searching for siRNAs that pairs with piRNAs, although of course the number of possibly
+formed piRNA/siRNA pairs is the same as the number of possibly formed siRNA/piRNA pairs.
+
+*Overlap*
+The number of nucleotides by which the pairs of sequences will overlap
+
+
 
 **Outputs**
 
 a fasta file of pairable reads such as :
 
->FBgn0000004_17.6|5839|R|26
+>FBgn0000004_17.6|5855|F|23|n=1
+
+TTGACGAAAATGATCGAGTGGAT
+
+>FBgn0000004_17.6|5839|R|26|n=1
 
 TTTTCGTCAATTGTGCCAAATAGGTA
 
->FBgn0000004_17.6|5855|F|23
-
-TTGACGAAAATGATCGAGTGGAT
+where FBgn0000004_17.6 stands for the chromosome, 5839 stands for the 1-based read position, 
+R stand for reverse strand (F forward strand), 26 stands for the size of the sequence and
+n=1 stands for the number of reads of the sequence in the dataset.
 
-where FBgn0000004_17.6 stands for the chromosome, 5839 stands for the 1-based read position, 
-R stand for reverse strand (F forward strand) and 26 stands for the size of the read.
-
-the second sequence in this example is a read that overlap by 10 nt with the first read.
+the second sequence in this example corresponds to 1 read that overlap by 10 nt with
+1 read of the first sequence.
 
         </help>
     <citations>