Galaxy |

Changeset 0:19b20927172d (2013-06-18)

Next changeset 1:7c9574213c0a (2013-06-20)

Commit message:
Uploaded

added:
pyCRAC/pyAlignment2Tab.xml
pyCRAC/pyBarcodeFilter.pl
pyCRAC/pyBarcodeFilter.xml
pyCRAC/pyBinCollector.pl
pyCRAC/pyBinCollector.xml
pyCRAC/pyCalculateChromosomeLengths.xml
pyCRAC/pyCalculateFDRs.xml
pyCRAC/pyCalculateMutationFrequencies.xml
pyCRAC/pyCheckGTFfile.xml
pyCRAC/pyClusterReads.pl
pyCRAC/pyClusterReads.xml
pyCRAC/pyExtractLinesFromGTF.xml
pyCRAC/pyFasta2tab.xml
pyCRAC/pyFastqDuplicateRemover.pl
pyCRAC/pyFastqDuplicateRemover.xml
pyCRAC/pyFastqJoiner.xml
pyCRAC/pyFastqSplitter.pl
pyCRAC/pyFastqSplitter.xml
pyCRAC/pyGTF2bed.xml
pyCRAC/pyGTF2bedGraph.pl
pyCRAC/pyGTF2bedGraph.xml
pyCRAC/pyGetGTFSources.xml
pyCRAC/pyGetGeneNamesFromGTF.xml
pyCRAC/pyMotif.pl
pyCRAC/pyMotif.xml
pyCRAC/pyPileup.xml
pyCRAC/pyReadAligner.xml
pyCRAC/pyReadCounters.pl
pyCRAC/pyReadCounters.xml
pyCRAC/pySelectMotifsFromGTF.xml
pyCRAC/pycrac.chr.loc.sample
pyCRAC/pycrac.fasta.loc.sample
pyCRAC/pycrac.gtf.loc.sample
pyCRAC/pycrac.tab.loc.sample
pyCRAC/tool_data_table_conf.xml.sample

diff -r 000000000000 -r 19b20927172d pyCRAC/pyAlignment2Tab.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pyCRAC/pyAlignment2Tab.xml Tue Jun 18 09:11:00 2013 -0400

b'@@ -0,0 +1,139 @@\n+<tool id="pyAlignment2Tab" name="pyAlignment2Tab">\n+\t <description>converter</description>\n+\t <requirements>\n+ \t<requirement type="package">pyCRAC</requirement>\n+ \t </requirements>\n+ \t <command interpreter="python">/usr/local/bin/pyAlignment2Tab.py -f $input --limit $limit -o $output --singlefile\n+ \t </command>\n+\t <version_command>/usr/local/bin/pyAlignment2Tab.py --version</version_command>\n+ \t <inputs>\n+\t \t<param name="input" type="data" format="fasta" label="pyReadAligner output file -f" help="Fasta file"/>\n+\t \t<param name="limit" type="integer" format="integer" value="90" size="4" label="Set the column width of alignment" help="Enter a value > 50">\n+\t\t\t<validator type="in_range" min="50" message="Please enter a value greater than 50"/>\n+\t \t</param>\n+\t\t<param name="label" type="text" format="txt" size="30" value="pyAlignment2Tab" label="Enter output file label -o" />\n+ \t </inputs>\n+ \t <outputs>\n+\t \t<data name="output" format="txt" label="${label.value}.tab"/>\n+ \t </outputs>\n+\t <help>\n+\n+.. class:: infomark\n+\n+**pyAlignment2Tab**\n+\n+pyAlignment2Tab is part of the pyCRAC_ package. Converts pyReadAligner fasta output to a tabular alignment output.\n+\n+Example::\n+\n+ The tool expects a standard pyReadAligner fasta-formatted output file:\n+ \n+ >GeneX\n+ ATGTCTCGTACTAACATGGATACAAGACACGCACATTCTGCTTTACTGGCAGCACCACAGAGTGCTACTGCAAATAGCAGGAGCAGCAACAGCAGCAGCGAGAGTAGTAGTAACAAAAACAATATCAATGTCGGCGTCGGTGACGATAGCGGTAA\n+ >257930-10\n+ ---TCTCGTACcAACATGGATACAAGACACGCACATTCTGCTT----------------------------------------------------------------------------------------------------------------\n+ >3664964-1\n+ ---TCTCGcACcAACATGGATACAAGACACGCACATTtTGCTT----------------------------------------------------------------------------------------------------------------\n+ >4033560-1\n+ ---TCTCGTACcAACATGGATACAAGACACGCACATTCTGtTT----------------------------------------------------------------------------------------------------------------\n+ >8571880-1\n+ ---TCTCGTACcAACATGGATACAAGACACGCAgATTCTGCTT----------------------------------------------------------------------------------------------------------------\n+ >9617396-1\n+ ---TCTCGTACcAACATGGATACAAGACACGCcCATTCTGCTT----------------------------------------------------------------------------------------------------------------\n+ >843368-5\n+ ------------AACAcGGATACAAGACACGCACATTCTG-------------------------------------------------------------------------------------------------------------------\n+ >854553-5\n+ ------------AACATGGATACAAGACACGCAC--TCTG-------------------------------------------------------------------------------------------------------------------\n+ >1522401-2\n+ --------------CATGGATACAAGACACGCACATTCTGCTTTACTGGCAGgA-----------------------------------------------------------------------------------------------------\n+ >5981234-1\n+ --------------CATGGATACAAGACACGCACAcTCTGCTTTACTGGCAGCA-----------------------------------------------------------------------------------------------------\n+ >997684-4\n+ --------------CATGGATACAAGACACGCACATTCTGCTTTACTGGCAGCA-----------------------------------------------------------------------------------------------------\n+ >1046653-4\n+ ---------------ATGGATACAAGACACGCACATTCTGCTTTACTGGCAGgAC----------------------------------------------------------------------------------------------------\n+ >1103730-4\n+ ---------------ATGGATACAAGACACGCACAcTCTG-------------------------------------------------------------------------------------------------------------------\n+ >1603913-2\n+ ---------------ATGGATACAAGAaACGCACAcTCTG-------------------------------------------------------------------------------------------------------------------\n+ >180349-12\n+ ---------------ATGGATACAAGACACGCACATTCcGCTTTACTG------------------'..b'---------------\n+ >1987775-1\t---------------ATGGATACccGACACGCACATTCTGCTTTACTGcCAGCAC-----------------------------------\n+ >2258725-1\t---------------ATGGATACAAGACACGCACATTCTGCTTTgCTGGCAGCAC-----------------------------------\n+ >2631987-1\t---------------ATGGATACAAGACACGCACATTCTGCTTTACcGGCAGgAC-----------------------------------\n+ >337206-9\t---------------ATGGATACAAGACACGCACATTCTGCTTTACTGGCAGCAC-----------------------------------\n+ >4616761-1\t---------------ATGGATAgAAGACACGCACATTCTGCTTTACTGGtAGCAC-----------------------------------\n+ >4756312-1\t---------------ATGGATACAAcACACGCACAcTCTG--------------------------------------------------\n+ >4763682-1\t---------------ATGGATACAAGACACGCACATTCcGCTTTcCTG------------------------------------------\n+ >5971268-1\t---------------ATGGATACAAGACACGCACATTCcGCTcTACTc------------------------------------------\n+ >6644790-1\t---------------ATGGATACAAGACACGCACATTCTGCTTTACTcGCAGCAC-----------------------------------\n+ >7112423-1\t---------------ATGGATACAAGACACGCACATTCTGCTTTACTGtCAGCAC-----------------------------------\n+ >7559990-1\t---------------ATGGATACAAGACACGCACATTCTGCTTTACTGGCcGgAC-----------------------------------\n+ >8007281-1\t---------------ATGGATAtAAGACACGCACAcTCTG--------------------------------------------------\n+ >9150255-1\t---------------ATGGATACAcGACACGCACATTCcGCTTTcCTG------------------------------------------\n+ >9180814-1\t---------------ATGGATACAAGACACGCACATTCTGCTTTACTGGCAGgcC-----------------------------------\n+ >963117-4\t---------------ATGGATACAAGACACGCACATTCTGCTTTACcGGCAGCAC-----------------------------------\n+ >9672073-1\t---------------ATGGATACAAGACACGCACATTCTGCTTTACTGGCAGCcC-----------------------------------\n+ >971218-4\t---------------ATGGATACAAGACACGCACATcCTGCTTTACTGG-AGCACC----------------------------------\n+ >10040274-1\t-------------------ATACAAGACACGCACATTCTGCTTTACTGGCAGgACCACA-------------------------------\n+ >1063072-4\t-------------------ATACAAGACACGCACATTCTGCTTcACTGGCAGCACCACA-------------------------------\n+ >1430188-2\t-------------------ATACAAGACACGCACATTCTGCTTTACTGGCAGCACCACA-------------------------------\n+ >5196741-1\t-------------------ATACAAGACACGCACATTCTGCTTcACTGGCcGCACCACA-------------------------------\n+ >6017337-1\t-------------------ATACAAGACACGCACATTCTGCTTcACTGtCAGaACCcCA-------------------------------\n+ >7159053-1\t-------------------ATACAAGACACGCACATTCTGCTTTACTGGCAGCACCcaA-------------------------------\n+ >7528336-1\t-------------------ATACAAGACACGCACATTCTGCTTcACTGGCAGCAaCACA-------------------------------\n+ >735584-6\t--------------------------------------------------------ACAGAGTGCTACTGCAAAcAGCAGGAGCAGCAAC\n+ >8551047-1\t--------------------------------------------------------ACAGAGTGCTAtTGCAAAcAGCAGGAGtAGtAAC\n+ >3000121-1\t------------------------------------------------------------AGTcCTACcGCAAATAGCAGcAGCAGCAAC\n+ >928481-5\t------------------------------------------------------------AGTGCTACcGCAAATAGCAGGAGCAGCAAC\n+ >126987-15\t----------------------------------------------------------------------CAAATAGCAGGAGCAGCAAC\n+ >3122797-1\t----------------------------------------------------------------------CAAATAGCAGGcGCAGCAAC\n+ >6684686-1\t----------------------------------------------------------------------CAAATAGCAGGAGCAGCAAC\n+ \n+ Note that the column width here was set to 90 characters\n+ \n+.. _pyCRAC: http://sandergranneman.bio.ed.ac.uk/Granneman_Lab/pyCRAC_software.html\n+ \n+------\n+\n+**Parameter list**\n+\n+Options::\n+\n+ \t-f data.fasta \n+ Type the path to the fasta file that you want to use.\n+ \t--limit=90 \n+ Allows the user to set the column width of the alignment. Default=90 characters\n+\t-o output.fasta \n+ Provide the name of your output file\n+\t\t\n+ \t</help>\n+</tool>\n'

diff -r 000000000000 -r 19b20927172d pyCRAC/pyBarcodeFilter.pl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pyCRAC/pyBarcodeFilter.pl Tue Jun 18 09:11:00 2013 -0400

[

@@ -0,0 +1,71 @@
+#!/usr/bin/perl -w
+use strict;
+use Getopt::Long;
+
+my %opt;
+
+
+GetOptions(\%opt, "f=s", "b=s", "out=s", "output_path=s","id=s","m=i", "file_type=s", "both", "r=s", "version", "i");
+
+my $cmnd;
+
+if (exists $opt{version}){
+ $cmnd = "python /usr/local/bin/pyBarcodeFilter.py --version";
+}
+else{
+ $cmnd = "python /usr/local/bin/pyBarcodeFilter.py -f $opt{f} -b $opt{b} -m $opt{m} --file_type $opt{file_type}";
+
+ if(defined $opt{r}){
+
+ $cmnd.= " -r $opt{r}";
+
+ if(exists $opt{both}){
+ $cmnd .= " --both";
+ }
+ }
+
+ if(exists $opt{i}){
+ $cmnd .= " -i";
+ }
+}
+
+# Create the output directory (for the multiple output files)
+my $output_path = $opt{output_path};
+
+system $cmnd;
+
+open(BC,$opt{b}) || die "Cannot open barcode file";
+my %bc;
+while(my $line = <BC>){
+ chomp($line);
+ my ($barcode,$sample) = (split(/\t/,$line))[0,1];
+ $bc{$barcode}=$sample;
+}
+
+system "mv barcode_statistics.txt $opt{out}";
+
+my $ft = lc($opt{file_type});
+
+foreach my $key(keys %bc){
+ my @split = (split(/\//,$opt{f}));
+ my $l = @split;
+ my $output = $split[$l-1];
+ $output = (split(/\./,$output))[0];
+ $output = "$output"."_"."$key"."_"."$bc{$key}"."."."$ft";
+ my $rename = "$output_path/primary_$opt{id}_$bc{$key}-1"."_visible_"."$ft";
+ system "mv $output $rename";
+
+ if(defined $opt{r}){
+ my @split2 = (split(/\//,$opt{r}));
+ $l = @split2;
+ $output = $split2[$l-1];
+ $output = (split(/\./,$output))[0];
+ $output = "$output"."_"."$key"."_"."$bc{$key}"."."."$ft";
+ $rename = "$output_path/primary_$opt{id}_$bc{$key}-2"."_visible_"."$ft";
+ system "mv $output $rename";
+ }
+}
+
+
+close BC;
+

diff -r 000000000000 -r 19b20927172d pyCRAC/pyBarcodeFilter.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pyCRAC/pyBarcodeFilter.xml Tue Jun 18 09:11:00 2013 -0400

@@ -0,0 +1,125 @@
+ <tool id ="pyBarcodeFilter" name="pyBarcodeFilter" force_history_refresh="True">
+        <requirements>
+            <requirement type="package">pyCRAC</requirement>
+        </requirements>
+ <command interpreter="perl">
+ /usr/local/bin/pyBarcodeFilter.pl
+ --file_type $ftype.type
+ -f $ftype.f
+ -b $barcode
+ -m $mismatch
+ $index
+ --out $out
+ --id $out.id
+ --output_path $__new_file_path__
+ #if $ftype.reverse.rev == "yes":
+        -r=$ftype.reverse.r
+ $ftype.reverse.both
+    #end if#
+ </command>
+ <version_command>pyBarcodeFilter.py --version</version_command>
+ <inputs>
+ <conditional name="ftype">
+ <param name="type" type="select" label="File type">
+ <option value="fastq" selected="true">FASTQ</option>
+ <option value="fasta">FASTA</option>
+ </param>
+ <when value="fastq">
+ <param format="fastq" name="f" type="data" label="FastQ File -f" help="FastQ format" />
+ <conditional name="reverse">
+                <param name="rev" type="select"  label="Add a reverse or paired FastQ file">
+                    <option value="no" selected="true">NO</option>
+                    <option value="yes">YES</option>
+                </param>
+                <when value="yes">
+                    <param format="fastq" name="r" type="data" label="Reverse FastQ File -f" help="FastQ format" />
+                    <param name="both" type="select"  label="Search for barcode in both reads">
+                        <option value="" selected="true">NO</option>
+                        <option value="--both">YES</option>
+                    </param>
+ </when>
+ <when value="no">
+ </when>
+ </conditional>
+ </when>
+ <when value="fasta">
+ <param format="fasta" name="f" type="data" label="FastA File -f" help="FastA format" />
+ <conditional name="reverse">
+                <param name="rev" type="select"  label="Add a reverse or paired FastA file">
+                    <option value="no" selected="true">NO</option>
+                    <option value="yes">YES</option>
+                </param>
+                <when value="yes">
+                    <param format="fasta" name="r" type="data" label="Reverse FastA File -f" help="FastA format" />
+                    <param name="both" type="select"  label="Search for barcode in both reads">
+                        <option value="" selected="true">NO</option>
+                        <option value="--both">YES</option>
+     </param>
+ </when>
+ <when value="no">
+ </when>
+ </conditional>
+ </when>
+ </conditional>
+ <param format="tabular" name="barcode" type="data" label="Barcode File -f" help="Tab delimited file with barcodes and barcode names" />
+ <param format="integer" name="mismatch" type="integer" label="Mismatches -m" value="0" size="3" help="Set the number of allowed mismatches in a barcode">
+ <validator type="in_range" min="0" max="100" message="Please enter a value between 0 and 100"/>
+ </param>
+ <param name="index" type="select"  label="Split data using Illumina indexing barcode information -i">
+            <option value="" selected="true">NO</option>
+            <option value="-i">YES</option>
+ </param>
+ </inputs>
+ <outputs>
+ <data format="text" name="out" label="pyBarcodeFilter"/>
+ </outputs>
+ <help>
+
+.. class:: infomark
+
+**pySolexaBarcodeFilter**
+
+pySolexaBarcodeFilter is part of the pyCRAC_ package. Filters sequence files by barcodes.
+
+This tool requires FASTA or FASTQ input files containing the raw data and a text file containing barcode information.
+To process paired end data, use -f and the -r flags to indicate the path to the forward and reverse sequencing reactions, respectively.
+The barcodes file should two columns separated by a tab (see the table below). The first column should contain the barcode nucleotide sequences.
+The second column should contain an identifier, for example, the name of the barcode or the name of the experiment.
+The ’N’ in the barcode sequence indicates a random nucleotide. Make sure to use a simple text editor like TextEdit (MacOS X), gedit (Linux/Unix) or use a text editor in the terminal.
+The program is case sensitive: all the nucleotide sequences should be upper case.
+You can freely combine different barcodes but if you are mixing samples containing random nucleotide barcodes and normal barcodes.
+**NOTE!** make sure to place the regular barcode sequence below the sequence with random nucleotides and make sure the shortest sequence is ALWAYS at the bottom in the column (see below)
+
+Example of a barcode text file::
+
+    NNNCGCTTAGC mutant2
+    NNNGCGCAGC  mutant1
+    NNNATTAG    control
+    NNNTAAGC    myfavprotein
+    AGC         oldcontrol
+    AC          veryfirstbarcodedsample
+
+.. _pyCRAC: http://sandergranneman.bio.ed.ac.uk/Granneman_Lab/pyCRAC_software.html
+
+------
+
+**Parameter list**
+
+Options::
+
+  -f FILE, --input_file=FILE
+                            name of the FASTQ or FASTA input file
+  -r FILE, --reverse_input_file=FILE
+                            name of the paired (or reverse) FASTQ or FASTA input file
+  --file_type=FASTQ
+                            type of file, uncompressed (fasta or fastq) or compressed (fasta.gz or fastq.gz, gzip/gunzip
+         compressed). Default is fastq
+  -b FILE, --barcode_list=FILE
+                            name of tab-delimited file containing barcodes and barcode names
+  -m 1, --mismatches=1
+                            to set the number of allowed mismatches in a barcode. A maximum of one mismatch is allowed. Default = 0
+  -i, --index
+                            use this option if you want to split the data using the Illumina indexing barcode information
+
+ </help>
+</tool>

diff -r 000000000000 -r 19b20927172d pyCRAC/pyBinCollector.pl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pyCRAC/pyBinCollector.pl Tue Jun 18 09:11:00 2013 -0400

@@ -0,0 +1,47 @@
+#!/usr/bin/perl -w
+use strict;
+use Getopt::Long;
+
+my %opt=(s=>"genomic",numberofbins=>20);
+
+
+GetOptions(\%opt, "f=s","version","gtf=s","range=i","annotation=s", "numberofbins=i","min_length=i","max_length=i","s=s","o=s","ignorestrand","outputall","sd=s","ssub=s","sdel=s","asd=s","assub=s","asdel=s","out=s","options","bins1=i","bins2=i","id=s");
+
+my $cmnd;
+
+my $prefix = "bc_$opt{id}";
+
+if (exists $opt{version}){
+ $cmnd = "python /usr/local/bin/pyBinCollector.py --version";
+}
+else{
+ $cmnd = "python /usr/local/bin/pyBinCollector.py -f $opt{f} --gtf $opt{gtf} --annotation $opt{annotation} -o $prefix";
+
+ if(exists $opt{outputall})
+ {
+     $cmnd .= " --outputall";
+ }
+
+ if(exists $opt{options}){
+
+     $cmnd .= " --range=$opt{range} --numberofbins $opt{numberofbins} --min_length $opt{min_length} --max_length $opt{max_length} -s $opt{s}";
+
+            if(exists $opt{ignorestrand}){ $cmnd .= " --ignorestrand";}
+     if(exists $opt{bins1}){ $cmnd .= " --binselect $opt{bins1} $opt{bins2}";}
+ }
+}
+
+
+system $cmnd;
+if(exists $opt{outputall}){
+
+    system "mv sense_data_$prefix.txt $opt{sd}";
+    system "mv sense_subs_$prefix.txt $opt{ssub}";
+    system "mv sense_dels_$prefix.txt $opt{sdel}";
+    system "mv anti_sense_data_$prefix.txt $opt{asd}";
+    system "mv anti_sense_subs_$prefix.txt $opt{assub}";
+    system "mv anti_sense_dels_$prefix.txt $opt{asdel}";
+}
+else{
+    system "mv $prefix"."_cumulative_densities_$opt{annotation}"."_$opt{s}_"."$opt{numberofbins}_bins.pileup $opt{out}";
+}

diff -r 000000000000 -r 19b20927172d pyCRAC/pyBinCollector.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pyCRAC/pyBinCollector.xml Tue Jun 18 09:11:00 2013 -0400

[

b'@@ -0,0 +1,290 @@\n+ <tool id ="pyBinCollector" name="pyBinCollector">\n+\t<requirements>\n+ \t<requirement type="package">pyCRAC</requirement>\n+ \t</requirements>\n+\t<command interpreter="perl"> \n+\tpyBinCollector.pl\n+\t-f $input\n+\t--gtf $addGTF.gtf\n+ #if $addGTF.gtfFile == "default" and $addGTF.annotate.annotations == "auto":\n+\t\t--annotation $addGTF.annotate.scan.annotation\n+\t#else:\n+\t\t--annotation $addGTF.annotate.annotation\n+\t#end if#\n+\t#if $addOpt.options == "edit":\n+\t --options\n+\t --range $addOpt.range\n+\t\t--min_length $addOpt.min_length\n+\t\t--max_length $addOpt.max_length\n+\t\t--numberofbins $addOpt.numberofbins\n+\t\t-s $addOpt.sequence\n+\t\t#if $addOpt.limitBins.binselect == "yes":\n+\t\t\t--bins1 $addOpt.limitBins.bs_first \n+\t\t\t--bins2 $addOpt.limitBins.bs_last\n+\t\t#end if#\n+\t\t$addOpt.ignore\n+\t\t$addOpt.oall.outputall\n+\t#end if#\n+\t-o "$input.name"\t\n+\t#if $addOpt.options == "edit" and $addOpt.oall.outputall == "--outputall":\n+\t --id $sd.id\n+\t --sd $sd\n+\t --ssub $ssub\n+\t --sdel $sdel\n+\t --asd $asd\n+\t --assub $assub\n+\t --asdel $asdel\n+\t#else:\n+\t --out $out\n+\t --id $out.id\n+\t#end if#\n+\t</command>\n+\t<version_command>/usr/local/bin/pyBinCollector.py --version</version_command>\n+\t<inputs>\n+\t\t<param format="gtf" name="input" type="data" label="Input File -f" help="pyReadCounters or pyMotif gtf output files" />\n+\n+\t\t<conditional name="addGTF">\n+\t\t\t<param name="gtfFile" type="select" label="Choose GTF File from">\n+\t\t\t\t<option value="default" selected="true">Defaults</option>\n+\t\t\t\t<option value="other">History</option>\n+\t\t\t</param>\t\n+\t\t\t<when value="default">\n+\t\t\t\t<param name="gtf" type="select" label="GTF File --gtf" help="GTF file containing gene ID co-ordinates">\n+\t\t\t\t\t<options from_data_table="pycrac_gtf"/>\n+\t\t\t\t</param>\n+\n+\t\t\t\t<conditional name="annotate">\n+\t\t\t\t<param name="annotations" type="select" label="Select annotation">\n+\t\t\t\t\t<option value="all" selected="true">All</option>\n+\t\t\t\t\t<option value="manual">Enter in text box</option>\n+\t\t\t\t\t<option value="auto">Scan pyGetGTFSources file</option>\n+\t\t\t\t</param>\t\n+\t\t\t\t<when value="all">\n+\t\t\t\t\t<param name="annotation" type="hidden" format="txt" size="10" value="all"/>\n+\t\t\t\t</when>\n+\t\t\t\t<when value="manual">\n+\t\t\t\t\t<param name="annotation" type="text" format="txt" size="100" value="protein_coding" label="Select which annotation to focus search on --annotation" help="To find a list of available annotations please use pyGetGTFSources tool">\n+\t\t\t\t\t\t<validator type="empty_field" message="Please enter a value"/>\t\t\t\n+\t\t\t\t\t</param>\n+\t\t\t\t</when>\n+\t\t\t\t<when value="auto">\n+\t\t\t\t\t<param format="tabular" name="gtf_annotation" type="data" label="GTF annotation File (pyGetGTFSources output)" help="Tabular file containing unique list of annotations/sources in selected GTF file. Refer to pyGetGTFSources"/>\t\n+\t\t\t\t\t\t<conditional name="scan">\n+\t\t\t\t\t\t<param name="annotations" type="select" label="Scan this file for annotations" help="Choose the correct GTF file then choose GO">\n+\t\t\t\t\t\t\t<option value="wait" selected="true">Waiting</option>\n+\t\t\t\t\t\t\t<option value="scanning">Go</option>\n+\t\t\t\t\t\t</param>\t\n+\t\t\t\t\t\t<when value="wait">\n+\t\t\t\t\t\t</when>\n+\t\t\t\t\t\t<when value="scanning">\n+\t\t\t\t\t\t<param name="annotation" type="select" multiple="false" label="Select which annotation to focus search on --annotation">\n+\t\t\t\t\t\t\t <options from_dataset="gtf_annotation">\n+\t\t\t\t\t\t\t <column name="name" index="0"/>\n+\t\t\t\t\t\t\t <column name="value" index="0"/>\n+\t\t\t\t\t\t\t </options>\n+\t\t\t\t\t\t</param> \n+\t\t\t\t\t\t</when>\n+\t\t\t\t\t\t</conditional>\n+\t\t\t\t</when>\n+\t\t\t\t</conditional>\n+\t\t\n+\t\t\t</when>\n+\t\t\t<when value="other">\n+\t\t\t\t<param format="gtf" name="gtf" type="data" label="GTF File --gtf" help="GTF file containing gene ID co-ordinates"/>\n+\t\t\t\t<conditional name="annotate">\n+\t\t\t\t<param name="annotations" type="select" label="Select annotation">\n+\t\t\t\t\t<option value="all" selected="true">All</option>\n+\t\t\t\t\t<option value="manual">Enter in text box</option>\n+\t\t\t\t\t<option value="auto">Scan selected file</optio'..b'all"</filter>\n+ </data>\n+ <data format="txt" name="asdel" label="anti_sense_dels_${label.value}.txt">\n+ <filter>addOpt[\'options\'] == "edit" and addOpt[\'oall\'][\'outputall\'] == "--outputall"</filter>\n+ </data>\n+\t</outputs>\n+\t<help>\n+\n+\n+.. class:: infomark\n+\n+**pyBinCollector**\n+\n+pyBinCollector is part of the pyCRAC_ package. Allows the user to generate genome-wide coverage plots. Normalises gene lengths by dividing genes into a \n+fixed number of bins and then calculates the hit density in each bin. The program also allows the user to input specific bin numbers to extract \n+blocks/clusters present in these bins.\n+\n+ \n+.. _pyCRAC: http://sandergranneman.bio.ed.ac.uk/Granneman_Lab/pyCRAC_software.html\n+ \n+------\n+\n+**Parameter list**\n+\n+File input options::\n+\n+ -f FILE, --input_file=FILE\n+ Provide the path and name of the pyReadCounters.py or\n+ pyMotif.py GTF file. By default the program expects\n+ data from the standard input.\n+ -o OUTPUT_FILE, --output_file=OUTPUT_FILE\n+ To set an output file name. Do not add a file\n+ extension. By default, if the --outputall flag is not\n+ used, the program writes to the standard output.\n+ --gtf=yeast.gtf \n+ type the path to the gtf annotation file that you want\n+ to use. Default is /usr/local/pyCRAC/db/Saccharomyces_\n+ cerevisiae.EF2.59.1.2.gtf\n+\n+pyBinCollector.py specific options::\n+\n+ -a protein_coding, --annotation=protein_coding\n+ select which annotation (i.e. protein_coding, ncRNA,\n+ sRNA, rRNA, tRNA, snoRNA, all) you would like to focus\n+ your search on. Default = all\n+ --min_length=20 \n+ to set a minimum length threshold for genes. Genes\n+ shorter than the minimal length will be discarded.\n+ Default = 1\n+ --max_length=10000 \n+ to set a maximum length threshold for genes. Genes\n+ larger than the maximum length will be discarded.\n+ Default = 100000000\n+ -n 20, --numberofbins=20\n+ select the number of bins you want to generate.\n+ Default=20\n+ --binselect=2 4 \n+ allows selection of sequences that were mapped to\n+ specific bins. This option expects two numbers, one\n+ for each bin, separated by a space. For example:\n+ --binselect 20 30.\n+ --outputall \n+ use this flag to output the normalized distribution\n+ for each individual gene, rather than making a\n+ cumulative coverage plot. Useful for making box plots\n+ or for making heat maps.\n+\n+Common options::\n+\n+ -r 100, --range=100\n+ allows you to set the length of the UTR regions. If\n+ you set \'-r 50\' or \'--range=50\', then the program will\n+ set a fixed length (50 bp) regardless of whether the\n+ GTF file has genes with annotated UTRs.\n+ -s intron, --sequence=intron\n+ with this option you can select whether you want to\n+ generate bins from the coding or genomic sequence or\n+ introns,exon,CDS, or UTR coordinates. Default =\n+ genomic\n+ --ignorestrand \n+ To ignore strand information and all reads overlapping\n+ with genomic features will be considered sense reads.\n+ Useful for analysing ChIP or RIP data\n+\n+\n+\n+ \n+\n+\t</help>\n+</tool>\t\n'

diff -r 000000000000 -r 19b20927172d pyCRAC/pyCalculateChromosomeLengths.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pyCRAC/pyCalculateChromosomeLengths.xml Tue Jun 18 09:11:00 2013 -0400

@@ -0,0 +1,53 @@
+<tool id="pyCalculateChromosomeLengths" name="pyCalculateChromosomeLengths">
+   <requirements>
+         <requirement type="package">pyCRAC</requirement>
+       </requirements>
+   <command interpreter="python">
+   /usr/local/bin/pyCalculateChromosomeLengths.py
+   -f $ftype.input
+   --file_type $ftype.filetype
+   -o $output </command>
+   <version_command>/usr/local/bin/pyCalculateChromosomeLengths.py --version</version_command>
+   <inputs>
+       <conditional name="ftype">
+       <param name="filetype" type="select"  label="File type">
+                                        <option value="fasta" selected="true">Fasta</option>
+                                        <option value="tab">Tab</option>
+       </param>
+       <when value="fasta">
+               <param name="input" type="data" format="fasta" label="Input file" help="Fasta or Tab file"/>
+       </when>
+       <when value="tab">
+                              <param name="input" type="data" format="tabular" label="Input file" help="Fasta or Tab file"/>
+       </when>
+       </conditional>
+   </inputs>
+   <param name="label" type="text" format="txt" size="30" value="pyCalculateChromosomeLengths" label="Enter output file label -o" />
+   <outputs>
+     <data name="output" format="txt" label="${label.value}.len"/>
+   </outputs>
+   <help>
+
+.. class:: infomark
+
+**pyCalculateChromosomeLengths**
+
+pyCalculateChromosomeLengths is part of the pyCRAC_ package. Takes a genome sequence in fasta or tab format and generates a tab-delimited file showing chromosome name and chromosome length.
+
+.. _pyCRAC: http://sandergranneman.bio.ed.ac.uk/Granneman_Lab/pyCRAC_software.html
+
+-------
+
+**Parameter list**
+
+Options::
+
+  -f chromosomes.fasta, --input_file=chromosomes.fasta
+                        provide the name and path of your fasta or tab genomic
+                        sequence file. Default is standard input.
+  --file_type=fasta
+                        provide the file type (fasta or tab). Default is fasta
+
+   </help>
+</tool>
+

diff -r 000000000000 -r 19b20927172d pyCRAC/pyCalculateFDRs.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pyCRAC/pyCalculateFDRs.xml Tue Jun 18 09:11:00 2013 -0400

b'@@ -0,0 +1,247 @@\n+ <tool id ="pyCalculateFDRs" name="pyCalculateFDRs">\n+\t<requirements>\n+ \t<requirement type="package">pyCRAC</requirement>\n+ \t</requirements>\n+\t<command interpreter="python"> \n+\t/usr/local/bin/pyCalculateFDRs.py\n+\t-f $ftype.input\n+ --file_type $ftype.file_type\n+\t--gtf=$addGTF.gtf\n+\n+\t#if $addGTF.annotate.annotations != "all":\n+\t #if $addGTF.gtfFile == "default" and $addGTF.annotate.annotations == "auto":\n+\t --annotation $addGTF.annotate.scan.annotation\n+\t #else:\n+\t\t--annotation $addGTF.annotate.annotation\n+\t #end if#\n+\t#end if#\n+\t--chromfile=$addChr.chr\n+\t#if $addOpt.options == "edit"\n+ -s $addOpt.sequence\n+\t --min $addOpt.min \n+ --minfdr $addOpt.minfdr \n+ --iterations=$addOpt.iterations \n+ --range $addOpt.range\n+\t#end if#\n+\t-o $output\n+\n+\t</command>\n+\t<version_command>/usr/local/bin/pyCalculateFDRs.py --version</version_command>\n+\t<inputs>\n+ <conditional name="ftype">\n+ <param name="file_type" type="select" label="Input File Type --file_type" help="Use bed6, gff or gtf input files containing read/cDNA co-ordinates">\n+ <option value="gff" selected="true">GFF</option>\n+ <option value="bed">Bed6</option>\n+ <option value="gtf">GTF</option>\n+ </param>\n+ <when value="gff">\n+ <param format="gff" name="input" type="data" label="Input File --readdatafile" help="GFF format containing read/cDNA co-ordinates" />\n+ </when>\n+ <when value="gtf">\n+ <param format="gtf" name="input" type="data" label="Input File --readdatafile" help="GTF format containing read/cDNA co-ordinates" />\n+ </when>\n+ <when value="bed">\n+ <param format="bed6" name="input" type="data" label="Input File --readdatafile" help="Bed 6 column format containing read/cDNA co-ordinates" />\n+ </when>\n+ </conditional>\n+\t \n+ <conditional name="addChr">\n+ <param name="chrfile" type="select" label="Choose Chromosome length file from">\n+ <option value="default" selected="true">Defaults</option>\n+ <option value="other">History</option>\n+ </param>\n+ <when value="default">\n+ <param name="chr" type="select" label="Chromosome length file -c" help="This file should have two columns: first column is the names of the chromosomes, second column is length of the chromosomes">\n+ <options from_data_table="pycrac_chr"/>\n+ </param>\n+ </when>\n+ <when value="other">\n+ <param format="tabular" name="chr" type="data" label="Chromosome length file -c" help="This file should have two columns: first column is the names of the chromosomes, second column is length of the chromosomes. Use pyCrac utility pyCalculateChromosomeLengths to create."/>\n+ </when>\n+ </conditional>\n+\n+\t <conditional name="addGTF">\n+\t\t <param name="gtfFile" type="select" label="Choose GTF File from">\n+\t\t <option value="default" selected="true">Defaults</option>\n+\t\t <option value="other">History</option>\n+\t\t </param>\t\n+\t\t <when value="default">\n+\t\t <param name="gtf" type="select" label="GTF File --gtf" help="GTF file containing gene ID co-ordinates">\n+\t\t <options from_data_table="pycrac_gtf"/>\n+\t\t </param>\n+\t\t <conditional name="annotate">\n+\t\t <param name="annotations" type="select" label="Select annotation">\n+ <option value="all" selected="true">All</option>\n+ <option value="manual">Enter in text box</option>\n+ <option value="auto">Scan pyGetGTFSources file</option>\n+\t\t </param>\t\n+\t\t <when value="all">\n+\t\t\t <param'..b' "exon" but this has no meaning! It may overlap with an intron.\n+Use bedtools to extract those intervals that overlap with introns or other features\n+\n+Example of an output file::\n+\n+ ##gff-version 2\n+ # generated by pyCalculateFDRs version 0.0.3, Sat Jun 1 21:16:23 2013\n+ # pyCalculateFDRs.py -f test_count_output_reads.gtf -r 200 -o test_count_output_FDRs_005.gtf -v -m 0.05\n+ # chromosome\tfeature\tsource\tstart\tend\tminimal_coverage\tstrand\t.\tattributes\n+ chrI\tprotein_coding\texon\t140846\t140860\t5\t-\t.\tgene_id "YAL005C"; gene_name "SSA1"; \n+ chrI\tintergenic_region\texon\t223118\t223164\t4\t-\t.\tgene_id "INT_0_179"; gene_name "INT_0_179"; \n+ chrI\tintergenic_region\texon\t71889\t71922\t3\t+\t.\tgene_id "INT_0_94"; gene_name "INT_0_94"; \n+ chrII\tintergenic_region\texon\t296127\t296158\t3\t-\t.\tgene_id "INT_0_365"; gene_name "INT_0_365"; \n+ chrII\tintergenic_region\texon\t680697\t680722\t4\t-\t.\tgene_id "INT_0_626"; gene_name "INT_0_626"; \n+ chrII\tintergenic_region\texon\t680827\t680846\t4\t-\t.\tgene_id "INT_0_626"; gene_name "INT_0_626"; \n+ chrII\tsnRNA\texon\t680827\t680838\t5\t-\t.\tgene_id "LSR1"; gene_name "LSR1"; \n+ chrII\tsnRNA\texon\t680951\t681001\t5\t-\t.\tgene_id "LSR1"; gene_name "LSR1"; \n+ chrII\tintergenic_region\texon\t577985\t577996\t3\t-\t.\tgene_id "INT_0_556"; gene_name "INT_0_556"; \n+ chrII\tprotein_coding\texon\t203838\t203887\t3\t+\t.\tgene_id "YBL011W"; gene_name "SCT1"; \n+ chrII\tprotein_coding\texon\t296127\t296158\t3\t-\t.\tgene_id "YBR028C"; gene_name "YBR028C"; \n+\n+ \n+pyCalculateFDRs is part of the pyCRAC_ package. Takes interval information in GTF or bed format and calculates False Discovery Rates (FDRs).\n+\n+ \n+.. _pyCRAC: http://sandergranneman.bio.ed.ac.uk/Granneman_Lab/pyCRAC_software.html\n+ \n+------\n+\n+**Parameter list**\n+\n+Options::\n+\n+ -f read_file, --readdatafile=read_file\n+ Name of the bed/gff/gtf file containing the read/cDNA\n+ coordinates\n+ --file_type=FILE_TYPE\n+ this tool supports bed6, gtf and gff input files.\n+ Please select from \'bed\',\'gtf\' or \'gff\'. Default=gtf\n+ -o outfile.gtf, --outfile=outfile.gtf\n+ Optional. Provide the name of the output file. Default\n+ is \'selected_intervals.gtf\'\n+ -r 100, --range=100 \n+ allows you to set the length of the UTR regions. If\n+ you set \'-r 50\' or \'--range=50\', then the program will\n+ set a fixed length (50 bp) regardless of whether the\n+ GTF file has genes with annotated UTRs.\n+ -a protein_coding, --annotation=protein_coding\n+ select which annotation (i.e. protein_coding, ncRNA,\n+ sRNA, rRNA,snoRNA,snRNA, depending on the source of\n+ your GTF file) you would like to focus your analysis\n+ on. Default = all annotations\n+ -c yeast.txt, --chromfile=yeast.txt\n+ Location of the chromosome info file. This file should\n+ have two columns: first column is the names of the\n+ chromosomes, second column is length of the\n+ chromosomes. Default is yeast\n+ --gtf=yeast.gtf \n+ Name of the annotation file. Default is /usr/local/pyC\n+ RAC/db/Saccharomyces_cerevisiae.EF2.59.1.2.gtf\n+ -m MINFDR, --minfdr=MINFDR\n+ To set a minimal FDR threshold for filtering interval\n+ data. Default is 0.05\n+ --min=MIN \n+ to set a minimal read coverages for a region. Regions\n+ with coverage less than minimum will be ignoredve an\n+ FDR of zero\n+ --iterations=ITERATIONS\n+ to set the number of iterations for randomization of\n+ read coordinates. Default=100\n+\t</help>\n+</tool>\t\n'

diff -r 000000000000 -r 19b20927172d pyCRAC/pyCalculateMutationFrequencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pyCRAC/pyCalculateMutationFrequencies.xml Tue Jun 18 09:11:00 2013 -0400

@@ -0,0 +1,126 @@
+<tool id ="pyCalculateMutationFrequencies" name="pyCalculateMutationFrequencies">
+ <requirements>
+         <requirement type="package">pyCRAC</requirement>
+     </requirements>
+ <command interpreter="python">
+ /usr/local/bin/pyCalculateMutationFrequencies.py
+ -r $readdatafile
+ -i $intervaldatafile
+ -c $addChr.chr
+ -o $output
+ --mutsfreq $mutsfreq
+ </command>
+ <version_command>/usr/local/bin/pyCalculateMutationFrequencies.py --version</version_command>
+ <inputs>
+       <param format="gff" name="readdatafile" type="data" label="GFF Reads File --readdatafile" help="GFF file containing read data" />
+       <param format="gtf" name="intervaldatafile" type="data" label="GFF Interval File --intervaldatafile" help="GFF file containing interval co-ordinates"/>
+       <conditional name="addChr">
+ <param name="chrfile" type="select"  label="Choose Chromosome length file from">
+   <option value="default" selected="true">Defaults</option>
+   <option value="other">History</option>
+ </param>
+ <when value="default">
+   <param name="chr" type="select"  label="Chromosome length file -c" help="This file should have two columns: first column is the names of the chromosomes, second column is length of the chromosomes.Use pyCrac utility pyCalculateChromosomeLengths to create.">
+     <options from_data_table="pycrac_chr"/>
+   </param>
+ </when>
+ <when value="other">
+   <param format="tabular" name="chr" type="data" label="Chromosome length file -c" help="This file should have two columns: first column is the names of the chromosomes, second column is length of the chromosomes"/>
+ </when>
+       </conditional>
+
+       <param format="integer" name="mutsfreq" type="integer" label="Minimum mutation frequency --mutsfreq " value="0" size="10" help="sets the minimal mutations frequency for an interval that you want to have written to our output file">
+ <validator type="in_range" min="0" message="Please enter a value >= 0"/>
+       </param>
+        <param name="label" type="text" format="txt" size="30" value="pyCalculateMutationFrequencies" label="Enter output file label -o" />
+ </inputs>
+ <outputs>
+ <data format="gtf" name="output" label="${label.value}.gtf"/>
+ </outputs>
+ <help>
+
+.. class:: infomark
+
+**pyCalculateMutationFrequencies**
+
+pyCalculateMutationFrequencies is part of the pyCRAC_ package. Takes an interval file and a pyReadCounters GTF file and calculates (cross-linking induced) mutation frequencies fore each interval.
+This tool can be used to calculate mutation frequencies for significant intervals (pyCalculateFDRs output file) or over-represented motifs (pyMotif GTF output file).
+It expects a pyCRAC GTF count_output_reads.gtf file and a GTF file with the intervals.
+
+For example::
+
+    This pyCalculateFDRs GTF output file::
+
+        ##gff-version 2
+        # generated by pyCalculateFDRs version 0.0.3, Sat Jun  1 21:16:23 2013
+        # pyCalculateFDRs.py -f test_count_output_reads.gtf -r 200 -o test_count_output_FDRs_005.gtf -v -m 0.05
+        # chromosome feature source start end minimal_coverage strand . attributes
+        chrII protein_coding exon 203838 203887 3 + . gene_id "YBL011W"; gene_name "SCT1";
+        chrII intergenic_region exon 407669 407708 3 + . gene_id "INT_0_445"; gene_name "INT_0_445";
+        chrII intergenic_region exon 585158 585195 2 + . gene_id "INT_0_562"; gene_name "INT_0_562";
+        chrII protein_coding exon 372390 372433 4 - . gene_id "YBR067C"; gene_name "TIP1";
+        chrII intergenic_region exon 380754 380815 6 - . gene_id "INT_0_431"; gene_name "INT_0_431";
+        chrIII protein_coding exon 138001 138044 5 + . gene_id "YCR012W"; gene_name "PGK1";
+        chrIII intergenic_region exon 227997 228036 5 + . gene_id "INT_0_885"; gene_name "INT_0_885";
+        chrIII intergenic_region exon 227997 228037 4 + . gene_id "INT_0_887"; gene_name "INT_0_887";
+        chrIII tRNA exon 227997 228037 4 + . gene_id "tS(CGA)C"; gene_name "SUP61";
+
+    Will be converted into::
+
+        ##gff-version 2
+        # generated by pyCalculateFDRs version 0.0.3, Sat Jun  1 21:16:23 2013
+        # /Library/Frameworks/EPD64.framework/Versions/Current/bin/pyCalculateFDRs.py -f test_count_output_reads.gtf -r 200 -o test_count_output_FDRs_005.gtf -v -m 0.05
+        # chromosome feature source start end minimal_coverage strand . attributes
+        chrII protein_coding exon 203838 203887 3 + . gene_id "YBL011W"; gene_name "SCT1"; # 203882D33.3,203883D33.3,203884D33.3;
+        chrII intergenic_region exon 407669 407708 3 + . gene_id "INT_0_445"; gene_name "INT_0_445"; # 407680D33.3,407681D33.3;
+        chrII intergenic_region exon 585158 585195 2 + . gene_id "INT_0_562"; gene_name "INT_0_562"; # 585171D100.0,585172D100.0,585173D100.0;
+        chrII protein_coding exon 372390 372433 4 - . gene_id "YBR067C"; gene_name "TIP1"; # 372412D50.0,372413D50.0;
+        chrII intergenic_region exon 380754 380815 6 - . gene_id "INT_0_431"; gene_name "INT_0_431"; # 380786D90.2,380787D90.2;
+        chrIII protein_coding exon 138001 138044 5 + . gene_id "YCR012W"; gene_name "PGK1"; # 138025D40.0,138026D30.0,138027D40.0;
+        chrIII intergenic_region exon 227997 228036 5 + . gene_id "INT_0_885"; gene_name "INT_0_885"; # 228006D85.7,228007D100.0;
+        chrIII intergenic_region exon 227997 228037 4 + . gene_id "INT_0_887"; gene_name "INT_0_887"; # 228006D85.7,228007D100.0;
+        chrIII tRNA exon 227997 228037 4 + . gene_id "tS(CGA)C"; gene_name "SUP61"; # 228006D85.7,228007D100.0;
+
+
+The hash character at the end of each line (#) shows chromosomal coordinates of mutated nucleotides within the cluster interval and their mutation frequencies.
+
+For example::
+
+    # 228007D100.0
+
+indicates that 100% of the nucleotides in position 228007 were deleted in the interval.
+
+By setting the --mutsfreq flag you can set a limit for the lowest mutation frequency that you want to have reported.
+This makes it relatively easy to select those significant regions that have nucleotides with high mutation frequencies.
+
+.. _pyCRAC: http://sandergranneman.bio.ed.ac.uk/Granneman_Lab/pyCRAC_software.html
+
+------
+
+**Parameter list**
+
+Options::
+
+  -i intervals.gtf, --intervaldatafile=intervals.gtf
+                        provide the path to your GTF interval data file.
+  -r reads.gtf, --readdatafile=reads.gtf
+                        provide the path to your GTF read data file.
+  -c yeast.txt, --chromfile=yeast.txt
+                        Location of the chromosome info file. This file should
+                        have two columns: first column is the names of the
+                        chromosomes, second column is length of the
+                        chromosomes. Default is yeast
+  -o intervals_with_muts.gtf, --output_file=intervals_with_muts.gtf
+                        provide a name for an output file. By default it
+                        writes to the standard output
+  --mutsfreq=10, --mutationfrequency=10
+                        sets the minimal mutations frequency for an interval
+                        that you want to have written to our output file.
+                        Default = 0%. Example: if the mutsfrequency is set at
+                        10 and an interval position has a mutated in less than
+                        10% of the reads,then the mutation will not be
+                        reported.
+
+
+ </help>
+ </tool>
\ No newline at end of file

diff -r 000000000000 -r 19b20927172d pyCRAC/pyCheckGTFfile.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pyCRAC/pyCheckGTFfile.xml Tue Jun 18 09:11:00 2013 -0400

@@ -0,0 +1,53 @@
+ <tool id ="pyCheckGTFfile" name="pyCheckGTFfile">
+ <requirements>
+         <requirement type="package">pyCRAC</requirement>
+     </requirements>
+ <command interpreter="python">
+ /usr/local/bin/pyCheckGTFfile.py --gtf $addGTF.gtf -o $out
+ </command>
+ <version_command>/usr/local/bin/pyCheckGTFfile.py --version</version_command>
+ <inputs>
+ <conditional name="addGTF">
+ <param name="gtfFile" type="select"  label="Choose GTF File from">
+ <option value="default" selected="true">Defaults</option>
+ <option value="other">History</option>
+ </param>
+ <when value="default">
+ <param name="gtf" type="select"  label="GTF File --gtf" help="GTF file containing gene ID co-ordinates">
+ <options from_data_table="pycrac_gtf"/>
+ </param>
+ </when>
+ <when value="other">
+ <param format="GTF" name="gtf" type="data" label="GTF File --gtf" help="GTF file containing gene ID co-ordinates"/>
+ </when>
+ </conditional>
+ <param name="label" type="text" format="txt" size="30" value="pyCheckGTFfile" label="Enter output file label -o" />
+ </inputs>
+
+ <outputs>
+ <data format="gtf" name="out" label="${label.value}.gtf"/>
+ </outputs>
+ <help>
+.. class:: infomark
+
+**pyCheckGTFfile**
+
+pyCheckGTFfile is part of the pyCRAC_ package. Renames duplicated gene names in your GTF annotation file.
+
+.. _pyCRAC: http://sandergranneman.bio.ed.ac.uk/Granneman_Lab/pyCRAC_software.html
+
+------
+
+**Parameter list**
+
+Options::
+
+  --gtf=gtf input file
+                        type the path to the gtf file that you want to use.
+  -o FILE, --output=FILE
+                        Optional. Specify the name of the output file. Default
+                        is standard output. Make sure it has the .gtf
+                        extension!
+
+ </help>
+</tool>

diff -r 000000000000 -r 19b20927172d pyCRAC/pyClusterReads.pl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pyCRAC/pyClusterReads.pl Tue Jun 18 09:11:00 2013 -0400

@@ -0,0 +1,10 @@
+#!/usr/bin/perl -w
+use strict;
+use Getopt::Long;
+
+print join(" ",@ARGV,"\n");
+
+
+
+
+

diff -r 000000000000 -r 19b20927172d pyCRAC/pyClusterReads.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pyCRAC/pyClusterReads.xml Tue Jun 18 09:11:00 2013 -0400

b'@@ -0,0 +1,230 @@\n+<tool id="pyClusterReads" name="pyClusterReads" force_history_refresh="True">\n+\t<requirements>\n+ \t<requirement type="package">pyCRAC</requirement>\n+ \t</requirements>\n+\t<command interpreter="python">\n+\t/usr/local/bin/pyClusterReads.py\n+\t-f $input\n+\t--gtf=$addGTF.gtf\n+ #if $addGTF.annotate.annotations != "all":\n+ #if $addGTF.gtfFile == "default" and $addGTF.annotate.annotations == "auto":\n+ --annotation=$addGTF.annotate.scan.annotation\n+ #else:\n+ --annotation=$addGTF.annotate.annotation\n+ #end if#\n+\t#end if#\n+\t-o $output\n+\t#if $addOpt.options == "edit":\n+\t\t--range=$addOpt.range\n+\t\t--cic=$addOpt.cic\n+\t\t--co=$addOpt.co\n+\t\t--ch=$addOpt.ch\n+\t\t--cl=$addOpt.cl\n+\t\t--mutsfreq=$addOpt.mutsfreq\n+\t#end if#\n+\t</command>\n+\t<version_command>/usr/local/bin/pyClusterReads.py --version</version_command>\n+\t<inputs>\n+\t <param format="gtf" name="input" type="data" label="Input Read Data File -f" help="GTF format sorted by position i.e. pyReadCounters output file."/> \n+\t <conditional name="addGTF">\n+ <param name="gtfFile" type="select" label="Choose GTF File from">\n+ <option value="default" selected="true">Defaults</option>\n+ <option value="other">History</option>\n+ </param> \n+ <when value="default">\n+ <param name="gtf" type="select" label="GTF File --gtf" help="GTF file containing gene ID co-ordinates">\n+ <options from_data_table="pycrac_gtf"/>\n+ </param>\n+ <conditional name="annotate">\n+ <param name="annotations" type="select" label="Select annotation">\n+ <option value="all" selected="true">All</option>\n+ <option value="manual">Enter in text box</option>\n+ <option value="auto">Scan pyGetGTFSources file</option>\n+ </param> \n+ <when value="all">\n+ <param name="annotation" type="hidden" format="txt" size="10" value="all"/>\n+ </when>\n+ <when value="manual">\n+ <param name="annotation" type="text" format="txt" size="100" value="protein_coding" label="Select which annotation to focus search on --annotation" help="To find a list of available annotations please use pyGetGTFSources tool">\n+ <validator type="empty_field" message="Please enter a value"/> \n+ </param>\n+ </when>\n+ <when value="auto">\n+ <param format="tabular" name="gtf_annotation" type="data" label="GTF annotation File (pyGetGTFSources output)" help="Tabular file containing unique list of annotations/sources in selected GTF file. Refer to pyGetGTFSources"/> \n+ <conditional name="scan">\n+ <param name="annotations" type="select" label="Scan this file for annotations" help="Choose the correct GTF file then choose GO">\n+ <option value="wait" selected="true">Waiting</option>\n+ <option value="scanning">Go</option>\n+ </param> \n+ <when value="wait">\n+ </when>\n+ <when value="scanning">\n+ <param name="annotation" type="select" multiple="false" label="Select which annotation to focus search on --annotation">\n+ <options from_dataset="gtf_annotation">\n+ <column name="name" index="0"/>\n+ <column name="value" index="0"/>\n+ </options>\n+ </param> \n+ </when>\n+ </conditional>\n+ </when>\n+ </conditional>\n+\n+ </when>\n+ <when value='..b'sterReads.py version 0.0.1, Fri Jan 18 11:59:42 2013\n+ # pyClusterReads.py -f count_output_reads.gtf -o count_output_clusters.gtf -v\n+ # chromosome feature source start end cDNAs strand height attributes\n+ chrI cluster exon 112583 112643 6 - 5 gene_id "INT_0_114,YAL021C"; gene_name "INT_0_114,CCR4"; # 112612S75.0;\n+ chrI cluster exon 113176 113232 3 - 3 gene_id "INT_0_114,YAL021C"; gene_name "INT_0_114,CCR4"; # 113184S100.0;\n+ chrI cluster exon 113334 113386 2 - 2 gene_id "INT_0_114,YAL021C"; gene_name "INT_0_114,CCR4"; # 113349S50.0,113379S100.0;\n+ chrI cluster exon 113534 113564 3 - 3 gene_id "INT_0_119,INT_0_114"; gene_name "INT_0_119,INT_0_114"; # 113554S33.3,113556S33.3,113557S33.3;\n+ chrI cluster exon 113644 113691 5 - 4 gene_id "YAL020C,INT_0_114"; gene_name "ATS1,INT_0_114"; # 113649S50.0,113657S33.3,113679S25.0\n+ chrI cluster exon 113912 113958 2 - 2 gene_id "YAL020C,INT_0_114"; gene_name "ATS1,INT_0_114"; # 113932S50.0,113946S50.0;\n+ chrI cluster exon 113966 114066 5 - 3 gene_id "YAL020C,INT_0_114"; gene_name "ATS1,INT_0_114"; # 113987S50.0,114033S33.3,114039S33.3;\n+ chrI cluster exon 114067 114130 3 - 3 gene_id "YAL020C,INT_0_114"; gene_name "ATS1,INT_0_114"; # 114099S100.0;\n+\n+.. _pyCRAC: http://sandergranneman.bio.ed.ac.uk/Granneman_Lab/pyCRAC_software.html \n+\n+------\n+\n+**Parameter list** \n+\n+\n+File input options::\n+\n+ -f reads.gtf, --input_file=reads.gtf\n+ provide the path to your GTF read data file. NOTE the\n+\t\t\t\t file has to be correctly sorted! If you used\n+\t\t\t\t pyReadCounters to generate the file you should be\n+\t\t\t\t fine. If you modified it, use the sort command\n+\t\t\t\t described in the manual to sort your file first by\n+\t\t\t\t chromosome, then by strand and then by start position.\n+ -o clusters.gtf, --output_file=clusters.gtf\n+ provide a name for an output file. By default it\n+\t\t\t\t writes to the standard output\n+ --gtf=Yourfavoritegtf.gtf\n+ type the path to the gtf annotation file that you want\n+\t\t\t\t to use\n+\n+Common pyCRAC options::\n+\n+ -r 100, --range=100\n+ allows you to set the length of the UTR regions. If\n+\t\t\t\t you set \'-r 50\' or \'--range=50\', then the program will\n+\t\t\t\t set a fixed length (50 bp) regardless of whether the\n+\t\t\t\t GTF annotation file has genes with annotated UTRs.\n+ -a protein_coding, --annotation=protein_coding\n+ select which annotation (i.e. protein_coding, ncRNA,\n+\t\t\t\t sRNA, rRNA,snoRNA,snRNA, depending on the source of\n+\t\t\t\t your GTF file) you would like to focus your analysis\n+\t\t\t\t on. Default = all annotations\n+\n+Options for cluster analysis::\n+\n+ --cic=2, --cdnasinclusters=2\n+ sets the minimal number of overlapping cDNAs in each\n+\t\t\t\t cluster. Default = 2\n+ --co=5, --clusteroverlap=5\n+ sets the number of nucleotides cDNA sequences have to\n+\t\t\t\t overlap to form a cluster. Default = 1 nucleotide\n+ --ch=5, --clusterheight=5\n+ sets the minimal height of the cluster. Default = 2\n+\t\t\t\t nucleotides\n+ --cl=100, --clusterlength=100\n+ to set the maximum cluster sequence length\n+ --mutsfreq=10, --mutationfrequency=10\n+ sets the minimal mutations frequency for a cluster\n+\t\t\t\t position in the GTF output file. Default = 0%.\n+\t\t\t\t Example: if the mutsfrequency is set at 10 and a\n+\t\t\t\t cluster position has a mutated in less than 10% of the\n+\t\t\t\t reads, then the mutation will not be reported.\n+\t</help>\n+</tool>\n\\ No newline at end of file\n'

diff -r 000000000000 -r 19b20927172d pyCRAC/pyExtractLinesFromGTF.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pyCRAC/pyExtractLinesFromGTF.xml Tue Jun 18 09:11:00 2013 -0400

@@ -0,0 +1,77 @@
+ <tool id ="pyExtractLinesFromGTF" name="pyExtractLinesFromGTF">
+ <requirements>
+         <requirement type="package">pyCRAC</requirement>
+     </requirements>
+ <command interpreter="python">
+ /usr/local/bin/pyExtractLinesFromGTF.py --gtf $addGTF.gtf --genes_file $g --attribute $attribute $v -o $out
+ </command>
+ <version_command>/usr/local/bin/pyExtractLinesFromGTF.py --version</version_command>
+ <inputs>
+ <conditional name="addGTF">
+ <param name="gtfFile" type="select"  label="Choose GTF File from">
+ <option value="default" selected="true">Defaults</option>
+ <option value="other">History</option>
+ </param>
+ <when value="default">
+ <param name="gtf" type="select"  label="GTF File --gtf" help="GTF file containing gene ID co-ordinates">
+ <options from_data_table="pycrac_gtf"/>
+ </param>
+ </when>
+ <when value="other">
+ <param format="GTF" name="gtf" type="data" label="GTF File --gtf" help="GTF file containing gene ID co-ordinates"/>
+ </when>
+ </conditional>
+ <param format="txt" name="g" type="data" label="File containing gene list --genes_file" help="Tabular file with 1 column of gene or annotation names"/>
+ <param name="attribute" type="select"  label="Select the attribute to extract names from --attribute">
+                                <option value="gene_name" selected="true">gene_name</option>
+                                <option value="gene_id">gene_id</option>
+                                <option value="transcript_name">transcript_name</option>
+                                <option value="transcript_id">transcript_id</option>
+ </param>
+                <param name="v" type="select"  label="Extract lines from GTF that -v">
+                                <option value="" selected="true">Match the gene file</option>
+                                <option value="-v">Do not match in gene fil</option>
+                </param>
+ <param name="label" type="text" format="txt" size="30" value="pyExtractLinesFromGTF" label="Enter output file label -o" />
+ </inputs>
+
+ <outputs>
+ <data format="gtf" name="out" label="${label.value}.gtf"/>
+ </outputs>
+ <help>
+.. class:: infomark
+
+**pyExtractLinesFromGTF**
+
+pyExtractLinesFromGTF is part of the pyCRAC_ package. Extracts lines from a GTF file that contain gene names of interest.
+
+
+.. _pyCRAC: http://sandergranneman.bio.ed.ac.uk/Granneman_Lab/pyCRAC_software.html
+
+------
+
+**Parameter list**
+
+Options::
+
+  --gtf=Yourfavoritegtf.gtf
+                        type the path to the gtf file that you want to use. By
+                        default it expects data from the standard input.
+  -g FILE, --genes_file=FILE
+                        name of your gene list or annotations list file (1
+                        column)
+  -o OUTFILE, --outfile=OUTFILE
+                        type the name and path of the file you want to write
+                        the output to. Default is standard output
+  -a ATTRIBUTE, --attribute=ATTRIBUTE
+                        from which attribute do you want to extract names?
+                        Choices: gene_name, gene_id, transcript_name,
+                        transcript_id
+  -v
+                        similar to grep -v option. Remove the genes from the
+                        GTF that are in the gene list
+
+
+
+ </help>
+</tool>

diff -r 000000000000 -r 19b20927172d pyCRAC/pyFasta2tab.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pyCRAC/pyFasta2tab.xml Tue Jun 18 09:11:00 2013 -0400

@@ -0,0 +1,51 @@
+<tool id="pyFasta2Tab" name="pyFasta2Tab">
+   <description>converter</description>
+   <requirements>
+         <requirement type="package">pyCRAC</requirement>
+       </requirements>
+   <command interpreter="python">/usr/local/bin/pyFasta2tab.py -f $input -o $output
+   </command>
+   <version_command>/usr/local/bin/pyFasta2tab.py --version</version_command>
+   <inputs>
+       <param name="input" type="data" format="fasta" label="Fasta file -f"/>
+   </inputs>
+   <param name="label" type="text" format="txt" size="30" value="pyFasta2Tab" label="Enter output file label -o" />
+   <outputs>
+     <data name="output" format="tabular" label="${label.value}.tab"/>
+   </outputs>
+   <help>
+
+.. class:: infomark
+
+**pyFasta2Tab**
+
+pyFasta2Tab is part of the pyCRAC_ package. Converts fasta to tabular format. Is used to convert your reference sequences in fasta format to the tabular format that pyCRAC uses for almost all tools.
+
+Example::
+
+    >sequence1
+    ATAGGATACATAACCATATTATGAGACC
+
+Is converted into::
+
+    sequence1   ATAGGATACATAACCATATTATGAGACC
+
+The pyCRAC package lo
+
+.. _pyCRAC: http://sandergranneman.bio.ed.ac.uk/Granneman_Lab/pyCRAC_software.html
+
+-------
+
+**Parameter list**
+
+Options::
+
+  -f fasta_file, --input_file=fasta_file
+                        provide the name and path of your fasta input file.
+                        Default is standard input.
+
+
+
+   </help>
+</tool>
+

diff -r 000000000000 -r 19b20927172d pyCRAC/pyFastqDuplicateRemover.pl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pyCRAC/pyFastqDuplicateRemover.pl Tue Jun 18 09:11:00 2013 -0400

@@ -0,0 +1,34 @@
+#!/usr/bin/perl -w
+use strict;
+use Getopt::Long;
+
+my %opt;
+
+
+GetOptions(\%opt, "f=s", "r=s", "o=s", "out2=s", "version","id=s");
+
+my $cmnd;
+
+if (exists $opt{version}){
+ $cmnd = "python /usr/local/bin/pyFastqDuplicateRemover.py --version";
+}
+else{
+ $cmnd = "python /usr/local/bin/pyFastqDuplicateRemover.py -f $opt{f} -o $opt{id}";
+
+ if(defined $opt{r}){
+ $cmnd.= " -r $opt{r}";
+ }
+}
+
+system $cmnd;
+
+
+
+if(defined $opt{r}){
+ system "mv $opt{id}"."_1.fasta $opt{o}";
+ system "mv $opt{id}"."_2.fasta $opt{out2}";
+}
+else{
+ system "mv $opt{id} $opt{o}";
+}
+

diff -r 000000000000 -r 19b20927172d pyCRAC/pyFastqDuplicateRemover.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pyCRAC/pyFastqDuplicateRemover.xml Tue Jun 18 09:11:00 2013 -0400

[

@@ -0,0 +1,117 @@
+ <tool id ="pyFastqDuplicateRemover" name="pyFastqDuplicateRemover">
+    <requirements>
+        <requirement type="package">pyCRAC</requirement>
+    </requirements>
+ <command interpreter="perl">
+ pyFastqDuplicateRemover.pl
+ -f $ftype.f
+ #if $ftype.reverse.rev == "yes":
+        -r=$ftype.reverse.r
+ --out2 $out2
+    #end if#
+ -o $out
+ --id $out.id
+ </command>
+ <version_command>pyFastqDuplicateRemover.py --version</version_command>
+ <inputs>
+ <conditional name="ftype">
+ <param name="type" type="select"  label="File type">
+ <option value="fastq" selected="true">FASTQ</option>
+ <option value="fasta">FASTA</option>
+ </param>
+ <when value="fastq">
+ <param format="fastq" name="f" type="data" label="FastQ File -f" help="FastQ format" />
+ <conditional name="reverse">
+                <param name="rev" type="select"  label="Add a reverse or paired FastQ file">
+                    <option value="no" selected="true">NO</option>
+                    <option value="yes">YES</option>
+                </param>
+                <when value="yes">
+     <param format="fastq" name="r" type="data" label="Reverse FastQ File -f" help="FastQ format" />
+ </when>
+ <when value="no">
+ </when>
+ </conditional>
+ </when>
+ <when value="fasta">
+ <param format="fasta" name="f" type="data" label="FastA File -f" help="FastA format" />
+ <conditional name="reverse">
+                <param name="rev" type="select"  label="Add a reverse or paired FastA file">
+                    <option value="no" selected="true">NO</option>
+                    <option value="yes">YES</option>
+                </param>
+                <when value="yes">
+     <param format="fasta" name="r" type="data" label="Reverse FastA File -f" help="FastA format" />
+ </when>
+ <when value="no">
+ </when>
+ </conditional>
+ </when>
+ </conditional>
+ <param name="label" type="text" format="txt" size="30" value="pyFastqDuplicateRemover" label="Enter output file label -o" />
+ </inputs>
+ <outputs>
+ <data format="fasta" name="out" label="${label.value}.fasta"/>
+ <data format="fasta" name="out2" label="${label.value}_reverse.fasta">
+ <filter>ftype['reverse']['rev'] == "yes"</filter>
+ </data>
+ </outputs>
+ <help>
+
+.. class:: infomark
+
+**pyFastqDuplicateRemover**
+
+pyFastqDuplicateRemover is part of the pyCRAC_ package. Removes identical sequences from fastq and fasta files and returns a fasta file with collapsed data.
+
+Can also process paired-end data.
+
+**Examples**
+
+Unprocessed fastq data with six random nucleotides at 5' end of the read::
+
+    @FCC102EACXX:3:1101:3231:2110#TGACCAAT/1
+    GCGCCTGCCAATTCCATCGTAATGATTAATAGGGACGGTCGGGGGCATC
+    +
+    bb_ceeeegggggiiiiiifghiihiihiiiiiiiiiifggfhiecccc
+
+After pyBarcodeFilter::
+
+    @FCC102EACXX:3:1101:3231:2110#TGACCAAT/1##GCGCCT
+    TCCATCGTAATGATTAATAGGGACGGTCGGGGGCATC
+    +
+    giiiiiifghiihiihiiiiiiiiiifggfhiecccc
+
+    This entry is printed to the NNNNNNGCCAAT barcode file.
+
+After pyFastqDuplicateRemover::
+
+    >1_GCGCCT_5/1
+    TCCATCGTAATGATTAATAGGGACGGTCGGGGGCATC
+
+    The '1' indicates that this is the first unique cDNA in the data
+    GCGCCT is the random barcode sequence
+    the '5' indicates that 5 reads were found with identical read and random barcode sequences
+    the '/1' indicates that the seqeuence originates from the forward sequencing reaction
+
+.. _pyCRAC: http://sandergranneman.bio.ed.ac.uk/Granneman_Lab/pyCRAC_software.html
+
+------
+
+**Parameter list**
+
+Options::
+
+  -f FILE, --input_file=FILE
+                                        name of the FASTQ or FASTA input file
+
+  -r FILE, --reverse_input_file=FILE
+                                        name of the paired (or reverse) FASTQ or FASTA input file
+
+  -o FILE, --output_file=FILE
+                                        Provide the path and name of the fastq or fasta output file. Default is standard output.
+ For paired-end data just provide a file name without file extension (!)
+ </help>
+</tool>
+
+

diff -r 000000000000 -r 19b20927172d pyCRAC/pyFastqJoiner.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pyCRAC/pyFastqJoiner.xml Tue Jun 18 09:11:00 2013 -0400

[

@@ -0,0 +1,139 @@
+ <tool id ="pyFastqJoiner" name="pyFastqJoiner">
+    <requirements>
+        <requirement type="package">pyCRAC</requirement>
+    </requirements>
+ <command interpreter="python">
+ /usr/local/bin/pyFastqJoiner.py
+ -f $ftype.f1 $ftype.f2
+ -o $out
+ --file_type=$ftype.type
+ #if $joinc.ch == "-c":
+        -c $joinc.c
+    #end if#
+ </command>
+ <version_command>/usr/local/bin/pyFastqJoiner.py --version</version_command>
+ <inputs>
+ <conditional name="ftype">
+            <param name="type" type="select" label="File type">
+                <option value="fastq" selected="true">FASTQ</option>
+                <option value="fasta">FASTA</option>
+            </param>
+            <when value="fastq">
+                <param format="fastq" name="f1" type="data" label="First FastQ File -f" help="FastQ format" />
+                <param format="fastq" name="f2" type="data" label="Second FastQ File -f" help="FastQ format" />
+            </when>
+            <when value="fasta">
+                <param format="fasta" name="f1" type="data" label="First FastA File -f" help="FastA format" />
+                <param format="fasta" name="f2" type="data" label="Second FastA File -f" help="FastA format" />
+            </when>
+ </conditional>
+ <conditional name="joinc">
+            <param name="ch" type="select"  label="Insert a character at join">
+                <option value="" selected="true">NO</option>
+                <option value="-c">YES</option>
+            </param>
+            <when value="-c">
+                <param type="text" name="c" label="Add this character -c" value=":" >
+                    <validator type="empty_field" message="enter a character or turn this option off" />
+                </param>
+            </when>
+            <when value="">
+            </when>
+ </conditional>
+     <param name="label" type="text" format="txt" size="30" value="pyFastqJoiner" label="Enter output file label -o" />
+ </inputs>
+ <outputs>
+ <data format="input" name="out" label="${label.value}.${ftype.type}"/>
+            <change_format>
+                <when input="ftype.type" value="fasta" format="fasta" />
+            </change_format>
+ </outputs>
+ <help>
+
+.. class:: infomark
+
+**pyFastqJoiner**
+
+pyFastqJoiner is part of the pyCRAC_ package. Merges paired sequences from two fastq or fasta formatted files.
+
+Example::
+
+    Forward reaction:
+
+    @FCC102EACXX:3:1101:1343:2181#ATCACGAT/1##CAATAG
+    CAAATTAGAGTGTTCAAAGCAGGCGTATTGCTCGAAT
+    +
+    `efhYb][bdQQ`eeaeaYbeY^ceU__IXa[^ZYae
+    @FCC102EACXX:3:1101:1424:2248#ATCACGAT/1##CCAGGA
+    CTAACCATAAACTATGCCTACTAGGGATCCAGAGGTG
+    +
+    ^_adddhJbaehbedd`dIb_^cXaRI^BBBBBBBBB
+    @FCC102EACXX:3:1101:1623:2036#ATCACGAN/1##CTCAGC
+    CAAAGTTAGGGGATCGAAGATGATCAGATACCGTCGT
+    +
+    bghfc^YbgbeadggfdffeaS^ac_X^cegaGZ_ef
+    @FCC102EACXX:3:1101:1574:2214#ATCACGAT/1##CGTTTT
+    CTAATGACCCACTCGGCACCTTACGAAATCAAAGTCT
+    +
+    cdfgYY`cefhhZef\eaggXaceeghfQaeghWNW\
+
+    Reverse reaction:
+
+    @FCC102EACXX:3:1101:1343:2181#ATCACGAT/2
+    AGCCTTTAAGTTTCAGCCTTGCGACCATACTCCCCCCAGAACCCAAAGA
+    +
+    YJaSJ`Z`K`YbSb[[daeJRR[YeWd_I^I^ecgc]OV\bdeaegbXb
+    @FCC102EACXX:3:1101:1424:2248#ATCACGAT/2
+    AAGTCCTTTAAGTTACAGCCTTGCGACCATACTACACCCAGAACCCAAA
+    +
+    YJJ\`JQY\`KJ`gY[[QRYY[[`H[_ceI^e[PYO^IWOHW^eaefhh
+    @FCC102EACXX:3:1101:1623:2036#ATCACGAN/2
+    GGCCAATCCTTATTGTGTCTGGACCTGGTGAGTTTCCCCGTGTTGAGTC
+    +
+    PP\`ccQ`eY[bQQ[d`ghehaghfgdg[`gb^bd[ePbH^c_c\a_eg
+
+    Here the ":" character is used to split the two sequences. This character tells pyFastqSplitter where to split the sequences.
+    This character is ignored by pyFastqDuplicateRemover
+
+    Result:
+
+    @FCC102EACXX:3:1101:1343:2181#ATCACGAT/1##CAATAG@FCC102EACXX:3:1101:1343:2181#ATCACGAT/2
+    CAAATTAGAGTGTTCAAAGCAGGCGTATTGCTCGAAT:AGCCTTTAAGTTTCAGCCTTGCGACCATACTCCCCCCAGAACCCAAAGA
+    +
+    `efhYb][bdQQ`eeaeaYbeY^ceU__IXa[^ZYaeYJaSJ`Z`K`YbSb[[daeJRR[YeWd_I^I^ecgc]OV\bdeaegbXb
+    @FCC102EACXX:3:1101:1424:2248#ATCACGAT/1##CCAGGA@FCC102EACXX:3:1101:1424:2248#ATCACGAT/2
+    CTAACCATAAACTATGCCTACTAGGGATCCAGAGGTG:AAGTCCTTTAAGTTACAGCCTTGCGACCATACTACACCCAGAACCCAAA
+    +
+    ^_adddhJbaehbedd`dIb_^cXaRI^BBBBBBBBBYJJ\`JQY\`KJ`gY[[QRYY[[`H[_ceI^e[PYO^IWOHW^eaefhh
+    @FCC102EACXX:3:1101:1623:2036#ATCACGAN/1##CTCAGC@FCC102EACXX:3:1101:1623:2036#ATCACGAN/2
+    CAAAGTTAGGGGATCGAAGATGATCAGATACCGTCGT:GGCCAATCCTTATTGTGTCTGGACCTGGTGAGTTTCCCCGTGTTGAGTC
+    +
+    bghfc^YbgbeadggfdffeaS^ac_X^cegaGZ_efPP\`ccQ`eY[bQQ[d`ghehaghfgdg[`gb^bd[ePbH^c_c\a_eg
+
+.. _pyCRAC: http://sandergranneman.bio.ed.ac.uk/Granneman_Lab/pyCRAC_software.html
+
+------
+
+**Parameter list**
+
+Options::
+
+    -f fastq_file1 fastq_file2
+                        Provide the names of two raw data files separated by a single space.
+                        Make sure the first file is the data file of the forward (/1) sequencing reaction.
+
+    --file_type=FASTQ
+                        Can join fasta and fastq files. Fastq is default
+
+    -o mergedfastq.fastq, --outfile=mergedfastq.fastq
+                        provide the name of the output file. By default it
+                        will be printed to the standard output
+
+    -c :
+                        This option adds the '|' character between the DNA
+                        sequences so that it is much easier to split the data
+                        again later on
+
+
+ </help>
+</tool>

diff -r 000000000000 -r 19b20927172d pyCRAC/pyFastqSplitter.pl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pyCRAC/pyFastqSplitter.pl Tue Jun 18 09:11:00 2013 -0400

@@ -0,0 +1,27 @@
+#!/usr/bin/perl -w
+use strict;
+use Getopt::Long;
+
+my %opt;
+
+
+GetOptions(\%opt, "f=s", "c=s", "o1=s", "o2=s","file_type=s", "version","id=s");
+
+my $cmnd;
+
+if (exists $opt{version}){
+ $cmnd = "python /usr/local/bin/pyFastqSplitter.py --version";
+}
+else{
+ $cmnd = "python /usr/local/bin/pyFastqSplitter.py -f $opt{f} -o $opt{id} --file_type=$opt{file_type}";
+
+ if(defined $opt{c}){
+ $cmnd.= " -c $opt{c}";
+ }
+
+}
+
+system $cmnd;
+system "mv $opt{id}_1.$opt{file_type} $opt{o1}";
+system "mv $opt{id}_2.$opt{file_type} $opt{o2}";
+

diff -r 000000000000 -r 19b20927172d pyCRAC/pyFastqSplitter.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pyCRAC/pyFastqSplitter.xml Tue Jun 18 09:11:00 2013 -0400

[

@@ -0,0 +1,140 @@
+ <tool id ="pyFastqSplitter" name="pyFastqSplitter" force_history_refresh="True">
+        <requirements>
+                <requirement type="package">pyCRAC</requirement>
+        </requirements>
+ <command interpreter="perl">
+ pyFastqSplitter.pl
+ -f $f
+ --o1 $out1
+ --id $label.value
+ --o2 $out2
+ --file_type $ftype.type
+ #if $joinc.ch == "-c":
+        -c $joinc.c
+    #end if#
+ </command>
+ <version_command>/usr/local/bin/pyFastqSplitter.py --version</version_command>
+ <inputs>
+ <conditional name="ftype">
+            <param name="type" type="select" label="File type">
+                <option value="fastq" selected="true">FASTQ</option>
+                <option value="fasta">FASTA</option>
+            </param>
+            <when value="fastq">
+                <param format="fastq" name="f" type="data" label="FastQ File -f" help="FastQ format" />
+            </when>
+            <when value="fasta">
+                <param format="fasta" name="f" type="data" label="FastA File -f" help="FastA format" />
+            </when>
+ </conditional>
+ <conditional name="joinc">
+            <param name="ch" type="select"  label="Insert a character at join">
+                <option value="" selected="true">NO</option>
+                <option value="-c">YES</option>
+            </param>
+            <when value="-c">
+                <param type="text" name="c" label="Split the reads on the -c character" value=":" >
+                    <validator type="empty_field" message="enter a character or turn this option off" />
+                </param>
+            </when>
+            <when value="">
+            </when>
+ </conditional>
+     <param name="label" type="text" format="txt" size="30" value="pyFastqSplitter" label="Enter output file label -o" />
+ </inputs>
+ <outputs>
+ <data format="input" name="out1" label="${label.value}_1.${ftype.type}"/>
+ <data format="input" name="out2" label="${label.value}_2.${ftype.type}"/>
+     <change_format>
+                <when input="ftype.type" value="fasta" format="fasta" />
+            </change_format>
+ </outputs>
+ <help>
+
+.. class:: infomark
+
+**pyFastqSplitter**
+
+pyFastqSplitter is part of the pyCRAC_ package. Splits a merged fastq file (pyFastqJoiner output) in to two files.
+
+Example::
+
+    Here the ":" character was used to separate the two sequences. By using the -c flag you can tell pyFastqSplitter where to split the sequences.
+    This character is ignored by pyFastqDuplicateRemover
+
+
+    @FCC102EACXX:3:1101:1343:2181#ATCACGAT/1##CAATAG@FCC102EACXX:3:1101:1343:2181#ATCACGAT/2
+    CAAATTAGAGTGTTCAAAGCAGGCGTATTGCTCGAAT:AGCCTTTAAGTTTCAGCCTTGCGACCATACTCCCCCCAGAACCCAAAGA
+    +
+    `efhYb][bdQQ`eeaeaYbeY^ceU__IXa[^ZYaeYJaSJ`Z`K`YbSb[[daeJRR[YeWd_I^I^ecgc]OV\bdeaegbXb
+    @FCC102EACXX:3:1101:1424:2248#ATCACGAT/1##CCAGGA@FCC102EACXX:3:1101:1424:2248#ATCACGAT/2
+    CTAACCATAAACTATGCCTACTAGGGATCCAGAGGTG:AAGTCCTTTAAGTTACAGCCTTGCGACCATACTACACCCAGAACCCAAA
+    +
+    ^_adddhJbaehbedd`dIb_^cXaRI^BBBBBBBBBYJJ\`JQY\`KJ`gY[[QRYY[[`H[_ceI^e[PYO^IWOHW^eaefhh
+    @FCC102EACXX:3:1101:1623:2036#ATCACGAN/1##CTCAGC@FCC102EACXX:3:1101:1623:2036#ATCACGAN/2
+    CAAAGTTAGGGGATCGAAGATGATCAGATACCGTCGT:GGCCAATCCTTATTGTGTCTGGACCTGGTGAGTTTCCCCGTGTTGAGTC
+    +
+    bghfc^YbgbeadggfdffeaS^ac_X^cegaGZ_efPP\`ccQ`eY[bQQ[d`ghehaghfgdg[`gb^bd[ePbH^c_c\a_eg
+
+    Result:
+
+    Forward reaction:
+
+    @FCC102EACXX:3:1101:1343:2181#ATCACGAT/1##CAATAG
+    CAAATTAGAGTGTTCAAAGCAGGCGTATTGCTCGAAT
+    +
+    `efhYb][bdQQ`eeaeaYbeY^ceU__IXa[^ZYae
+    @FCC102EACXX:3:1101:1424:2248#ATCACGAT/1##CCAGGA
+    CTAACCATAAACTATGCCTACTAGGGATCCAGAGGTG
+    +
+    ^_adddhJbaehbedd`dIb_^cXaRI^BBBBBBBBB
+    @FCC102EACXX:3:1101:1623:2036#ATCACGAN/1##CTCAGC
+    CAAAGTTAGGGGATCGAAGATGATCAGATACCGTCGT
+    +
+    bghfc^YbgbeadggfdffeaS^ac_X^cegaGZ_ef
+    @FCC102EACXX:3:1101:1574:2214#ATCACGAT/1##CGTTTT
+    CTAATGACCCACTCGGCACCTTACGAAATCAAAGTCT
+    +
+    cdfgYY`cefhhZef\eaggXaceeghfQaeghWNW\
+
+    Reverse reaction:
+
+    @FCC102EACXX:3:1101:1343:2181#ATCACGAT/2
+    AGCCTTTAAGTTTCAGCCTTGCGACCATACTCCCCCCAGAACCCAAAGA
+    +
+    YJaSJ`Z`K`YbSb[[daeJRR[YeWd_I^I^ecgc]OV\bdeaegbXb
+    @FCC102EACXX:3:1101:1424:2248#ATCACGAT/2
+    AAGTCCTTTAAGTTACAGCCTTGCGACCATACTACACCCAGAACCCAAA
+    +
+    YJJ\`JQY\`KJ`gY[[QRYY[[`H[_ceI^e[PYO^IWOHW^eaefhh
+    @FCC102EACXX:3:1101:1623:2036#ATCACGAN/2
+    GGCCAATCCTTATTGTGTCTGGACCTGGTGAGTTTCCCCGTGTTGAGTC
+    +
+    PP\`ccQ`eY[bQQ[d`ghehaghfgdg[`gb^bd[ePbH^c_c\a_eg
+
+.. _pyCRAC: http://sandergranneman.bio.ed.ac.uk/Granneman_Lab/pyCRAC_software.html
+
+------
+
+**Parameter list**
+
+Options::
+
+    -f fastq_file, --filename=fastq_file
+                        To provide the names of two raw data files separated
+                        by a single space. Default = standard input
+    --file_type=FASTQ
+                        Can split joined fasta and fastq files. Fastq is default
+                        If there isn't a specific character splitting the two reads
+                        the tool assumes that the two reads were of equal length
+    -o splitfastq, --outfile=splitfastq
+                        Provide the name of the output files (WITHOUT file
+                        extension). By default the data will be printed to the
+                        standard output
+    -c :, --character=:
+                        If the joined sequences were separated by a specific
+                        character then the program can divide the sequences by
+                        looking for that character
+
+ </help>
+</tool>

diff -r 000000000000 -r 19b20927172d pyCRAC/pyGTF2bed.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pyCRAC/pyGTF2bed.xml Tue Jun 18 09:11:00 2013 -0400

@@ -0,0 +1,107 @@
+<tool id="pyGTF2bed" name="pyGTF2bed">
+   <description>converter</description>
+   <requirements>
+         <requirement type="package">pyCRAC</requirement>
+       </requirements>
+   <command interpreter="python">/usr/local/bin/pyGTF2bed.py --gtf $input -o $output
+   #if $addtrack.track == "--track":
+    --track
+    --name $addtrack.name
+    --description $addtrack.description
+    #if $addtrack.colorscheme.colorsel == "default":
+     -c $addtrack.colorscheme.color
+    #else:
+     -s '$addtrack.colorscheme.plus,$addtrack.colorscheme.minus'
+    #end if#
+   #end if#
+   </command>
+   <version_command>/usr/local/bin/pyGTF2bed.py --version</version_command>
+   <inputs>
+       <param name="input" type="data" format="gtf" label="GTF file --gtf"/>
+       <conditional name="addtrack">
+ <param name="track" type="select" label="Add UCSC track line to output --track">
+   <option value="" selected="true">NO</option>
+   <option value="--track">YES</option>
+ </param>
+ <when value=""/>
+ <when value="--track">
+   <param name="name" format="txt" type="text" value="User_supplied_track" size="80" label="Track name -n"/>
+                  <param name="description" format="txt" type="text" value="User_supplied_track" size="80" label="Track description -d"/>
+   <conditional name="colorscheme">
+   <param name="colorsel" type="select" label="Colouring scheme">
+     <option value="default" selected="true">One Colour</option>
+     <option value="strand">By Strand</option>
+   </param>
+   <when value="default">
+     <param name="color" type="select" label="Choose track colour -c">
+       <option value="black" selected="true">Black</option>
+       <option value="red">Red</option>
+                      <option value="blue">Blue</option>
+                      <option value="green">Green</option>
+                      <option value="purple">Purple</option>
+     </param>
+   </when>
+                  <when value="strand">
+                    <param name="plus" type="select" label="Choose forward strand track colour -s">
+                      <option value="black" selected="true">Black</option>
+                      <option value="red">Red</option>
+                      <option value="blue">Blue</option>
+                      <option value="green">Green</option>
+                      <option value="purple">Purple</option>
+                    </param>
+     <param name="minus" type="select" label="Choose minus strand track colour -s">
+                      <option value="black" selected="true">Black</option>
+                      <option value="red">Red</option>
+                      <option value="blue">Blue</option>
+                      <option value="green">Green</option>
+                      <option value="purple">Purple</option>
+     </param>
+                  </when>
+   </conditional>
+ </when>
+       </conditional>
+       <param name="label" type="text" format="txt" size="30" value="pyGTF2bed" label="Enter output file label -o" />
+   </inputs>
+   <outputs>
+     <data name="output" format="bed6" label="${label.value}.bed"/>
+   </outputs>
+   <help>
+
+.. class:: infomark
+
+**pyGTF2bed**
+
+pyGTF2bed is part of the pyCRAC_ package. Converts GTF files to the bed 6 format. Gene names present in the GTF file will be included in the bed file.
+
+
+.. _pyCRAC: http://sandergranneman.bio.ed.ac.uk/Granneman_Lab/pyCRAC_software.html
+
+------
+
+**Parameter list**
+
+These options can be used to add and modify a track line for the UCSC genome browser::
+
+    --track
+                        Use this flag to add a UCSC genome browser track line
+                        to the beginning of your file
+    -n NAME, --name=NAME
+                        For the USCS track line: provide a track name. Default
+                        = 'User_supplied_track'
+    -d DESCRIPTION, --description=DESCRIPTION
+                        For the USCS track line: provide a track description.
+                        Default = 'User_supplied_track'
+    -c COLOR, --color=COLOR
+                        select the track color. Default = black
+    -s STRANDS, --colorstrands=STRANDS
+                        select the colors for each strand. Default =
+                        'red,blue'
+
+File input options::
+
+    --gtf=Yourfavoritegtf.gtf
+                        type the path to the gtf file that you want to
+                        convert. Default is standard input
+   </help>
+</tool>
+

diff -r 000000000000 -r 19b20927172d pyCRAC/pyGTF2bedGraph.pl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pyCRAC/pyGTF2bedGraph.pl Tue Jun 18 09:11:00 2013 -0400

@@ -0,0 +1,38 @@
+#!/usr/bin/perl -w
+use strict;
+use Getopt::Long;
+
+my %opt;
+
+
+GetOptions(\%opt, "gtf=s","po=s","version","mo=s","count=i","chromfile=s","t=s","iCLIP","track","name=s","description=s","color=s","s=s","id=s");
+
+my $cmnd;
+my $prefix = "gb_$opt{id}";
+$prefix =~ s/\s/_/g;
+
+
+if (exists $opt{version}){
+ $cmnd = "python /usr/local/bin/pyGTF2bedGraph.py --version";
+}
+else{
+ $cmnd = "python /usr/local/bin/pyGTF2bedGraph.py --gtf $opt{gtf} --chromfile $opt{chromfile} -t $opt{t} --count $opt{count} -o $prefix";
+
+ if(exists $opt{iCLIP}){
+
+     $cmnd .= " --iCLIP";
+ }
+
+ if(exists $opt{track}){
+     $cmnd .= " --track --name \"$opt{name}\" --description \"$opt{description}\"";
+
+     if(exists $opt{color}){$cmnd .= " --color $opt{color}";}
+            if(exists $opt{s}){$cmnd .= " -s \"$opt{s}\"";}
+ }
+}
+
+system $cmnd;
+
+system "mv $prefix"."_plus_strand.bedgraph $opt{po}";
+system "mv $prefix"."_minus_strand.bedgraph $opt{mo}";
+

diff -r 000000000000 -r 19b20927172d pyCRAC/pyGTF2bedGraph.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pyCRAC/pyGTF2bedGraph.xml Tue Jun 18 09:11:00 2013 -0400

@@ -0,0 +1,162 @@
+<tool id="pyGTF2bedGraph" name="pyGTF2bedGraph">
+   <description>converter</description>
+   <requirements>
+         <requirement type="package">pyCRAC</requirement>
+       </requirements>
+   <command interpreter="perl">pyGTF2bedGraph.pl --gtf $input --po $po --mo $mo
+   --chromfile $addchr.chr
+   -t $type
+   --count $count
+   $iclip
+   #if $addtrack.track == "--track":
+    --track
+    --name $addtrack.name
+    --description $addtrack.description
+    #if $addtrack.colorscheme.colorsel == "default":
+     --color $addtrack.colorscheme.color
+    #else:
+     -s '$addtrack.colorscheme.plus,$addtrack.colorscheme.minus'
+    #end if#
+   #end if#
+   --id $po.id
+   </command>
+   <version_command>/usr/local/bin/pyGTF2bedGraph.py --version</version_command>
+   <inputs>
+       <param name="input" type="data" format="gtf" label="GTF file --gtf"/>
+              <conditional name="addchr">
+ <param name="chrfile" type="select"  label="Choose Chromosome length file from">
+   <option value="default" selected="true">Defaults</option>
+   <option value="other">History</option>
+ </param>
+ <when value="default">
+   <param name="chr" type="select"  label="Chromosome length file -c" help="This file should have two columns: first column is the names of the chromosomes, second column is length of the chromosomes. Use pyCrac utility pyCalculateChromosomeLengths to create.">
+     <options from_data_table="pycrac_chr"/>
+   </param>
+ </when>
+ <when value="other">
+   <param format="tabular" name="chr" type="data" label="Chromosome length file -c" help="This file should have two columns: first column is the names of the chromosomes, second column is length of the chromosomes"/>
+ </when>
+       </conditional>
+       <param name="type" type="select"  label="Choose type of data -t">
+                  <option value="reads" selected="true">Reads</option>
+                  <option value="substitutions">Substitutions</option>
+                  <option value="deletions">Deletions</option>
+       </param>
+       <param format="integer" name="count" type="integer" label="Count per feature --count " value="1" size="5" help="Takes the numbers in the 'score' column of the GTF file as the total number of reads for each position" >
+ <validator type="in_range" min="1" message="Please enter a value >= 1"/>
+       </param>
+       <param name="iclip" type="select" label="iCLIP mode --iCLIP">
+ <option value="" selected="true">OFF</option>
+ <option value="--iCLIP">ON</option>
+       </param>
+       <conditional name="addtrack">
+ <param name="track" type="select" label="Add UCSC track line to output">
+   <option value="" selected="true">NO</option>
+   <option value="--track">YES</option>
+ </param>
+ <when value=""/>
+ <when value="--track">
+   <param name="name" format="txt" type="text" value="User_supplied_track" size="80" label="Track name"/>
+                  <param name="description" format="txt" type="text" value="User_supplied_track" size="80" label="Track description"/>
+   <conditional name="colorscheme">
+   <param name="colorsel" type="select" label="Colouring scheme">
+     <option value="default" selected="true">One Colour</option>
+     <option value="strand">By Strand</option>
+   </param>
+   <when value="default">
+     <param name="color" type="select" label="Choose track colour">
+       <option value="black" selected="true">Black</option>
+       <option value="red">Red</option>
+                      <option value="blue">Blue</option>
+                      <option value="green">Green</option>
+                      <option value="purple">Purple</option>
+     </param>
+   </when>
+                  <when value="strand">
+                    <param name="plus" type="select" label="Choose forward strand track colour">
+                      <option value="black" selected="true">Black</option>
+                      <option value="red">Red</option>
+                      <option value="blue">Blue</option>
+                      <option value="green">Green</option>
+                      <option value="purple">Purple</option>
+                    </param>
+     <param name="minus" type="select" label="Choose minus strand track colour">
+                      <option value="black" selected="true">Black</option>
+                      <option value="red">Red</option>
+                      <option value="blue">Blue</option>
+                      <option value="green">Green</option>
+                      <option value="purple">Purple</option>
+     </param>
+                  </when>
+   </conditional>
+ </when>
+       </conditional>
+       <param name="label" type="text" format="txt" size="30" value="pyGTF2bedGraph" label="Enter output file label -o" />
+   </inputs>
+   <outputs>
+     <data name="po" format="bedgraph" label="${label.value}_plus_strand.bg"/>
+            <data name="mo" format="bedgraph" label="${label.value}_minus_strand.bg"/>
+   </outputs>
+   <help>
+
+.. class:: infomark
+
+**pyGTF2bedGraph**
+
+pyGTF2bedGraph is part of the pyCRAC_ package. Generates bedgraph files for each chromosome. An homage to bedtools genomecoverage. Takes a pyReadCounters GTF file as input file. Can also output bedGraph files for substitutions and deletions.
+
+.. _pyCRAC: http://sandergranneman.bio.ed.ac.uk/Granneman_Lab/pyCRAC_software.html
+
+------
+
+**Parameter list**
+
+File input options::
+
+    --gtf=readdata.gtf
+                        type the path to the gtf file data file. Be default it
+                        expects data from the standard input
+    -o converted
+                        provide a name for an output file. A file extension or
+                        strand information is not necessary.
+    -c yeast.txt, --chromfile=yeast.txt
+                        Location of the chromosome info file. This file should
+                        have two columns: first column is the names of the
+                        chromosomes, second column is length of the
+                        chromosomes. Default is yeast
+    -t TYPE, --type=TYPE
+                        this tool can generate bedGraph files for reads,
+                        substitutions or deletions. Please use
+                        'reads','substitutions' or 'deletions' to indicate the
+                        type of data. Default='reads'
+    --count
+                        Takes the numbers in the 'score' column of the GTF
+                        file as the total number of reads for each position.
+                        Default is 1 for each interval.
+    --iCLIP
+                        This turns on the iCLIP mode and the sgr reads or cDNA
+                        files will report cross-linking site frequencies in
+                        iCLIP data
+    -v, --verbose
+                        to print status messages to a log file
+
+These options can be used to add a track line for the UCSC genome browser::
+
+    --track
+                        Use this flag to add a UCSC genome browser track line
+                        to the beginning of your file
+    -n NAME, --name=NAME
+                        For the USCS track line: provide a track name. Default
+                        = 'User_supplied_track'
+    -d DESCRIPTION, --description=DESCRIPTION
+                        For the USCS track line: provide a track description.
+                        Default = 'User_supplied_track'
+    --color=COLOR
+                        select the track color. Default = black
+    -s STRANDS, --colorstrands=STRANDS
+                        select the colors for each strand. Default =
+                        'red,blue'
+
+   </help>
+</tool>
+

diff -r 000000000000 -r 19b20927172d pyCRAC/pyGetGTFSources.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pyCRAC/pyGetGTFSources.xml Tue Jun 18 09:11:00 2013 -0400

@@ -0,0 +1,63 @@
+ <tool id ="pyGetGTFSources" name="pyGetGTFSources">
+ <requirements>
+         <requirement type="package">pyCRAC</requirement>
+     </requirements>
+ <command interpreter="python">
+ /usr/local/bin/pyGetGTFSources.py --gtf $addGTF.gtf --count -o $out
+ </command>
+ <version_command>/usr/local/bin/pyGetGTFSources.py --version</version_command>
+ <inputs>
+ <conditional name="addGTF">
+ <param name="gtfFile" type="select"  label="Choose GTF File from">
+ <option value="default" selected="true">Defaults</option>
+ <option value="other">History</option>
+ </param>
+ <when value="default">
+ <param name="gtf" type="select"  label="GTF File --gtf" help="GTF file containing gene ID co-ordinates">
+ <options from_data_table="pycrac_gtf"/>
+ </param>
+ </when>
+ <when value="other">
+ <param format="GTF" name="gtf" type="data" label="GTF File --gtf" help="GTF file containing gene ID co-ordinates"/>
+ </when>
+ </conditional>
+ <param name="count" type="select"  label="Count occurences of each annotation --count">
+                                <option value="" selected="true">No</option>
+                                <option value="--count">Yes</option>
+ </param>
+                <param name="label" type="text" format="txt" size="30" value="GTF sources list" label="Enter output file label -o" />
+ </inputs>
+
+ <outputs>
+ <data format="tabular" name="out" label="${label.value}.txt"/>
+ </outputs>
+ <help>
+.. class:: infomark
+
+**pyGetGTFSources**
+
+pyGetGTFSources is part of the pyCRAC_ package. Extracts source names from the second column in a GTF file.
+
+
+.. _pyCRAC: http://sandergranneman.bio.ed.ac.uk/Granneman_Lab/pyCRAC_software.html
+
+------
+
+**Parameter list**
+
+Options::
+
+  --gtf=Yourfavoritegtf.gtf
+                        type the path to the gtf file that you want to use. By
+                        default it expects data from the standard input
+  -o OUTFILE, --outfile=OUTFILE
+                        type the name and path of the file you want to write
+                        the output to. Default is standard output
+  --count               with this flag you the program will count the
+                        occurence for each source/annotation in the gtf file
+
+
+
+
+ </help>
+</tool>

diff -r 000000000000 -r 19b20927172d pyCRAC/pyGetGeneNamesFromGTF.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pyCRAC/pyGetGeneNamesFromGTF.xml Tue Jun 18 09:11:00 2013 -0400

@@ -0,0 +1,71 @@
+ <tool id ="pyGetGeneNamesFromGTF" name="pyGetGeneNamesFromGTF">
+ <requirements>
+         <requirement type="package">pyCRAC</requirement>
+     </requirements>
+ <command interpreter="python">
+ /usr/local/bin/pyGetGeneNamesFromGTF.py --gtf $addGTF.gtf --attribute $attribute $count -o $out
+ </command>
+ <version_command>/usr/local/bin/pyGetGeneNamesFromGTF.py --version</version_command>
+ <inputs>
+ <conditional name="addGTF">
+ <param name="gtfFile" type="select"  label="Choose GTF File from">
+ <option value="default" selected="true">Defaults</option>
+ <option value="other">History</option>
+ </param>
+ <when value="default">
+ <param name="gtf" type="select"  label="GTF File --gtf" help="GTF file containing gene ID co-ordinates">
+ <options from_data_table="pycrac_gtf"/>
+ </param>
+ </when>
+ <when value="other">
+ <param format="GTF" name="gtf" type="data" label="GTF File --gtf" help="GTF file containing gene ID co-ordinates"/>
+ </when>
+ </conditional>
+ <param name="attribute" type="select"  label="Select the attribute to extract names from --attribute">
+                                <option value="gene_name" selected="true">gene_name</option>
+                                <option value="gene_id">gene_id</option>
+                                <option value="transcript_name">transcript_name</option>
+                                <option value="transcript_id">transcript_id</option>
+ </param>
+                <param name="count" type="select"  label="Count occurences of each annotation --count">
+                                <option value="" selected="true">No</option>
+                                <option value="--count">Yes</option>
+                </param>
+ <param name="label" type="text" format="txt" size="30" value="GTF gene list" label="Enter output file label -o" />
+ </inputs>
+
+ <outputs>
+ <data format="tabular" name="out" label="${label.value}.txt"/>
+ </outputs>
+ <help>
+.. class:: infomark
+
+**pyGetGeneNamesFromGTF**
+
+pyGetGeneNamesFromGTF is part of the pyCRAC_ package. Extracts and counts all gene names from a GTF file.
+
+.. _pyCRAC: http://sandergranneman.bio.ed.ac.uk/Granneman_Lab/pyCRAC_software.html
+
+------
+
+**Parameter list**
+
+Options::
+
+  --gtf=Yourfavoritegtf.gtf
+                        type the path to the gtf file that you want to use. By
+                        default it expects data from the standard input.
+  -o OUTFILE, --outfile=OUTFILE
+                        type the name and path of the file you want to write
+                        the output to. Default is standard output
+  -a ATTRIBUTE, --attribute=ATTRIBUTE
+                        from which attribute do you want to extract names?
+                        Choices: gene_name, gene_id, transcript_name,
+                        transcript_id
+  --count
+                        with this flag you the program will count the
+                        occurence for each source/annotation in the gtf file
+
+
+ </help>
+</tool>

diff -r 000000000000 -r 19b20927172d pyCRAC/pyMotif.pl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pyCRAC/pyMotif.pl Tue Jun 18 09:11:00 2013 -0400

@@ -0,0 +1,41 @@
+#!/usr/bin/perl -w
+use strict;
+use Getopt::Long;
+
+my %opt;
+
+
+GetOptions(\%opt, "f=s","version","gtf=s","range=i","overlap=i","--annotation=s", "--tab=s","--k_min=i","--k_max=i","--numberofkmers=i","--count=s","--features=s","--zscores=s","--random=s","options","o=s","id=s");
+
+my $cmnd;
+
+my $prefix = "m_$opt{id}";
+
+
+if (exists $opt{version}){
+ $cmnd = "python /usr/local/bin/pyMotif.py --version";
+}
+else{
+ $cmnd = "python /usr/local/bin/pyMotif.py -f $opt{f} --gtf $opt{gtf} --tab $opt{tab} --annotation $opt{annotation} -o $prefix";
+
+ if(exists $opt{options}){
+
+ $cmnd .= " --range=$opt{range} --overlap=$opt{overlap} --k_min=$opt{k_min} --k_max=$opt{k_max} --numberofkmers=$opt{numberofkmers}";
+ }
+}
+
+#testing
+#open (COUNT, ">$opt{count}") || die "";
+#print COUNT "$cmnd";
+
+system $cmnd;
+
+
+system "mv $prefix"."_$opt{annotation}_data_k-mers_count.txt $opt{count}";
+system "mv $prefix"."_$opt{annotation}_top_k-mers_in_features.gtf $opt{features}";
+system "mv $prefix"."_$opt{annotation}_k-mer_Z_scores.txt $opt{zscores}";
+system "mv $prefix"."_$opt{annotation}_random_k-mers_count.txt $opt{random}";
+
+
+
+

diff -r 000000000000 -r 19b20927172d pyCRAC/pyMotif.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pyCRAC/pyMotif.xml Tue Jun 18 09:11:00 2013 -0400

b'@@ -0,0 +1,222 @@\n+ <tool id ="pyMotif" name="pyMotif">\n+\t<requirements>\n+ \t<requirement type="package">pyCRAC</requirement>\n+ \t</requirements>\n+\t<command interpreter="perl"> \n+\tpyMotif.pl\n+\t-f $input\n+\t--gtf=$addGTF.gtf\n+\n+ \t#if $addGTF.gtfFile == "default" and $addGTF.annotate.annotations == "auto":\n+\t --annotation $addGTF.annotate.scan.annotation\n+\t#else:\n+\t --annotation $addGTF.annotate.annotation\n+\t#end if#\n+\n+\t--tab=$addTab.tab\n+\n+\t#if $addOpt.options == "edit":\n+\t --options\n+\t --k_min $addOpt.kmin\n+\t\t--k_max $addOpt.kmax\n+\t\t--numberofkmers=$addOpt.numberofkmers\n+\t\t--overlap $addOpt.overlap\n+\t\t--range $addOpt.range\n+\t#end if#\n+\t-o "$input.name"\n+\t--id $count.id\n+\t--count $count\n+\t--random $random\n+\t--features $features\n+\t--zscores $zscores\n+\t</command>\n+\t<version_command>/usr/local/bin/pyMotif.py --version</version_command>\n+\t<inputs>\n+\t\t<param format="gtf" name="input" type="data" label="Input File --input_file" help="File of type .gtf" />\n+ <conditional name="addTab">\n+ <param name="tabFile" type="select" label="Choose Genomic Reference Sequence from">\n+ <option value="default" selected="true">Defaults</option>\n+ <option value="other">History</option>\n+ </param>\n+ <when value="default">\n+ <param name="tab" type="select" label="Genomic Reference Sequence --tab" help="Tab file containing genomic reference sequence">\n+ <options from_data_table="pycrac_tab"/>\n+ </param>\n+ </when>\n+ <when value="other">\n+ <param format="tabular" name="tab" type="data" label="Genomic Reference Sequence --tab" help="Tab file containing genomic reference sequence"/>\n+ </when>\n+ </conditional>\t\t\t\t\n+\t\t<conditional name="addGTF">\n+\t\t\t<param name="gtfFile" type="select" label="Choose GTF File from">\n+\t\t\t\t<option value="default" selected="true">Defaults</option>\n+\t\t\t\t<option value="other">History</option>\n+\t\t\t</param>\t\n+\t\t\t<when value="default">\n+\t\t\t\t<param name="gtf" type="select" label="GTF File --gtf" help="GTF file containing gene ID co-ordinates">\n+\t\t\t\t\t<options from_data_table="pycrac_gtf"/>\n+\t\t\t\t</param>\n+\n+\t\t\t\t<conditional name="annotate">\n+\t\t\t\t<param name="annotations" type="select" label="Select annotation">\n+\t\t\t\t\t<option value="all" selected="true">All</option>\n+\t\t\t\t\t<option value="manual">Enter in text box</option>\n+\t\t\t\t\t<option value="auto">Scan pyGetGTFSources file</option>\n+\t\t\t\t</param>\t\n+\t\t\t\t<when value="all">\n+\t\t\t\t\t<param name="annotation" type="hidden" format="txt" size="10" value="all"/>\n+\t\t\t\t</when>\n+\t\t\t\t<when value="manual">\n+\t\t\t\t\t<param name="annotation" type="text" format="txt" size="100" value="protein_coding" label="Select which annotation to focus search on --annotation" help="To find a list of available annotations please use pyGetGTFSources tool">\n+\t\t\t\t\t\t<validator type="empty_field" message="Please enter a value"/>\t\t\t\n+\t\t\t\t\t</param>\n+\t\t\t\t</when>\n+\t\t\t\t<when value="auto">\n+\t\t\t\t\t<param format="tabular" name="gtf_annotation" type="data" label="GTF annotation File (pyGetGTFSources output)" help="Tabular file containing unique list of annotations/sources in selected GTF file. Refer to pyGetGTFSources"/>\t\n+\t\t\t\t\t\t<conditional name="scan">\n+\t\t\t\t\t\t<param name="annotations" type="select" label="Scan this file for annotations" help="Choose the correct GTF file then choose GO">\n+\t\t\t\t\t\t\t<option value="wait" selected="true">Waiting</option>\n+\t\t\t\t\t\t\t<option value="scanning">Go</option>\n+\t\t\t\t\t\t</param>\t\n+\t\t\t\t\t\t<when value="wait">\n+\t\t\t\t\t\t</when>\n+\t\t\t\t\t\t<when value="scanning">\n+\t\t\t\t\t\t<param name="annotation" type="select" multiple="false" label="Select which annotation to focus search on --annotation">\n+\t\t\t\t\t\t\t <options from_dataset="gtf_annota'..b'aximum number of k-mers in output">\n+\t\t <validator type="in_range" min="0" message="Please enter a value >= 0"/>\n+\t\t </param>\n+\t\t <param format="integer" name="range" type="integer" label="Range --range" value="0" size="5" help="Manually set the length of the 5\' and 3\' UTRs 0>50000">\n+\t\t <validator type="in_range" min="0" max="50000" message="Please enter a value between 0 and 50000"/>\n+\t\t </param>\n+\t\t <param format="integer" name="overlap" type="integer" label="Overlap --overlap" value="1" size="5" help="Sets the number of nucleotides a read has to overlap with a gene before it is considered a hit. ">\n+\t\t <validator type="in_range" min="1" message="Please enter a positive integer"/>\n+\t\t </param>\n+\t\t </when>\n+\t\t <when value="default">\n+\t\t </when>\n+\t\t</conditional>\n+ <param name="label" type="text" format="txt" size="30" value="pyMotif" label="Enter output file label -o" />\n+\t</inputs>\n+\n+\t<outputs>\n+\t\t<data format="tabular" name="zscores" label="${label.value}_k-mer_Z_scores.txt"/>\n+\t\t<data format="tabular" name="count" label="${label.value}_data_k-mers_count.txt"/>\n+\t\t<data format="gtf" name="features" label="${label.value}_top_k-mers_in_features.gtf"/>\n+ <data format="tabular" name="random" label="${label.value}_random_k-mers_count.txt"/>\n+\t</outputs>\n+\t<help>\n+\n+.. class:: infomark\n+\n+**pyMotif**\n+\n+pyMotif is part of the pyCRAC_ package. Looks for enriched sequence motifs in high-throughput sequencing data. Produces a GTF type output file \n+with coordinates and Z-scores for enriched motifs. The GTF file can be visualised in genome browsers.\n+ \n+.. _pyCRAC: http://sandergranneman.bio.ed.ac.uk/Granneman_Lab/pyCRAC_software.html\n+ \n+------\n+\n+**Parameter list**\n+\n+File input options::\n+\n+ -f intervals.gtf, --input_file=intervals.gtf\n+ Provide the path to an interval gtf file. By default\n+ it expects data from the standard input.\n+ -o OUTPUT_FILE, --output_file=OUTPUT_FILE\n+ Use this flag to override the standard file names. Do\n+ NOT add an extension.\n+ --gtf=annotation_file.gtf\n+ type the path to the gtf annotation file that you want\n+ to use\n+ --tab=tab_file.tab \n+ type the path to the tab file that contains the\n+ genomic reference sequence\n+\n+pyMotif specific options::\n+\n+ --k_min=4 \n+ this option allows you to set the shortest k-mer\n+ length. Default = 4.\n+ --k_max=6 \n+ this option allows you to set the longest k-mer\n+ length. Default = 8.\n+ -n 100, --numberofkmers=100\n+ choose the maximum number of enriched k-mer sequences\n+ you want to have reported in output files. Default =\n+ 1000\n+\n+pyCRAC common options::\n+\n+ -a protein_coding, --annotation=protein_coding\n+ select which annotation (i.e. protein_coding, ncRNA,\n+ sRNA, rRNA,snoRNA,snRNA, depending on the source of\n+ your GTF file) you would like to focus your search on.\n+ Default = all annotations\n+ -r 100, --range=100\n+ allows you to add regions flanking the genomic\n+ feature. If you set \'-r 50\' or \'--range=50\', then the\n+ program will add 50 nucleotides to each feature on\n+ each side regardless of whether the GTF file has genes\n+ with annotated UTRs.\n+ --overlap=1 \n+ sets the number of nucleotides a motif has to overlap\n+ with a genomic feature before it is considered a hit.\n+ Default = 1 nucleotide\n+\n+\n+\n+\n+\t</help>\n+</tool>\t\n'

diff -r 000000000000 -r 19b20927172d pyCRAC/pyPileup.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pyCRAC/pyPileup.xml Tue Jun 18 09:11:00 2013 -0400

[

b'@@ -0,0 +1,384 @@\n+<?xml version="1.0" encoding="utf-8"?>\n+ <tool id ="pyPileup" name="pyPileup">\n+\t<requirements>\n+ \t<requirement type="package">pyCRAC</requirement>\n+ \t</requirements>\n+\t<command interpreter="python">\n+\t/usr/local/bin/pyPileup.py\n+\t-f $ftype.input\n+ --file_type $ftype.file_type \n+ #if $geneOpt.alignGene == "gene": \n+ -g $geneOpt.genes \n+ #end if# \n+ #if $geneOpt.alignGene == "chr": \n+ --chr $geneOpt.chr \n+ #end if# \n+ #if ($ftype.file_type == "novo" or $ftype.file_type == "sam") and $ftype.disc.discard == "discard": \n+ --discarded $discarded\n+\t#end if# \n+ --gtf=$addGTF.gtf \n+ --tab=$addTab.tab \n+ #if ($ftype.file_type == "novo" or $ftype.file_type == "sam") and $ftype.addAlignOpt.alignoptions == "edit":\n+ --align_quality=$ftype.addAlignOpt.align_quality \n+ --align_score=$ftype.addAlignOpt.align_score \n+ --distance=$ftype.addAlignOpt.d \n+ --length=$ftype.addAlignOpt.length \n+ #if int($ftype.addAlignOpt.max) > 0:\n+ --max=$ftype.addAlignOpt.max \n+ #end if# \n+ $ftype.addAlignOpt.unique \n+ $ftype.addAlignOpt.blocks \n+ $ftype.addAlignOpt.mutations \n+ #if $ftype.disc.discard == "--discarded": \n+ --discarded $discarded \n+ #end if# \n+ #end if# '..b"es\n+\n+Common options::\n+\n+ -v, --verbose \n+ prints all the status messages to a file rather than\n+ the standard output\n+ --ignorestrand \n+ this flag tells the program to ignore strand\n+ information and all overlapping reads will considered\n+ sense reads. Useful for analysing ChIP or RIP data\n+ --zip=FILE \n+ use this option to compress all the output files in a\n+ single zip file\n+ --overlap=1 \n+ sets the number of nucleotides a read has to overlap\n+ with a gene before it is considered a hit. Default =\n+ 1 nucleotide\n+ -s genomic, --sequence=genomic\n+ with this option you can select whether you want the\n+ reads aligned to the genomic or the coding sequence.\n+ Default = genomic\n+ -r 100, --range=100\n+ allows you to set the length of the UTR regions. If\n+ you set '-r 50' or '--range=50', then the program will\n+ set a fixed length (50 bp) regardless of whether the\n+ GTF file has genes with annotated UTRs.\n+\n+Options for novo, SAM and BAM files::\n+\n+ --align_quality=100, --mapping_quality=100\n+ with these options you can set the alignment quality\n+ (Novoalign) or mapping quality (SAM) threshold. Reads\n+ with qualities lower than the threshold will be\n+ ignored. Default = 0\n+ --align_score=100 \n+ with this option you can set the alignment score\n+ threshold. Reads with alignment scores lower than the\n+ threshold will be ignored. Default = 0\n+ -l 100, --length=100\n+ to set read length threshold. Default = 1000\n+ -m 100000, --max=100000\n+ maximum number of mapped reads that will be analyzed.\n+ Default = All\n+ --unique \n+ with this option reads with multiple alignment\n+ locations will be removed. Default = Off\n+ --blocks \n+ with this option reads with the same start and end\n+ coordinates on a chromosome will only be counted once.\n+ Default = Off\n+ --discarded=FILE \n+ prints the lines from the alignments file that were\n+ discarded by the parsers. This file contains reads\n+ that were unmapped (NM), of poor quality (i.e. QC) or\n+ paired reads that were mapped to different chromosomal\n+ locations or were too far apart on the same\n+ chromosome. Useful for debugging purposes\n+ -d 1000, --distance=1000\n+ this option allows you to set the maximum number of\n+ base-pairs allowed between two non-overlapping paired\n+ reads. Default = 1000\n+ --mutations=delsonly\n+ Use this option to only track mutations that are of\n+ interest. For CRAC data this is usually deletions\n+ (--mutations=delsonly). For PAR-CLIP data this is\n+ usually T-C mutations (--mutations=TC). Other options\n+ are: do not report any mutations: --mutations=nomuts.\n+ Only report specific base mutations, for example only\n+ in T's, C's and G's :--mutations=[TCG]. The brackets\n+ are essential. Other nucleotide combinations are also\n+ possible\n+\n+\t</help>\n+</tool>\t\n"

diff -r 000000000000 -r 19b20927172d pyCRAC/pyReadAligner.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pyCRAC/pyReadAligner.xml Tue Jun 18 09:11:00 2013 -0400

[

b'@@ -0,0 +1,368 @@\n+ <tool id ="pyReadAligner" name="pyReadAligner">\n+\t<requirements>\n+\t\t\t<requirement type="package">pyCRAC</requirement>\n+\t\t</requirements>\n+\t<command interpreter="python"> \n+\t/usr/local/bin/pyReadAligner.py\n+\t-f $ftype.input\n+\t--file_type $ftype.file_type\n+\t#if $geneOpt.alignGene == "gene":\n+\t\t-g $geneOpt.genes\n+\t#end if#\n+\t#if $geneOpt.alignGene == "chr":\n+\t\t--chr $geneOpt.chr\n+\t#end if#\n+\t\t#if ($ftype.file_type == "novo" or $ftype.file_type == "sam") and $ftype.disc.discard == "discard":\t \n+\t\t\t\t --discarded $discarded\n+\t\t#end if#\t \n+\t--gtf=$addGTF.gtf\n+\t--tab=$addTab.tab\n+\t#if ($ftype.file_type == "novo" or $ftype.file_type == "sam") and $ftype.addAlignOpt.alignoptions == "edit":\n+\t\t --align_quality=$ftype.addAlignOpt.align_quality\t\t\t\n+\t\t --align_score=$ftype.addAlignOpt.align_score\t\t\t\t\t\t\t\n+\t\t --distance=$ftype.addAlignOpt.d\t\t\t\t\t\t\t\t\t\n+\t\t --length=$ftype.addAlignOpt.length\t\n+\t\t #if int($ftype.addAlignOpt.max) > 0:\t\t\t\t\t\t\t\t\t\n+\t\t --max=$ftype.addAlignOpt.max\t\t\t\t\t\t\t \n+\t\t #end if#\n+\t\t $ftype.addAlignOpt.unique\t\t\t\t\t\t\t\t\t\t \n+\t\t $ftype.addAlignOpt.blocks\t\t \n+\t\t $ftype.addAlignOpt.mutations\t \n+\t#end if#\n+\t#if $addOpt.options == "edit":\n+\t\t--range=$addOpt.range\n+\t\t--overlap=$addOpt.overlap\n+\t\t$addOpt.ignore\n+\t\t-s $addOpt.sequence\n+\t\t#if int($addOpt.limit) > 0:\n+\t\t\t--limit=$addOpt.limit\n+\t\t#end if#\n+\t#end if#\n+\t-o $output\t\n+\t</command>\n+\t<version_command>/usr/local/bin/pyReadAligner.py --version</version_command>\n+\t<inputs>\n+\n+\n+\t\t\t<conditional name="geneOpt">\n+\t\t\t\t\t\t<param name="alignGene" type="select" label="Do you want to align reads to genes or chromosome co-ordinates?">\n+\t\t\t\t\t\t\t\t<option value="gene" selected="true">Genes</option>\n+\t\t\t\t\t\t\t\t<option value="chr">Chromosome Co-ordinates</option>\n+\t\t\t\t\t\t</param>\n+\t\t\t\t\t\t<when value="chr">\n+\t\t\t <param format="interval" name="chr" type="data" label="Choose a Chromosome Coordinate File" help="Tab delimited text file contai\\\n+ning an identifier, chromosome name, start position, end position and strand (\'-\' or \'+\')"/>\n+\t\t\t\t\t\t</when>\n+\t\t\t\t\t\t<when value="gene">\n+\t\t\t <param format="txt" name="genes" type="data" label="Choose a Gene List -g" help="Single column gene ID file"/>\n+\t\t\t\t\t\t</when>\n+\t\t</conditional>\n+\t\t\t\t<conditional name="addGTF">\n+\t\t\t\t\t\t<param name="gtfFile" type="select"\t label="Choose GTF File from">\n+\t\t\t\t\t\t\t\t<option value="default" selected="true">Defaults</option>\n+\t\t\t\t\t\t\t\t<option value="other">History</option>\n+\t\t\t\t\t\t</param>\n+\t\t\t\t\t\t<when value="default">\n+\t\t\t\t\t\t\t\t<param name="gtf" type="select"\t label="GTF File --gtf" help="GTF file containing gene ID co-ordinates">\n+\t\t\t\t\t\t\t\t\t\t<options from_data_table="pycrac_gtf"/>\n+\t\t\t\t\t\t\t\t</param>\n+\t\t\t\t\t\t</when>\n+\t\t\t\t\t\t<when value="other">\n+\t\t\t\t\t\t\t\t<param format="GTF" name="gtf" type="data" label="GTF File --gtf" help="GTF file containing gene ID co-ordinates"/>\n+\t\t\t\t\t\t</when>\n+\t\t\t\t</conditional>\n+\t\t\t <conditional name="addTab">\n+\t\t\t\t\t\t<param name="tabFile" type="select"\t label="Choose Genomic Reference Sequence from">\n+\t\t\t\t\t\t\t\t<option value="default" selected="true">Defaults</option>\n+\t\t\t\t\t\t\t\t<option value="other">History</option>\n+\t\t\t\t\t\t</param>\n+\t\t\t\t\t\t<when value="default">\n+\t\t\t\t\t\t\t\t<param name="tab" type="select"\t label="Genomic Reference Sequence --tab" help="Tab file containing genomic reference sequence">\n+\t\t\t\t\t\t\t\t\t\t<options from_data_table="pycrac_tab"/>\n+\t\t\t\t\t\t\t\t</param>\n+\t\t\t\t\t\t</when>\n+\t\t\t\t\t\t<when value="other">\n+\t\t\t\t\t\t\t\t<param format="tabular" name="tab" type="data" label="Genomic Reference Sequence --tab" help="Tab file containing genomic reference sequence"/>\n+\t\t\t\t\t\t</when>\n+\t\t </conditional>\n+\n+\n+\t\t<conditional name="ftype">\n+\t\t\t<param name="file_type" type="select" label="Input File Type --file_type">\n+\t\t\t\t\t<option value="sam">Sam/BAM</option>\n+\t\t\t\t\t<option value="novo">Novo</option>\n+\t\t\t\t\t<option value="gtf">GTF</option>\n+\t\t\t</param>\n+\t\t\t<when value="sam">\n+\t\t\t\t<param format="sam,bam" name="input" type="data" label="Input File -f" help="Alignment file of type .sam or .'..b" if you simply would like to align reads against a\n+\t\t\t\t\t\tgenomic sequence you should generate a tab delimited\n+\t\t\t\t\t\tfile containing an identifyer, chromosome name, start\n+\t\t\t\t\t\tposition, end position and strand\n+\t--gtf=annotation_file.gtf\n+\t\t\t\t\t\ttype the path to the gtf annotation file that you want\n+\t\t\t\t\t\tto use\n+\t--tab=tab_file.tab\t\n+ type the path to the tab file that contains the\n+\t\t\t\t\t\tgenomic reference sequence\n+\t--file_type=FILE_TYPE\n+\t\t\t\t\t\tuse this option to specify the file type (i.e. 'novo',\n+\t\t\t\t\t\t'sam', 'gtf'). This will tell the program which\n+\t\t\t\t\t\tparsers to use for processing the files. Default =\n+\t\t\t\t\t\t'novo'\n+\n+pyReadAligner specific options::\n+\n+\t--limit=500\t\t\t\n+ with this option you can select how many reads mapped\n+\t\t\t\t\t\tto a particular gene/ORF/region you want to count.\n+\t\t\t\t\t\tDefault = All\n+\n+Common options::\n+\n+\t--ignorestrand\t\t\n+ this flag tells the program to ignore strand\n+\t\t\t\t\t\tinformation and all overlapping reads will considered\n+\t\t\t\t\t\tsense reads. Useful for analysing ChIP or RIP data\n+\t--overlap=1\t\t\t\n+ sets the number of nucleotides a read has to overlap\n+\t\t\t\t\t\twith a gene before it is considered a hit. Default =\n+\t\t\t\t\t\t1 nucleotide\n+\t-s genomic, --sequence=genomic\n+\t\t\t\t\t\twith this option you can select whether you want the\n+\t\t\t\t\t\treads aligned to the genomic or the coding sequence.\n+\t\t\t\t\t\tDefault = genomic\n+\t-r 100, --range=100\n+\t\t\t\t\t\tallows you to set the length of the UTR regions. If\n+\t\t\t\t\t\tyou set '-r 50' or '--range=50', then the program will\n+\t\t\t\t\t\tset a fixed length (50 bp) regardless of whether the\n+\t\t\t\t\t\tGTF file has genes with annotated UTRs.\n+\n+Options for novo, SAM and BAM files::\n+\n+\t--align_quality=100, --mapping_quality=100\n+\t\t\t\t\t\twith these options you can set the alignment quality\n+\t\t\t\t\t\t(Novoalign) or mapping quality (SAM) threshold. Reads\n+\t\t\t\t\t\twith qualities lower than the threshold will be\n+\t\t\t\t\t\tignored. Default = 0\n+\t--align_score=100\t\n+ with this option you can set the alignment score\n+\t\t\t\t\t\tthreshold. Reads with alignment scores lower than the\n+\t\t\t\t\t\tthreshold will be ignored. Default = 0\n+\t-l 100, --length=100\n+\t\t\t\t\t\tto set read length threshold. Default = 1000\n+\t-m 100000, --max=100000\n+\t\t\t\t\t\tmaximum number of mapped reads that will be analyzed.\n+\t\t\t\t\t\tDefault = All\n+\t--unique\t\t\t \n+ with this option reads with multiple alignment\n+\t\t\t\t\t\tlocations will be removed. Default = Off\n+\t--blocks\t\t\t\n+ with this option reads with the same start and end\n+\t\t\t\t\t\tcoordinates on a chromosome will only be counted once.\n+\t\t\t\t\t\tDefault = Off\n+\t--discarded=FILE\t \n+ prints the lines from the alignments file that were\n+\t\t\t\t\t\tdiscarded by the parsers. This file contains reads\n+\t\t\t\t\t\tthat were unmapped (NM), of poor quality (i.e. QC) or\n+\t\t\t\t\t\tpaired reads that were mapped to different chromosomal\n+\t\t\t\t\t\tlocations or were too far apart on the same\n+\t\t\t\t\t\tchromosome. Useful for debugging purposes\n+\t-d 1000, --distance=1000\n+\t\t\t\t\t\tthis option allows you to set the maximum number of\n+\t\t\t\t\t\tbase-pairs allowed between two non-overlapping paired\n+\t\t\t\t\t\treads. Default = 1000\n+\t--mutations=delsonly\n+\t\t\t\t\t\tUse this option to only track mutations that are of\n+\t\t\t\t\t\tinterest. For CRAC data this is usually deletions\n+\t\t\t\t\t\t(--mutations=delsonly). For PAR-CLIP data this is\n+\t\t\t\t\t\tusually T-C mutations (--mutations=TC). Other options\n+\t\t\t\t\t\tare: do not report any mutations: --mutations=nomuts.\n+\t\t\t\t\t\tOnly report specific base mutations, for example only\n+\t\t\t\t\t\tin T's, C's and G's :--mutations=[TCG]. The brackets\n+\t\t\t\t\t\tare essential. Other nucleotide combinations are also\n+\t\t\t\t\t\tpossible\n+\n+\n+\t</help>\n+</tool> \n"

diff -r 000000000000 -r 19b20927172d pyCRAC/pyReadCounters.pl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pyCRAC/pyReadCounters.pl Tue Jun 18 09:11:00 2013 -0400

@@ -0,0 +1,60 @@
+#!/usr/bin/perl -w
+use strict;
+use Getopt::Long;
+
+my %opt;
+
+
+GetOptions(\%opt, "f=s","file_type=s","version","gtf=s","align_quality=i","align_score=i","range=i","length=i","max=i","distance=i","ignorestrand","overlap=i","unique","blocks","mutations=s","countoutput=s","stats=s","hittable=s","intronUTRoverlap=s","discarded=s","options","alignOpt","id=s");
+
+my $cmnd;
+my $prefix = "rc_$opt{id}";
+
+
+if (exists $opt{version}){
+ $cmnd = "python /usr/local/bin/pyReadCounters.py --version";
+}
+else{
+ $cmnd = "python /usr/local/bin/pyReadCounters.py -f $opt{f} --file_type $opt{file_type} --gtf $opt{gtf} -o $prefix";
+
+ if(exists $opt{options}){
+
+     $cmnd .= " --range=$opt{range} --overlap=$opt{overlap}";
+
+     if(exists $opt{ignorestrand}){ $cmnd .= " --ignorestrand";}
+ }
+
+ if(exists $opt{alignOpt}){
+     $cmnd .= " --align_quality=$opt{align_quality} --align_score=$opt{align_score} --length=$opt{length} --distance=$opt{distance}";
+            if(exists $opt{max}){$cmnd .= " --max=$opt{max}";}
+     if(exists $opt{unique}){$cmnd .= " --unique";}
+            if(exists $opt{blocks}){$cmnd .= " --blocks";}
+            if(exists $opt{mutations}){$cmnd .= " --mutations=$opt{mutations}";}
+            if(exists $opt{discarded}){$cmnd .= " --discarded=$opt{discarded}";}
+
+ }
+}
+
+
+system $cmnd;
+print STDOUT $cmnd;
+
+
+if(exists $opt{blocks}){
+    system "mv $prefix"."_hittable_cDNAs.txt $opt{hittable}";
+    system "mv $prefix"."_file_statistics_cDNAs.txt $opt{stats}";
+    system "mv $prefix"."_intron_and_UTR_overlap_cDNAs.gtf $opt{intronUTRoverlap}";
+
+    if($opt{file_type} ne "gtf"){
+ system "mv $prefix"."_count_output_cDNAs.gtf $opt{countoutput}";
+    }
+}
+else{
+    system "mv $prefix"."_hittable_reads.txt $opt{hittable}";
+    system "mv $prefix"."_file_statistics_reads.txt $opt{stats}";
+    system "mv $prefix"."_intron_and_UTR_overlap_reads.gtf $opt{intronUTRoverlap}";
+
+    if($opt{file_type} ne "gtf"){
+        system "mv $prefix"."_count_output_reads.gtf $opt{countoutput}";
+    }
+}

diff -r 000000000000 -r 19b20927172d pyCRAC/pyReadCounters.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pyCRAC/pyReadCounters.xml Tue Jun 18 09:11:00 2013 -0400

[

b'@@ -0,0 +1,359 @@\n+<tool id ="pyReadCounters" name="pyReadCounters" force_history_refresh="True">\n+\t<requirements>\n+\t\t\t<requirement type="package">pyCRAC</requirement>\n+\t\t</requirements>\n+\t<command interpreter="perl"> \n+\tpyReadCounters.pl\n+\t-f $ftype.input\n+\t--file_type $ftype.file_type\n+\t--gtf $addGTF.gtf\n+\t#if ($ftype.file_type == "novo" or $ftype.file_type == "sam") and $ftype.disc.discard == "discard":\n+\t\t--discarded $discarded \n+\t#end if#\n+\t#if ($ftype.file_type == "novo" or $ftype.file_type == "sam") and $ftype.addAlignOpt.alignoptions == "edit":\n+\t\t--alignOpt\n+\t\t--align_quality $ftype.addAlignOpt.align_quality\n+\t\t--align_score $ftype.addAlignOpt.align_score \n+\t\t#if int($ftype.addAlignOpt.max) > 0:\t\t\t\t\t\t\t\t\t\t\t\t \n+\t\t\t--max $ftype.addAlignOpt.max\t\t\t\t\t\t\t \n+\t\t#end if#\t \n+\t\t--distance $ftype.addAlignOpt.d\n+\t\t--length $ftype.addAlignOpt.length\n+\t\t$ftype.addAlignOpt.unique\t\n+\t\t$ftype.addAlignOpt.blocks\n+\t\t$ftype.addAlignOpt.mutations\n+\t#end if#\n+\t#if $addOpt.options == "edit":\n+\t\t--options\n+\t\t--range $addOpt.range\n+\t\t$addOpt.ignore\n+\t\t--overlap $addOpt.overlap\n+\t#end if#\n+\n+\t--stats $stats\n+\t--hittable $hittable\n+\t--intronUTRoverlap $intronUTRoverlap\n+\n+\t#if $ftype.file_type == "novo" or $ftype.file_type == "sam":\n+\t\t--countoutput $countoutput\n+\t#end if#\n+\n+\t--id $stats.id\n+\t</command>\n+\t<version_command>/usr/local/bin/pyReadCounters.py --version</version_command>\n+\t<inputs>\n+ <conditional name="addGTF">\n+ <param name="gtfFile" type="select"\t label="Choose GTF File from">\n+ <option value="default" selected="true">Defaults</option>\n+ <option value="other">History</option>\n+ </param>\n+ <when value="default">\n+ <param name="gtf" type="select"\t label="GTF File --gtf" help="GTF file containing gene ID co-ordinates">\n+ <options from_data_table="pycrac_gtf"/>\n+ </param>\n+ </when>\n+ <when value="other">\n+ <param format="GTF" name="gtf" type="data" label="GTF File --gtf" help="GTF file containing gene ID co-ordinates"/>\n+ </when>\n+ </conditional>\n+\t\t<conditional name="ftype">\n+\t\t\t<param name="file_type" type="select" label="Input File Type --file_type" help="Use .novo or .sam input files">\n+\t\t\t\t<option value="novo" selected="true">Novo</option>\n+\t\t\t\t<option value="sam">Sam/Bam</option>\n+\t\t\t\t<option value="gtf">GTF</option>\n+\t\t\t</param>\n+\t\t\t<when value="novo">\n+\t\t\t\t<param format="tabular" name="input" type="data" label="Input File --input_file" help="Alignment file of type .novo" />\n+\t\t\t\t<conditional name="disc">\n+\t\t\t\t <param name="discard" type="select" label="Print discarded reads to a separate file">\n+\t\t\t\t\t<option value="" selected="true">OFF</option>\n+\t\t\t\t\t<option value="discard">ON</option>\n+\t\t\t\t </param>\n+\t\t\t\t <when value="discard">\n+\t\t\t\t </when>\n+\t\t\t\t <when value="">\n+\t\t\t\t </when>\n+\t\t\t\t</conditional>\n+\t\t <conditional name="addAlignOpt">\n+ <param name="alignoptions" type="select" label="Alignment Options">\n+ <option value="default" selected="true">Default</option>\n+ <option value="edit">Edit</option>\n+ </param>\n+ <when value="edit">\n+ <param name="mutations" type="select" label="Option for selecting type of mutations to report --mutations" help="cross-linking sites are often highlighted by deletions and/or substitutions in the reads. You can use this option to select specific mutations that you want to have reported in the GTF output file.">\n+ <option value="" selected="true">Off</option>\n+ <option value="--mutations delsonly">deletions</option>\n+ <option value="--mutations subsonly">substitutions</option>\n+ <option value="--mutations TC">T->C substitutions</option>\n+ <option value="--mut'..b'"; gene_name "INT_0_6716,ACC1"; # 661726S;\n+ chrXIV\treads\texon\t661839\t661878\t4\t+\t. gene_id "INT_0_6716,YNR016C"; gene_name "INT_0_6716,ACC1"; # 661875S;\n+ \n+This output file also reports whether a read contains a mutation. \n+ \n+For example::\n+ \n+ # 661596S\n+ \n+Indicates that the read had a nucleotide substitution ("S") at genomic coordinate 661596. The chromosome name can be found in the first column. \n+\n+.. _pyCRAC: http://sandergranneman.bio.ed.ac.uk/Granneman_Lab/pyCRAC_software.html\n+\t\t\n+------\n+\n+**Parameter list**\n+\n+File input options::\n+\n+\t-f FILE, --input_file=FILE\n+\t\t\t\t\t\tprovide the path to your novo, SAM/BAM or gtf data\n+\t\t\t\t\t\tfile. Default is standard input. Make sure to specify\n+\t\t\t\t\t\tthe file type of the file you want to have analyzed\n+\t\t\t\t\t\tusing the --file_type option!\n+\t-o OUTPUT_FILE, --output_file=OUTPUT_FILE\n+\t\t\t\t\t\tUse this flag to override the standard file names. Do\n+\t\t\t\t\t\tNOT add an extension.\n+\t--file_type=FILE_TYPE\n+\t\t\t\t\t\tuse this option to specify the file type (i.e.\n+\t\t\t\t\t\t\'novo\',\'sam\' or \'gtf\'). This will tell the program\n+\t\t\t\t\t\twhich parsers to use for processing the files. Default\n+\t\t\t\t\t\t= \'novo\'\n+\t--gtf=annotation_file.gtf\n+\t\t\t\t\t\ttype the path to the gtf annotation file that you want\n+\t\t\t\t\t\tto use\n+\n+Common pyCRAC options::\n+\n+\t\t--ignorestrand\t\t\t\t\t\t\n+\t\t\t\t\t\t\t\t\t\t\tTo ignore strand information and all reads overlapping\n+\t\t\t\t\t\twith genomic features will be considered sense reads.\n+\t\t\t\t\t\tUseful for analysing ChIP or RIP data\n+\t--overlap=1\t\t\t\t\t\n+\t\t\t\t\t\t\t\t\t\t\t\tsets the number of nucleotides a read has to overlap\n+\t\t\t\t\t\twith a gene before it is considered a hit. Default =\n+\t\t\t\t\t\t1 nucleotide\n+\t-r 100, --range=100\n+\t\t\t\t\t\tallows you to add regions flanking the genomic\n+\t\t\t\t\t\tfeature. If you set \'-r 50\' or \'--range=50\', then the\n+\t\t\t\t\t\tprogram will add 50 nucleotides to each feature on\n+\t\t\t\t\t\teach side regardless of whether the GTF file has genes\n+\t\t\t\t\t\twith annotated UTRs\n+\n+Options for SAM/BAM and Novo files::\n+\n+\t--mutations=delsonly\n+\t\t\t\t\t\tUse this option to only track mutations that are of\n+\t\t\t\t\t\tinterest. For CRAC data this is usually deletions\n+\t\t\t\t\t\t(--mutations=delsonly). For PAR-CLIP data this is\n+\t\t\t\t\t\tusually T-C mutations (--mutations=TC). Other options\n+\t\t\t\t\t\tare\\: do not report any mutations: --mutations=nomuts.\n+\t\t\t\t\t\tOnly report specific base mutations, for example only\n+\t\t\t\t\t\tin T\'s, C\'s and G\'s :--mutations=[TCG]. The brackets\n+\t\t\t\t\t\tare essential. Other nucleotide combinations are also\n+\t\t\t\t\t\tpossible\n+\t--align_quality=100, --mapping_quality=100\n+\t\t\t\t\t\twith these options you can set the alignment quality\n+\t\t\t\t\t\t(Novoalign) or mapping quality (SAM) threshold. Reads\n+\t\t\t\t\t\twith qualities lower than the threshold will be\n+\t\t\t\t\t\tignored. Default = 0\n+\t--align_score=100\t\t\t\t\t\n+\t\t\t\t\t\t\t\t\t\t\t\twith this option you can set the alignment score\n+\t\t\t\t\t\tthreshold. Reads with alignment scores lower than the\n+\t\t\t\t\t\tthreshold will be ignored. Default = 0\n+\t--unique\t\t\t\t\t\t\t\n+\t\t\t\t\t\t\t\t\t\t\t\twith this option reads with multiple alignment\n+\t\t\t\t\t\tlocations will be removed. Default = Off\n+\t--blocks\t\t\t\t\t\n+\t\t\t\t\t\t\t\t\t\t\t\twith this option reads with the same start and end\n+\t\t\t\t\t\tcoordinates on a chromosome will be counted as one\n+\t\t\t\t\t\tcDNA. Default = Off\n+\t-m 100000, --max=100000\n+\t\t\t\t\t\tmaximum number of mapped reads that will be analyzed.\n+\t\t\t\t\t\tDefault = All\n+\t-d 1000, --distance=1000\n+\t\t\t\t\t\tthis option allows you to set the maximum number of\n+\t\t\t\t\t\tbase-pairs allowed between two non-overlapping paired\n+\t\t\t\t\t\treads. Default = 1000\n+\t--discarded=FILE\t\t\t\t\t\n+\t\t\t\t\t\t\t\t\t\t\t\tprints the lines from the alignments file that were\n+\t\t\t\t\t\tdiscarded by the parsers. This file contains reads\n+\t\t\t\t\t\tthat were unmapped (NM), of poor quality (i.e. QC) or\n+\t\t\t\t\t\tpaired reads that were mapped to different chromosomal\n+\t\t\t\t\t\tlocations or were too far apart on the same\n+\t\t\t\t\t\tchromosome. Useful for debugging purposes\n+\t-l 100, --length=1000\t\t\t\t\t\n+\t\t\t\t\t\t\t\t\t\t\t\tto set read length threshold. Default = 1000\n+\n+\t</help>\n+</tool> \n'

diff -r 000000000000 -r 19b20927172d pyCRAC/pySelectMotifsFromGTF.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pyCRAC/pySelectMotifsFromGTF.xml Tue Jun 18 09:11:00 2013 -0400

@@ -0,0 +1,75 @@
+ <tool id ="pySelectMotifsFromGTF" name="pySelectMotifsFromGTF">
+ <requirements>
+         <requirement type="package">pyCRAC</requirement>
+     </requirements>
+ <command interpreter="python">
+ /usr/local/bin/pySelectMotifsFromGTF.py
+ --gtf $input
+ -m $motif
+ -o $out
+ -l $length
+ -z $zscore
+ </command>
+ <version_command>/usr/local/bin/pySelectMotifsFromGTF.py --version</version_command>
+ <inputs>
+ <param format="gtf" name="input" type="data" label="Input File -f" help="pyMotif gtf output files" />
+ <param format="txt" name="motif" type="text" size="200" value="KBCTTG" label="motif string" help="Enter motif (all uppercase) you want to extract from the pyMotif gtf output file">
+ <validator type="empty_field" />
+ </param>
+ <param format="integer" type="integer" value="6" size="5" name="length" label="Length --length" help="Set a Kmer Length. Note that the length has to be at least as long as your k-mer sequence, otherwise the program will not run correctly" />
+ <param format="float" type="float" value="0" size="5" name="zscore" label="Z Score --Z_score" help="Set a minimum Kmer Z_score" />
+ <param name="label" type="text" format="txt" size="30" value="pySelectMotifsFromGTF" label="Enter output file label -o" />
+ </inputs>
+
+ <outputs>
+ <data format="gtf" name="out" label="${label.value}_${motif.value}.gtf"/>
+ </outputs>
+ <help>
+
+
+.. class:: infomark
+
+**pySelectMotifsFromGTF**
+
+pySelectMotifsFromGTF is part of the pyCRAC_ package. Extracts your favourite k-mer sequence from pyMotif GTF output files.
+Note that you can include degenerate nucleotides in your motif string::
+
+    N = A, G, C or T
+    R = A or G = puRine
+    Y = C or T = pYrimidine
+    M = A or C = aroMatic
+    S = G or C
+    W = A or T
+    K = G or T = Keto
+    V = A, C or G = Not T (letter after)
+    D = A, G or T = Not C
+    H = A, C or T = Not G
+    B = C, G or T = Not A
+
+So if you enter KBCTTG as search string and length=6, then the program will extract a large number of six-mers from your data.
+If you set length = 8, it will look for this pattern in a stretch of 8 nucleotides.
+
+.. _pyCRAC: http://sandergranneman.bio.ed.ac.uk/Granneman_Lab/pyCRAC_software.html
+
+------
+
+**Parameter list**
+
+Options::
+
+    --gtf=Yourfavoritegtf.gtf
+                        type the path to the gtf file that you want to use. By
+                        default it expects data from the standard input
+    -o FILE, --output=FILE
+                        Optional.Specify the name of the output file. Default
+                        is standard output. Make sure it has the .gtf
+                        extension!
+    -m KBCTTG, --motif=KBCTTG
+                        Specify the motif you want extract from the GTF file.
+    -z 15.0, --Z_score=15.0
+                        Set a minimum k-mer Z-score. Default=0
+    -l 4, --length=4
+                        Set a k-mer length. Default is no length filtering
+
+ </help>
+</tool>

diff -r 000000000000 -r 19b20927172d pyCRAC/pycrac.chr.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pyCRAC/pycrac.chr.loc.sample Tue Jun 18 09:11:00 2013 -0400

@@ -0,0 +1,1 @@
+s.cerevisiae EF2 /usr/local/pyCRAC/db/Saccharomyces_cerevisiae.EF2.59.1.0_chr_lengths.txt

diff -r 000000000000 -r 19b20927172d pyCRAC/pycrac.fasta.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pyCRAC/pycrac.fasta.loc.sample Tue Jun 18 09:11:00 2013 -0400

@@ -0,0 +1,1 @@
+s.cerevisiae EF2 /usr/local/pyCRAC/db/Saccharomyces_cerevisiae.EF2.59.1.0.fa

diff -r 000000000000 -r 19b20927172d pyCRAC/pycrac.gtf.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pyCRAC/pycrac.gtf.loc.sample Tue Jun 18 09:11:00 2013 -0400

@@ -0,0 +1,1 @@
+s.cerevisiae EF2 /usr/local/pyCRAC/db/Saccharomyces_cerevisiae.EF2.59.1.2.gtf

diff -r 000000000000 -r 19b20927172d pyCRAC/pycrac.tab.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pyCRAC/pycrac.tab.loc.sample Tue Jun 18 09:11:00 2013 -0400

@@ -0,0 +1,1 @@
+s.cerevisiae EF2 /usr/local/pyCRAC/db/Saccharomyces_cerevisiae.EF2.59.1.0.fa.tab

diff -r 000000000000 -r 19b20927172d pyCRAC/tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pyCRAC/tool_data_table_conf.xml.sample Tue Jun 18 09:11:00 2013 -0400

@@ -0,0 +1,23 @@
+
+<tables>
+    
+    <table name="pycrac_fasta">
+        <columns>name, value</columns>
+        <file path="tool-data/pyCRAC/pycrac.fasta.loc" />
+    </table>
+    
+    <table name="pycrac_gtf">
+        <columns>name, value</columns>
+        <file path="tool-data/pyCRAC/pycrac.gtf.loc" />
+    </table>
+    
+    <table name="pycrac_tab">
+        <columns>name, value</columns>
+        <file path="tool-data/pyCRAC/pycrac.tab.loc" />
+    </table>
+    
+    <table name="pycrac_chr">
+        <columns>name, value</columns>
+        <file path="tool-data/pyCRAC/pycrac.chr.loc" />
+    </table>
+</tables>