# HG changeset patch
# User devteam
# Date 1400517262 14400
# Node ID f6e5bb5aa2f574f21d55e6f8a59e0cdfaa754e17
Imported from capsule None
diff -r 000000000000 -r f6e5bb5aa2f5 rmapq_wrapper.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/rmapq_wrapper.py Mon May 19 12:34:22 2014 -0400
@@ -0,0 +1,100 @@
+#!/usr/bin/env python
+
+import os, sys, tempfile
+
+assert sys.version_info[:2] >= (2.4)
+
+def stop_err( msg ):
+
+ sys.stderr.write( "%s\n" % msg )
+ sys.exit()
+
+
+def __main__():
+
+ # I/O
+ target_path = sys.argv[1]
+ infile = sys.argv[2]
+ scorefile = sys.argv[3]
+ high_score = sys.argv[4] # -q
+ high_len = sys.argv[5] # -M
+ read_len = sys.argv[6] # -w
+ align_len = sys.argv[7] # -h
+ mismatch = sys.argv[8] # -m
+ output_file = sys.argv[9]
+
+ try:
+ float(high_score)
+ except:
+ stop_err('Invalid value for minimal quality score.')
+
+ try:
+ int(high_len)
+ except:
+ stop_err('Invalid value for minimal high quality bases.')
+
+ # first guess the read length
+ guess_read_len = 0
+ seq = ''
+ for i, line in enumerate(open(infile)):
+ line = line.rstrip('\r\n')
+ if line.startswith('>'):
+ if seq:
+ guess_read_len = len(seq)
+ break
+ else:
+ seq += line
+
+ try:
+ test = int(read_len)
+ if test == 0:
+ read_len = str(guess_read_len)
+ else:
+ assert test >= 20 and test <= 64
+ except:
+ stop_err('Invalid value for read length. Must be between 20 and 64.')
+
+
+ try:
+ int(align_len)
+ except:
+ stop_err('Invalid value for minimal length of a hit.')
+
+ try:
+ int(mismatch)
+ except:
+ stop_err('Invalid value for mismatch numbers in an alignment.')
+
+ all_files = []
+ if os.path.isdir(target_path):
+ # check target genome
+ fa_files = os.listdir(target_path)
+
+ for file in fa_files:
+ file = "%s/%s" % ( target_path, file )
+ file = os.path.normpath(file)
+ all_files.append(file)
+ else:
+ stop_err("No sequences for %s are available for search, please report this error." %(target_path))
+
+ for detail_file_path in all_files:
+ output_tempfile = tempfile.NamedTemporaryFile().name
+ command = "rmapq -q %s -M %s -h %s -w %s -m %s -Q %s -c %s %s -o %s 2>&1" % ( high_score, high_len, align_len, read_len, mismatch, scorefile, detail_file_path, infile, output_tempfile )
+ #print command
+ try:
+ os.system( command )
+ except Exception, e:
+ stop_err( str( e ) )
+
+ try:
+ assert os.system( 'cat %s >> %s' % ( output_tempfile, output_file ) ) == 0
+ except Exception, e:
+ stop_err( str( e ) )
+
+ try:
+ os.remove( output_tempfile )
+ except:
+ pass
+
+
+if __name__ == '__main__': __main__()
diff -r 000000000000 -r f6e5bb5aa2f5 rmapq_wrapper.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/rmapq_wrapper.xml Mon May 19 12:34:22 2014 -0400
@@ -0,0 +1,94 @@
+
+ for Solexa Short Reads Alignment with Quality Scores
+
+ rmap
+
+
+ #if $trim.choice=="No":
+ rmapq_wrapper.py $database $input_seq $input_score $high_score $high_len 0 $align_len $mismatch $output1
+ #else:
+ rmapq_wrapper.py $database $input_seq $input_score $high_score $high_len $trim.read_len $align_len $mismatch $output1
+ #end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+.. class:: warningmark
+
+ RMAPQ was developed for **Solexa** reads.
+
+.. class:: infomark
+
+**TIP**. The tool will guess the length of the reads, however, if you select to trim the reads, the *Maximal Length of the Reads* must be between 20 and 64. Reads with lengths longer than the specified value will be trimmed at the 3'end.
+
+-----
+
+**What it does**
+
+This tool runs **rmapq** (for more information, please see the reference below), searching against a genome build with sequence qualities.
+
+-----
+
+**Parameters**
+
+- *Minimal High-quality Bases* (**-M**): the minimal length of the high quality score bases
+- *Minimum Score for High-quality Base* (**-q**) : the minimal quality score
+- *Minimal Length of a Hit* (**-h**) : the minimal length of an exact match or seed
+- *Number of Mismatches Allowed* (**-m**) : the maximal number of mismatches allowed in an alignment
+- *Read Length* (**-w**) : maximal length of the reads; reads longer than the threshold will be truncated at 3' end.
+
+-----
+
+**Reference**
+
+ **RMAP** is developed by Dr. Andrew D Smith and Dr. Zhenyu Xuan at the Cold Spring Harbor Laboratory. Please see http://rulai.cshl.edu/rmap/
+
+
+
diff -r 000000000000 -r f6e5bb5aa2f5 test-data/rmapq_wrapper_test1.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/rmapq_wrapper_test1.bed Mon May 19 12:34:22 2014 -0400
@@ -0,0 +1,8 @@
+phix 360 396 seq1 1 -
+phix 4188 4224 seq2 1 +
+phix 4908 4944 seq4 0 -
+phix 2811 2847 seq5 2 +
+phix 3847 3883 seq6 0 -
+phix 91 127 seq7 0 +
+phix 2302 2338 seq8 2 +
+phix 2448 2484 seq9 0 +
diff -r 000000000000 -r f6e5bb5aa2f5 test-data/rmapq_wrapper_test1.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/rmapq_wrapper_test1.fasta Mon May 19 12:34:22 2014 -0400
@@ -0,0 +1,20 @@
+>seq1
+GACTCATGATTTCTTACCTATTAGTGGTTGAACATC
+>seq2
+GTGATATGTATGTTGACGGCCATAAGGCTGCTTCTT
+>seq3
+GTTGTCGATAGAACTTCATGTGCCTGTAAAACAAGT
+>seq4
+ACCAACCAGAACGTGAAAAAGCGTCCTGCGTGTAGC
+>seq5
+GTTTATGTTGGTTTCATGGTTTTGTCTAACTTTATC
+>seq6
+GCTTTACCGTCTTTCCAGAAATTGTTCCAAGTATCG
+>seq7
+GCTTGTTTACGAATTAAATCGAAGTGGACTGCTGGC
+>seq8
+GTTATAACGCCGAAGCGGTAAAAATTTTTATTTTTT
+>seq9
+GTTCTCACTTCTGTTACTCCAGCTTCTTCGGCACCT
+>seq10
+GTGGCCTGTTGATTCTAAAGGTTAGTTTCTTCACGC
diff -r 000000000000 -r f6e5bb5aa2f5 test-data/rmapq_wrapper_test1.qual
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/rmapq_wrapper_test1.qual Mon May 19 12:34:22 2014 -0400
@@ -0,0 +1,10 @@
+ -40 -40 40 -40 40 -40 -40 -40 -40 40 -40 -40 -40 -40 -40 40 -40 40 -40 -40 40 -40 -40 -40 -40 -40 -40 40 -40 -40 40 -40 40 -40 -40 -40 -40 -40 -40 40 -40 -40 -40 40 -40 -40 -40 40 -40 40 -40 -40 -40 -40 -40 40 -40 -40 -40 40 40 -40 -40 -40 -40 40 -40 -40 -40 40 -40 -40 -40 -40 -40 40 40 -40 -40 -40 -40 -40 -40 40 -40 -40 -40 40 40 -40 -40 -40 -40 -40 40 -40 -40 -40 -40 40 -40 -40 40 -40 -40 -40 40 -40 -40 -40 -40 40 -40 -40 -40 40 -40 -40 40 -40 40 -40 -40 -40 40 -40 -40 -40 -40 40 -40 -40 40 -40 -40 -40 -40 -40 -15 15 -40 40 -40 -40
+ -40 -40 40 -40 -40 -40 -40 40 -40 -40 40 -40 40 -40 -40 -40 -40 -40 -40 40 40 -40 -40 -40 -40 -40 -40 40 -40 -40 40 -40 -40 -40 -40 40 40 -40 -40 -40 -40 -40 -40 40 -40 -40 40 -40 -40 -40 -40 40 -40 -40 -40 40 -40 -40 40 -40 40 -40 -40 -40 -5 5 -40 -40 -40 -40 40 -40 -40 -40 40 -40 -40 21 -40 -21 -40 40 -40 -40 40 -40 -40 -40 -40 -40 -40 40 40 -40 -40 -40 12 -40 -40 -12 -36 -40 36 -40 -40 -40 40 -40 -4 4 -40 -40 -40 -40 -40 40 -40 -40 14 -14 -40 40 -40 -40 -40 -40 -40 40 -40 -40 -40 40 -40 40 -40 -40 -40 -40 -40 40 -40 -40 -25 25
+ -40 -40 40 -40 -40 -40 -40 40 -40 -40 -40 40 -40 -40 40 -40 -40 -40 -40 40 -40 40 -40 -40 -40 -40 40 -40 40 -40 -40 -40 -40 -40 -40 40 40 -40 -40 -40 -40 -40 40 -40 40 -40 -40 -40 40 -40 -40 -40 -40 40 -40 -40 -40 -40 -40 40 -40 -40 -40 40 -40 34 -40 -34 40 -40 -40 -40 -40 -40 -40 40 -40 -25 25 -40 -40 -40 -40 40 -37 -40 37 -40 -40 7 -40 -7 -40 40 -40 -40 -40 -40 -40 40 -40 -40 40 -40 -40 -40 -40 40 40 -40 -40 -40 38 -40 -40 -38 40 -40 -40 -40 40 -40 -40 -40 -40 40 -40 -40 40 -40 -40 -40 11 -16 -13 -22 -40 -40 40 -40 -40 -40 -40 40
+ 40 -40 -40 -40 -40 40 -40 -40 -40 40 -40 -40 40 -40 -40 -40 40 -40 -40 -40 -33 33 -40 -40 -40 40 -40 -40 40 -40 -40 -40 -40 -40 40 -40 40 -40 -40 -40 40 -40 -40 -40 -40 40 -40 -40 -40 -40 40 -40 -40 -40 -25 25 -40 -40 40 -40 40 -40 -40 -40 40 -40 -40 -40 40 -40 -40 -40 40 -40 -40 -40 40 -40 -40 -40 -40 -40 27 -27 -5 5 -40 -40 -40 -40 40 -40 -40 -40 -40 40 -40 40 -40 -40 -40 40 -40 -40 -40 -40 -40 40 -40 -40 40 -40 -40 40 -40 -40 -40 -37 37 -40 -40 -40 -40 40 -40 -40 40 -40 -40 -40 -25 25 40 -40 -40 -40 -40 -40 34 -34 -40 40 -40 -40
+ -40 -40 40 -40 -40 -40 -40 40 -40 -40 -40 40 -40 -40 -40 40 40 -40 -40 -40 -40 -40 -40 40 -40 -40 40 -40 -40 -40 -40 40 -40 -40 -40 40 -40 -40 40 -40 -40 -40 40 -40 -40 -40 -40 40 -40 -40 -40 40 -40 -40 -40 40 -40 40 -40 -40 40 -40 -40 -40 -40 -40 -40 40 -40 -40 40 -40 -40 -40 40 -40 -40 -40 -40 40 -40 -40 -40 40 -40 -40 -40 40 -40 -40 -2 2 -40 -40 35 -35 -40 -40 -40 40 -40 40 -40 -40 -40 -40 -40 40 40 -40 -40 -40 40 -40 -40 -40 -40 36 -40 -36 -40 -40 -40 40 -40 -40 -40 40 -40 -40 -40 40 5 -5 -40 -28 -40 -16 -40 16 -40 40 -40 -40
+ -40 -40 40 -40 -40 40 -40 -40 -40 -40 -40 40 -40 -40 -40 40 -40 -40 -40 40 40 -40 -40 -40 -40 40 -40 -40 -40 40 -40 -40 -40 -40 40 -40 -40 -40 -40 40 -40 40 -40 -40 -40 -40 -40 40 -40 -40 -40 40 -40 -40 -40 40 -40 40 -40 -40 -40 40 -40 -40 40 -40 -40 -40 -40 -40 40 -40 40 -40 -40 -40 40 -40 -40 -40 40 -40 -40 -40 -40 -40 -40 40 -40 -40 -40 40 -40 -40 40 -40 -40 -40 -40 40 -40 -40 -40 40 -40 40 -40 -40 -40 40 -40 -40 40 -40 -40 -40 40 -40 -40 -40 -40 -40 28 -28 -40 -40 -40 40 40 -40 -40 -40 -40 -40 -40 40 -40 40 -40 -40 -40 -40 40 -40
+ -40 -40 40 -40 -40 40 -40 -40 -40 -40 -40 40 -40 -40 -40 40 -40 -40 40 -40 -40 -40 -40 40 -40 -40 -40 40 -40 -40 -40 40 40 -40 -40 -40 -40 40 -40 -40 -40 -40 40 -40 40 -40 -40 -40 40 -40 -40 -40 -40 -40 -40 40 -40 -40 -40 40 40 -40 -40 -40 40 -40 -40 -40 40 -40 -40 -40 -40 -40 -40 40 -40 40 -40 -40 -40 -40 40 -40 40 -40 -40 -40 40 -40 -40 -40 -40 -40 40 -40 -40 -40 -40 40 -40 -40 40 -40 -40 -40 40 -40 40 -40 -40 -40 -40 40 -40 -40 -40 -40 -40 40 -40 -40 27 -27 -40 40 -40 -40 -40 -40 -40 40 -40 -40 40 -40 -40 -40 14 -14 -40 40 -40 -40
+ -40 -40 40 -40 -40 -40 -40 40 -40 -40 -40 40 40 -40 -40 -40 -40 -40 -40 40 40 -40 -40 -40 40 -40 -40 -40 -40 40 -40 -40 -40 -40 40 -40 -40 40 -40 -40 -40 40 -40 -40 -40 -40 40 -40 40 -40 -40 -40 40 -40 -40 -40 -40 -40 40 -40 -40 40 -40 -40 -40 -40 40 -40 -40 -40 40 -40 -40 -40 -40 40 40 -40 -40 -40 40 -40 -40 -40 40 -40 -40 -40 40 -40 -40 -40 40 -40 -40 -40 -40 -40 -40 40 -40 -40 -40 40 -40 -40 -40 40 -40 -40 -40 40 -36 -40 -40 36 40 -40 -40 -40 -40 -40 -40 40 -40 -40 -40 40 -40 -40 -40 40 -40 -40 -40 40 -40 -40 -40 40 -40 -40 -40 40
+ -40 -40 40 -40 -40 -40 -40 40 -40 -40 -40 40 -40 40 -40 -40 -40 -40 -40 40 -40 40 -40 -40 40 -40 -40 -40 -40 40 -40 -40 -40 -40 -40 40 -40 -40 -40 40 -40 40 -40 -40 -40 -40 -40 40 -40 -40 40 -40 -40 -40 -40 40 -40 -40 -40 40 40 -40 -40 -40 -40 40 -40 -40 -40 -40 -40 40 -40 40 -40 -40 -40 40 -40 -40 40 -40 -40 -40 -40 -40 40 -40 -40 40 -40 -40 -40 -40 -40 40 -40 -40 -40 40 -40 40 -40 -40 -40 -40 -40 40 -40 -40 -40 40 -40 40 -40 -40 -40 -40 40 -40 -40 -40 22 -22 -40 40 -40 -40 40 -40 -40 -40 -40 40 -40 -40 -40 40 -40 -40 -40 -40 -40 40
+ -40 -40 40 -40 -40 -40 -40 40 -40 -40 40 -40 -40 -40 40 -40 -40 40 -40 -40 -40 40 -40 -40 -40 -40 -40 40 -40 -40 40 -40 -40 -40 -40 40 -40 -40 -40 40 -40 -40 40 -40 40 -40 -40 -40 -40 -40 -40 40 -40 -40 -6 6 -40 40 -40 -40 -40 -40 -40 40 3 -40 -40 -3 40 -40 -40 -40 40 -40 -40 -40 -40 -40 40 -40 -40 -40 40 -40 -0 -40 -40 0 -40 -40 -40 40 40 -40 -40 -40 -40 -40 40 -40 -40 -40 -40 40 -40 -40 -40 40 -40 -40 -40 40 -20 20 -40 -40 -40 -40 -40 40 -40 -40 -40 40 -40 40 -40 -40 8 -40 -40 -8 -40 40 -40 -40 -40 -40 40 -40 -4 4 -40 -40
diff -r 000000000000 -r f6e5bb5aa2f5 tool-data/faseq.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/faseq.loc.sample Mon May 19 12:34:22 2014 -0400
@@ -0,0 +1,26 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use genome fasta sequence files. The faseq.loc file has this format
+#(white space characters are TAB characters):
+#
+#
+#
+# In the dir, each file is fasta format and contains only one sequence. So,
+#for example, if you had hg18 fasta sequences stored in /depot/data2/galaxy/faseq/hg18,
+#then your faseq.loc entry would look like this:
+#
+#hg18 /depot/data2/galaxy/faseq/hg18
+#
+#and your /depot/data2/galaxy/faseq/hg18 directory would contain all of
+#your fasta sequence files (e.g.):
+#
+#-rw-r--r-- 1 wychung galaxy 138082251 2008-04-16 11:57 chr10.fa
+#-rw-r--r-- 1 wychung galaxy 115564 2008-04-16 11:57 chr10_random.fa
+#-rw-r--r-- 1 wychung galaxy 137141451 2008-04-16 11:58 chr11.fa
+#...etc...
+#Your faseq.loc file should include an entry per line for each set of fasta
+#sequence files you have stored. For example:
+#
+#hg18 /depot/data2/galaxy/faseq/hg18
+#mm9 /depot/data2/galaxy/faseq/mm9
+#Arabidopsis /depot/data2/galaxy/faseq/Arabidopsis
+#...etc...
diff -r 000000000000 -r f6e5bb5aa2f5 tool_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml Mon May 19 12:34:22 2014 -0400
@@ -0,0 +1,6 @@
+
+
+
+
+
+