Repository 'msp_blastparser_and_hits'
hg clone https://toolshed.g2.bx.psu.edu/repos/drosofff/msp_blastparser_and_hits

Changeset 15:1991c830504a (2016-11-09)
Previous changeset 14:6dfa79a6908a (2016-04-05) Next changeset 16:0e51eef139ab (2017-10-11)
Commit message:
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
modified:
BlastParser_and_hits.py
BlastParser_and_hits.xml
b
diff -r 6dfa79a6908a -r 1991c830504a BlastParser_and_hits.py
--- a/BlastParser_and_hits.py Tue Apr 05 05:19:08 2016 -0400
+++ b/BlastParser_and_hits.py Wed Nov 09 11:32:32 2016 -0500
[
b'@@ -2,17 +2,17 @@\n #  blastn tblastn blastx parser revised 14-1-2016.\n # drosofff@gmail.com\n \n-import sys\n import argparse\n from collections import defaultdict\n \n+\n def Parser():\n     the_parser = argparse.ArgumentParser()\n     the_parser.add_argument(\'--blast\', action="store", type=str, help="Path to the blast output (tabular format, 12 column)")\n     the_parser.add_argument(\'--sequences\', action="store", type=str, help="Path to the fasta file with blasted sequences")\n     the_parser.add_argument(\'--fastaOutput\', action="store", type=str, help="fasta output file of blast hits")\n     the_parser.add_argument(\'--tabularOutput\', action="store", type=str, help="tabular output file of blast analysis")\n-    the_parser.add_argument(\'--flanking\', action="store", type=int, help="number of flanking nucleotides added to the hit sequences") \n+    the_parser.add_argument(\'--flanking\', action="store", type=int, help="number of flanking nucleotides added to the hit sequences")\n     the_parser.add_argument(\'--mode\', action="store", choices=["verbose", "short"], type=str, help="reporting (verbose) or not reporting (short) oases contigs")\n     the_parser.add_argument(\'--filter_relativeCov\', action="store", type=float, default=0, help="filter out relative coverages below the specified ratio (float number)")\n     the_parser.add_argument(\'--filter_maxScore\', action="store", type=float, default=0, help="filter out best BitScores below the specified float number")\n@@ -23,29 +23,32 @@\n     the_parser.add_argument(\'--un_sequences\', action="store", type=str, help="sequences that have not been blast aligned")\n     the_parser.add_argument(\'--dataset_name\', action="store", type=str, default="", help="the name of the dataset that has been parsed, to be reported in the output")\n     args = the_parser.parse_args()\n-    if not all ( (args.sequences, args.blast, args.fastaOutput, args.tabularOutput) ):\n+    if not all((args.sequences, args.blast, args.fastaOutput, args.tabularOutput)):\n         the_parser.error(\'argument(s) missing, call the -h option of the script\')\n     if not args.flanking:\n         args.flanking = 0\n     return args\n \n+\n def median(lst):\n     lst = sorted(lst)\n     if len(lst) < 1:\n             return None\n-    if len(lst) %2 == 1:\n+    if len(lst) % 2 == 1:\n             return lst[((len(lst)+1)/2)-1]\n-    if len(lst) %2 == 0:\n+    if len(lst) % 2 == 0:\n             return float(sum(lst[(len(lst)/2)-1:(len(lst)/2)+1]))/2.0\n \n+\n def mean(lst):\n     if len(lst) < 1:\n         return 0\n     return sum(lst) / float(len(lst))\n \n-def getfasta (fastafile):\n+\n+def getfasta(fastafile):\n     fastadic = {}\n-    for line in open (fastafile):\n+    for line in open(fastafile):\n         if line[0] == ">":\n             header = line[1:-1]\n             fastadic[header] = ""\n@@ -55,13 +58,15 @@\n         fastadic[header] = "".join(fastadic[header].split("\\n"))\n     return fastadic\n \n+\n def insert_newlines(string, every=60):\n     lines = []\n     for i in xrange(0, len(string), every):\n         lines.append(string[i:i+every])\n     return \'\\n\'.join(lines)\n-    \n-def getblast (blastfile):\n+\n+\n+def getblast(blastfile):\n     \'\'\'blastinfo [0]\tPercentage of identical matches\n        blastinfo [1]\tAlignment length\n        blastinfo [2]\tNumber of mismatches\n@@ -73,25 +78,26 @@\n        blastinfo [8]\tExpectation value (E-value)\n        blastinfo [9]\tBit score\n        blastinfo [10]\tSubject length (NEED TO BE SPECIFIED WHEN RUNNING BLAST) \'\'\'\n-    blastdic = defaultdict (dict) \n-    for line in open (blastfile):\n+    blastdic = defaultdict(dict)\n+    for line in open(blastfile):\n         fields = line[:-1].split("\\t")\n         transcript = fields[0]\n         subject = fields[1]\n-        blastinfo = [float(fields[2]) ] # blastinfo[0]\n-        blastinfo = blastinfo + [int(i) for i in fields[3:10] ] # blastinfo[1:8] insets 1 to 7\n-        blastinfo.append(fields[10]) # blastinfo[8] E-value remains as a string type\n-        blastinfo.append(float(fields[11])) # blastinfo[9] '..b'int >> F, "\\t".join(line)\n             for header in results[subject]["HitDic"]:\n-                print >> Fasta, ">%s\\n%s" % (header, insert_newlines(results[subject]["HitDic"][header]) )\n-            print >> Fasta, "" # final carriage return for the sequence\n+                print >> Fasta, ">%s\\n%s" % (header, insert_newlines(results[subject]["HitDic"][header]))\n+            print >> Fasta, ""  # final carriage return for the sequence\n     F.close()\n     Fasta.close()\n     return blasted_transcripts\n-        \n-def dispatch_sequences (fastadict, blasted_transcripts, matched_sequences, unmatched_sequences):\n+\n+\n+def dispatch_sequences(fastadict, blasted_transcripts, matched_sequences, unmatched_sequences):\n     \'\'\'to output the sequences that matched and did not matched in the blast\'\'\'\n-    F_matched = open (matched_sequences, "w")\n-    F_unmatched = open (unmatched_sequences, "w")\n+    F_matched = open(matched_sequences, "w")\n+    F_unmatched = open(unmatched_sequences, "w")\n     for transcript in fastadict:\n-        if transcript in blasted_transcripts: # le list of blasted_transcripts is generated by the outputParsing function\n-            print >> F_matched, ">%s\\n%s" % (transcript, insert_newlines(fastadict[transcript]) )\n+        if transcript in blasted_transcripts:  # list of blasted_transcripts is generated by the outputParsing function\n+            print >> F_matched, ">%s\\n%s" % (transcript, insert_newlines(fastadict[transcript]))\n         else:\n-            print >> F_unmatched, ">%s\\n%s" % (transcript, insert_newlines(fastadict[transcript]) )\n+            print >> F_unmatched, ">%s\\n%s" % (transcript, insert_newlines(fastadict[transcript]))\n     F_matched.close()\n     F_unmatched.close()\n     return\n \n-def __main__ ():\n+\n+def __main__():\n     args = Parser()\n-    fastadict = getfasta (args.sequences)\n-    Xblastdict = getblast (args.blast)\n+    fastadict = getfasta(args.sequences)\n+    Xblastdict = getblast(args.blast)\n     results = defaultdict(dict)\n     for subject in Xblastdict:\n-        results[subject]["HitDic"], results[subject]["subjectLength"], results[subject]["TotalCoverage"], results[subject]["RelativeSubjectCoverage"], results[subject]["maxBitScores"], results[subject]["meanBitScores"]  = subjectCoverage(fastadict, Xblastdict, subject, args.flanking)\n-    blasted_transcripts = outputParsing (args.dataset_name, args.tabularOutput, args.fastaOutput, results, Xblastdict, fastadict,\n-                                        filter_relativeCov=args.filter_relativeCov, filter_maxScore=args.filter_maxScore,\n-                                        filter_meanScore=args.filter_meanScore, filter_term_in=args.filter_term_in,\n-                                        filter_term_out=args.filter_term_out, mode=args.mode)\n-    dispatch_sequences (fastadict, blasted_transcripts, args.al_sequences, args.un_sequences)\n+        results[subject]["HitDic"], results[subject]["subjectLength"], results[subject]["TotalCoverage"], results[subject]["RelativeSubjectCoverage"], results[subject]["maxBitScores"], results[subject]["meanBitScores"] = subjectCoverage(fastadict, Xblastdict, subject, args.flanking)\n+    blasted_transcripts = outputParsing(args.dataset_name, args.tabularOutput,\n+                                        args.fastaOutput, results, Xblastdict, fastadict,\n+                                        filter_relativeCov=args.filter_relativeCov,\n+                                        filter_maxScore=args.filter_maxScore,\n+                                        filter_meanScore=args.filter_meanScore,\n+                                        filter_term_in=args.filter_term_in,\n+                                        filter_term_out=args.filter_term_out,\n+                                        mode=args.mode)\n+    dispatch_sequences(fastadict, blasted_transcripts, args.al_sequences, args.un_sequences)\n \n-if __name__=="__main__": __main__()\n\\ No newline at end of file\n+if __name__ == "__main__":\n+    __main__()\n'
b
diff -r 6dfa79a6908a -r 1991c830504a BlastParser_and_hits.xml
--- a/BlastParser_and_hits.xml Tue Apr 05 05:19:08 2016 -0400
+++ b/BlastParser_and_hits.xml Wed Nov 09 11:32:32 2016 -0500
[
@@ -1,12 +1,12 @@
 <tool id="BlastParser_and_hits" name="Parse blast output and compile hits" version="2.4.3">
 <description>for virus discovery</description>
 <requirements></requirements>
-<command interpreter="python">
-BlastParser_and_hits.py
- --sequences $sequences
- --blast $blast
- --tabularOutput $tabularOutput
- --fastaOutput $fastaOutput
+<command><![CDATA[
+    python '$__tool_directory__'/BlastParser_and_hits.py
+ --sequences '$sequences'
+ --blast '$blast'
+ --tabularOutput '$tabularOutput'
+ --fastaOutput '$fastaOutput'
  --flanking $flanking
  --mode $mode
  ## Additional parameters.
@@ -17,17 +17,17 @@
         --filter_term_in "$additional_filters.filter_term_in"
         --filter_term_out "$additional_filters.filter_term_out"
     #end if
-    --al_sequences $al_sequences
-    --un_sequences $un_sequences
+    --al_sequences '$al_sequences'
+    --un_sequences '$un_sequences'
     --dataset_name "$blast.element_identifier"
 
-</command>
+    ]]></command>
 <inputs>
  <param name="sequences" type="data" format="fasta"  label="fasta sequences that have been blasted" />
  <param name="blast" type="data" format="tabular" label="The blast output you wish to parse" />
- <param name="flanking" type="text" size="5" value= "5" label="Number of flanking nucleotides to add to hits for CAP3 assembly"/>
+ <param name="flanking" type="integer" value= "5" label="Number of flanking nucleotides to add to hits for CAP3 assembly"/>
  <param name="mode" type="select" label="Extensive or compact  reporting mode" help="display (extensive)  or not (compact) the oases contigs">
-     <option value="verbose" default="true">extensive</option>
+     <option value="verbose" selected="true">extensive</option>
      <option value="short">compact</option>
  </param>
     <conditional name="additional_filters">