Galaxy |

Changeset 15:1991c830504a (2016-11-09)

Previous changeset 14:6dfa79a6908a (2016-04-05) Next changeset 16:0e51eef139ab (2017-10-11)

Commit message:
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07

modified:
BlastParser_and_hits.py
BlastParser_and_hits.xml

diff -r 6dfa79a6908a -r 1991c830504a BlastParser_and_hits.py
--- a/BlastParser_and_hits.py Tue Apr 05 05:19:08 2016 -0400
+++ b/BlastParser_and_hits.py Wed Nov 09 11:32:32 2016 -0500

[

b'@@ -2,17 +2,17 @@\n # blastn tblastn blastx parser revised 14-1-2016.\n # drosofff@gmail.com\n \n-import sys\n import argparse\n from collections import defaultdict\n \n+\n def Parser():\n the_parser = argparse.ArgumentParser()\n the_parser.add_argument(\'--blast\', action="store", type=str, help="Path to the blast output (tabular format, 12 column)")\n the_parser.add_argument(\'--sequences\', action="store", type=str, help="Path to the fasta file with blasted sequences")\n the_parser.add_argument(\'--fastaOutput\', action="store", type=str, help="fasta output file of blast hits")\n the_parser.add_argument(\'--tabularOutput\', action="store", type=str, help="tabular output file of blast analysis")\n- the_parser.add_argument(\'--flanking\', action="store", type=int, help="number of flanking nucleotides added to the hit sequences") \n+ the_parser.add_argument(\'--flanking\', action="store", type=int, help="number of flanking nucleotides added to the hit sequences")\n the_parser.add_argument(\'--mode\', action="store", choices=["verbose", "short"], type=str, help="reporting (verbose) or not reporting (short) oases contigs")\n the_parser.add_argument(\'--filter_relativeCov\', action="store", type=float, default=0, help="filter out relative coverages below the specified ratio (float number)")\n the_parser.add_argument(\'--filter_maxScore\', action="store", type=float, default=0, help="filter out best BitScores below the specified float number")\n@@ -23,29 +23,32 @@\n the_parser.add_argument(\'--un_sequences\', action="store", type=str, help="sequences that have not been blast aligned")\n the_parser.add_argument(\'--dataset_name\', action="store", type=str, default="", help="the name of the dataset that has been parsed, to be reported in the output")\n args = the_parser.parse_args()\n- if not all ( (args.sequences, args.blast, args.fastaOutput, args.tabularOutput) ):\n+ if not all((args.sequences, args.blast, args.fastaOutput, args.tabularOutput)):\n the_parser.error(\'argument(s) missing, call the -h option of the script\')\n if not args.flanking:\n args.flanking = 0\n return args\n \n+\n def median(lst):\n lst = sorted(lst)\n if len(lst) < 1:\n return None\n- if len(lst) %2 == 1:\n+ if len(lst) % 2 == 1:\n return lst[((len(lst)+1)/2)-1]\n- if len(lst) %2 == 0:\n+ if len(lst) % 2 == 0:\n return float(sum(lst[(len(lst)/2)-1:(len(lst)/2)+1]))/2.0\n \n+\n def mean(lst):\n if len(lst) < 1:\n return 0\n return sum(lst) / float(len(lst))\n \n-def getfasta (fastafile):\n+\n+def getfasta(fastafile):\n fastadic = {}\n- for line in open (fastafile):\n+ for line in open(fastafile):\n if line[0] == ">":\n header = line[1:-1]\n fastadic[header] = ""\n@@ -55,13 +58,15 @@\n fastadic[header] = "".join(fastadic[header].split("\\n"))\n return fastadic\n \n+\n def insert_newlines(string, every=60):\n lines = []\n for i in xrange(0, len(string), every):\n lines.append(string[i:i+every])\n return \'\\n\'.join(lines)\n- \n-def getblast (blastfile):\n+\n+\n+def getblast(blastfile):\n \'\'\'blastinfo [0]\tPercentage of identical matches\n blastinfo [1]\tAlignment length\n blastinfo [2]\tNumber of mismatches\n@@ -73,25 +78,26 @@\n blastinfo [8]\tExpectation value (E-value)\n blastinfo [9]\tBit score\n blastinfo [10]\tSubject length (NEED TO BE SPECIFIED WHEN RUNNING BLAST) \'\'\'\n- blastdic = defaultdict (dict) \n- for line in open (blastfile):\n+ blastdic = defaultdict(dict)\n+ for line in open(blastfile):\n fields = line[:-1].split("\\t")\n transcript = fields[0]\n subject = fields[1]\n- blastinfo = [float(fields[2]) ] # blastinfo[0]\n- blastinfo = blastinfo + [int(i) for i in fields[3:10] ] # blastinfo[1:8] insets 1 to 7\n- blastinfo.append(fields[10]) # blastinfo[8] E-value remains as a string type\n- blastinfo.append(float(fields[11])) # blastinfo[9] '..b'int >> F, "\\t".join(line)\n for header in results[subject]["HitDic"]:\n- print >> Fasta, ">%s\\n%s" % (header, insert_newlines(results[subject]["HitDic"][header]) )\n- print >> Fasta, "" # final carriage return for the sequence\n+ print >> Fasta, ">%s\\n%s" % (header, insert_newlines(results[subject]["HitDic"][header]))\n+ print >> Fasta, "" # final carriage return for the sequence\n F.close()\n Fasta.close()\n return blasted_transcripts\n- \n-def dispatch_sequences (fastadict, blasted_transcripts, matched_sequences, unmatched_sequences):\n+\n+\n+def dispatch_sequences(fastadict, blasted_transcripts, matched_sequences, unmatched_sequences):\n \'\'\'to output the sequences that matched and did not matched in the blast\'\'\'\n- F_matched = open (matched_sequences, "w")\n- F_unmatched = open (unmatched_sequences, "w")\n+ F_matched = open(matched_sequences, "w")\n+ F_unmatched = open(unmatched_sequences, "w")\n for transcript in fastadict:\n- if transcript in blasted_transcripts: # le list of blasted_transcripts is generated by the outputParsing function\n- print >> F_matched, ">%s\\n%s" % (transcript, insert_newlines(fastadict[transcript]) )\n+ if transcript in blasted_transcripts: # list of blasted_transcripts is generated by the outputParsing function\n+ print >> F_matched, ">%s\\n%s" % (transcript, insert_newlines(fastadict[transcript]))\n else:\n- print >> F_unmatched, ">%s\\n%s" % (transcript, insert_newlines(fastadict[transcript]) )\n+ print >> F_unmatched, ">%s\\n%s" % (transcript, insert_newlines(fastadict[transcript]))\n F_matched.close()\n F_unmatched.close()\n return\n \n-def __main__ ():\n+\n+def __main__():\n args = Parser()\n- fastadict = getfasta (args.sequences)\n- Xblastdict = getblast (args.blast)\n+ fastadict = getfasta(args.sequences)\n+ Xblastdict = getblast(args.blast)\n results = defaultdict(dict)\n for subject in Xblastdict:\n- results[subject]["HitDic"], results[subject]["subjectLength"], results[subject]["TotalCoverage"], results[subject]["RelativeSubjectCoverage"], results[subject]["maxBitScores"], results[subject]["meanBitScores"] = subjectCoverage(fastadict, Xblastdict, subject, args.flanking)\n- blasted_transcripts = outputParsing (args.dataset_name, args.tabularOutput, args.fastaOutput, results, Xblastdict, fastadict,\n- filter_relativeCov=args.filter_relativeCov, filter_maxScore=args.filter_maxScore,\n- filter_meanScore=args.filter_meanScore, filter_term_in=args.filter_term_in,\n- filter_term_out=args.filter_term_out, mode=args.mode)\n- dispatch_sequences (fastadict, blasted_transcripts, args.al_sequences, args.un_sequences)\n+ results[subject]["HitDic"], results[subject]["subjectLength"], results[subject]["TotalCoverage"], results[subject]["RelativeSubjectCoverage"], results[subject]["maxBitScores"], results[subject]["meanBitScores"] = subjectCoverage(fastadict, Xblastdict, subject, args.flanking)\n+ blasted_transcripts = outputParsing(args.dataset_name, args.tabularOutput,\n+ args.fastaOutput, results, Xblastdict, fastadict,\n+ filter_relativeCov=args.filter_relativeCov,\n+ filter_maxScore=args.filter_maxScore,\n+ filter_meanScore=args.filter_meanScore,\n+ filter_term_in=args.filter_term_in,\n+ filter_term_out=args.filter_term_out,\n+ mode=args.mode)\n+ dispatch_sequences(fastadict, blasted_transcripts, args.al_sequences, args.un_sequences)\n \n-if __name__=="__main__": __main__()\n\\ No newline at end of file\n+if __name__ == "__main__":\n+ __main__()\n'

diff -r 6dfa79a6908a -r 1991c830504a BlastParser_and_hits.xml
--- a/BlastParser_and_hits.xml Tue Apr 05 05:19:08 2016 -0400
+++ b/BlastParser_and_hits.xml Wed Nov 09 11:32:32 2016 -0500

[

@@ -1,12 +1,12 @@
<tool id="BlastParser_and_hits" name="Parse blast output and compile hits" version="2.4.3">
<description>for virus discovery</description>
<requirements></requirements>
-<command interpreter="python">
-BlastParser_and_hits.py
- --sequences $sequences
- --blast $blast
- --tabularOutput $tabularOutput
- --fastaOutput $fastaOutput
+<command><![CDATA[
+    python '$__tool_directory__'/BlastParser_and_hits.py
+ --sequences '$sequences'
+ --blast '$blast'
+ --tabularOutput '$tabularOutput'
+ --fastaOutput '$fastaOutput'
--flanking $flanking
--mode $mode
## Additional parameters.
@@ -17,17 +17,17 @@
         --filter_term_in "$additional_filters.filter_term_in"
         --filter_term_out "$additional_filters.filter_term_out"
     #end if
-    --al_sequences $al_sequences
-    --un_sequences $un_sequences
+    --al_sequences '$al_sequences'
+    --un_sequences '$un_sequences'
     --dataset_name "$blast.element_identifier"

-</command>
+    ]]></command>
<inputs>
<param name="sequences" type="data" format="fasta"  label="fasta sequences that have been blasted" />
<param name="blast" type="data" format="tabular" label="The blast output you wish to parse" />
- <param name="flanking" type="text" size="5" value= "5" label="Number of flanking nucleotides to add to hits for CAP3 assembly"/>
+ <param name="flanking" type="integer" value= "5" label="Number of flanking nucleotides to add to hits for CAP3 assembly"/>
<param name="mode" type="select" label="Extensive or compact  reporting mode" help="display (extensive)  or not (compact) the oases contigs">
-     <option value="verbose" default="true">extensive</option>
+     <option value="verbose" selected="true">extensive</option>
    <option value="short">compact</option>
</param>
     <conditional name="additional_filters">