Previous changeset 14:6dfa79a6908a (2016-04-05) Next changeset 16:0e51eef139ab (2017-10-11) |
Commit message:
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07 |
modified:
BlastParser_and_hits.py BlastParser_and_hits.xml |
b |
diff -r 6dfa79a6908a -r 1991c830504a BlastParser_and_hits.py --- a/BlastParser_and_hits.py Tue Apr 05 05:19:08 2016 -0400 +++ b/BlastParser_and_hits.py Wed Nov 09 11:32:32 2016 -0500 |
[ |
b'@@ -2,17 +2,17 @@\n # blastn tblastn blastx parser revised 14-1-2016.\n # drosofff@gmail.com\n \n-import sys\n import argparse\n from collections import defaultdict\n \n+\n def Parser():\n the_parser = argparse.ArgumentParser()\n the_parser.add_argument(\'--blast\', action="store", type=str, help="Path to the blast output (tabular format, 12 column)")\n the_parser.add_argument(\'--sequences\', action="store", type=str, help="Path to the fasta file with blasted sequences")\n the_parser.add_argument(\'--fastaOutput\', action="store", type=str, help="fasta output file of blast hits")\n the_parser.add_argument(\'--tabularOutput\', action="store", type=str, help="tabular output file of blast analysis")\n- the_parser.add_argument(\'--flanking\', action="store", type=int, help="number of flanking nucleotides added to the hit sequences") \n+ the_parser.add_argument(\'--flanking\', action="store", type=int, help="number of flanking nucleotides added to the hit sequences")\n the_parser.add_argument(\'--mode\', action="store", choices=["verbose", "short"], type=str, help="reporting (verbose) or not reporting (short) oases contigs")\n the_parser.add_argument(\'--filter_relativeCov\', action="store", type=float, default=0, help="filter out relative coverages below the specified ratio (float number)")\n the_parser.add_argument(\'--filter_maxScore\', action="store", type=float, default=0, help="filter out best BitScores below the specified float number")\n@@ -23,29 +23,32 @@\n the_parser.add_argument(\'--un_sequences\', action="store", type=str, help="sequences that have not been blast aligned")\n the_parser.add_argument(\'--dataset_name\', action="store", type=str, default="", help="the name of the dataset that has been parsed, to be reported in the output")\n args = the_parser.parse_args()\n- if not all ( (args.sequences, args.blast, args.fastaOutput, args.tabularOutput) ):\n+ if not all((args.sequences, args.blast, args.fastaOutput, args.tabularOutput)):\n the_parser.error(\'argument(s) missing, call the -h option of the script\')\n if not args.flanking:\n args.flanking = 0\n return args\n \n+\n def median(lst):\n lst = sorted(lst)\n if len(lst) < 1:\n return None\n- if len(lst) %2 == 1:\n+ if len(lst) % 2 == 1:\n return lst[((len(lst)+1)/2)-1]\n- if len(lst) %2 == 0:\n+ if len(lst) % 2 == 0:\n return float(sum(lst[(len(lst)/2)-1:(len(lst)/2)+1]))/2.0\n \n+\n def mean(lst):\n if len(lst) < 1:\n return 0\n return sum(lst) / float(len(lst))\n \n-def getfasta (fastafile):\n+\n+def getfasta(fastafile):\n fastadic = {}\n- for line in open (fastafile):\n+ for line in open(fastafile):\n if line[0] == ">":\n header = line[1:-1]\n fastadic[header] = ""\n@@ -55,13 +58,15 @@\n fastadic[header] = "".join(fastadic[header].split("\\n"))\n return fastadic\n \n+\n def insert_newlines(string, every=60):\n lines = []\n for i in xrange(0, len(string), every):\n lines.append(string[i:i+every])\n return \'\\n\'.join(lines)\n- \n-def getblast (blastfile):\n+\n+\n+def getblast(blastfile):\n \'\'\'blastinfo [0]\tPercentage of identical matches\n blastinfo [1]\tAlignment length\n blastinfo [2]\tNumber of mismatches\n@@ -73,25 +78,26 @@\n blastinfo [8]\tExpectation value (E-value)\n blastinfo [9]\tBit score\n blastinfo [10]\tSubject length (NEED TO BE SPECIFIED WHEN RUNNING BLAST) \'\'\'\n- blastdic = defaultdict (dict) \n- for line in open (blastfile):\n+ blastdic = defaultdict(dict)\n+ for line in open(blastfile):\n fields = line[:-1].split("\\t")\n transcript = fields[0]\n subject = fields[1]\n- blastinfo = [float(fields[2]) ] # blastinfo[0]\n- blastinfo = blastinfo + [int(i) for i in fields[3:10] ] # blastinfo[1:8] insets 1 to 7\n- blastinfo.append(fields[10]) # blastinfo[8] E-value remains as a string type\n- blastinfo.append(float(fields[11])) # blastinfo[9] '..b'int >> F, "\\t".join(line)\n for header in results[subject]["HitDic"]:\n- print >> Fasta, ">%s\\n%s" % (header, insert_newlines(results[subject]["HitDic"][header]) )\n- print >> Fasta, "" # final carriage return for the sequence\n+ print >> Fasta, ">%s\\n%s" % (header, insert_newlines(results[subject]["HitDic"][header]))\n+ print >> Fasta, "" # final carriage return for the sequence\n F.close()\n Fasta.close()\n return blasted_transcripts\n- \n-def dispatch_sequences (fastadict, blasted_transcripts, matched_sequences, unmatched_sequences):\n+\n+\n+def dispatch_sequences(fastadict, blasted_transcripts, matched_sequences, unmatched_sequences):\n \'\'\'to output the sequences that matched and did not matched in the blast\'\'\'\n- F_matched = open (matched_sequences, "w")\n- F_unmatched = open (unmatched_sequences, "w")\n+ F_matched = open(matched_sequences, "w")\n+ F_unmatched = open(unmatched_sequences, "w")\n for transcript in fastadict:\n- if transcript in blasted_transcripts: # le list of blasted_transcripts is generated by the outputParsing function\n- print >> F_matched, ">%s\\n%s" % (transcript, insert_newlines(fastadict[transcript]) )\n+ if transcript in blasted_transcripts: # list of blasted_transcripts is generated by the outputParsing function\n+ print >> F_matched, ">%s\\n%s" % (transcript, insert_newlines(fastadict[transcript]))\n else:\n- print >> F_unmatched, ">%s\\n%s" % (transcript, insert_newlines(fastadict[transcript]) )\n+ print >> F_unmatched, ">%s\\n%s" % (transcript, insert_newlines(fastadict[transcript]))\n F_matched.close()\n F_unmatched.close()\n return\n \n-def __main__ ():\n+\n+def __main__():\n args = Parser()\n- fastadict = getfasta (args.sequences)\n- Xblastdict = getblast (args.blast)\n+ fastadict = getfasta(args.sequences)\n+ Xblastdict = getblast(args.blast)\n results = defaultdict(dict)\n for subject in Xblastdict:\n- results[subject]["HitDic"], results[subject]["subjectLength"], results[subject]["TotalCoverage"], results[subject]["RelativeSubjectCoverage"], results[subject]["maxBitScores"], results[subject]["meanBitScores"] = subjectCoverage(fastadict, Xblastdict, subject, args.flanking)\n- blasted_transcripts = outputParsing (args.dataset_name, args.tabularOutput, args.fastaOutput, results, Xblastdict, fastadict,\n- filter_relativeCov=args.filter_relativeCov, filter_maxScore=args.filter_maxScore,\n- filter_meanScore=args.filter_meanScore, filter_term_in=args.filter_term_in,\n- filter_term_out=args.filter_term_out, mode=args.mode)\n- dispatch_sequences (fastadict, blasted_transcripts, args.al_sequences, args.un_sequences)\n+ results[subject]["HitDic"], results[subject]["subjectLength"], results[subject]["TotalCoverage"], results[subject]["RelativeSubjectCoverage"], results[subject]["maxBitScores"], results[subject]["meanBitScores"] = subjectCoverage(fastadict, Xblastdict, subject, args.flanking)\n+ blasted_transcripts = outputParsing(args.dataset_name, args.tabularOutput,\n+ args.fastaOutput, results, Xblastdict, fastadict,\n+ filter_relativeCov=args.filter_relativeCov,\n+ filter_maxScore=args.filter_maxScore,\n+ filter_meanScore=args.filter_meanScore,\n+ filter_term_in=args.filter_term_in,\n+ filter_term_out=args.filter_term_out,\n+ mode=args.mode)\n+ dispatch_sequences(fastadict, blasted_transcripts, args.al_sequences, args.un_sequences)\n \n-if __name__=="__main__": __main__()\n\\ No newline at end of file\n+if __name__ == "__main__":\n+ __main__()\n' |
b |
diff -r 6dfa79a6908a -r 1991c830504a BlastParser_and_hits.xml --- a/BlastParser_and_hits.xml Tue Apr 05 05:19:08 2016 -0400 +++ b/BlastParser_and_hits.xml Wed Nov 09 11:32:32 2016 -0500 |
[ |
@@ -1,12 +1,12 @@ <tool id="BlastParser_and_hits" name="Parse blast output and compile hits" version="2.4.3"> <description>for virus discovery</description> <requirements></requirements> -<command interpreter="python"> -BlastParser_and_hits.py - --sequences $sequences - --blast $blast - --tabularOutput $tabularOutput - --fastaOutput $fastaOutput +<command><![CDATA[ + python '$__tool_directory__'/BlastParser_and_hits.py + --sequences '$sequences' + --blast '$blast' + --tabularOutput '$tabularOutput' + --fastaOutput '$fastaOutput' --flanking $flanking --mode $mode ## Additional parameters. @@ -17,17 +17,17 @@ --filter_term_in "$additional_filters.filter_term_in" --filter_term_out "$additional_filters.filter_term_out" #end if - --al_sequences $al_sequences - --un_sequences $un_sequences + --al_sequences '$al_sequences' + --un_sequences '$un_sequences' --dataset_name "$blast.element_identifier" -</command> + ]]></command> <inputs> <param name="sequences" type="data" format="fasta" label="fasta sequences that have been blasted" /> <param name="blast" type="data" format="tabular" label="The blast output you wish to parse" /> - <param name="flanking" type="text" size="5" value= "5" label="Number of flanking nucleotides to add to hits for CAP3 assembly"/> + <param name="flanking" type="integer" value= "5" label="Number of flanking nucleotides to add to hits for CAP3 assembly"/> <param name="mode" type="select" label="Extensive or compact reporting mode" help="display (extensive) or not (compact) the oases contigs"> - <option value="verbose" default="true">extensive</option> + <option value="verbose" selected="true">extensive</option> <option value="short">compact</option> </param> <conditional name="additional_filters"> |