Galaxy |

Changeset 7:d408ad12401a (2017-12-08)

Previous changeset 6:c43392559c47 (2017-12-08) Next changeset 8:f969be906cb5 (2022-05-17)

Commit message:
Uploading GitHub version of btyper

added:
btyper-2.0.3/btyper

diff -r c43392559c47 -r d408ad12401a btyper-2.0.3/btyper
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/btyper-2.0.3/btyper Fri Dec 08 11:39:12 2017 -0500

[

b'@@ -0,0 +1,905 @@\n+#!/usr/bin/env python\n+\n+# BTyper version 2.0.3\n+# Created by Laura Carroll\n+# lmc297@cornell.edu\n+\n+# import required packages\n+import argparse, sys, os, re, glob, collections\n+\t\n+\n+# parse arguments\n+parser = argparse.ArgumentParser(usage=\'btyper -i </path/to/input/file.extension> -o </path/to/desired/output_directory/> -t [input data format (seq, pe, se, sra, or sra-get)] [-other options]\')\n+parser.add_argument(\'-i\',\'--input\', nargs=\'+\', help=\'Enter the path to the Bacillus cereus group sequence data you would like to input, or enter an SRA accession number\',required=True)\n+parser.add_argument(\'-o\', \'--output\', nargs=\'+\', help=\'Specify a path to your desired output directory\',required=True)\n+parser.add_argument(\'-t\', \'--type\', nargs=\'+\', help=\'Specify your type of data: seq for genomes or contigs in fasta format, pe for paired-end Illumina reads, se for single-end Illumina reads, sra for a sra file, or sra-get with an SRA accession number\',required=True)\n+parser.add_argument(\'--draft_genome\', action=\'store_true\', default=False, help=\'Optional argument for use with contigs in fasta format; concatenates draft genome contigs into pseudochromosome\')\n+parser.add_argument(\'-v\', \'--virulence\', nargs=\'?\', default=True, help=\'Optional argument, True or False; perform virulence gene typing; default=True\')\n+parser.add_argument(\'-m\', \'--mlst\', nargs=\'?\', default=True, help=\'Optional argument, True or False; perform MLST using Bacillus cereus MLST database; default=True\')\n+parser.add_argument(\'-r\', \'--rpoB\', nargs=\'?\', default=True, help=\'Optional argument, True or False; perform rpoB typing; default=True\')\n+parser.add_argument(\'-p\', \'--panC\', nargs=\'?\', default=True, help=\'Optional argument, True or False; perform panC typing; default=True\')\n+parser.add_argument(\'-s\', nargs=\'?\', default=False, help=\'Optional argument, True or False; BLAST 16s DNA sequence; not recommended for inferring species or pathogenicity; default=False\')\n+parser.add_argument(\'--spades_m\', nargs=\'?\', default=250, help=\'Optional argument for use with Illumina reads; integer; set SPAdes memory limit -m/--memory option in Gb; default is 250 Gb, the default for SPAdes\')\n+parser.add_argument(\'--spades_t\', nargs=\'?\', default=16, help=\'Optional argument for use with Illumina reads; integer; set number of threads to use for SPAdes -t/--threads option; default is 16, the default for SPAdes\')\n+parser.add_argument(\'--spades_k\', nargs=\'?\', default=77, help=\'Optional argument for use with Illumina reads; comma-separated list of integers; set k-mer sizes to use with SPAdes -k option; default is 77\')\n+parser.add_argument(\'-v_db\', \'--virulence_database\', nargs=\'?\', default="aa", help=\'Optional argument for use with -v/--virulence option; specify virulence database to be used: nuc for nucleotide database or aa for amino acid database; default=aa\')\n+parser.add_argument(\'-nuc_p\', \'--nucleotide_p\', nargs=\'?\',default=75, help=\'Optional argument for use with -v/--virulence option and nucleotide database -v_db nuc option; integer between 0 and 100; minimum percent nucleotide identity for virulence gene detection; default=75\')\n+parser.add_argument(\'-nuc_q\',\'--nucleotide_q\', nargs=\'?\',default=90, help=\'Optional argument for use with -v/--virulence option and nucleotide database -v_db nuc option; integer between 0 and 100; minimum percent coverage for virulence gene detection; default=90\')\n+parser.add_argument(\'-aa_p\',\'--amino_acid_p\',nargs=\'?\',default=50, help=\'Optional argument for use with -v/--virulence option and amino acid database -v_db aa option; integer between 0 and 100; minimum percent amino acid identity for virulence gene detection; default=50\')\n+parser.add_argument(\'-aa_q\',\'--amino_acid_q\', nargs=\'?\',default=70, help=\'Optional argument for use with -v/--virulence option and amino acid database -v_db aa option; integer between 0 and 100; minimum percent coverage for virulence gene detection; default=70\')\n+parser.add_argument(\'-e\', \'--evalue\', nargs=\'?\', def'..b't_xml(newseq=dictionaries,argdict=mydb,query_path=query_path,task=task,shorttask=shorttask,evalue_thresh=evalue_thresh,pident_thresh=pident_thresh,qcov_thresh=qcov_thresh)\n+\t\t\tfor f in glob.glob(oarg+"btyper_final_results/*_final_results.txt"):\n+\t\t\t\tbetween_sections(finalfile_string=f)\n+\t\texcept UnboundLocalError:\n+\t\t\tprint "No sequences found for "+shorttask\n+\t\n+\t# if performing mlst:\t\n+\tif marg=="True":\n+\t\tmlst_genes=["glp.fas","gmk.fas","ilv.fas","pta.fas","pur.fas","pyc.fas","tpi.fas"]\n+\t\t# get best-matching AT for each MLST gene\n+\t\tfor mlst in mlst_genes:\n+\t\t\tquery_path=btyper_path+"seq_mlst_db/"+mlst\n+\t\t\tmydb=dbparse(query_path)\n+\t\t\ttask="Predicted MLST Profile:"\n+\t\t\tshorttask="mlst"\n+\t\t\tevalue_thresh=float(earg)\n+\t\t\tpident_thresh=0\n+\t\t\tqcov_thresh=0\n+\t\t\t# run AT for each gene, if sequence deteced\n+\t\t\ttry:\n+\t\t\t\tmake_blast_xml(newseq=dictionaries,argdict=mydb,query_path=query_path,task=task,shorttask=shorttask,evalue_thresh=evalue_thresh,pident_thresh=pident_thresh,qcov_thresh=qcov_thresh)\n+\t\t\texcept UnboundLocalError:\n+\t\t\t\tprint "No sequences found for "+mlst\n+\t\t# loop through isolatefiles\n+\t\tfor root, dirs, files in os.walk(oarg+"btyper_final_results/isolatefiles/"):\n+\t\t\tfor d in dirs:\n+\t\t\t\tdirroot=d.split("_results")[0]\n+\t\t\t\t# open mlst results file, and get ST from ATs\n+\t\t\t\ttry:\t\n+\t\t\t\t\tnewf=open(oarg+"btyper_final_results/isolatefiles/"+d+"/"+dirroot.strip()+"_mlst_results.txt","r")\n+\t\t\t\t\tfinalfile_string=oarg+"btyper_final_results/"+dirroot.strip()+"_final_results.txt"\n+\t\t\t\t\tff=open(finalfile_string,"r")\n+\t\t\t\t\tflines=ff.readlines()\t\n+\t\t\t\t\tif not any("Predicted MLST Profile" in fl.strip() for fl in flines):\n+\t\t\t\t\t\tget_st(mlst_infile=newf,st_file=btyper_path+"seq_mlst_db/b_cereus_mlst_db.txt",finalfile_string=finalfile_string,mlst_genes=mlst_genes)\n+\t\t\t\texcept IOError:\n+\t\t\t\t\tprint "No sequences found for "+shorttask\n+\n+\t# if performing rpoB typing:\n+\tif rarg=="True":\n+\t\t# define database\n+\t\tquery_path=btyper_path+"seq_rpoB_db/rpobdatabase08122015.fa"\n+\t\tmydb=dbparse(query_path)\n+\t\ttask="Predicted rpoB Allelic Type:"\n+\t\tshorttask="rpoB"\n+\t\tevalue_thresh=float(earg)\n+\t\tpident_thresh=0\n+\t\tqcov_thresh=0\n+\t\t# perform rpoB typing, if gene is present\n+\t\ttry:\n+\t\t\tmake_blast_xml(newseq=dictionaries,argdict=mydb,query_path=query_path,task=task,shorttask=shorttask,evalue_thresh=evalue_thresh,pident_thresh=pident_thresh,qcov_thresh=qcov_thresh)\n+\t\t\tfor f in glob.glob(oarg+"btyper_final_results/*_final_results.txt"):\n+\t\t\t\tbetween_sections(finalfile_string=f)\n+\t\texcept UnboundLocalError:\n+\t\t\tprint "No sequences found for "+shorttask\n+\t\n+\t# if performing 16s typing:\n+\tif sarg=="True":\n+\t\t# define database\n+\t\tquery_path=btyper_path+"seq_16s_db/b_cereus_group_16s_db.fasta"\n+\t\tmydb=dbparse(query_path)\n+\t\ttask="Predicted 16s Type"\n+\t\tshorttask="16s"\n+\t\tevalue_thresh=float(earg)\n+\t\tpident_thresh=0\n+\t\tqcov_thresh=0\n+\t\t# perform 16s typing, if gene is present\n+\t\ttry:\n+\t\t\tmake_blast_xml(newseq=dictionaries,argdict=mydb,query_path=query_path,task=task,shorttask=shorttask,evalue_thresh=evalue_thresh,pident_thresh=pident_thresh,qcov_thresh=qcov_thresh)\n+\t\t\tfor f in glob.glob(oarg+"btyper_final_results/*_final_results.txt"):\n+\t\t\t\tbetween_sections(finalfile_string=f)\n+\t\texcept UnboundLocalError:\n+\t\t\tprint "No sequences found for "+shorttask\n+\n+print "Typing complete...how neat is that?"\n+print ""\n+print "Thank you for using BTyper! For more fun, take your output files to BMiner, BTyper\'s companion application for data aggregation and visualization."\n+print ""\n+print "To cite BTyper and/or BMiner, please use the following:"\n+print "Carroll, Laura M., Jasna Kovac, Rachel A. Miller, Martin Wiedmann. 2017. Rapid, high-throughput identification of anthrax-causing and emetic Bacillus cereus group genome assemblies using BTyper, a computational tool for virulence-based classification of Bacillus cereus group isolates using nucleotide sequencing data. Applied and Environmental Microbiology 2017 Jun 16. pii: AEM.01096-17. doi: 10.1128/AEM.01096-17."\n+\n+\n'