Previous changeset 5:c3afcb547fee (2017-12-08) Next changeset 7:d408ad12401a (2017-12-08) |
Commit message:
Deleted selected files |
removed:
btyper-2.0.3/btyper |
b |
diff -r c3afcb547fee -r c43392559c47 btyper-2.0.3/btyper --- a/btyper-2.0.3/btyper Fri Dec 08 10:37:52 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,905 +0,0 @@\n-#!/usr/bin/env python\n-\n-# BTyper version 2.0.3\n-# Created by Laura Carroll\n-# lmc297@cornell.edu\n-\n-# import required packages\n-import argparse, sys, os, re, glob, collections\n-\t\n-\n-# parse arguments\n-parser = argparse.ArgumentParser(usage=\'btyper -i </path/to/input/file.extension> -o </path/to/desired/output_directory/> -t [input data format (seq, pe, se, sra, or sra-get)] [-other options]\')\n-parser.add_argument(\'-i\',\'--input\', nargs=\'+\', help=\'Enter the path to the Bacillus cereus group sequence data you would like to input, or enter an SRA accession number\',required=True)\n-parser.add_argument(\'-o\', \'--output\', nargs=\'+\', help=\'Specify a path to your desired output directory\',required=True)\n-parser.add_argument(\'-t\', \'--type\', nargs=\'+\', help=\'Specify your type of data: seq for genomes or contigs in fasta format, pe for paired-end Illumina reads, se for single-end Illumina reads, sra for a sra file, or sra-get with an SRA accession number\',required=True)\n-parser.add_argument(\'--draft_genome\', action=\'store_true\', default=False, help=\'Optional argument for use with contigs in fasta format; concatenates draft genome contigs into pseudochromosome\')\n-parser.add_argument(\'-v\', \'--virulence\', nargs=\'?\', default=True, help=\'Optional argument, True or False; perform virulence gene typing; default=True\')\n-parser.add_argument(\'-m\', \'--mlst\', nargs=\'?\', default=True, help=\'Optional argument, True or False; perform MLST using Bacillus cereus MLST database; default=True\')\n-parser.add_argument(\'-r\', \'--rpoB\', nargs=\'?\', default=True, help=\'Optional argument, True or False; perform rpoB typing; default=True\')\n-parser.add_argument(\'-p\', \'--panC\', nargs=\'?\', default=True, help=\'Optional argument, True or False; perform panC typing; default=True\')\n-parser.add_argument(\'-s\', nargs=\'?\', default=False, help=\'Optional argument, True or False; BLAST 16s DNA sequence; not recommended for inferring species or pathogenicity; default=False\')\n-parser.add_argument(\'--spades_m\', nargs=\'?\', default=250, help=\'Optional argument for use with Illumina reads; integer; set SPAdes memory limit -m/--memory option in Gb; default is 250 Gb, the default for SPAdes\')\n-parser.add_argument(\'--spades_t\', nargs=\'?\', default=16, help=\'Optional argument for use with Illumina reads; integer; set number of threads to use for SPAdes -t/--threads option; default is 16, the default for SPAdes\')\n-parser.add_argument(\'--spades_k\', nargs=\'?\', default=77, help=\'Optional argument for use with Illumina reads; comma-separated list of integers; set k-mer sizes to use with SPAdes -k option; default is 77\')\n-parser.add_argument(\'-v_db\', \'--virulence_database\', nargs=\'?\', default="aa", help=\'Optional argument for use with -v/--virulence option; specify virulence database to be used: nuc for nucleotide database or aa for amino acid database; default=aa\')\n-parser.add_argument(\'-nuc_p\', \'--nucleotide_p\', nargs=\'?\',default=75, help=\'Optional argument for use with -v/--virulence option and nucleotide database -v_db nuc option; integer between 0 and 100; minimum percent nucleotide identity for virulence gene detection; default=75\')\n-parser.add_argument(\'-nuc_q\',\'--nucleotide_q\', nargs=\'?\',default=90, help=\'Optional argument for use with -v/--virulence option and nucleotide database -v_db nuc option; integer between 0 and 100; minimum percent coverage for virulence gene detection; default=90\')\n-parser.add_argument(\'-aa_p\',\'--amino_acid_p\',nargs=\'?\',default=50, help=\'Optional argument for use with -v/--virulence option and amino acid database -v_db aa option; integer between 0 and 100; minimum percent amino acid identity for virulence gene detection; default=50\')\n-parser.add_argument(\'-aa_q\',\'--amino_acid_q\', nargs=\'?\',default=70, help=\'Optional argument for use with -v/--virulence option and amino acid database -v_db aa option; integer between 0 and 100; minimum percent coverage for virulence gene detection; default=70\')\n-parser.add_argument(\'-e\', \'--evalue\', nargs=\'?\', def'..b't_xml(newseq=dictionaries,argdict=mydb,query_path=query_path,task=task,shorttask=shorttask,evalue_thresh=evalue_thresh,pident_thresh=pident_thresh,qcov_thresh=qcov_thresh)\n-\t\t\tfor f in glob.glob(oarg+"btyper_final_results/*_final_results.txt"):\n-\t\t\t\tbetween_sections(finalfile_string=f)\n-\t\texcept UnboundLocalError:\n-\t\t\tprint "No sequences found for "+shorttask\n-\t\n-\t# if performing mlst:\t\n-\tif marg=="True":\n-\t\tmlst_genes=["glp.fas","gmk.fas","ilv.fas","pta.fas","pur.fas","pyc.fas","tpi.fas"]\n-\t\t# get best-matching AT for each MLST gene\n-\t\tfor mlst in mlst_genes:\n-\t\t\tquery_path=btyper_path+"seq_mlst_db/"+mlst\n-\t\t\tmydb=dbparse(query_path)\n-\t\t\ttask="Predicted MLST Profile:"\n-\t\t\tshorttask="mlst"\n-\t\t\tevalue_thresh=float(earg)\n-\t\t\tpident_thresh=0\n-\t\t\tqcov_thresh=0\n-\t\t\t# run AT for each gene, if sequence deteced\n-\t\t\ttry:\n-\t\t\t\tmake_blast_xml(newseq=dictionaries,argdict=mydb,query_path=query_path,task=task,shorttask=shorttask,evalue_thresh=evalue_thresh,pident_thresh=pident_thresh,qcov_thresh=qcov_thresh)\n-\t\t\texcept UnboundLocalError:\n-\t\t\t\tprint "No sequences found for "+mlst\n-\t\t# loop through isolatefiles\n-\t\tfor root, dirs, files in os.walk(oarg+"btyper_final_results/isolatefiles/"):\n-\t\t\tfor d in dirs:\n-\t\t\t\tdirroot=d.split("_results")[0]\n-\t\t\t\t# open mlst results file, and get ST from ATs\n-\t\t\t\ttry:\t\n-\t\t\t\t\tnewf=open(oarg+"btyper_final_results/isolatefiles/"+d+"/"+dirroot.strip()+"_mlst_results.txt","r")\n-\t\t\t\t\tfinalfile_string=oarg+"btyper_final_results/"+dirroot.strip()+"_final_results.txt"\n-\t\t\t\t\tff=open(finalfile_string,"r")\n-\t\t\t\t\tflines=ff.readlines()\t\n-\t\t\t\t\tif not any("Predicted MLST Profile" in fl.strip() for fl in flines):\n-\t\t\t\t\t\tget_st(mlst_infile=newf,st_file=btyper_path+"seq_mlst_db/b_cereus_mlst_db.txt",finalfile_string=finalfile_string,mlst_genes=mlst_genes)\n-\t\t\t\texcept IOError:\n-\t\t\t\t\tprint "No sequences found for "+shorttask\n-\n-\t# if performing rpoB typing:\n-\tif rarg=="True":\n-\t\t# define database\n-\t\tquery_path=btyper_path+"seq_rpoB_db/rpobdatabase08122015.fa"\n-\t\tmydb=dbparse(query_path)\n-\t\ttask="Predicted rpoB Allelic Type:"\n-\t\tshorttask="rpoB"\n-\t\tevalue_thresh=float(earg)\n-\t\tpident_thresh=0\n-\t\tqcov_thresh=0\n-\t\t# perform rpoB typing, if gene is present\n-\t\ttry:\n-\t\t\tmake_blast_xml(newseq=dictionaries,argdict=mydb,query_path=query_path,task=task,shorttask=shorttask,evalue_thresh=evalue_thresh,pident_thresh=pident_thresh,qcov_thresh=qcov_thresh)\n-\t\t\tfor f in glob.glob(oarg+"btyper_final_results/*_final_results.txt"):\n-\t\t\t\tbetween_sections(finalfile_string=f)\n-\t\texcept UnboundLocalError:\n-\t\t\tprint "No sequences found for "+shorttask\n-\t\n-\t# if performing 16s typing:\n-\tif sarg=="True":\n-\t\t# define database\n-\t\tquery_path=btyper_path+"seq_16s_db/b_cereus_group_16s_db.fasta"\n-\t\tmydb=dbparse(query_path)\n-\t\ttask="Predicted 16s Type"\n-\t\tshorttask="16s"\n-\t\tevalue_thresh=float(earg)\n-\t\tpident_thresh=0\n-\t\tqcov_thresh=0\n-\t\t# perform 16s typing, if gene is present\n-\t\ttry:\n-\t\t\tmake_blast_xml(newseq=dictionaries,argdict=mydb,query_path=query_path,task=task,shorttask=shorttask,evalue_thresh=evalue_thresh,pident_thresh=pident_thresh,qcov_thresh=qcov_thresh)\n-\t\t\tfor f in glob.glob(oarg+"btyper_final_results/*_final_results.txt"):\n-\t\t\t\tbetween_sections(finalfile_string=f)\n-\t\texcept UnboundLocalError:\n-\t\t\tprint "No sequences found for "+shorttask\n-\n-print "Typing complete...how neat is that?"\n-print ""\n-print "Thank you for using BTyper! For more fun, take your output files to BMiner, BTyper\'s companion application for data aggregation and visualization."\n-print ""\n-print "To cite BTyper and/or BMiner, please use the following:"\n-print "Carroll, Laura M., Jasna Kovac, Rachel A. Miller, Martin Wiedmann. 2017. Rapid, high-throughput identification of anthrax-causing and emetic Bacillus cereus group genome assemblies using BTyper, a computational tool for virulence-based classification of Bacillus cereus group isolates using nucleotide sequencing data. Applied and Environmental Microbiology 2017 Jun 16. pii: AEM.01096-17. doi: 10.1128/AEM.01096-17."\n-\n-\n' |