Previous changeset 2:1bb80c25b379 (2015-09-24) Next changeset 4:34ea8f113018 (2015-09-24) |
Commit message:
Uploaded |
added:
velvet.pl |
b |
diff -r 1bb80c25b379 -r c979f8682b21 velvet.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/velvet.pl Thu Sep 24 10:42:55 2015 -0400 |
[ |
b"@@ -0,0 +1,432 @@\n+#!/usr/bin/perl\n+use strict;\n+use warnings;\n+use Logger::Logger;\n+use Getopt::Long;\n+use Tools::Fasta;\n+use Pod::Usage;\n+\n+my $directory;\n+my $hashLength;\n+my $fileString;\n+my $performMetagenomicAssembly = 1;\n+my $man;\n+my $help;\n+\n+my $velvethOptions = {};\n+my $velvetgOptions = {};\n+my $velvetgmOptions = {};\n+my $metaVelvetgOptions = {};\n+my $lastOptFile ='';\n+\n+GetOptions(\n+\n+ 'd|directory=s' => \\$directory,\n+ 'hash_length=s' => \\$hashLength,\n+ 'm|meta!' => \\$performMetagenomicAssembly,\n+ 'man' => \\$man,\n+ 'h|help' => \\$help,\n+ 'short:s{,}' => sub {registerVelvetFileOptionHash(\\$fileString, \\$lastOptFile, @_)},\n+ 'short2:s{,}' => sub {registerVelvetFileOptionHash(\\$fileString, \\$lastOptFile, @_)},\n+ 'short3:s{,}' => sub {registerVelvetFileOptionHash(\\$fileString, \\$lastOptFile, @_)},\n+ 'shortPaired:s{,}' => sub {registerVelvetFileOptionHash(\\$fileString, \\$lastOptFile, @_)},\n+ 'shortPaired2:s{,}' => sub {registerVelvetFileOptionHash(\\$fileString, \\$lastOptFile, @_)},\n+ 'shortPaired3:s{,}' => sub {registerVelvetFileOptionHash(\\$fileString, \\$lastOptFile, @_)},\n+ 'long:s{,}' => sub {registerVelvetFileOptionHash(\\$fileString, \\$lastOptFile, @_)},\n+ 'longPaired:s{,}' => sub {registerVelvetFileOptionHash(\\$fileString, \\$lastOptFile, @_)},\n+ 'reference:s{,}' => sub {registerVelvetFileOptionHash(\\$fileString, \\$lastOptFile, @_)},\n+ 'fasta:s{,}' => sub {registerVelvetFileOptionHash(\\$fileString, \\$lastOptFile, @_)},\n+ 'fastq:s{,}' => sub {registerVelvetFileOptionHash(\\$fileString, \\$lastOptFile, @_)},\n+ 'raw:s{,}' => sub {registerVelvetFileOptionHash(\\$fileString, \\$lastOptFile, @_)},\n+ 'fasta_gz:s{,}' => sub {$_[0] =~ s/_/./; registerVelvetFileOptionHash(\\$fileString, \\$lastOptFile, @_)},\n+ 'fastq_gz:s{,}' => sub {$_[0] =~ s/_/./; registerVelvetFileOptionHash(\\$fileString, \\$lastOptFile, @_)},\n+ 'raw_gz:s{,}' => sub {$_[0] =~ s/_/./; registerVelvetFileOptionHash(\\$fileString, \\$lastOptFile, @_)},\n+ 'sam:s{,}' => sub {registerVelvetFileOptionHash(\\$fileString, \\$lastOptFile, @_)},\n+ 'bam:s{,}' => sub {registerVelvetFileOptionHash(\\$fileString, \\$lastOptFile, @_)},\n+ 'fmtAuto:s{,}' => sub {registerVelvetFileOptionHash(\\$fileString, \\$lastOptFile, @_)},\n+ 'interleaved:s{,}' => sub {registerVelvetFileOptionHash(\\$fileString, \\$lastOptFile, @_)},\n+ 'separate:s{,}' => sub {registerVelvetFileOptionHash(\\$fileString, \\$lastOptFile, @_)},\n+ 'strand_specific' => sub{registerOnOffOption($velvethOptions, @_)},\n+ 'reuse_Sequences:s' => sub{registerOnOffOption($velvethOptions, @_)},\n+ 'reuse_binary:s' => sub{registerOnOffOption($velvethOptions, @_)},\n+ 'noHash:s' => sub{registerOnOffOption($velvethOptions, @_)},\n+ 'create_binary:s' => sub{registerOnOffOption($velvethOptions, @_)},\n+\n+ 'cov_cutoff=f' => sub{registerScalarOptionHash($velvetgmOptions, @_)},\n+ 'ins_length=i' => sub{registerScalarOptionHash($velvetgmOptions, @_)},\n+ 'read_trkg=s' => sub{registerYesNoOption($velvetgOptions, @_)},\n+ 'min_contig_lgth=i' => sub{registerScalarOptionHash($velvetgmOptions, @_)},\n+ 'amos_file=s' => sub{registerScalarOptionHash($velvetgmOptions, @_)},\n+ 'exp_cov=s' => sub{registerScalarOptionHash($velvetgmOptions, @_)},\n+ 'long_cov_cutoff=f' => sub{registerScalarOptionHash($velvetgmOptions, @_)},\n+ 'ins_length_long=i' => sub{registerScalarOptionHash($velvetgmOptions, @_)},\n+ 'ins_length2=i' => sub{registerScalarOptionHash($velvetgmOptions, @_)},\n+ 'ins_length_sd=i' => sub{registerScalarOptionHash($velvetgmOptions, @_)},\n+ 'ins_length_long_sd=i' => sub{registerScalarOptionHash($velvetgmOptions, @_)},\n+ 'ins_length2_sd=i' => sub{registerScalarOptionHash($velvetgmOptions, @_)},\n+ 'scaffolding=s' => sub{registerScalarOptionHash($velvetgmOptions, @_)},\n+ 'max_branch_length=i' => sub{registerScalarOptionHash($velvetgmOptions, @_)},\n+ 'max_divergence=f' => sub{registerScalarOptionHash($velvetgmOptions, @_)},\n+ "..b'nch_length <integer>\t: maximum length in base pair of bubble (default: 100)\n+\n+\t-max_divergence <floating-point>: maximum divergence rate between two branches in a bubble (default: 0.2)\n+\n+\t-max_gap_count <integer>\t: maximum number of gaps allowed in the alignment of the two branches of a bubble (default: 3)\n+\n+\t-min_pair_count <integer>\t: minimum number of paired end connections to justify the scaffolding of two long contigs (default: 5)\n+\n+\t-max_coverage <floating point>\t: removal of high coverage nodes AFTER tour bus (default: no removal)\n+\n+\t-coverage_mask <int> : minimum coverage required for confident regions of contigs (default: 1)\n+\n+\t-long_mult_cutoff <int>\t\t: minimum number of long reads required to merge contigs (default: 2)\n+\n+\t-unused_reads <yes|no>\t\t: export unused reads in UnusedReads.fa file (default: no)\n+\n+\t-alignments <yes|no>\t\t: export a summary of contig alignment to the reference sequences (default: no)\n+\n+\t-exportFiltered <yes|no>\t: export the long nodes which were eliminated by the coverage filters (default: no)\n+\n+\t-clean <yes|no>\t\t\t: remove all the intermediary files which are useless for recalculation (default : no)\n+\n+\t-very_clean <yes|no>\t\t: remove all the intermediary files (no recalculation possible) (default: no)\n+\n+\t-paired_exp_fraction <double>\t: remove all the paired end connections which less than the specified fraction of the expected count (default: 0.1)\n+\n+\t-shortMatePaired* <yes|no>\t: for mate-pair libraries, indicate that the library might be contaminated with paired-end reads (default no)\n+\n+\t-conserveLong <yes|no>\t\t: preserve sequences with long reads in them (default no)\n+\n+Output:\n+\n+\tdirectory/contigs.fa\t\t: fasta file of contigs longer than twice hash length\n+\n+\tdirectory/stats.txt\t\t: stats file (tab-spaced) useful for determining appropriate coverage cutoff\n+\n+\tdirectory/LastGraph\t\t: special formatted file with all the information on the final graph\n+\n+\tdirectory/velvet_asm.afg\t: (if requested) AMOS compatible assembly file\n+\n+=head1 META-VELVETG OPTIONS\n+\n+ Graph-splitting options (metagenome-specific):\n+\n+\t-discard_chimera <yes|no> \t: discard chimera sub-graph (default: no)\n+\n+\t-max_chimera_rate <double> \t: maximum allowable chimera rate (default: 0.0)\n+\n+\t-repeat_cov_sd \t: standard deviation of repeat node coverages (default: 0.1)\n+\n+\t-min_split_length <int> \t: minimum node length required for repeat resolution (default: 0)\n+\n+\t-valid_connections <int> \t: minimum allowable number of consistent paired-end connections (default: 1)\n+\n+\t-noise_connections <int> \t: maximum allowable number of inconsistent paired-end connections (default: 0)\n+\n+\t-use_connections <yes|no> \t: use paired-end connections for graph splitting (default: yes)\n+\n+\t-report_split_detail <yes|no>\t: report sequences around repeat nodes (default: no)\n+\n+\t-report_subgraph <yes|no> \t: report node sequences for each subgraph (default: no)\n+\n+ Peak detection options (metagenome-specific):\n+\n+\t-exp_covs <string|auto> \t: expected coverages for each species in microbiome (default: auto)\n+\n+\tex : -exp_covs 214_122_70_43_25_13.5\n+\n+\tcoverage values should be sorted in a descending order\n+\n+\t-min_peak_cov <double> \t: minimum peak coverage (default: 0)\n+\n+\t-max_peak_cov <double> \t: maximum peak coverage (default: 500)\n+\n+\t-histo_bin_width <double> \t: bin width of peak coverage histogram (default: 1)\n+\n+\t-histo_sn_ratio <double> \t: signal-noise ratio to remove peak noises (default: 10)\n+\n+ Output:\n+\n+\tdirectory/meta-velvetg.contigs.fa \t: fasta file of contigs longer than twice hash length\n+\n+\tdirectory/meta-velvetg.LastGraph \t: special formatted file with all the information on the final graph\n+\n+\tdirectory/meta-velvetg.Graph2-stats.txt \t: stats file (tab-delimited) useful for optimizing coverage peak values\n+\n+\tdirectory/meta-velvetg.split-stats.txt \t: stats file (tab-delimited) useful for optimizing graph-splitting parameters\n+\n+=cut\n' |