# HG changeset patch # User big-tiandm # Date 1406790434 14400 # Node ID 7321a6f82492b7b842b68177f0fbfd595e0c1cb5 # Parent 7cb8a65f8012cbacd90cb3a784e350e975fd83a2 Uploaded diff -r 7cb8a65f8012 -r 7321a6f82492 matching.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/matching.pl Thu Jul 31 03:07:14 2014 -0400 @@ -0,0 +1,90 @@ +#!/usr/bin/perl -w +#Filename: +#Author: Tian Dongmei +#Email: tiandm@big.ac.cn +#Date: 2013/7/19 +#Modified: +#Description: +my $version=1.00; + +use strict; +use Getopt::Long; + +my %opts; +GetOptions(\%opts,"i=s","g=s","index:s","v:i","p:i","r:s","o=s","time:s","h"); +if (!(defined $opts{i} and defined $opts{o} ) || defined $opts{h}) { #necessary arguments +&usage; +} + +my $filein=$opts{'i'}; +my $fileout=$opts{'o'}; +unless ($fileout=~/\/$/) {$fileout.="/";} +my $genome=$opts{'g'}; +my $mis=defined $opts{'v'}? $opts{'v'} : 0; +my $hits=defined $opts{'r'}? $opts{'r'} : 25; +my $index=defined $opts{'index'} ? $opts{'index'} : ""; +my $threads=defined $opts{'p'} ? $opts{'p'} : 1; + + +my $time=&Time(); +if (defined $opts{'time'}) { + $time=$opts{'time'}; +} + +my $mapdir=$fileout."/genome_match_".$time; +if(not -d $mapdir){ + mkdir $mapdir; +} +chdir $mapdir; +###check genome index +if (-s $index.".1.ebwt") { +}else{ + `bowtie-build $genome $genome`; + $index="$genome"; +} + +### genome mapping +`bowtie -v $mis -f -p $threads -m $hits -a --best --strata $index $filein --al genome_mapped.fa --un genome_not_mapped.fa --max genome_mapped_Mlimit.fa > genome_mapped.bwt`; + +#`convert_bowtie_to_blast.pl genome_mapped.bwt genome_mapped.fa $genome > genome_mapped.bst`; + +sub Time{ + my $time=time(); + my ($sec,$min,$hour,$day,$month,$year) = (localtime($time))[0,1,2,3,4,5,6]; + $month++; + $year+=1900; + if (length($sec) == 1) {$sec = "0"."$sec";} + if (length($min) == 1) {$min = "0"."$min";} + if (length($hour) == 1) {$hour = "0"."$hour";} + if (length($day) == 1) {$day = "0"."$day";} + if (length($month) == 1) {$month = "0"."$month";} + #print "$year-$month-$day $hour:$min:$sec\n"; + return("$year-$month-$day-$hour-$min-$sec"); +} + +sub usage{ +print <<"USAGE"; +Version $version +Usage: +$0 -i -o +options: +-i input file# input reads fasta/fastq file +-g input file# genome file +-index file-prefix #(must be indexed by bowtie-build) The parameter + string must be the prefix of the bowtie index. For instance, if + the first indexed file is called 'h_sapiens_37_asm.1.ebwt' then + the prefix is 'h_sapiens_37_asm'.##can be null +-v report end-to-end hits w/ <=v mismatches; ignore qualities,default 0; + +-p/--threads number of alignment threads to launch (default: 1) + +-r int a read is allowed to map up to this number of positions in the genome + default is 25 + +-o output directory + +-h help +USAGE +exit(1); +} +