Mercurial > repos > romaingred > gred_rnaseq
changeset 4:4d637ab9113e draft
Deleted selected files
author | romaingred |
---|---|
date | Fri, 17 Nov 2017 03:33:56 -0500 |
parents | 98d99e51579a |
children | 411b36116b7a |
files | alignTophat.pl alignTophat.xml bed_pipe.loc.sample gred_rnaseq-2369cfe4f737/alignTophat.pl gred_rnaseq-2369cfe4f737/alignTophat.xml gred_rnaseq-2369cfe4f737/bed_pipe.loc.sample gred_rnaseq-2369cfe4f737/tool_data_table_conf.xml.sample gred_rnaseq-2369cfe4f737/tool_dependencies.xml tool_dependencies.xml |
diffstat | 9 files changed, 487 insertions(+), 518 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/alignTophat.pl Fri Nov 17 03:33:56 2017 -0500 @@ -0,0 +1,334 @@ +#!/usr/bin/perl +use strict; +use warnings; +use Getopt::Long; + +my $max_procs = 8; + +my ($ma, $fastq, $fastqN, $mis,$findSens, $contaminants, $directional, $build_contaminant, $ref, $build_ref, $annotation, $dirBam, $dirBam_cont,$dirBed, $dirBedgraph, $outText, $outSimple, $random_w); + +GetOptions ( +"direction:s" => \$directional, +"random_w:s" => \$random_w, +"find:s" => \$findSens, +"fastq=s" => \$fastq, +"fastq_n=s" => \$fastqN, +"ma=i" => \$ma, +"mis=i" => \$mis, +"contaminants=s" => \$contaminants, +"anno=s" => \$annotation, +"build_contaminant" => \$build_contaminant, +"ref=s" => \$ref, +"build_ref" => \$build_ref, +"dirbamCont=s" => \$dirBam_cont, +"dirbam=s" => \$dirBam, +"dirbedgraph=s" => \$dirBedgraph, +"text=s" => \$outText, +); + +$directional = lc($directional); +$findSens = lc($findSens); +$random_w = lc($random_w); + +if ($build_ref) +{ + `(bowtie2-build $ref $ref) 2> /dev/null `; +} + +if ($build_contaminant) +{ + `(bowtie2-build $contaminants $contaminants) 2> /dev/null `; +} + +#"outsimple=s" => \$outSimple +#"annotation=s" => \$annotation, + +#$annotation = "/data/visiteur/annotation/ara/TAIR10_all_sorted.bed"; +#$annotation = "/data/home/pogorelcnikr/SG/humanGenes.bed"; + +# mapping against contaminants databases in: contaminants, fastq, # of mismatches, output_dir out: tophat dir +tophat2($fastq, $contaminants, $mis, 1, 'dir_contaminant'); +`bamToFastq -i dir_contaminant/unmapped.bam -fq clean.fastq`; + +# mapping against contaminants databases in: $ref, fastq, # of mismatches, output_dir out: tophat dir +tophat2('clean.fastq', $ref, $mis, $max_procs, 'dir_reference'); + +my $repTot = 0; +if ($ma == 0) +{ + open (my $rT, 'dir_reference/align_summary.txt'); + my @repT = <$rT>; + $repTot = $1 if $repT[2] =~ /\s+Mapped\s+:\s+(\d+).*/; + close $rT; +} +else +{ + $repTot = $ma; +} + +# Creation of bedgraphs +my $dir_bedgraph = $fastqN.'_bedgraph/'; +findUnique('dir_reference/accepted_hits.bam', $dir_bedgraph, $repTot); + +# count in: unique_mapped.bam out: raw_count_simple antisens_count +if ($random_w eq 'true') +{ + reference('rand_sorted.bam', $repTot); + `mv rand_sorted.bam dir_reference/random_sorted.bam`; +} +else +{ + reference('ac_sorted.bam', $repTot); + `mv ac_sorted.bam dir_reference/unique_sorted.bam`; +} + +zip('dir_reference', $dirBam); +zip('dir_contaminant', $dirBam_cont); +zip( $dir_bedgraph , $dirBedgraph); + +################################# +# inputs: +# fastq $_[0] +# bowtie2_index $_[1] +# mismatches # $_[2] +# output directory $_[3] +# max_procs $_[4] +################################# +sub tophat2 +{ + my ($fastq, $index, $mismtaches, $proc, $out_repertory) = @_; + $out_repertory = $out_repertory.'/' unless $out_repertory =~ /\/$/; + # print STDERR "/data/visiteur/tophat-2.0.12.Linux_x86_64/tophat2 -p $proc -N $mismtaches --b2-very-sensitive -o $out_repertory $index $fastq\n"; + `tophat2 -p $proc -N $mismtaches --b2-very-sensitive -o $out_repertory $index $fastq 2> /dev/stdin`; +} +############################################################################################ + +################################# +# inputs: +# bam $_[0] +# output directory $_[1] +################################# +sub findUnique +{ + my ($bam, $out_bedgraph, $sc) = @_; + + mkdir $out_bedgraph; + + my $bedg_uni = $out_bedgraph.'/'.$fastqN.'_unique.bedgraph' ; + my $bedg_all_multi = $out_bedgraph.'/'.$fastqN.'_all_multi.bedgraph' ; + my $bedg_random = $out_bedgraph.'/'.$fastqN.'_random_multi.bedgraph' ; + + my $bedg_uni_p = $out_bedgraph.'/'.$fastqN.'_unique_plusStrand.bedgraph' ; + my $bedg_all_multi_p = $out_bedgraph.'/'.$fastqN.'_all_multi_plusStrand.bedgraph' ; + my $bedg_random_p = $out_bedgraph.'/'.$fastqN.'_random_multi_plusStrand.bedgraph' ; + + my $bedg_uni_m = $out_bedgraph.'/'.$fastqN.'_unique_minusStrand.bedgraph' ; + my $bedg_all_multi_m = $out_bedgraph.'/'.$fastqN.'_all_multi_minusStrand.bedgraph' ; + my $bedg_random_m = $out_bedgraph.'/'.$fastqN.'_random_multi_minusStrand.bedgraph' ; + + my $sca = 1000000/$sc; + + #unique file + `samtools view -H $bam > ac.sam 2> /dev/null && samtools view -@ $max_procs $bam 2> /dev/null |grep NH:i:1\$ >> ac.sam && samtools view -@ $max_procs -Shb ac.sam > ac.bam 2> /dev/null`; + `samtools sort -@ $max_procs ac.bam -o ac_sorted.bam 2> /dev/null`; + `genomeCoverageBed -scale $sca -split -bg -ibam ac_sorted.bam > $bedg_uni 2> /dev/null`; + if ($directional eq 'true') + { + `genomeCoverageBed -strand + -scale $sca -split -bg -ibam ac_sorted.bam > $bedg_uni_p 2> /dev/null`; + `genomeCoverageBed -strand - -scale $sca -split -bg -ibam ac_sorted.bam > $bedg_uni_m 2> /dev/null`; + } + #all multi + `samtools sort -@ $max_procs $bam -o accepted_hits_sorted.bam 2> /dev/null`; + `genomeCoverageBed -scale $sca -split -bg -ibam accepted_hits_sorted.bam > $bedg_all_multi 2> /dev/null`; + if ($directional eq 'true') + { + `genomeCoverageBed -strand + -scale $sca -split -bg -ibam accepted_hits_sorted.bam > $bedg_all_multi_p 2> /dev/null`; + `genomeCoverageBed -strand - -scale $sca -split -bg -ibam accepted_hits_sorted.bam > $bedg_all_multi_m 2> /dev/null`; + } + #random multi + `samtools view -@ $max_procs -hb -F256 $bam > rand.bam 2> /dev/null`; + `samtools sort -@ $max_procs rand.bam -o rand_sorted.bam 2> /dev/null`; + `genomeCoverageBed -scale $sca -split -bg -ibam rand_sorted.bam > $bedg_random 2> /dev/null`; + if ($directional eq 'true') + { + `genomeCoverageBed -strand + -scale $sca -split -bg -ibam rand_sorted.bam > $bedg_random_p 2> /dev/null`; + `genomeCoverageBed -strand - -scale $sca -split -bg -ibam rand_sorted.bam > $bedg_random_m 2> /dev/null`; + } +} +############################################################################################ + +################################# +# inputs: +# bam $_[0] +# output directory $_[1] +################################# +sub reference +{ + my $bam = shift; + my $mapnum = shift; + + my @tabRef = (); + + open (my $anno, $annotation) || die "cannot open annotation file!\n"; + while(<$anno>) + { + chomp $_; + my @split = split /\t/, $_; + my $size = $split[2] - $split[1]; + push @tabRef, [$_,0,0,$size]; + } + close $anno; + + `bedtools bamtobed -i $bam > ac.bed`; + + # my $mapnum = 0; + # open (my $bed, 'ac.bed') || die "cannot open ac.bed\n"; + # while(<$bed>){ $mapnum++; } + # close $bed; + + if ($directional eq 'true') + { + #Same strand + `bedtools intersect -s -wao -b ac.bed -a $annotation> intersectStranded.bed`; + #Different strand + `bedtools intersect -S -wao -b ac.bed -a $annotation> intersectOppositeStrand.bed`; + + open (my $strand, 'intersectStranded.bed') || die "cannot open intersectStranded.bed\n"; + my $i = 0; + my $firstLine = <$strand>; chomp $firstLine; + my @split = split /\t/, $firstLine; + my $prevId = $split[3]; + if ($split[16] != 0 && $split[12] - $split[11] == $split[16]) + { + $tabRef[$i]->[1] = 1; + } + while(<$strand>) + { + chomp $_; + @split = split /\t/, $_; + if ($split[3] ne $prevId) + { + $i++; + } + if ($split[16] != 0 && $split[12] - $split[11] == $split[16]) + { + $tabRef[$i]->[1] += 1; + } + $prevId = $split[3]; + } + close $strand; + + open ($strand, 'intersectOppositeStrand.bed') || die "cannot open intersectOppositeStrand.bed\n"; + $i = 0; + $firstLine = <$strand>; chomp $firstLine; + @split = split /\t/, $firstLine; + $prevId = $split[3]; + if ($split[16] != 0 && $split[12] - $split[11] == $split[16]) + { + $tabRef[$i]->[2] = 1; + } + while(<$strand>) + { + chomp $_; + @split = split /\t/, $_; + if ($split[3] ne $prevId) + { + $i++; + } + if ($split[16] != 0 && $split[12] - $split[11] == $split[16]) + { + $tabRef[$i]->[2] += 1; + } + $prevId = $split[3]; + } + close $strand; + } + + else + { + `bedtools intersect -wao -b ac.bed -a $annotation> intersect.bed`; + + open (my $strand, 'intersect.bed') || die "cannot open intersect.bed\n"; + my $i = 0; + my $firstLine = <$strand>; chomp $firstLine; + my @split = split /\t/, $firstLine; + my $prevId = $split[3]; + if ($split[16] != 0 && $split[12] - $split[11] == $split[16]) + { + $tabRef[$i]->[1] = 1; + } + while(<$strand>) + { + chomp $_; + @split = split /\t/, $_; + if ($split[3] ne $prevId) + { + $i++; + } + if ($split[16] != 0 && $split[12] - $split[11] == $split[16]) + { + $tabRef[$i]->[1] += 1; + } + $prevId = $split[3]; + } + close $strand; + } + + my $totplus = 0; my $totminus = 0; + foreach my $outPut (@tabRef) + { + $totplus += $outPut->[1]; + $totminus += $outPut->[2]; + } + + open (my $out, ">".$outText) || die "cannot open raw_count.txt\n"; + print $out "Chr\tstart\tend\tID\tscore\tstrand\ttype\tD1\tD2\tD3\treads\trpm\treads on opposite strand\trpm\n"; + my ($Sr, $Rr) = (0,0); + foreach my $outPut (@tabRef) + { + if ($totplus > $totminus || $findSens eq 'false') + { + $Sr = rpm($outPut->[1],$mapnum); + $Rr = rpm($outPut->[2],$mapnum); + print $out $outPut->[0]."\t".$outPut->[1]."\t".$Sr."\t".$outPut->[2]."\t".$Rr."\n"; + } + else + { + $Rr = rpm($outPut->[1],$mapnum); + $Sr = rpm($outPut->[2],$mapnum); + print $out $outPut->[0]."\t".$outPut->[2]."\t".$Sr."\t".$outPut->[1]."\t".$Rr."\n"; + } + } + close $out; +} +############################################################################################ + +################################# +# inputs: +# bam $_[0] +# output directory $_[1] +################################# +sub rpm +{ + my $raw_count = shift; + my $mapped_number = shift; + my $RPM = 0; + $RPM = ($raw_count * 1000000) / ($mapped_number) if $mapped_number != 0; + return $RPM; +} +############################################################################################ + +################################# +# inputs: +# bam $_[0] +# output directory $_[1] +################################# +sub zip +{ + my ($in_dir, $out_tar) = @_; + `tar -cvf tmp.tar $in_dir/`; + `mv tmp.tar $out_tar`; + rmdir $in_dir; +} +############################################################################################ +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/alignTophat.xml Fri Nov 17 03:33:56 2017 -0500 @@ -0,0 +1,151 @@ +<tool id="alignTophat" name="align RNAseq" version="0.0.1"> + <description>Align directionnal RNAseq to obtain bedgraphs and raw count file</description> + <command interpreter="perl"> + + ./alignTophat.pl + + --fastq ${first_input} + --fastq_n ${first_input.name} + + #if $Genome.refGenomeSource == "history": + --ref "${Genome.ownFile}" + --build_ref + #else: + --ref "${Genome.indices.fields.path}" + #end if + + #if $contaminants.refGenomeSource == "history": + --contaminants "${contaminants.ownFile}" + --build_contaminant + #else: + --contaminants "${contaminants.indices.fields.path}" + #end if + #if $anno.bedpipe == "history": + --annot "${anno.ownFile}" + #else: + --anno "${anno.indices.fields.path}" + #end if + + --mis $mis + --ma $ma + + --dirbam $dirbam + --dirbamCont $dirbamcont + --dirbedgraph $dirbedgraph + + --text $text + + --random_w $rand + --find $idDir.find + --direction $idDir.direc + </command> + <inputs> + + <param format="fastqsanger" name="first_input" type="data" label="fastq produced by fasteris" help=""/> + + + <conditional name="Genome"> + <param name="refGenomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?"> + <option value="indexed">Use a built-in index</option> + <option value="history">Use one from the history</option> + </param> + <when value="indexed"> + <param name="indices" type="select" label="Select a reference genome"> + <options from_data_table="bowtie2_indexes"> + <filter type="sort_by" column="2" /> + <validator type="no_options" message="No indexes are available" /> + </options> + </param> + </when> + <when value="history"> + <param name="ownFile" type="data" format="fasta" metadata_name="dbkey" label="Select a reference from history" /> + </when> + </conditional> + + + <conditional name="Genome"> + <param name="refGenomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?"> + <option value="indexed">Use a built-in index</option> + <option value="history">Use one from the history</option> + </param> + <when value="indexed"> + <param name="indices" type="select" label="Select a reference genome"> + <options from_data_table="bowtie2_indexes"> + <filter type="sort_by" column="2" /> + <validator type="no_options" message="No indexes are available" /> + </options> + </param> + </when> + <when value="history"> + <param name="ownFile" type="data" format="fasta" metadata_name="dbkey" label="Select a reference from history" /> + </when> + </conditional> + + <conditional name="contaminants"> + <param name="refGenomeSource" type="select" label="Will you select contaminants database from your history or use a built-in index?"> + <option value="indexed">Use a built-in index</option> + <option value="history">Use one from the history</option> + </param> + <when value="indexed"> + <param name="indices" type="select" label="Select contaminants reference"> + <options from_data_table="bowtie2_indexes"> + <filter type="sort_by" column="2" /> + <validator type="no_options" message="No indexes are available" /> + </options> + </param> + </when> + <when value="history"> + <param name="ownFile" type="data" format="fasta" metadata_name="dbkey" label="Select a reference from history" /> + </when> + </conditional> + + <conditional name="anno"> + <param name="bedpipe" type="select" label="Will you select an annotation file from your history or use a built-in index?" help="tab file format, (chr start end id score strand type description1 description2 description3"> + <option value="indexed">Use a built-in index</option> + <option value="history">Use one from the history</option> + </param> + <when value="indexed"> + <param name="indices" type="select" label="Select annotation file"> + <options from_data_table="bed_pipe"> + <filter type="sort_by" column="2" /> + <validator type="no_options" message="No indexes are available" /> + </options> + </param> + </when> + <when value="history"> + <param name="ownFile" type="data" format="bed" metadata_name="dbkey" label="Select a reference from history" /> + </when> + </conditional> + + + <param name="mis" type="integer" value="0" label="mismatches"/> + <param name="ma" type="integer" value="0" label="normalization value" help="e.g number of mappers (if 0 auto)"/> + <param name="rand" type="boolean" checked="false" label="work on multi-mappers randomly assigned"/> + + + <conditional name="idDir"> + <param name="direc" type="boolean" checked="true" label="directional library"/> + <when value="false"> + <param name="find" type="hidden" value="false"/> + </when> + <when value="true"> + <param name="find" type="boolean" checked="true" label="auto sens finder"/> + </when> + </conditional> + </inputs> + + <outputs> + <data format="tar" name="dirbam" label="alignement_zip_${first_input.name}"/> + <data format="tar" name="dirbamcont" label="contaminant_zip_${first_input.name}"/> + <data format="tar" name="dirbedgraph" label="bedgraph_zip_${first_input.name}"/> + <data format="tabular" name="text" label="raw_count_tabular_${first_input.name}_rand_${rand}"/> + </outputs> + + <requirements> + <requirement type="package" version="2.2.6">bowtie2</requirement> + <requirement type="package" version="2.1.0">tophat</requirement> + <requirement type="package" version="2.17.0">bedtools</requirement> + <requirement type="package" version="0.1.19">samtools</requirement> + </requirements> + +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bed_pipe.loc.sample Fri Nov 17 03:33:56 2017 -0500 @@ -0,0 +1,2 @@ +#<unique_id> <database_caption> <base_name_path> +dmel dmel_annotation /path/to/annotation/dmel/dmel_gene.bed
--- a/gred_rnaseq-2369cfe4f737/alignTophat.pl Thu Nov 16 10:33:42 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,334 +0,0 @@ -#!/usr/bin/perl -use strict; -use warnings; -use Getopt::Long; - -my $max_procs = 8; - -my ($ma, $fastq, $fastqN, $mis,$findSens, $contaminants, $directional, $build_contaminant, $ref, $build_ref, $annotation, $dirBam, $dirBam_cont,$dirBed, $dirBedgraph, $outText, $outSimple, $random_w); - -GetOptions ( -"direction:s" => \$directional, -"random_w:s" => \$random_w, -"find:s" => \$findSens, -"fastq=s" => \$fastq, -"fastq_n=s" => \$fastqN, -"ma=i" => \$ma, -"mis=i" => \$mis, -"contaminants=s" => \$contaminants, -"anno=s" => \$annotation, -"build_contaminant" => \$build_contaminant, -"ref=s" => \$ref, -"build_ref" => \$build_ref, -"dirbamCont=s" => \$dirBam_cont, -"dirbam=s" => \$dirBam, -"dirbedgraph=s" => \$dirBedgraph, -"text=s" => \$outText, -); - -$directional = lc($directional); -$findSens = lc($findSens); -$random_w = lc($random_w); - -if ($build_ref) -{ - `(bowtie2-build $ref $ref) 2> /dev/null `; -} - -if ($build_contaminant) -{ - `(bowtie2-build $contaminants $contaminants) 2> /dev/null `; -} - -#"outsimple=s" => \$outSimple -#"annotation=s" => \$annotation, - -#$annotation = "/data/visiteur/annotation/ara/TAIR10_all_sorted.bed"; -#$annotation = "/data/home/pogorelcnikr/SG/humanGenes.bed"; - -# mapping against contaminants databases in: contaminants, fastq, # of mismatches, output_dir out: tophat dir -tophat2($fastq, $contaminants, $mis, 1, 'dir_contaminant'); -`bamToFastq -i dir_contaminant/unmapped.bam -fq clean.fastq`; - -# mapping against contaminants databases in: $ref, fastq, # of mismatches, output_dir out: tophat dir -tophat2('clean.fastq', $ref, $mis, $max_procs, 'dir_reference'); - -my $repTot = 0; -if ($ma == 0) -{ - open (my $rT, 'dir_reference/align_summary.txt'); - my @repT = <$rT>; - $repTot = $1 if $repT[2] =~ /\s+Mapped\s+:\s+(\d+).*/; - close $rT; -} -else -{ - $repTot = $ma; -} - -# Creation of bedgraphs -my $dir_bedgraph = $fastqN.'_bedgraph/'; -findUnique('dir_reference/accepted_hits.bam', $dir_bedgraph, $repTot); - -# count in: unique_mapped.bam out: raw_count_simple antisens_count -if ($random_w eq 'true') -{ - reference('rand_sorted.bam', $repTot); - `mv rand_sorted.bam dir_reference/random_sorted.bam`; -} -else -{ - reference('ac_sorted.bam', $repTot); - `mv ac_sorted.bam dir_reference/unique_sorted.bam`; -} - -zip('dir_reference', $dirBam); -zip('dir_contaminant', $dirBam_cont); -zip( $dir_bedgraph , $dirBedgraph); - -################################# -# inputs: -# fastq $_[0] -# bowtie2_index $_[1] -# mismatches # $_[2] -# output directory $_[3] -# max_procs $_[4] -################################# -sub tophat2 -{ - my ($fastq, $index, $mismtaches, $proc, $out_repertory) = @_; - $out_repertory = $out_repertory.'/' unless $out_repertory =~ /\/$/; - # print STDERR "/data/visiteur/tophat-2.0.12.Linux_x86_64/tophat2 -p $proc -N $mismtaches --b2-very-sensitive -o $out_repertory $index $fastq\n"; - `tophat2 -p $proc -N $mismtaches --b2-very-sensitive -o $out_repertory $index $fastq 2> /dev/stdin`; -} -############################################################################################ - -################################# -# inputs: -# bam $_[0] -# output directory $_[1] -################################# -sub findUnique -{ - my ($bam, $out_bedgraph, $sc) = @_; - - mkdir $out_bedgraph; - - my $bedg_uni = $out_bedgraph.'/'.$fastqN.'_unique.bedgraph' ; - my $bedg_all_multi = $out_bedgraph.'/'.$fastqN.'_all_multi.bedgraph' ; - my $bedg_random = $out_bedgraph.'/'.$fastqN.'_random_multi.bedgraph' ; - - my $bedg_uni_p = $out_bedgraph.'/'.$fastqN.'_unique_plusStrand.bedgraph' ; - my $bedg_all_multi_p = $out_bedgraph.'/'.$fastqN.'_all_multi_plusStrand.bedgraph' ; - my $bedg_random_p = $out_bedgraph.'/'.$fastqN.'_random_multi_plusStrand.bedgraph' ; - - my $bedg_uni_m = $out_bedgraph.'/'.$fastqN.'_unique_minusStrand.bedgraph' ; - my $bedg_all_multi_m = $out_bedgraph.'/'.$fastqN.'_all_multi_minusStrand.bedgraph' ; - my $bedg_random_m = $out_bedgraph.'/'.$fastqN.'_random_multi_minusStrand.bedgraph' ; - - my $sca = 1000000/$sc; - - #unique file - `samtools view -H $bam > ac.sam 2> /dev/null && samtools view -@ $max_procs $bam 2> /dev/null |grep NH:i:1\$ >> ac.sam && samtools view -@ $max_procs -Shb ac.sam > ac.bam 2> /dev/null`; - `samtools sort -@ $max_procs ac.bam -o ac_sorted.bam 2> /dev/null`; - `genomeCoverageBed -scale $sca -split -bg -ibam ac_sorted.bam > $bedg_uni 2> /dev/null`; - if ($directional eq 'true') - { - `genomeCoverageBed -strand + -scale $sca -split -bg -ibam ac_sorted.bam > $bedg_uni_p 2> /dev/null`; - `genomeCoverageBed -strand - -scale $sca -split -bg -ibam ac_sorted.bam > $bedg_uni_m 2> /dev/null`; - } - #all multi - `samtools sort -@ $max_procs $bam -o accepted_hits_sorted.bam 2> /dev/null`; - `genomeCoverageBed -scale $sca -split -bg -ibam accepted_hits_sorted.bam > $bedg_all_multi 2> /dev/null`; - if ($directional eq 'true') - { - `genomeCoverageBed -strand + -scale $sca -split -bg -ibam accepted_hits_sorted.bam > $bedg_all_multi_p 2> /dev/null`; - `genomeCoverageBed -strand - -scale $sca -split -bg -ibam accepted_hits_sorted.bam > $bedg_all_multi_m 2> /dev/null`; - } - #random multi - `samtools view -@ $max_procs -hb -F256 $bam > rand.bam 2> /dev/null`; - `samtools sort -@ $max_procs rand.bam -o rand_sorted.bam 2> /dev/null`; - `genomeCoverageBed -scale $sca -split -bg -ibam rand_sorted.bam > $bedg_random 2> /dev/null`; - if ($directional eq 'true') - { - `genomeCoverageBed -strand + -scale $sca -split -bg -ibam rand_sorted.bam > $bedg_random_p 2> /dev/null`; - `genomeCoverageBed -strand - -scale $sca -split -bg -ibam rand_sorted.bam > $bedg_random_m 2> /dev/null`; - } -} -############################################################################################ - -################################# -# inputs: -# bam $_[0] -# output directory $_[1] -################################# -sub reference -{ - my $bam = shift; - my $mapnum = shift; - - my @tabRef = (); - - open (my $anno, $annotation) || die "cannot open annotation file!\n"; - while(<$anno>) - { - chomp $_; - my @split = split /\t/, $_; - my $size = $split[2] - $split[1]; - push @tabRef, [$_,0,0,$size]; - } - close $anno; - - `bedtools bamtobed -i $bam > ac.bed`; - - # my $mapnum = 0; - # open (my $bed, 'ac.bed') || die "cannot open ac.bed\n"; - # while(<$bed>){ $mapnum++; } - # close $bed; - - if ($directional eq 'true') - { - #Same strand - `bedtools intersect -s -wao -b ac.bed -a $annotation> intersectStranded.bed`; - #Different strand - `bedtools intersect -S -wao -b ac.bed -a $annotation> intersectOppositeStrand.bed`; - - open (my $strand, 'intersectStranded.bed') || die "cannot open intersectStranded.bed\n"; - my $i = 0; - my $firstLine = <$strand>; chomp $firstLine; - my @split = split /\t/, $firstLine; - my $prevId = $split[3]; - if ($split[16] != 0 && $split[12] - $split[11] == $split[16]) - { - $tabRef[$i]->[1] = 1; - } - while(<$strand>) - { - chomp $_; - @split = split /\t/, $_; - if ($split[3] ne $prevId) - { - $i++; - } - if ($split[16] != 0 && $split[12] - $split[11] == $split[16]) - { - $tabRef[$i]->[1] += 1; - } - $prevId = $split[3]; - } - close $strand; - - open ($strand, 'intersectOppositeStrand.bed') || die "cannot open intersectOppositeStrand.bed\n"; - $i = 0; - $firstLine = <$strand>; chomp $firstLine; - @split = split /\t/, $firstLine; - $prevId = $split[3]; - if ($split[16] != 0 && $split[12] - $split[11] == $split[16]) - { - $tabRef[$i]->[2] = 1; - } - while(<$strand>) - { - chomp $_; - @split = split /\t/, $_; - if ($split[3] ne $prevId) - { - $i++; - } - if ($split[16] != 0 && $split[12] - $split[11] == $split[16]) - { - $tabRef[$i]->[2] += 1; - } - $prevId = $split[3]; - } - close $strand; - } - - else - { - `bedtools intersect -wao -b ac.bed -a $annotation> intersect.bed`; - - open (my $strand, 'intersect.bed') || die "cannot open intersect.bed\n"; - my $i = 0; - my $firstLine = <$strand>; chomp $firstLine; - my @split = split /\t/, $firstLine; - my $prevId = $split[3]; - if ($split[16] != 0 && $split[12] - $split[11] == $split[16]) - { - $tabRef[$i]->[1] = 1; - } - while(<$strand>) - { - chomp $_; - @split = split /\t/, $_; - if ($split[3] ne $prevId) - { - $i++; - } - if ($split[16] != 0 && $split[12] - $split[11] == $split[16]) - { - $tabRef[$i]->[1] += 1; - } - $prevId = $split[3]; - } - close $strand; - } - - my $totplus = 0; my $totminus = 0; - foreach my $outPut (@tabRef) - { - $totplus += $outPut->[1]; - $totminus += $outPut->[2]; - } - - open (my $out, ">".$outText) || die "cannot open raw_count.txt\n"; - print $out "Chr\tstart\tend\tID\tscore\tstrand\ttype\tD1\tD2\tD3\treads\trpm\treads on opposite strand\trpm\n"; - my ($Sr, $Rr) = (0,0); - foreach my $outPut (@tabRef) - { - if ($totplus > $totminus || $findSens eq 'false') - { - $Sr = rpm($outPut->[1],$mapnum); - $Rr = rpm($outPut->[2],$mapnum); - print $out $outPut->[0]."\t".$outPut->[1]."\t".$Sr."\t".$outPut->[2]."\t".$Rr."\n"; - } - else - { - $Rr = rpm($outPut->[1],$mapnum); - $Sr = rpm($outPut->[2],$mapnum); - print $out $outPut->[0]."\t".$outPut->[2]."\t".$Sr."\t".$outPut->[1]."\t".$Rr."\n"; - } - } - close $out; -} -############################################################################################ - -################################# -# inputs: -# bam $_[0] -# output directory $_[1] -################################# -sub rpm -{ - my $raw_count = shift; - my $mapped_number = shift; - my $RPM = 0; - $RPM = ($raw_count * 1000000) / ($mapped_number) if $mapped_number != 0; - return $RPM; -} -############################################################################################ - -################################# -# inputs: -# bam $_[0] -# output directory $_[1] -################################# -sub zip -{ - my ($in_dir, $out_tar) = @_; - `tar -cvf tmp.tar $in_dir/`; - `mv tmp.tar $out_tar`; - rmdir $in_dir; -} -############################################################################################ -
--- a/gred_rnaseq-2369cfe4f737/alignTophat.xml Thu Nov 16 10:33:42 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,151 +0,0 @@ -<tool id="alignTophat" name="align RNAseq" version="0.0.1"> - <description>Align directionnal RNAseq to obtain bedgraphs and raw count file</description> - <command interpreter="perl"> - - ./alignTophat.pl - - --fastq ${first_input} - --fastq_n ${first_input.name} - - #if $Genome.refGenomeSource == "history": - --ref "${Genome.ownFile}" - --build_ref - #else: - --ref "${Genome.indices.fields.path}" - #end if - - #if $contaminants.refGenomeSource == "history": - --contaminants "${contaminants.ownFile}" - --build_contaminant - #else: - --contaminants "${contaminants.indices.fields.path}" - #end if - #if $anno.bedpipe == "history": - --annot "${anno.ownFile}" - #else: - --anno "${anno.indices.fields.path}" - #end if - - --mis $mis - --ma $ma - - --dirbam $dirbam - --dirbamCont $dirbamcont - --dirbedgraph $dirbedgraph - - --text $text - - --random_w $rand - --find $idDir.find - --direction $idDir.direc - </command> - <inputs> - - <param format="fastqsanger" name="first_input" type="data" label="fastq produced by fasteris" help=""/> - - - <conditional name="Genome"> - <param name="refGenomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?"> - <option value="indexed">Use a built-in index</option> - <option value="history">Use one from the history</option> - </param> - <when value="indexed"> - <param name="indices" type="select" label="Select a reference genome"> - <options from_data_table="bowtie2_indexes"> - <filter type="sort_by" column="2" /> - <validator type="no_options" message="No indexes are available" /> - </options> - </param> - </when> - <when value="history"> - <param name="ownFile" type="data" format="fasta" metadata_name="dbkey" label="Select a reference from history" /> - </when> - </conditional> - - - <conditional name="Genome"> - <param name="refGenomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?"> - <option value="indexed">Use a built-in index</option> - <option value="history">Use one from the history</option> - </param> - <when value="indexed"> - <param name="indices" type="select" label="Select a reference genome"> - <options from_data_table="bowtie2_indexes"> - <filter type="sort_by" column="2" /> - <validator type="no_options" message="No indexes are available" /> - </options> - </param> - </when> - <when value="history"> - <param name="ownFile" type="data" format="fasta" metadata_name="dbkey" label="Select a reference from history" /> - </when> - </conditional> - - <conditional name="contaminants"> - <param name="refGenomeSource" type="select" label="Will you select contaminants database from your history or use a built-in index?"> - <option value="indexed">Use a built-in index</option> - <option value="history">Use one from the history</option> - </param> - <when value="indexed"> - <param name="indices" type="select" label="Select contaminants reference"> - <options from_data_table="bowtie2_indexes"> - <filter type="sort_by" column="2" /> - <validator type="no_options" message="No indexes are available" /> - </options> - </param> - </when> - <when value="history"> - <param name="ownFile" type="data" format="fasta" metadata_name="dbkey" label="Select a reference from history" /> - </when> - </conditional> - - <conditional name="anno"> - <param name="bedpipe" type="select" label="Will you select an annotation file from your history or use a built-in index?" help="tab file format, (chr start end id score strand type description1 description2 description3"> - <option value="indexed">Use a built-in index</option> - <option value="history">Use one from the history</option> - </param> - <when value="indexed"> - <param name="indices" type="select" label="Select annotation file"> - <options from_data_table="bed_pipe"> - <filter type="sort_by" column="2" /> - <validator type="no_options" message="No indexes are available" /> - </options> - </param> - </when> - <when value="history"> - <param name="ownFile" type="data" format="bed" metadata_name="dbkey" label="Select a reference from history" /> - </when> - </conditional> - - - <param name="mis" type="integer" value="0" label="mismatches"/> - <param name="ma" type="integer" value="0" label="normalization value" help="e.g number of mappers (if 0 auto)"/> - <param name="rand" type="boolean" checked="false" label="work on multi-mappers randomly assigned"/> - - - <conditional name="idDir"> - <param name="direc" type="boolean" checked="true" label="directional library"/> - <when value="false"> - <param name="find" type="hidden" value="false"/> - </when> - <when value="true"> - <param name="find" type="boolean" checked="true" label="auto sens finder"/> - </when> - </conditional> - </inputs> - - <outputs> - <data format="tar" name="dirbam" label="alignement_zip_${first_input.name}"/> - <data format="tar" name="dirbamcont" label="contaminant_zip_${first_input.name}"/> - <data format="tar" name="dirbedgraph" label="bedgraph_zip_${first_input.name}"/> - <data format="tabular" name="text" label="raw_count_tabular_${first_input.name}_rand_${rand}"/> - </outputs> - - <requirements> - <requirement type="package" version="2.2.6">bowtie2</requirement> - <requirement type="package" version="2.1.0">tophat</requirement> - <requirement type="package" version="2.17.0">bedtools</requirement> - <requirement type="package" version="0.1.19">samtools</requirement> - </requirements> - -</tool>
--- a/gred_rnaseq-2369cfe4f737/bed_pipe.loc.sample Thu Nov 16 10:33:42 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,2 +0,0 @@ -#<unique_id> <database_caption> <base_name_path> -dmel dmel_annotation /path/to/annotation/dmel/dmel_gene.bed
--- a/gred_rnaseq-2369cfe4f737/tool_data_table_conf.xml.sample Thu Nov 16 10:33:42 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,6 +0,0 @@ -<tables> - <table name="bed_pipe" comment_char="#"> - <columns>value, name, path</columns> - <file path="${__HERE__}/test-data/bed_pipe.loc" /> - </table> -</tables> \ No newline at end of file
--- a/gred_rnaseq-2369cfe4f737/tool_dependencies.xml Thu Nov 16 10:33:42 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,11 +0,0 @@ -<?xml version="1.0"?> -<tool_dependency> - <package name="bowtie2" version="2.2.6"> - </package> - <package name="tophat" version="2.1.0"> - </package> - <package name="bedtools" version="2.24.0"> - </package> - <package name="samtools" version="1.3.1"> - </package> -</tool_dependency>
--- a/tool_dependencies.xml Thu Nov 16 10:33:42 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,14 +0,0 @@ -<?xml version="1.0"?> -<tool_dependency> - <package name="bowtie2" version="2.2.6"> - <repository changeset_revision="0d9cd7487cc9" name="package_bowtie_2_2_6" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" /> - </package> - <package name="tophat" version="2.1.0"> - <repository changeset_revision="ed92d4c8e7f6" name="package_tophat_2_1_0" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" /> - </package> - <package name="bedtools" version="2.24.0"> - <repository changeset_revision="3416a1d4a582" name="package_bedtools_2_24" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" /> - </package> - <package name="samtools" version="1.3.1"> - </package> -</tool_dependency> \ No newline at end of file