Previous changeset 1:e8dbc8b9a59a (2017-12-12) Next changeset 3:aeebcdb9b8b2 (2017-12-12) |
Commit message:
Uploaded |
added:
sm_STAR2_V2.pl |
b |
diff -r e8dbc8b9a59a -r 80e19490ec6a sm_STAR2_V2.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sm_STAR2_V2.pl Tue Dec 12 10:08:56 2017 -0500 |
b |
b'@@ -0,0 +1,236 @@\n+#!/usr/bin/perl -w\n+\n+# usage : perl sm_STAR.pl <read1.fastq.gz> <read2.fastq.gz>\n+# 10/02/2014 - Wrapper du traitement des donn\xc3\xa9es RNAseq\n+# Sarah Maman\n+# Copyright (C) 2014 INRA\n+# This program is free software: you can redistribute it and/or modify\n+# it under the terms of the GNU General Public License as published by\n+# the Free Software Foundation, either version 3 of the License, or\n+# (at your option) any later version.\n+#\n+# This program is distributed in the hope that it will be useful,\n+# but WITHOUT ANY WARRANTY; without even the implied warranty of\n+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n+# GNU General Public License for more details.\n+# \n+# You should have received a copy of the GNU General Public License\n+# along with this program. If not, see <http://www.gnu.org/licenses/>.\n+#\n+use strict;\n+use File::Basename;\n+use Getopt::Long;\n+use lib "$ENV{\'MY_GALAXY_DIR\'}";\n+use GalaxyPath;\n+\n+my $cfg = GalaxyPath->new( -file => $ENV{"GALAXY_CONFIG_FILE"});\n+my $PATH = $cfg->my_path( \'workPath\', \'MYWORKSPACE\' );\n+my $STAR = $cfg->my_path( \'toolsPath\', \'STAR_PATH\' );\n+\n+\n+\n+my $Nthreads;\n+my $genome_path;\n+my $reads_selector;\n+my $input_read;\n+my $Read1fastqgz;\n+my $Read2fastqgz;\n+my $alignIntronMin;\n+my $alignIntronMax; \n+my $outFilterMismatchNmax;\n+my $orientation;\n+my $refownfastaref;\n+my $refselector;\n+my $refowngtf;\n+my $compress;\n+my $cufflinks;\n+my $outputfile;\n+my $outputfileT;\n+my $outputlogSJ;\n+my $outputlogfinal;\n+\n+\n+Getopt::Long::Configure( \'no_ignorecase\', \'bundling\' );\n+GetOptions (\n+ \'runThreadN=i\' => \\$Nthreads,\n+ \'genomeDir=s\' => \\$genome_path,\n+ \'refselector=s\' => \\$refselector,\n+ \'refownfastaref=s\' => \\$refownfastaref,\n+ \'refowngtf=s\' => \\$refowngtf,\n+ \'compress=s\' => \\$compress,\n+ \'cufflinks=s\' => \\$cufflinks,\n+ \'readsselector=s\'=> \\$reads_selector,\n+ \'readFilesIn1=s\' => \\$Read1fastqgz,\n+ \'readFilesIn2=s\' => \\$Read2fastqgz,\n+ \'readsinputread=s\' => \\$input_read,\n+ \'alignIntronMin=i\' => \\$alignIntronMin,\n+ \'alignIntronMax=i\' => \\$alignIntronMax, \n+ \'outFilterMismatchNmax=i\' => \\$outFilterMismatchNmax,\n+ \'orientation=s\' => \\$orientation,\n+ \'outputfile=s\' => \\$outputfile,\n+ \'outputfileT=s\' => \\$outputfileT,\n+ \'outputlogfinal=s\' => \\$outputlogfinal,\n+ \'outputlogSJ=s\' => \\$outputlogSJ\n+) or die "Usage: Error in command line arguments\\n";\n+\n+my $cmd1 = \'\'; my $cmd2 =\'\';\n+my $cmd3 = \'\'; my $cmd4 =\'\';\n+\n+#STAR --runThreadN 4 --runMode genomeGenerate --genomeDir /work/smaman/TP_RNAseq/INDEX/ --genomeFastaFiles ITAG2.3_genomic_Ch6.fasta --sjdbGTFfile ITAG_pre2.3_gene_models_Ch6.gtf --sjdbOverhang 100\n+\n+#smaman@node001 /work/smaman/TP_RNAseq $ ls -ltrah INDEX\n+#-rw-r--r-- 1 smaman BIOINFO 331 17 juil. 11:55 genomeParameters.txt\n+#-rw-r--r-- 1 smaman BIOINFO 387K 17 juil. 11:55 exonGeTrInfo.tab\n+#-rw-r--r-- 1 smaman BIOINFO 53K 17 juil. 11:55 geneInfo.tab\n+#-rw-r--r-- 1 smaman BIOINFO 151K 17 juil. 11:55 transcriptInfo.tab\n+#-rw-r--r-- 1 smaman BIOINFO 171K 17 juil. 11:55 exonInfo.tab\n+#-rw-r--r-- 1 smaman BIOINFO 325K 17 juil. 11:55 sjdbList.fromGTF.out.tab\n+#-rw-r--r-- 1 smaman BIOINFO 272K 17 juil. 11:55 sjdbInfo.txt\n+#-rw-r--r-- 1 smaman BIOINFO 325K 17 juil. 11:55 sjdbList.out.tab\n+#-rw-r--r-- 1 smaman BIOINFO 11 17 juil. 11:55 chrName.txt\n+#-rw-r--r-- 1 smaman BIOINFO 9 17 juil. 11:55 chrLength.txt\n+#-rw-r--r-- 1 smaman BIOINFO 11 17 juil. 11:55 chrStart.txt\n+#-rw-r--r-- 1 smaman BIOINFO 20 17 juil. 11:55 chrNameLength.txt\n+#-rw-r--r-- 1 smaman BIOINFO 47M 17 juil. 11:55 Genome\n+#-rw-r--r-- 1 smaman BIOINFO 360M 17 juil. 11:55 SA\n+#-rw-r--r-- 1 smaman BIOINFO 1,5G 17 juil. 11:55 SAindex\n+\n+\n+#STAR --readFilesIn WTr1.fastq WTr2.fastq --genomeDir /work/smaman/TP_RNAseq/INDEX/ --sjdbGTFfile ITAG_pre2.3_gene_models_Ch6.gtf --outSAMtype BAM SortedByCoordinate --alignIntronMin 20 --alignIntronMax 1000000 --outFilterMismatchNmax 10 --outSAMtype BAM SortedByCoordinate --runThreadN 4 --outFileNam'..b';\n+ system $cmd2;\n+ #Info pour les biologistes\n+ print STDOUT "STAR command run on cluster with oriented reads : \\n\\n $cmd2 \\n\\n \n+ Instead, you need to run Cufflinks with the library option --library-type options. For example, cufflinks <\xe2\x80\xa6> -library-type fr-firststrand should be used for the \xe2\x80\x9cstandard\xe2\x80\x9d dUTP protocol. This option has to be used only for Cufflinks runs and not for STAR runs.\\n\\n";\n+ }\n+}else{\n+ \n+ \n+ my $in1;\n+ my $in2;\n+ if ($compress eq "compress"){\n+ #Si besoin, recup\xc3\xa9ration du fichier de configuration avec modification de l extension\n+ `ln -s $Read1fastqgz $dirresults/Read1.fastq.gz; ln -s $Read2fastqgz $dirresults/Read2.fastq.gz;`;\n+ $in1="$dirresults/Read1.fastq.gz";\n+ $in2="$dirresults/Read2.fastq.gz";\n+ $cat="--readFilesCommand zcat";\n+ }else\n+ {`ln -s $Read1fastqgz $dirresults/Read1.fastq; ln -s $Read2fastqgz $dirresults/Read2.fastq;`;\n+ $in1="$dirresults/Read1.fastq";\n+ $in2="$dirresults/Read2.fastq";\n+ $cat="";}\n+ \n+ \n+ if ($orientation eq "No"){\n+ $cmd3 = "(cd $dirresults; $STAR --runThreadN $Nthreads --genomeDir $genome_path --readFilesIn $in1 $in2 --outSAMtype BAM SortedByCoordinate --alignIntronMin $alignIntronMin --alignIntronMax $alignIntronMax --outFilterMismatchNmax $outFilterMismatchNmax $cat --outFileNamePrefix $nb $addcuff) >& ./out_Star.log 2>&1";\n+ system $cmd3;\n+ #Info pour les biologistes\n+ print STDOUT "STAR command run on cluster without oriented reads : \\n\\n $cmd3 \\n\\n ";\n+ }\n+ else\n+ {\n+ $cmd4 = "(cd $dirresults; $STAR --runThreadN $Nthreads --genomeDir $genome_path --readFilesIn $in1 $in2 --outSAMtype BAM SortedByCoordinate --alignIntronMin $alignIntronMin --alignIntronMax $alignIntronMax --outFilterMismatchNmax $outFilterMismatchNmax $cat --outFileNamePrefix $nb $addcuff) >& ./out_Star.log 2>&1";\n+ #Info pour les biologistes\n+ system $cmd4;\n+ print STDOUT "STAR command run on cluster with oriented reads : \\n\\n $cmd4 \\n\\n \n+ Instead, you need to run Cufflinks with the library option --library-type options. For example, cufflinks <\xe2\x80\xa6> -library-type fr-firststrand should be used for the \xe2\x80\x9cstandard\xe2\x80\x9d dUTP protocol. This option has to be used only for Cufflinks runs and not for STAR runs.\\n\\n";\n+ }\n+\n+\n+}\n+\n+#Si besoin :\n+#TEST 1 : command ligne on vm-galaxy\n+#TEST 2 perl Galaxy file : perl script.pl path/to/tests/files/used/for/galaxy/perl/script out1\n+\n+#Recuperation des fichiers par Galaxy\n+#-rw-r--r-- 1 smaman BIOINFO 35K 17 juil. 12:03 galaxyNameAligned.toTranscriptome.out.bam +++++ \n+#-rw-r--r-- 1 smaman BIOINFO 637 17 juil. 12:03 galaxyNameAligned.sortedByCoord.out.bam +++++++++\n+#-rw-r--r-- 1 smaman BIOINFO 0 17 juil. 12:03 galaxyNameSJ.out.tab ++++++++++++++++\n+#-rw-r--r-- 1 smaman BIOINFO 1,7K 17 juil. 12:03 galaxyNameLog.final.out +++++++++++++++\n+my $bam = glob("$dirresults/*$nb*Aligned.sortedByCoord.out.bam");\n+if (! -e $bam){print STDERR "Aligned.sortedByCoord.out.bam file not found. \\n";}else{`cp -a $bam $outputfile`;}\n+my $bamT = glob("$dirresults/*$nb*Aligned.toTranscriptome.out.bam");\n+if (! -e $bamT){print STDERR "Aligned.toTranscriptome.out.bam file not found. \\n";}else{`cp -a $bamT $outputfileT`;}\n+my $logSJ = glob("$dirresults/$nb*SJ.out.tab");\n+if (! -e $logSJ){print STDERR "SJ.out.tab log file not found. \\n";}else{`cp -a $logSJ $outputlogSJ`;}\n+my $logfinal = glob("$dirresults/$nb*Log.final.out");\n+if (! -e $logfinal){print STDERR "Log.final.out log file not found. \\n";}else{`cp -a $logfinal $outputlogfinal`;}\n+\n' |