diff sm_STAR2_V2.pl @ 6:b7ed4ee33fb6 draft default tip

Deleted selected files
author sarahinraauzeville
date Mon, 09 Dec 2019 10:55:33 -0500
parents c4fc8ff6e280
children
line wrap: on
line diff
--- a/sm_STAR2_V2.pl	Tue Dec 12 10:16:23 2017 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,236 +0,0 @@
-#!/usr/bin/perl -w
-
-# usage : perl sm_STAR.pl <read1.fastq.gz> <read2.fastq.gz>
-# 10/02/2014 - Wrapper du traitement des données RNAseq
-# Sarah Maman
-# Copyright (C) 2014 INRA
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-# 
-# You should have received a copy of the GNU General Public License
-# along with this program.  If not, see <http://www.gnu.org/licenses/>.
-#
-use strict;
-use File::Basename;
-use Getopt::Long;
-use lib "$ENV{'MY_GALAXY_DIR'}";
-use GalaxyPath;
-
-my $cfg = GalaxyPath->new( -file => $ENV{"GALAXY_CONFIG_FILE"});
-my $PATH = $cfg->my_path( 'workPath', 'MYWORKSPACE' );
-my $STAR = $cfg->my_path( 'toolsPath', 'STAR_PATH' );
-
-
-
-my $Nthreads;
-my $genome_path;
-my $reads_selector;
-my $input_read;
-my $Read1fastqgz;
-my $Read2fastqgz;
-my $alignIntronMin;
-my $alignIntronMax;   
-my $outFilterMismatchNmax;
-my $orientation;
-my $refownfastaref;
-my $refselector;
-my $refowngtf;
-my $compress;
-my $cufflinks;
-my $outputfile;
-my $outputfileT;
-my $outputlogSJ;
-my $outputlogfinal;
-
-
-Getopt::Long::Configure( 'no_ignorecase', 'bundling' );
-GetOptions (
-    'runThreadN=i' => \$Nthreads,
-    'genomeDir=s' => \$genome_path,
-    'refselector=s' => \$refselector,
-    'refownfastaref=s' => \$refownfastaref,
-    'refowngtf=s' => \$refowngtf,
-    'compress=s' => \$compress,
-    'cufflinks=s' => \$cufflinks,
-    'readsselector=s'=> \$reads_selector,
-    'readFilesIn1=s' => \$Read1fastqgz,
-    'readFilesIn2=s' => \$Read2fastqgz,
-    'readsinputread=s' => \$input_read,
-    'alignIntronMin=i' => \$alignIntronMin,
-    'alignIntronMax=i' => \$alignIntronMax,   
-    'outFilterMismatchNmax=i' => \$outFilterMismatchNmax,
-    'orientation=s' => \$orientation,
-    'outputfile=s' => \$outputfile,
-    'outputfileT=s' => \$outputfileT,
-    'outputlogfinal=s' => \$outputlogfinal,
-    'outputlogSJ=s' => \$outputlogSJ
-) or die "Usage: Error in command line arguments\n";
-
-my $cmd1 = ''; my $cmd2 ='';
-my $cmd3 = ''; my $cmd4 ='';
-
-#STAR --runThreadN 4 --runMode genomeGenerate --genomeDir /work/smaman/TP_RNAseq/INDEX/ --genomeFastaFiles ITAG2.3_genomic_Ch6.fasta --sjdbGTFfile ITAG_pre2.3_gene_models_Ch6.gtf --sjdbOverhang 100
-
-#smaman@node001 /work/smaman/TP_RNAseq $ ls -ltrah INDEX
-#-rw-r--r-- 1 smaman BIOINFO  331 17 juil. 11:55 genomeParameters.txt
-#-rw-r--r-- 1 smaman BIOINFO 387K 17 juil. 11:55 exonGeTrInfo.tab
-#-rw-r--r-- 1 smaman BIOINFO  53K 17 juil. 11:55 geneInfo.tab
-#-rw-r--r-- 1 smaman BIOINFO 151K 17 juil. 11:55 transcriptInfo.tab
-#-rw-r--r-- 1 smaman BIOINFO 171K 17 juil. 11:55 exonInfo.tab
-#-rw-r--r-- 1 smaman BIOINFO 325K 17 juil. 11:55 sjdbList.fromGTF.out.tab
-#-rw-r--r-- 1 smaman BIOINFO 272K 17 juil. 11:55 sjdbInfo.txt
-#-rw-r--r-- 1 smaman BIOINFO 325K 17 juil. 11:55 sjdbList.out.tab
-#-rw-r--r-- 1 smaman BIOINFO   11 17 juil. 11:55 chrName.txt
-#-rw-r--r-- 1 smaman BIOINFO    9 17 juil. 11:55 chrLength.txt
-#-rw-r--r-- 1 smaman BIOINFO   11 17 juil. 11:55 chrStart.txt
-#-rw-r--r-- 1 smaman BIOINFO   20 17 juil. 11:55 chrNameLength.txt
-#-rw-r--r-- 1 smaman BIOINFO  47M 17 juil. 11:55 Genome
-#-rw-r--r-- 1 smaman BIOINFO 360M 17 juil. 11:55 SA
-#-rw-r--r-- 1 smaman BIOINFO 1,5G 17 juil. 11:55 SAindex
-
-
-#STAR --readFilesIn WTr1.fastq WTr2.fastq  --genomeDir /work/smaman/TP_RNAseq/INDEX/ --sjdbGTFfile ITAG_pre2.3_gene_models_Ch6.gtf   --outSAMtype BAM SortedByCoordinate --alignIntronMin 20 --alignIntronMax 1000000 --outFilterMismatchNmax 10 --outSAMtype BAM SortedByCoordinate --runThreadN 4 --outFileNamePrefix  galaxyName --outSAMstrandField intronMotif --outFilterIntronMotifs RemoveNoncanonical --outFilterType BySJout  --quantMode TranscriptomeSAM 
-
-#-rw-r--r--  1 smaman BIOINFO  45M 26 mars   2015 ITAG2.3_genomic_Ch6.fasta
-#-rw-r--r--  1 smaman BIOINFO 1,6M 26 mars   2015 ITAG_pre2.3_gene_models_Ch6.gtf
-#-rw-r--r--  1 smaman BIOINFO   29 26 mars   2015 ITAG2.3_genomic_Ch6.fasta.fai
-#-rw-r--r--  1 smaman BIOINFO  614 17 juil. 10:20 WTr1.fastq
-#-rw-r--r--  1 smaman BIOINFO  589 17 juil. 10:20 WTr2.fastq
-#-rw-r--r--  1 smaman BIOINFO  14K 17 juil. 11:55 Log.out
-#-rw-r--r--  1 smaman BIOINFO  35K 17 juil. 12:03 galaxyNameAligned.toTranscriptome.out.bam
-#-rw-r--r--  1 smaman BIOINFO  637 17 juil. 12:03 galaxyNameAligned.sortedByCoord.out.bam +++++++++
-#-rw-r--r--  1 smaman BIOINFO    0 17 juil. 12:03 galaxyNameSJ.out.tab            ++++++++++++++++
-#-rw-r--r--  1 smaman BIOINFO  246 17 juil. 12:03 galaxyNameLog.progress.out
-#-rw-r--r--  1 smaman BIOINFO 1,7K 17 juil. 12:03 galaxyNameLog.final.out   +++++++++++++++
-#-rw-r--r--  1 smaman BIOINFO  16K 17 juil. 12:03 galaxyNameLog.out
-
-
-
-#workspace
-my $debug = 0; #Mode debug
-if ($debug == 0)
-	{
-	print STDOUT "Debug mode OK \n";
-	}
-else	
-	{
-	$PATH = dirname($outputfile); 
-	print STDOUT "No debug \n";
-	}
-
-
-#Récuperer le numero (unique) de l'output afin, si besoin, de créer un répertoire de travail unique dans /work/galaxy-dev/workspace
-my ($nb) = ($outputfile=~/dataset_(\d+)\.\S+$/);
-
-#Repertoire de sortie cree par le script, verif des droits d'ecriture sur ce repertoire de sortie
-`cd $PATH/; mkdir $nb/; chmod -R 777 $nb/; cd $nb/;`;
-my $dirresults= "$PATH/".$nb;
-
-print STDOUT "Job working directory : $dirresults \n";
-
-
-if ($refselector eq "ownfasta"){
-        my $cmdSTARindex="(cd $dirresults/; mkdir INDEX/; chmod 777 INDEX/; $STAR --runThreadN $Nthreads --runMode genomeGenerate --genomeDir $dirresults/INDEX --genomeFastaFiles $refownfastaref --sjdbGTFfile $refowngtf --sjdbOverhang 100) >& ./out_Starindex.log 2>&1";
-        system  $cmdSTARindex;
-        #Info pour les biologistes
-        print STDOUT "STAR Genome Generate : \n\n $cmdSTARindex \n\n ";
-        $genome_path = "$dirresults/INDEX/";
-}
-
-my $addcuff;
-if ($cufflinks eq "cuff"){
-    $addcuff="--outSAMstrandField intronMotif --outFilterIntronMotifs RemoveNoncanonical --outFilterType BySJout --quantMode TranscriptomeSAM ";
-}else{
-    $addcuff="";
-}
-
-
-my $cat;
-if ($reads_selector eq "single"){
-                    
-                my $in;    
-                if ($compress eq "compress"){
-                #Si besoin, recupération du fichier de configuration avec modification de l extension
-                `ln -s $input_read $dirresults/input_read.fastq.gz;`;
-                $in = "$dirresults/input_read.fastq.gz";
-                $cat="--readFilesCommand zcat";
-                }else
-                {`ln -s $input_read $dirresults/input_read.fastq;`;
-                $in = "$dirresults/input_read.fastq";
-                $cat="";}
-
-                if ($orientation eq "No"){
-                    $cmd1 = "(cd $dirresults; $STAR  --runThreadN $Nthreads --genomeDir $genome_path --readFilesIn $in  --outSAMtype BAM SortedByCoordinate --alignIntronMin $alignIntronMin --alignIntronMax $alignIntronMax --outFilterMismatchNmax $outFilterMismatchNmax $cat  --outFileNamePrefix $nb $addcuff) >& ./out_Star.log 2>&1";
-                    system $cmd1;
-                    #Info pour les biologistes
-                    print STDOUT "STAR command run on cluster without oriented reads : \n\n $cmd1 \n\n ";
-                }
-                else
-                {
-                    $cmd2 = "(cd $dirresults; $STAR --runThreadN $Nthreads --genomeDir $genome_path --readFilesIn $in  --outSAMtype BAM SortedByCoordinate --alignIntronMin $alignIntronMin --alignIntronMax $alignIntronMax --outFilterMismatchNmax $outFilterMismatchNmax $cat --outFileNamePrefix $nb $addcuff) >& ./out_Star.log 2>&1";
-                     system  $cmd2;
-                    #Info pour les biologistes
-                    print STDOUT "STAR command run on cluster with oriented reads : \n\n $cmd2 \n\n 
-                    Instead, you need to run Cufflinks with the library option --library-type options. For example, cufflinks <…> -library-type fr-firststrand should be used for the “standard” dUTP protocol. This option has to be used only for Cufflinks runs and not for STAR runs.\n\n";
-                }
-}else{
-    
-    
-    my $in1;
-    my $in2;
-    if ($compress eq "compress"){
-                #Si besoin, recupération du fichier de configuration avec modification de l extension
-                `ln -s $Read1fastqgz $dirresults/Read1.fastq.gz; ln -s $Read2fastqgz $dirresults/Read2.fastq.gz;`;
-                $in1="$dirresults/Read1.fastq.gz";
-                $in2="$dirresults/Read2.fastq.gz";
-                $cat="--readFilesCommand zcat";
-                }else
-                {`ln -s $Read1fastqgz $dirresults/Read1.fastq; ln -s $Read2fastqgz $dirresults/Read2.fastq;`;
-                $in1="$dirresults/Read1.fastq";
-                $in2="$dirresults/Read2.fastq";
-                $cat="";}
-                
-                
-   if ($orientation eq "No"){
-                    $cmd3 = "(cd $dirresults; $STAR --runThreadN $Nthreads --genomeDir $genome_path --readFilesIn $in1 $in2  --outSAMtype BAM SortedByCoordinate --alignIntronMin $alignIntronMin --alignIntronMax $alignIntronMax --outFilterMismatchNmax $outFilterMismatchNmax  $cat --outFileNamePrefix $nb $addcuff) >& ./out_Star.log 2>&1";
-                    system $cmd3;
-                    #Info pour les biologistes
-                    print STDOUT "STAR command run on cluster without oriented reads : \n\n $cmd3 \n\n ";
-                }
-                else
-                {
-                    $cmd4 = "(cd $dirresults; $STAR --runThreadN $Nthreads --genomeDir $genome_path --readFilesIn $in1 $in2  --outSAMtype BAM SortedByCoordinate --alignIntronMin $alignIntronMin --alignIntronMax $alignIntronMax --outFilterMismatchNmax $outFilterMismatchNmax $cat --outFileNamePrefix $nb $addcuff) >& ./out_Star.log 2>&1";
-                    #Info pour les biologistes
-                     system $cmd4;
-                    print STDOUT "STAR command run on cluster with oriented reads : \n\n $cmd4 \n\n 
-                    Instead, you need to run Cufflinks with the library option --library-type options. For example, cufflinks <…> -library-type fr-firststrand should be used for the “standard” dUTP protocol. This option has to be used only for Cufflinks runs and not for STAR runs.\n\n";
-                }
-
-
-}
-
-#Si besoin :
-#TEST 1 : command ligne on vm-galaxy
-#TEST 2 perl Galaxy file : perl script.pl path/to/tests/files/used/for/galaxy/perl/script out1
-
-#Recuperation des fichiers par Galaxy
-#-rw-r--r--  1 smaman BIOINFO  35K 17 juil. 12:03 galaxyNameAligned.toTranscriptome.out.bam  +++++ 
-#-rw-r--r--  1 smaman BIOINFO  637 17 juil. 12:03 galaxyNameAligned.sortedByCoord.out.bam +++++++++
-#-rw-r--r--  1 smaman BIOINFO    0 17 juil. 12:03 galaxyNameSJ.out.tab            ++++++++++++++++
-#-rw-r--r--  1 smaman BIOINFO 1,7K 17 juil. 12:03 galaxyNameLog.final.out   +++++++++++++++
-my $bam = glob("$dirresults/*$nb*Aligned.sortedByCoord.out.bam");
-if (! -e $bam){print STDERR "Aligned.sortedByCoord.out.bam file not found. \n";}else{`cp -a $bam $outputfile`;}
-my $bamT = glob("$dirresults/*$nb*Aligned.toTranscriptome.out.bam");
-if (! -e $bamT){print STDERR "Aligned.toTranscriptome.out.bam file not found. \n";}else{`cp -a $bamT $outputfileT`;}
-my $logSJ = glob("$dirresults/$nb*SJ.out.tab");
-if (! -e $logSJ){print STDERR "SJ.out.tab log file not found. \n";}else{`cp -a $logSJ $outputlogSJ`;}
-my $logfinal = glob("$dirresults/$nb*Log.final.out");
-if (! -e $logfinal){print STDERR "Log.final.out log file not found. \n";}else{`cp -a $logfinal $outputlogfinal`;}
-