view galaxy/wrappers/ExprQCplots.xml @ 2:b9ef036de818 draft default tip

"planemo upload for repository https://github.com/juliechevalier/GIANT/tree/master commit 89b7d0e8812f53222691cffe29bd48be6519829d"
author vandelj
date Fri, 25 Sep 2020 08:52:46 +0000
parents 0435f94d27a7
children
line wrap: on
line source

<tool name="GIANT-QC Plots" id="giant_plot_functions" version="0.1.4">
  <description>Descriptive plots of .CEL collections or normalized expression data</description>
  <requirements>
	  <requirement type="package" version="1.40.1">bioconductor-oligo</requirement>
	  <requirement type="package" version="2.2_13">r-ff</requirement>
	  <requirement type="package" version="2.2.1">r-ggplot2</requirement>
	  <requirement type="package" version="4.5.6">r-plotly</requirement>
    <requirement type="package" version="1.8_17">r-mgcv</requirement>
  </requirements>
  <code file="../../src/General_functions.py"/>
  <!--<code file="./src/General_functions.py"/> change for Planemo test-->
  <stdio>
    <regex match="Execution halted"
           source="both"
           level="fatal"
           description="Execution halted, please contact tool developer or administrators." />
    <regex match="Error in"
           source="both"
           level="fatal"
           description="An error occured during R execution, please contact tool developer." />
    <exit_code range="15" level="fatal" description="Error during formating scripts, see log file for more information." />
    <exit_code range="10" level="fatal" description="Missing file during html report, see log file for more information." />
    <exit_code range="1:9" level="fatal" description="Error in R execution, see log file for more information." />
  </stdio>
  <command>	<![CDATA[
    ##set $dataType=$inputData.extension
	   #set $dataType=$inputData[0].ext
	   
	   #if $conditionInformation and $pcaSection.factorsToInclude!="None":
       	bash $scriptPrepareTable;
        ret_code=\$?;
        if [ \$ret_code != 0 ]; then
          exit \$ret_code;
        fi; 
     #end if

	   #if ($dataType == "cel" and len($inputData)>1) or ($dataType == "tabular" and len($inputData)==1):
	   Rscript '$__tool_directory__/../../src/ExprPlotsScript.R' -i '$inputData' -l '$log' -f '$advSection.imageFormat' -j '$dataType'
     ##change for Planemo test
     ##Rscript '$__tool_directory__/src/ExprPlotsScript.R' -i '$inputData' -l '$log' -f '$advSection.imageFormat' -j '$dataType'
	   #if $dataType == "cel":
	   #for $inputDataset in $inputData
        -c '${inputDataset.name}'
       #end for
	   #end if
	   #if $conditionInformation and $pcaSection.factorsToInclude!="None":
       	 -t 'factorTable.csv'
       #end if
	   #if $plotSection.histogramToPlot:
	     -h 'Histograms'
	   #end if
	   #if $plotSection.maplotToPlot:
	     -a 'MAplots'
	   #end if
	   #if $plotSection.boxplotToPlot:
	     -b 'Boxplots'
	   #end if
	   #if $plotSection.microarrayToPlot and $dataType == "cel":
	    -m 'Microarray'
	   #end if
	   #if $pcaSection.acpToPlot:
	    -p 'PCA'
      	-s 'screePlot'
	   #end if
	   ;
     ret_code=\$?;
     if [ \$ret_code != 0 ]; then
      exit \$ret_code;
     else
      bash $scriptTransfer;
      ret_code=\$?;
      if [ \$ret_code != 0 ]; then
        exit \$ret_code;
      fi
     fi;
	   #else:
		  printf "[ERROR]Execution halted, not enough (.CEL files) or too many (tabular file) input files" >> $log;
      exit 15;
	   #end if
     printf "[INFO]End of tool script" >> $log; 
	   ]]>
	</command>



  <configfiles>
    <configfile name="scriptPrepareTable">
    <![CDATA[
     awk -v fact="$pcaSection.factorsToInclude" 'BEGIN{FS="\t";OFS="";ORS="";split(fact, factors, ",");for(i in factors)factorsName[factors[i]]=1} NR==1{for(i=2;i<=NF;i++){if(\$i in factorsName)positFactor[i]=1}} {print $1;for(factorID in positFactor)print "\t"\$factorID; print "\n"}'  $conditionInformation > ./factorTable.csv;

    if [ ! -e ./factorTable.csv ]; then
      printf "[ERROR]factorTable.csv is missing" >> $log; 
      exit 15
    fi
    printf "[INFO]End of scriptPrepareTable\n" >> $log 
    ]]>
    </configfile>

    <configfile name="scriptTransfer">
<![CDATA[
#set $dataType=$inputData[0].ext
#set $cnt=1

##create header of HTML file
printf  "<!DOCTYPE html>\n<html>\n"  > $html_file

##add to HTML scripts to show and hide MAplots and MicroArray pictures

printf "<head>
<script src=\"https://ajax.googleapis.com/ajax/libs/jquery/3.3.1/jquery.min.js\"></script>
<script>
\\$(document).ready(function(){
    \\$(\"\#MAplotTitle\").click(function(){
  if(\\$(\"p.MAplot\").is(\':visible\'))\\$(\"p.MAplot\").hide(1000);
        else \\$(\"p.MAplot\").show(1000);
    });
    \\$(\"\#MicroarrayTitle\").click(function(){
  if(\\$(\"p.Microarray\").is(\':visible\'))\\$(\"p.Microarray\").hide(1000);
        else \\$(\"p.Microarray\").show(1000);
    });
});
</script>
</head>\n<body>\n" >> $html_file

mkdir -p $html_file.extra_files_path



#if $plotSection.histogramToPlot:

printf "<h3>Histograms</h3>\n" >> $html_file
##create folders in media
counter=1
for histogram in \$(ls ./plotLyDir/Histograms*html)
do
histogramShort=\${histogram%\.*}
histogramShort=\${histogramShort\#\#*/}

##modify HTML to point to plotLy folder
sed -i "s/\${histogramShort}_files/PlotLy_scripts/g" \$histogram

##copy HTML files in both folders
cp \$histogram ${html_file.extra_files_path}/Histograms\${counter}.html

##add HTML link
printf "<a href=\"Histograms\${counter}.html\">Histograms\${counter}</a>\n"  >> $html_file

if [ \$counter = 1 ]; then

#if $advSection.imagePlotlyFormat=="svg":
##before copying modify plotLy script to replace png snapshot with svg (not proud of solution but seems to work)
cd ./plotLyDir/\${histogramShort}_files/plotlyjs-*/
awk '{gsub("e=\"png\"","e=\"svg\"",\$0);print \$0}' ./plotly-latest.min.js > ./plotly-latest.minTemp.js
awk '{gsub("Download plot as a png","Download plot as a svg",\$0);print \$0}' ./plotly-latest.minTemp.js > ./plotly-latest.min.js
rm ./plotly-latest.minTemp.js
cd ../../../
#end if

##copy only scripts folder for the first histogram
cp -r ./plotLyDir/\${histogramShort}_files $html_file.extra_files_path
mv ${html_file.extra_files_path}/\${histogramShort}_files ${html_file.extra_files_path}/PlotLy_scripts
fi

((counter++))
done

if [ \$counter = 1 ]; then
  printf "[ERROR]Histograms are missing" >> $log; 
  exit 10
fi

#end if



#if $plotSection.boxplotToPlot:

printf "<h3>Boxplots</h3>\n" >> $html_file

##create folders in media
counter=1
for boxplot in \$(ls ./plotLyDir/Boxplots*html)
do
boxplotShort=\${boxplot%\.*}
boxplotShort=\${boxplotShort\#\#*/}

##modify HTML to point to plotLy folder
sed -i "s/\${boxplotShort}_files/PlotLy_scripts/g" \$boxplot

##copy HTML files in both folders
cp \$boxplot ${html_file.extra_files_path}/Boxplots\${counter}.html

##add HTML link
printf "<a href=\"Boxplots\${counter}.html\">Boxplots\${counter}</a>\n"  >> $html_file

if [ \$counter = 1 ]; then
#if $advSection.imagePlotlyFormat=="svg":
##before copying modify plotLy script to replace png snapshot with svg (not proud of solution but seems to work)
cd ./plotLyDir/\${boxplotShort}_files/plotlyjs-*/
awk '{gsub("e=\"png\"","e=\"svg\"",\$0);print \$0}' ./plotly-latest.min.js > ./plotly-latest.minTemp.js
awk '{gsub("Download plot as a png","Download plot as a svg",\$0);print \$0}' ./plotly-latest.minTemp.js > ./plotly-latest.min.js
rm ./plotly-latest.minTemp.js
cd ../../../
#end if

##copy only scripts folder for the first boxplot
cp -r ./plotLyDir/\${boxplotShort}_files $html_file.extra_files_path
mv ${html_file.extra_files_path}/\${boxplotShort}_files ${html_file.extra_files_path}/PlotLy_scripts
fi

((counter++))
done

if [ \$counter = 1 ]; then
  printf "[ERROR]Boxplots are missing" >> $log; 
  exit 10
fi

#end if



#if $plotSection.maplotToPlot:

printf "<h3 id=\"MAplotTitle\">MA plots (show/hide)</h3>\n" >> $html_file

##create folders in media
counter=1
for MAplot in \$(ls ./plotLyDir/MAplots_*html)
do
MAplotShort=\${MAplot%\.*}
MAplotShort=\${MAplotShort\#\#*/}

conditionName=\${MAplot%\.*}
conditionName=\${conditionName\#\#*MAplots_}

echo \$conditionName > ./temporaryConditionName
conditionFormatedName=\$(awk 'BEGIN{FS="\t"} ARGIND==1{dico[\$1]=\$2} ARGIND==2{print dico[\$1]}' ./correspondanceFileNames.csv ./temporaryConditionName)

##modify HTML to point to plotLy folder
sed -i "s/\${MAplotShort}_files/PlotLy_scripts/g" \$MAplot

##copy HTML files in both folders
cp \$MAplot ${html_file.extra_files_path}/MAplot_\$conditionName.html

##add HTML link
printf "<p class=\"MAplot\">\n<a href=\"MAplot_\$conditionName.html\">MAplot \$conditionFormatedName</a>\n</p>\n"  >> $html_file

if [ \$counter = 1 ]; then
##copy only scripts folder for the first MAplot

#if $advSection.imagePlotlyFormat=="svg":
##before copying modify plotLy script to replace png snapshot with svg (not proud of solution but seems to work)
cd ./plotLyDir/\${MAplotShort}_files/plotlyjs-*/
awk '{gsub("e=\"png\"","e=\"svg\"",\$0);print \$0}' ./plotly-latest.min.js > ./plotly-latest.minTemp.js
awk '{gsub("Download plot as a png","Download plot as a svg",\$0);print \$0}' ./plotly-latest.minTemp.js > ./plotly-latest.min.js
rm ./plotly-latest.minTemp.js
cd ../../../
#end if

cp -r ./plotLyDir/\${MAplotShort}_files $html_file.extra_files_path
mv ${html_file.extra_files_path}/\${MAplotShort}_files ${html_file.extra_files_path}/PlotLy_scripts
fi

((counter++))
done

if [ \$counter = 1 ]; then
  printf "[ERROR]MAplots are missing" >> $log; 
  exit 10
fi

#end if  




#if $plotSection.microarrayToPlot and $dataType == "cel":

printf "<h3 id=\"MicroarrayTitle\">Microarray</h3>\n" >> $html_file
for microarray in \$(ls ./plotDir/Microarray_*)
do
conditionName=\${microarray%\.*}
conditionName=\${conditionName\#\#*Microarray_}

echo \$conditionName > ./temporaryConditionName
conditionFormatedName=\$(awk 'BEGIN{FS="\t"} ARGIND==1{dico[\$1]=\$2} ARGIND==2{print dico[\$1]}' ./correspondanceFileNames.csv ./temporaryConditionName)

cp \$microarray ${html_file.extra_files_path}/Microarray_\$conditionName.${advSection.imageFormat}
printf "<p class=\"Microarray\"><a href=\"Microarray_\$conditionName.${advSection.imageFormat}\">Microarray \$conditionFormatedName</a>\n</p>"  >> $html_file
done

#end if




#if $pcaSection.acpToPlot:

printf "<h3>PCA</h3>\n" >> $html_file

##create folders in media
counter=1
for pca in \$(ls ./plotLyDir/PCA*html)
do
pcaShort=\${pca%\.*}
pcaShort=\${pcaShort\#\#*/}

conditionName=\${pca%\.*}
conditionName=\${conditionName\#\#*PCA_}

echo \$conditionName > ./temporaryConditionName
conditionFormatedName=\$(awk 'BEGIN{FS="\t"} ARGIND==1{dico[\$1]=\$2} ARGIND==2{split(\$0,tab,"_AND_");if(length(tab)==1){if(\$0 in dico){print dico[\$0]}else{print $0}}else{print dico[tab[1]]" * "dico[tab[2]]}}' ./correspondanceFileNames.csv ./temporaryConditionName)

##modify HTML to point to plotLy folder
sed -i "s/\${pcaShort}_files/PlotLy_scripts/g" \$pca

##copy HTML files in both folders
cp \$pca ${html_file.extra_files_path}/PCA_\$conditionName.html

##add HTML link
printf "<p><a href=\"PCA_\$conditionName.html\">PCA \$conditionFormatedName</a>\n</p>"  >> $html_file


if [ \$counter = 1 ]; then

#if $advSection.imagePlotlyFormat=="svg":
##before copying modify plotLy script to replace png snapshot with svg (not proud of solution but seems to work)
cd ./plotLyDir/\${pcaShort}_files/plotlyjs-*/
awk '{gsub("e=\"png\"","e=\"svg\"",\$0);print \$0}' ./plotly-latest.min.js > ./plotly-latest.minTemp.js
awk '{gsub("Download plot as a png","Download plot as a svg",\$0);print \$0}' ./plotly-latest.minTemp.js > ./plotly-latest.min.js
rm ./plotly-latest.minTemp.js
cd ../../../
#end if

##copy only scripts folder for the first pca
cp -r ./plotLyDir/\${pcaShort}_files $html_file.extra_files_path
mv ${html_file.extra_files_path}/\${pcaShort}_files ${html_file.extra_files_path}/PlotLy_scripts
fi

((counter++))
done

if [ \$counter = 1 ]; then
  printf "[ERROR]PCA plots are missing" >> $log; 
  exit 10
fi

##now for screePlot
if [ -e ./plotLyDir/screePlot.html ]; then
  ##copy HTML files in both folders
  cp ./plotLyDir/screePlot.html ${html_file.extra_files_path}/screePlot.html
  ##add HTML link
  printf "<p><a href=\"screePlot.html\">Scree plot</a>\n</p>"  >> $html_file
else
  printf "[ERROR]screePlot.html is missing" >> $log; 
  exit 10
fi

##copy scripts folder

#if $advSection.imagePlotlyFormat=="svg":
##but before copying modify plotLy script to replace png snapshot with svg (not proud of solution but seems to work)
cd ./plotLyDir/screePlot_files/plotlyjs-*/
awk '{gsub("e=\"png\"","e=\"svg\"",\$0);print \$0}' ./plotly-latest.min.js > ./plotly-latest.minTemp.js
awk '{gsub("Download plot as a png","Download plot as a svg",\$0);print \$0}' ./plotly-latest.minTemp.js > ./plotly-latest.min.js
rm ./plotly-latest.minTemp.js
cd ../../../
#end if

cp -r ./plotLyDir/screePlot_files $html_file.extra_files_path

#end if


##create footer of HTML file
printf  "</body>\n</html>" >> $html_file
]]>
    </configfile>
  </configfiles>



  <inputs>
    <param type="text" name="title" value="PlotFigure_toPersonalize" label="Title for output">
	</param>		 
    <param type="data" name="inputData" format="cel,tabular" label="Select one .CEL collection or one tabular file" optional="false" multiple="true" >
	</param>

	<section name="plotSection" title="Plots selection" expanded="True">
    <param type="boolean" name="histogramToPlot" checked="true" label="Plot histograms" help="Plot intensity distribution for each condition (pm probes for .cel)">
    </param>
    <param type="boolean" name="maplotToPlot" checked="true" label="Plot MA plots" help="Plot MA plot for each condition, median value is used as reference">
    </param>
    <param type="boolean" name="boxplotToPlot" checked="true" label="Plot boxplots" help="Plot intensity through boxplot for each condition (pm probes for .cel)">
    </param>
	<param type="boolean" name="microarrayToPlot" checked="true" label="Display microarray images (only for .CEL files)">
    </param>

    </section>
	<section name="pcaSection" title="PCA analysis" expanded="True">
	  <param type="boolean" name="acpToPlot" checked="true" label="Plot 3D PCA" help="3D plot of conditions in the space defined by the 3 principal components">
    </param>
    <param type="data" name="conditionInformation" format="tabular" label="Factor information tabular file (optional)" optional="true" multiple="false">
    </param>
    <param name="factorsToInclude" type="select" optional="true" multiple="true" label="Select factor informations to display (optional)"
    	refresh_on_change="true"  dynamic_options="get_column_names(pcaSection['conditionInformation'].file_name,0)">
                 <validator type="empty_field" message="You should specify one factor"></validator>
	</param>

    </section>

	<section name="advSection" title="Advanced parameters" expanded="False">
    <param type="select" name="imageFormat" display="radio" label="Output microarray image format">
      <option value="png">PNG format</option>
      <option value="pdf">PDF format</option>
    </param>
    <param type="select" name="imagePlotlyFormat" display="radio" label="Html snapshot format">
      <option value="png">PNG format</option>
      <option value="svg">SVG format</option>
    </param>
    </section>
	
  </inputs>

  
  
  <outputs>	
		<data format="html" name="html_file" label="${title}_HTML.html"/>
		<!--
		<collection name="outputHistogramsList" label="${title}_HistogramsList" type="list">
		  <discover_datasets pattern="(?P&lt;designation&gt;Histograms[0-9]+)\.(?P&lt;ext&gt;[^\._]+)?" directory="plotDir" visible="false"/>
		  <filter>plotSection['histogramToPlot']</filter>
		</collection>
		
		<collection name="outputMAplotsList" label="${title}_MAplotsList" type="list">
		  <discover_datasets pattern="(?P&lt;designation&gt;MAplots[0-9]+)\.(?P&lt;ext&gt;[^\._]+)?" directory="plotDir" visible="false"/>
		  <filter>plotSection['maplotToPlot']</filter>
		</collection>
		
		<collection name="outputBoxplotsList" label="${title}_BoxplotsList" type="list">
		  <discover_datasets pattern="(?P&lt;designation&gt;Boxplots[0-9]+)\.(?P&lt;ext&gt;[^\._]+)?" directory="plotDir" visible="false"/>
		  <filter>plotSection['boxplotToPlot']</filter>
		</collection>
		
		<collection name="outputMicroarrayList" label="${title}_MicroarrayList" type="list">
		  <discover_datasets pattern="(?P&lt;designation&gt;Microarray\_.*)\.(?P&lt;ext&gt;[^\._]+)?" directory="plotDir" visible="false"/>
		  <filter>plotSection['microarrayToPlot'] and inputData[0].ext == "cel"</filter>
		</collection>

		<collection name="outputPCAList" label="${title}_PCA" type="list">
		  <discover_datasets pattern="(?P&lt;designation&gt;PCA[0-9]+)\.(?P&lt;ext&gt;[^\._]+)?" directory="plotDir" visible="false"/>
		  <discover_datasets pattern="(?P&lt;designation&gt;screePlot)\.(?P&lt;ext&gt;[^\._]+)?" directory="plotDir" visible="false"/>
		  <filter>pcaSection['acpToPlot']</filter>
		</collection>
		-->
    <data name="log" label="${title}_Log" format="txt"/>
  </outputs>



 <tests>
  <test maxseconds="3600">
    <param name="inputData" value="./NormalizedData.tabular" />
    <section name="plotSection" >
      <param name="histogramToPlot" value="true" />
      <param name="maplotToPlot" value="true" />
      <param name="boxplotToPlot" value="true" />
      <param name="microarrayToPlot" value="false" />
    </section>
    <section name="pcaSection" >
      <param name="acpToPlot" value="true" />
      <param name="conditionInformation" value="./FactorFileGenerator/output/conditionsFile.csv" />
      <param name="factorsToInclude" value="Strain,Treatment" />
    </section>
    <section name="advSection" >
      <param name="imageFormat" value="png" />
      <param name="imagePlotlyFormat" value="png" />
    </section>
    <output name="html_file" file="./ExprQCplots/output/outputHTML.zip" decompress="true" >
    </output>  
    <output name="log" file="./ExprQCplots/output/outputLog.txt" compare="sim_size" delta_frac="0.10" />    
  </test>
</tests> 



  <help>
<![CDATA[
**What it does ?**

This tool generate descriptive plots for Affymetrix raw data (.CEL file) or any tabular file containing expression data (from arrays or RNA-seq).

-----

**Parameters**

\- **Title** to personalize output file names (please avoid special characters and spaces).

\- **Input Data** 

- **.CEL files** of your study (you can select multiple .CEL files or unique collection). For vizualization purposes, expression data extracted from .CEL files will be automatically log2 transformed by the tool.

OR

- **Expression tabular file** with samples as columns and genes as rows (header row contains sample names and first column gene identifiers). For better vizualization, expression data sould be preferentially already log2 transformed, no such transformation will be performed by the tool.

    ::

        Conditions  157_(HuGene-2_0-st).CEL 156_(HuGene-2_0-st).CEL  155_(HuGene-2_0-st).CEL    154_(HuGene-2_0-st).CEL                        
        DDX11L2     4.500872                4.429759                 4.780281                   4.996189             
        MIR1302-2   3.415065                3.520472                 3.471503                   3.567988           
        OR4F5       3.737956                3.011586                 3.424494                   3.497545
        VWA1        5.189621                5.129595                 4.806793                   5.227014

\- **Plots selection**
: to select required plots

- **Plot histograms**
- **Plot MA plots**
- **Plot boxplots**
- **Display microarray images** (only if "Input Data" are .CEL files)

\- **PCA analysis**

- **Plot 3D PCA** plot the 3 first principal components
- **Factor information tabular file** with factors as columns and samples as rows (header row contains factor names and first column sample names). Can help to discriminate samples in PCA plot.

    ::

        Conditions                Sex   Treatment Reaction
        138_(HuGene-2_0-st).CEL   1     TreatA    Pos
        148_(HuGene-2_0-st).CEL   0     NoTreat   Pos
        139_(HuGene-2_0-st).CEL   0     TreatB    Neg
        149_(HuGene-2_0-st).CEL   0     NoTreat   Neg

- **Select factor informations** you want to display in PCA plot (1 for samples colors and 1 for samples shapes)

\- **Advanced parameters**

- **Output microarray image format** available only for .CEL input files
- **Html snapshot format** : format of plot images taken from interactive view

-----

\- **Outputs**

- **HTML file** to access interactive version of plots through PlotLy html pages
- **LOG file** for job logs. In case of job failure, please attached this file to bug report
]]>  </help>

 <citations>
  <citation type="bibtex">@misc{vandel_jimmy_2018_1477870, author = {Vandel, J. and Gheeraert, C. and Eeckhoute, J. and Staels, B. and Lefebvre, P. and Dubois-Chevalier, J.}, title = {GIANT: Galaxy-based Interactive tools for ANalaysis of Transcriptomic data}, month = nov, year = 2018, doi = {10.5281/zenodo.1477870}, url = {https://doi.org/10.5281/zenodo.1477870}
  }</citation>

  <citation type="bibtex">@online{plotly, author = {Plotly Technologies Inc.}, title = {Collaborative data science}, publisher = {Plotly Technologies Inc.}, address = {Montreal, QC}, year = {2015}, url = {https://plot.ly}
  }</citation>
 </citations>

</tool>