Mercurial > repos > vandelj > giant_hierarchical_clustering

<tool name="GIANT-Heatmap and Hierarchical clustering" id="giant_hierarchical_clustering" version="0.5.3">
  <description>Run hierarchical clustering and plot heatmap from expression data and/or differential expression analysis</description>
  <requirements>
    <requirement type="package" version="4.8.0">r-plotly</requirement>
    <requirement type="package" version="1.12.0">r-dendextend</requirement>
    <requirement type="package" version="0.1_20">r-ggdendro</requirement>
    <requirement type="package" version="3.2.1">r-ggplot2</requirement>
    <requirement type="package" version="0.16.0">r-heatmaply</requirement>
    <requirement type="package" version="0.4.8">r-circlize</requirement>
    <requirement type="package" version="1.18.1">bioconductor-complexheatmap</requirement>
    <requirement type="package" version="2.2.2">pandoc</requirement>
  </requirements>
  <code file="../../src/General_functions.py"/>
  <!--<code file="./src/General_functions.py"/> change for Planemo test-->
  <stdio>
    <regex match="Execution halted"
           source="both"
           level="fatal"
           description="Execution halted, please contact tool developer or administrators." />
    <regex match="Error in"
           source="both"
           level="fatal"
           description="An error occured during R execution, please contact tool developer." />
    <exit_code range="10" level="fatal" description="Missing file during html report, see log file for more information." />
    <exit_code range="1:9" level="fatal" description="Error in R execution, see log file for more information." />
  </stdio>
  <command>	<![CDATA[

      #if ($dataToCluster.dataToCluster_selector=="expression" or $dataToCluster.dataToCluster_selector=="genericData") and $dataToCluster.expressionData:

        ##start by selecting specific input data columns depending on user request
        #if $dataToCluster.dataToCluster_selector=="genericData" and $dataToCluster.columnToKeep:
          awk -v columns="$dataToCluster.columnToKeep" 'BEGIN{FS="\t";OFS="";ORS="";split(columns,columnsTab,",")} FNR==1{for(iColumn=1;iColumn<=length(columnsTab);iColumn++)for(iField=2;iField<=NF;iField++){if(\$iField==columnsTab[iColumn])colsToSelect[iColumn]=iField}} {line=\$1;for(iColumn=1;iColumn<=length(columnsTab);iColumn++)line=line"\t"\$colsToSelect[iColumn];print line"\n";}' $dataToCluster.expressionData > ./selectedExpressionData;
        #else
          cp $dataToCluster.expressionData ./selectedExpressionData;
        #end if

        ##reorder columns of input data based on factors file
        #if $dataToCluster.reorder_sample.reordering_selector=="factorFile" and $dataToCluster.reorder_sample.factorFileData and $dataToCluster.reorder_sample.factorToUse:
          awk -v factors="$dataToCluster.reorder_sample.factorToUse" 'BEGIN{FS="\t";OFS="";ORS="";split(factors,factorsTab,",")} FNR==1{for(iFactor=1;iFactor<=length(factorsTab);iFactor++)for(iField=2;iField<=NF;iField++){if(\$iField==factorsTab[iFactor])colsToSelect[iFactor]=iField}} FNR>1{line=\$1;for(iFactor=1;iFactor<=length(factorsTab);iFactor++)line=line"\t"\$colsToSelect[iFactor];print line"\n";}' $dataToCluster.reorder_sample.factorFileData > ./orderingFactor;

          sort -V -k2 ./orderingFactor > ./orderingSample;

          awk 'BEGIN{FS="\t";OFS="";ORS="";factorNumber=0} ARGIND==1{sampleOrdered[FNR]=\$1;factorNumber=FNR} ARGIND==2 && FNR==1{for(iElemt=1;iElemt<=factorNumber;iElemt++)for(iPosit=2;iPosit<=NF;iPosit++)if(\$iPosit==sampleOrdered[iElemt])positOrdered[iElemt]=iPosit} ARGIND==2{line=\$1;for(iElemt=1;iElemt<=factorNumber;iElemt++)if(iElemt in positOrdered)line=line"\t"\$positOrdered[iElemt];print line"\n"}' ./orderingSample ./selectedExpressionData > ./orderedExpressionData;

          ##check if some input data columns were lost during the process
          awk 'ARGIND==1 && FNR==1{colNumbA=NF} ARGIND==2 && FNR==1{colNumbB=NF} END{if(colNumbA!=colNumbB) print "[WARNING] "colNumbA-colNumbB" input data columns was removed during reordering due to missing information in factor file!\n"}' ./selectedExpressionData ./orderedExpressionData >> $log;

          #if $advSection.conditionClusterNumber!="1":
            printf "[WARNING]Sample clustering option is selected, sample reordering will not be preserved!\n" >> $log;
          #end if
        #else:
          cp ./selectedExpressionData ./orderedExpressionData;
        #end if
      #end if


      ##generate common file name for differential analysis results depending on input data nature
      #if ($dataToCluster.dataToCluster_selector=="expression" or $dataToCluster.dataToCluster_selector=="genericData") and $dataToCluster.filtering_step.filtering_step_selector!="no" and $dataToCluster.filtering_step.select_filtering.filtering_stepBis_selector=="diffExpParam" and $dataToCluster.filtering_step.select_filtering.differentialAnalysis:
        cp  ${dataToCluster.filtering_step.select_filtering.differentialAnalysis} ./filteredDifferentialAnalysis;
      #end if
      #if $dataToCluster.dataToCluster_selector=="foldChange" and $dataToCluster.differentialAnalysis:
        cp $dataToCluster.differentialAnalysis ./filteredDifferentialAnalysis;
      #end if


      ##generate factor information to use for barplot
      #if $advSection.conditionBarColor.conditionBarColor_selector=="yes"  and $advSection.conditionBarColor.factorFileDataBarPlot and $advSection.conditionBarColor.factorToUse:
       awk -v factor="$advSection.conditionBarColor.factorToUse" 'BEGIN{FS="\t";OFS="";ORS=""} NR==1{for(i=2;i<=NF;i++)if(\$i==factor)colToKeep=i} {print \$1"\t"\$colToKeep"\n"}' $advSection.conditionBarColor.factorFileDataBarPlot > ./barPlotFactor;
      #end if

	    Rscript '$__tool_directory__/../../src/heatMapClustering.R' --log '$log' --outputFile '$outputData' --format '$advSection.imageFormat' --clusterNumber '$advSection.clusterNumber'
      ##change for Planemo test
      ##Rscript '$__tool_directory__/src/heatMapClustering.R' --log '$log' --outputFile '$outputData' --format '$advSection.imageFormat' --clusterNumber '$advSection.clusterNumber'
      --maxRows '$advSection.maxSampleToPlot' --sampleClusterNumber '$advSection.conditionClusterNumber' --dataTransformation '$advSection.dataTransformation' --distanceMeasure '$advSection.distanceMeasure' --aggloMethod '$advSection.aggloMethod'
      #if $advSection.select_color.specifyColors=="true":
      --personalColors '$advSection.select_color.featureMin_color,$advSection.select_color.featureMedium_color,$advSection.select_color.featureMax_color'
      #end if
      #if $advSection.conditionBarColor.conditionBarColor_selector=="yes" and $advSection.conditionBarColor.factorFileDataBarPlot and $advSection.conditionBarColor.factorToUse:
      --factorInfo './barPlotFactor'
      --sideBarColorPalette '$advSection.conditionBarColor.sideBarPalette'
      #end if
      #if $dataToCluster.dataToCluster_selector=="genericData":
      --genericData
      #end if
      #if $dataToCluster.dataToCluster_selector=="expression" or $dataToCluster.dataToCluster_selector=="genericData":
      --expressionFile './orderedExpressionData'
        #if $dataToCluster.filtering_step.filtering_step_selector!="no":
          --filterInputOutput '$dataToCluster.filtering_step.filtering_step_selector'
          #if $dataToCluster.filtering_step.select_filtering.filtering_stepBis_selector=="diffExpParam":
            --diffAnalyseFile './filteredDifferentialAnalysis'
            #if $dataToCluster.dataToCluster_selector=="expression":
              --comparisonName '$dataToCluster.filtering_step.select_filtering.comparisonsToInclude'
              --FCthreshold '$dataToCluster.filtering_step.select_filtering.FCthreshold'
              --pvalThreshold '$dataToCluster.filtering_step.select_filtering.pvalThreshold'
            #else:
              #if $dataToCluster.filtering_step.select_filtering.comparisonsToIncludeLow and $dataToCluster.filtering_step.select_filtering.valThresholdLow:
                --comparisonNameLow '$dataToCluster.filtering_step.select_filtering.comparisonsToIncludeLow'
                --FCthreshold '$dataToCluster.filtering_step.select_filtering.valThresholdLow'
              #end if
              #if $dataToCluster.filtering_step.select_filtering.comparisonsToIncludeHigh and $dataToCluster.filtering_step.select_filtering.valThresholdHigh:
                --comparisonNameHigh '$dataToCluster.filtering_step.select_filtering.comparisonsToIncludeHigh'
                --pvalThreshold '$dataToCluster.filtering_step.select_filtering.valThresholdHigh'
              #end if
            #end if
          #else:
            --geneListFiltering '$dataToCluster.filtering_step.select_filtering.geneListFile'
          #end if
        #end if
      #else
       --diffAnalyseFile './filteredDifferentialAnalysis'
       --comparisonName '$dataToCluster.comparisonsToInclude'
        #if $dataToCluster.filtering_step.filtering_step_selector!="no":
          --filterInputOutput '$dataToCluster.filtering_step.filtering_step_selector'
          #if $dataToCluster.filtering_step.select_filtering.filtering_stepBis_selector=="diffExpParam":
            --FCthreshold '$dataToCluster.filtering_step.select_filtering.FCthreshold'
            --pvalThreshold '$dataToCluster.filtering_step.select_filtering.pvalThreshold'
          #else:
            --geneListFiltering '$dataToCluster.filtering_step.select_filtering.geneListFile'
          #end if
        #end if
      #end if
	   ;
	   ret_code=\$?;
	   if [ \$ret_code != 0 ]; then
	   	exit \$ret_code;
	   else
	   	bash $scriptTransfer;
	   	ret_code=\$?;
	   	if [ \$ret_code != 0 ]; then
	   		exit \$ret_code;
	   	fi
	   fi;
	   printf "[INFO]End of tool script" >> $log;
	]]>
  </command>


  <configfiles>
    <configfile name="scriptTableToHtml">
<![CDATA[
printf  "<!DOCTYPE html>
<html>
<head>
<meta http-equiv=\"Content-type\" content=\"text/html; charset=utf-8\">
<link rel=\"stylesheet\" type=\"text/css\" href=\"https://cdn.datatables.net/1.10.16/css/jquery.dataTables.min.css\">
<script type=\"text/javascript\" language=\"javascript\" src=\"https://code.jquery.com/jquery-1.12.4.js\">
</script>
<script type=\"text/javascript\" language=\"javascript\" src=\"https://cdn.datatables.net/1.10.16/js/jquery.dataTables.min.js\">
</script>
<script type=\"text/javascript\" class=\"init\">
\\$(document).ready(function() {
  \\$(\'\#example\').DataTable( {
        \"columnDefs\": [ {
            \"visible\": false,
            \"targets\": -1
        } ]
    } );
} );
</script>
</head>
<body style=\"background-color:white;\">
<table id=\"example\" class=\"display\" cellspacing=\"0\">
" > ${html_file.extra_files_path}/outputClustering.html

printf "<colgroup>\n" >> ${html_file.extra_files_path}/outputClustering.html
#if $dataToCluster.dataToCluster_selector=="foldChange" or ($dataToCluster.dataToCluster_selector=="expression" and $dataToCluster.filtering_step.filtering_step_selector!="no" and $dataToCluster.filtering_step.select_filtering.filtering_stepBis_selector=="diffExpParam"):
printf "<col span=\"2\" style=\"background-color:rgb(224,235,235)\">\n" >> ${html_file.extra_files_path}/outputClustering.html
printf "<col span=\"1\" style=\"background-color:rgb(250,235,235)\">\n" >> ${html_file.extra_files_path}/outputClustering.html
awk 'BEGIN{odd=1;FS="\t"} NR==1{for(i=4;i<=NF;i=i+5){if(odd==1){odd=0;printf "<col span=\"5\" style=\"background-color:rgb(224,238,255)\">\n"}else{odd=1;printf "<col span=\"5\" style=\"background-color:rgb(255,221,224)\">\n"}}}' $outputData >> ${html_file.extra_files_path}/outputClustering.html
#else
printf "<col span=\"1\" style=\"background-color:rgb(224,235,235)\">\n" >> ${html_file.extra_files_path}/outputClustering.html
printf "<col span=\"1\" style=\"background-color:rgb(250,235,235)\">\n" >> ${html_file.extra_files_path}/outputClustering.html
awk 'BEGIN{odd=1;FS="\t"} NR==1{for(i=3;i<=NF;i++){if(odd==1){odd=0;printf "<col span=\"1\" style=\"background-color:rgb(224,238,255)\">\n"}else{odd=1;printf "<col span=\"1\" style=\"background-color:rgb(255,221,224)\">\n"}}}' $outputData >> ${html_file.extra_files_path}/outputClustering.html
#end if

printf "</colgroup>\n" >> ${html_file.extra_files_path}/outputClustering.html
printf "<thead>\n<tr>\n" >> ${html_file.extra_files_path}/outputClustering.html

#if $dataToCluster.dataToCluster_selector=="foldChange" or ($dataToCluster.dataToCluster_selector=="expression" and $dataToCluster.filtering_step.filtering_step_selector!="no" and $dataToCluster.filtering_step.select_filtering.filtering_stepBis_selector=="diffExpParam"):
printf "<th rowspan=\"2\">Gene</th>\n" >> ${html_file.extra_files_path}/outputClustering.html
printf "<th rowspan=\"2\">Info</th>\n" >> ${html_file.extra_files_path}/outputClustering.html
printf "<th rowspan=\"2\">Cluster</th>\n" >> ${html_file.extra_files_path}/outputClustering.html
awk 'BEGIN{FS="\t"} NR==1{for(i=4;i<=NF;i=i+5)printf "<th colspan=\"5\">"\$i"</th>\n"}' $outputData >> ${html_file.extra_files_path}/outputClustering.html
printf "<th></th>\n" >> ${html_file.extra_files_path}/outputClustering.html
printf "</tr>\n<tr>\n" >> ${html_file.extra_files_path}/outputClustering.html
awk 'BEGIN{FS="\t"} NR==2{for(i=4;i<=NF;i++)printf "<th>"\$i"</th>\n"}' $outputData >> ${html_file.extra_files_path}/outputClustering.html
#else
printf "<th rowspan=\"1\">Gene</th>\n" >> ${html_file.extra_files_path}/outputClustering.html
printf "<th rowspan=\"1\">Cluster</th>\n" >> ${html_file.extra_files_path}/outputClustering.html
awk 'BEGIN{FS="\t"} NR==1{for(i=3;i<=NF;i++)printf "<th colspan=\"1\">"\$i"</th>\n"}' $outputData >> ${html_file.extra_files_path}/outputClustering.html
#end if

printf "<th></th>\n" >> ${html_file.extra_files_path}/outputClustering.html
printf "</tr>\n</thead>\n" >> ${html_file.extra_files_path}/outputClustering.html
printf "<tfoot>\n<tr>\n" >> ${html_file.extra_files_path}/outputClustering.html

#if $dataToCluster.dataToCluster_selector=="foldChange" or ($dataToCluster.dataToCluster_selector=="expression" and $dataToCluster.filtering_step.filtering_step_selector!="no" and $dataToCluster.filtering_step.select_filtering.filtering_stepBis_selector=="diffExpParam"):
awk 'BEGIN{FS="\t"} NR==2{for(i=1;i<=NF;i++)printf "<th>"\$i"</th>\n"}' $outputData >> ${html_file.extra_files_path}/outputClustering.html
#else
awk 'BEGIN{FS="\t"} NR==1{for(i=1;i<=NF;i++)printf "<th>"\$i"</th>\n"}' $outputData >> ${html_file.extra_files_path}/outputClustering.html
#end if

printf "<th></th>\n" >> ${html_file.extra_files_path}/outputClustering.html
printf "</tr>\n</tfoot>\n">> ${html_file.extra_files_path}/outputClustering.html
printf "<tbody>\n" >> ${html_file.extra_files_path}/outputClustering.html

#if $dataToCluster.dataToCluster_selector=="foldChange" or ($dataToCluster.dataToCluster_selector=="expression" and $dataToCluster.filtering_step.filtering_step_selector!="no" and $dataToCluster.filtering_step.select_filtering.filtering_stepBis_selector=="diffExpParam"):
awk 'BEGIN{FS="\t"} NR>2{printf "<tr>\n";for(i=1;i<=NF;i++){printf "<th>"\$i"</th>\n"};printf "<th></th>\n";printf "</tr>\n"}' $outputData >> ${html_file.extra_files_path}/outputClustering.html
#else
awk 'BEGIN{FS="\t"} NR>1{printf "<tr>\n";for(i=1;i<=NF;i++){printf "<th>"\$i"</th>\n"};printf "<th></th>\n";printf "</tr>\n"}' $outputData >> ${html_file.extra_files_path}/outputClustering.html
#end if

printf "</tbody>\n" >> ${html_file.extra_files_path}/outputClustering.html

printf "</table>
</body>
</html>" >> ${html_file.extra_files_path}/outputClustering.html

]]>
    </configfile>

    <configfile name="scriptTransfer">
<![CDATA[

mkdir -p $html_file.extra_files_path


##create HTML file for clustering output table
source $scriptTableToHtml

##check outputClustering.html is here
if ! [ -e ${html_file.extra_files_path}/outputClustering.html ]; then
  printf "[ERROR]outputClustering.html is missing.\n" >> $log;
  exit 10
fi

#write header of html file
printf  "<!DOCTYPE html>\n<html>\n<body>"  > $html_file


##first add reference of the clustering output table
printf "<h3>Clustering tabular</h3>\n" >> $html_file
printf "<a href=\"outputClustering.html\">Clustering results</a>\n"  >> $html_file


##manage heatmap file


if [ -e ./plotLyDir/Heatmap.html ]; then

printf "<h3>Heatmap plot</h3>\n" >> $html_file

##modify HTML to point to the first script folder
sed -i "s/Heatmap_files/PlotLy_Heatmap_scripts/g" ./plotLyDir/Heatmap.html

##copy HTML files in both folders
cp ./plotLyDir/Heatmap.html ${html_file.extra_files_path}/Heatmap.html

##add HTML link
printf "<a href=\"Heatmap.html\">Heatmap</a>\n"  >> $html_file

#if $advSection.imagePlotlyFormat=="svg":
##before copying scripts folder modify them to replace png snapshot with svg (not proud of solution but seems to work)
cd ./plotLyDir/Heatmap_files/plotly-main-*/
awk '{gsub("\"png\"","\"svg\"",\$0);print \$0}' ./plotly-latest.min.js > ./plotly-latest.minTemp.js
awk '{gsub("Download plot as a png","Download plot as a svg",\$0);print \$0}' ./plotly-latest.minTemp.js > ./plotly-latest.min.js
rm ./plotly-latest.minTemp.js
cd ../../../
#end if

#if $advSection.scaleSnapshot!="1.0":
##before copying scripts folder modify scale parameter (not proud of solution but seems to work)
cd ./plotLyDir/Heatmap_files/plotly-main-*/
awk '{gsub("h=t\\.scale\\|\\|1","h=$advSection.scaleSnapshot",\$0);print \$0}' ./plotly-latest.min.js > ./plotly-latest.minTemp.js
mv -f ./plotly-latest.minTemp.js ./plotly-latest.min.js
cd ../../../
#end if

##now copy scripts folder
cp -r ./plotLyDir/Heatmap_files $html_file.extra_files_path
mv ${html_file.extra_files_path}/Heatmap_files ${html_file.extra_files_path}/PlotLy_Heatmap_scripts

else
  printf "[ERROR]Heatmap.html is missing.\n" >> $log;
  exit 10
fi


##manage screePlot files


if [ -e ./plotLyDir/screePlot.html ]; then

printf "<h3>Scree plot</h3>\n" >> $html_file

##modify HTML to point to the first script folder
sed -i "s/screePlot_files/PlotLy_screePlot_scripts/g" ./plotLyDir/screePlot.html

##copy HTML files in both folders
cp ./plotLyDir/screePlot.html ${html_file.extra_files_path}/screePlot.html

##add HTML link
printf "<a href=\"screePlot.html\">Scree plot</a>\n"  >> $html_file

#if $advSection.imagePlotlyFormat=="svg":
##before copying scripts folder modify them to replace png snapshot with svg (not proud of solution but seems to work)
cd ./plotLyDir/screePlot_files/plotly-main-*/
awk '{gsub("\"png\"","\"svg\"",\$0);print \$0}' ./plotly-latest.min.js > ./plotly-latest.minTemp.js
awk '{gsub("Download plot as a png","Download plot as a svg",\$0);print \$0}' ./plotly-latest.minTemp.js > ./plotly-latest.min.js
rm ./plotly-latest.minTemp.js
cd ../../../
#end if

#if $advSection.scaleSnapshot!="1.0":
##before copying scripts folder modify scale parameter (not proud of solution but seems to work)
cd ./plotLyDir/screePlot_files/plotly-main-*/
awk '{gsub("h=t\\.scale\\|\\|1","h=$advSection.scaleSnapshot",\$0);print \$0}' ./plotly-latest.min.js > ./plotly-latest.minTemp.js
mv -f ./plotly-latest.minTemp.js ./plotly-latest.min.js
cd ../../../
#end if

##now copy scripts folder
cp -r ./plotLyDir/screePlot_files $html_file.extra_files_path
mv ${html_file.extra_files_path}/screePlot_files ${html_file.extra_files_path}/PlotLy_screePlot_scripts

else
  printf "[WARNING]screeplot.html is missing, probably due to limited number of genes.\n" >> $log;
fi


##manage circular files


if [ -e ./plotDir/circularPlot.${advSection.imageFormat} ]; then

cp ./plotDir/circularPlot.${advSection.imageFormat} ${html_file.extra_files_path}/circularPlot.${advSection.imageFormat}

printf "<h3>Circular plot</h3>\n" >> $html_file

##add HTML link
printf "<a href=\"circularPlot.${advSection.imageFormat}\">Circular plot</a>\n"  >> $html_file

else
  printf "[WARNING]circularPlot file is missing, probably due to limited number of genes.\n" >> $log;
fi


##create footer of HTML file
printf  "</body>\n</html>" >> $html_file

]]>
    </configfile>
  </configfiles>


<inputs>
  <param type="text" name="title" value="Heatmap_toPersonalize" label="Title for output"/>

  <conditional name="dataToCluster">
    <param name="dataToCluster_selector" type="select" label="Data to cluster">
        <option value="expression" selected="true">Expression data</option>
        <option value="foldChange">Differential expression analysis results</option>
        <option value="genericData">Generic data table</option>
    </param>
    <when value="expression">

      <param type="data" name="expressionData" format="tabular" label="Normalized expression tabular file" multiple="false"/>

      <conditional name="reorder_sample">
        <param name="reordering_selector" type="select" label="Reorder samples">
            <option value="no" selected="true">No reordering</option>
            <option value="factorFile">Reorder sample based on a factors file</option>
        </param>
        <when value="factorFile">
          <param type="data" name="factorFileData" format="tabular" label="Factors file" multiple="false" help="Be sure the conditions clusters number is set to 1 in advanced parameters."/>
          <param name="factorToUse" type="select" optional="false" multiple="true" label="Select factor(s) to use for reordering" refresh_on_change="true"  dynamic_options="get_column_names(reorder_sample['factorFileData'].file_name,0)">
                 <validator type="empty_field" message="You should specify at least one factor"></validator>
          </param>
        </when>
        <when value="no">
        </when>
      </conditional>

      <conditional name="filtering_step">
        <param name="filtering_step_selector" type="select" label="Probes/genes filtering">
            <option value="no" selected="true">No filtering</option>
            <option value="input">Filter input probes/genes before clustering</option>
            <option value="output">Filter probes/genes after clustering (for tabular output)</option>
        </param>
        <when value="input">
          <conditional name="select_filtering">
            <param name="filtering_stepBis_selector" type="select" label="Filter">
              <option value="diffExpParam" selected="true">Based on differential expression results (FC and p-val)</option>
              <option value="geneList">From list of genes</option>
            </param>
            <when value="diffExpParam">
              <param type="data" name="differentialAnalysis" format="tabular" label="Differential analysis tabular file (as given by LIMMA diff.exp. tool)" optional="false" multiple="false">
              </param>

              <param name="comparisonsToInclude" type="select" optional="false" multiple="true" label="Select comparisons to use for filtering" refresh_on_change="true"  dynamic_options="get_column_names_filteredList(select_filtering['differentialAnalysis'].file_name,[0,1],5)">
                 <validator type="empty_field" message="You should specify one factor"></validator>
              </param>

              <param name="FCthreshold" type="float" value="2" label="Fold change threshold for input (both 'threshold' and '1/threshold' values will be used)" help="Minimum value is 1 (ie. all probes/genes are kept)" >
                <validator type="in_range" min="1" exclude_min="false" message="Threshold should be greater than 1"/>
              </param>
              <param name="pvalThreshold" type="float" value="0.05" label="FDR p-val threshold for input" help="When several comparisons are selected a conservative rule is applied (see details below)" >
                <validator type="in_range" min="0" max="1" message="Threshold should be between 0 and 1"/>
              </param>
            </when>
            <when value="geneList">
              <param type="data" format="tabular" name="geneListFile" label="List of genes to keep" multiple="false" help="Gene names should be the same as written in expression file"/>
            </when>
          </conditional>
        </when>

        <when value="output">
          <conditional name="select_filtering">
            <param name="filtering_stepBis_selector" type="select" label="Filter">
              <option value="diffExpParam" selected="true">Based on differential expression results (FC and p-val)</option>
              <option value="geneList">From list of genes</option>
            </param>
            <when value="diffExpParam">
              <param type="data" name="differentialAnalysis" format="tabular" label="Differential analysis tabular file (as given by LIMMA diff.exp. tool)" optional="false" multiple="false">
              </param>

              <param name="comparisonsToInclude" type="select" optional="false" multiple="true" label="Select comparisons to use for filtering" refresh_on_change="true"  dynamic_options="get_column_names_filteredList(select_filtering['differentialAnalysis'].file_name,[0,1],5)">
                 <validator type="empty_field" message="You should specify one factor"></validator>
              </param>

              <param name="FCthreshold" type="float" value="2" label="Fold change threshold for output (both 'threshold' and '1/threshold' values will be used)" help="Minimum value is 1 (ie. all probes/genes are kept)" >
                 <validator type="in_range" min="1" exclude_min="false" message="Threshold should be greater than 1"/>
              </param>
              <param name="pvalThreshold" type="float" value="0.05" label="FDR p-val threshold for output" help="When several comparisons are selected a conservative rule is applied (see details below)">
                    <validator type="in_range" min="0" max="1" message="Threshold should be between 0 and 1"/>
              </param>
            </when>
            <when value="geneList">
              <param type="data" format="tabular" name="geneListFile" label="List of genes to keep" multiple="false" help="Gene names should be the same as written in expression file"/>
            </when>
          </conditional>
        </when>
        <when value="no">
        </when>
      </conditional>

    </when>

    <when value="foldChange">

        <param type="data" name="differentialAnalysis" format="tabular" label="Differential analysis tabular file (as given by LIMMA diff.exp. tool)" optional="false" multiple="false">
        </param>

        <param name="comparisonsToInclude" type="select" optional="false" multiple="true" label="Select comparisons to cluster" refresh_on_change="true"  dynamic_options="get_column_names_filteredList(dataToCluster['differentialAnalysis'].file_name,[0,1],5)">
                 <validator type="empty_field" message="You should specify one factor"></validator>
        </param>

        <conditional name="filtering_step">
          <param name="filtering_step_selector" type="select" label="Probes/genes filtering">
              <option value="no" selected="true">No filtering</option>
              <option value="input">Filter input probes/genes before clustering</option>
              <option value="output">Filter probes/genes only in tabular output file</option>
          </param>
          <when value="input">
            <conditional name="select_filtering">
              <param name="filtering_stepBis_selector" type="select" label="Filter">
                <option value="diffExpParam" selected="true">Based on differential expression results (FC and p-val)</option>
                <option value="geneList">From list of genes</option>
              </param>
              <when value="diffExpParam">
                <param name="FCthreshold" type="float" value="2" label="Fold change threshold for input (both 'threshold' and '1/threshold' values will be used)" help="Minimum value is 1 (ie. all probes/genes are kept)" >
                  <validator type="in_range" min="1" exclude_min="false" message="FC threshold should be greater than 1"/>
                </param>
                <param name="pvalThreshold" type="float" value="0.05" label="FDR p-val threshold for input" help="When several comparisons are selected a conservative rule is applied (see details below)" >
                  <validator type="in_range" min="0" max="1" message="Threshold should be between 0 and 1"/>
                </param>
              </when>
              <when value="geneList">
               <param type="data" format="tabular" name="geneListFile" label="List of genes to keep" multiple="false" help="Gene names should be the same as written in diff. exp. analysis file"/>
              </when>
            </conditional>
          </when>

          <when value="output">
            <conditional name="select_filtering">
              <param name="filtering_stepBis_selector" type="select" label="Filter">
                <option value="diffExpParam" selected="true">Based on diff. exp. parameters (FC and p-val)</option>
                <option value="geneList">From list of genes</option>
              </param>
              <when value="diffExpParam">
                <param name="FCthreshold" type="float" value="2" label="Fold change threshold for output (both 'threshold' and '1/threshold' values will be used)" help="Minimum value is 1 (ie. all probes/genes are kept)">
                   <validator type="in_range" min="1" exclude_min="false" message="Threshold should be greater than 1"/>
                </param>
                <param name="pvalThreshold" type="float" value="0.05" label="FDR p-val threshold for output" help="When several comparisons are selected a conservative rule is applied (see details below)">
                      <validator type="in_range" min="0" max="1" message="Threshold should be between 0 and 1"/>
                </param>
              </when>
              <when value="geneList">
                <param type="data" format="tabular" name="geneListFile" label="List of genes to keep" multiple="false" help="Gene names should be the same as written in diff. exp. analysis file"/>
              </when>
            </conditional>
          </when>
          <when value="no">
          </when>
        </conditional>

    </when>

    <when value="genericData">

      <param type="data" name="expressionData" format="tabular" label="Generic tabular file" multiple="false"/>

      <param name="columnToKeep" type="select" optional="false" multiple="true" label="Select column to cluster" refresh_on_change="true"  dynamic_options="get_column_names_filteredList(dataToCluster['expressionData'].file_name,[0])">
                 <validator type="empty_field" message="You should select at least on column"></validator>
      </param>

      <conditional name="reorder_sample">
        <param name="reordering_selector" type="select" label="Reorder columns">
            <option value="no" selected="true">No reordering</option>
            <option value="factorFile">Reorder comlumns based on a factors file</option>
        </param>
        <when value="factorFile">
          <param type="data" name="factorFileData" format="tabular" label="Factors file" multiple="false" help="Be sure the conditions clusters number is set to 1 in advanced parameters."/>
          <param name="factorToUse" type="select" optional="false" multiple="true" label="Select factor(s) to use for reordering" refresh_on_change="true"  dynamic_options="get_column_names(reorder_sample['factorFileData'].file_name,0)">
                 <validator type="empty_field" message="You should specify at least one factor"></validator>
          </param>
        </when>
        <when value="no">
        </when>
      </conditional>

      <conditional name="filtering_step">
        <param name="filtering_step_selector" type="select" label="Probes/genes filtering">
            <option value="no" selected="true">No filtering</option>
            <option value="input">Filter input probes/genes before clustering</option>
            <option value="output">Filter probes/genes after clustering (for tabular output)</option>
        </param>
        <when value="input">
          <conditional name="select_filtering">
            <param name="filtering_stepBis_selector" type="select" label="Filter">
              <option value="diffExpParam" selected="true">Based on tabular file content</option>
              <option value="geneList">From list of genes</option>
            </param>
            <when value="diffExpParam">
              <param type="data" name="differentialAnalysis" format="tabular" label="Tabular file containing filtering information" optional="false" multiple="false">
              </param>

              <param name="comparisonsToIncludeLow" type="select" optional="true" multiple="true" label="Select columns to consider for low filtering (keeping rows with higher value than a low threshold, ae. FC)" refresh_on_change="true"  dynamic_options="get_column_names_filteredList(select_filtering['differentialAnalysis'].file_name,[0])">
              </param>

              <param name="valThresholdLow" type="float" value="0.0" optional="true"  label="Low filtering threshold" help="When several comparisons are selected a conservative rule is applied (see details below)">
              </param>

             <param name="comparisonsToIncludeHigh" type="select" optional="true" multiple="true" label="Select columns to consider for high filtering (keeping rows with lower value than a high threshold, ae. p-value)" refresh_on_change="true"  dynamic_options="get_column_names_filteredList(select_filtering['differentialAnalysis'].file_name,[0])">
              </param>

              <param name="valThresholdHigh" type="float" value="0.0" optional="true"  label="High filtering threshold" help="When several columns are selected a conservative rule is applied (see details below)" >
              </param>
            </when>
            <when value="geneList">
              <param type="data" format="tabular" name="geneListFile" label="List of genes to keep" multiple="false" help="Gene names should be the same as written in input file"/>
            </when>
          </conditional>
        </when>

        <when value="output">
          <conditional name="select_filtering">
            <param name="filtering_stepBis_selector" type="select" label="Filter">
              <option value="diffExpParam" selected="true">Based on tabular file content</option>
              <option value="geneList">From list of genes</option>
            </param>
            <when value="diffExpParam">
              <param type="data" name="differentialAnalysis" format="tabular" label="Tabular file containing filtering information" optional="false" multiple="false">
              </param>

              <param name="comparisonsToIncludeLow" type="select" optional="true" multiple="true" label="Select columns to consider for low filtering (keeping rows with higher value than a low threshold, ae. FC)" refresh_on_change="true"  dynamic_options="get_column_names_filteredList(select_filtering['differentialAnalysis'].file_name,[0])">
              </param>

              <param name="valThresholdLow" type="float" value="0.0" optional="true" label="Low filtering threshold" help="When several comparisons are selected a conservative rule is applied (see details below)">
              </param>

             <param name="comparisonsToIncludeHigh" type="select" optional="true" multiple="true" label="Select columns to consider for high filtering (keeping rows with lower value than a high threshold, ae. p-value)" refresh_on_change="true"  dynamic_options="get_column_names_filteredList(select_filtering['differentialAnalysis'].file_name,[0])">
              </param>

              <param name="valThresholdHigh" type="float" value="0.0" optional="true"  label="High filtering threshold" help="When several columns are selected a conservative rule is applied (see details below)" >
              </param>
            </when>
            <when value="geneList">
              <param type="data" format="tabular" name="geneListFile" label="List of genes to keep" multiple="false" help="Gene names should be the same as written in input file"/>
            </when>
          </conditional>
        </when>
        <when value="no">
        </when>
      </conditional>

    </when>
  </conditional>

	<section name="advSection" title="Advanced parameters" expanded="false">

    <param name="clusterNumber" type="integer" value="5" label="Requested number of genes clusters" help="Use scree plot to adjust the number of genes clusters">
      <validator type="in_range" min="2" message="Cluster number should be greater than 1"/>
    </param>

    <param name="conditionClusterNumber" type="integer" value="1" label="Requested number of conditions clusters (1 = no clustering)">
        <validator type="in_range" min="1" message="Cluster number should be greater than 0"/>
    </param>

    <param name="dataTransformation" type="select" label="Apply mathematical transformation to data before clustering">
      <option value="no" selected="true">No</option>
      <option value="log">Natural Logarithm</option>
      <option value="log2">Base 2 Logarithm</option>
    </param>

    <param name="distanceMeasure" type="select" label="Distance measure used for clustering">
      <option value="euclidean" selected="true">euclidean</option>
      <option value="manhattan">manhattan</option>
      <option value="binary">binary</option>
      <option value="pearson">pearson</option>
      <option value="spearman">spearman</option>
      <option value="kendall">kendall</option>
      <option value="absPearson">absolute pearson</option>
      <option value="absSpearman">absolute spearman</option>
      <option value="absKendall">absolute kendall</option>
    </param>

    <param name="aggloMethod" type="select" label="Agglomeration method used for clustering" help="See documentation of 'hclust' R method for more information">
      <option value="complete">complete</option>
      <option value="median">median</option>
      <option value="centroid">centroid</option>
      <option value="average">average</option>
      <option value="single">single</option>
      <option value="mcquitty">mcquitty</option>
      <option value="ward.D">ward1</option>
      <option value="ward.D2" selected="true">ward2</option>
    </param>

  <conditional name="conditionBarColor">
          <param name="conditionBarColor_selector" type="select" label="Add side bar color for samples/comparisons">
              <option value="no" selected="true">No</option>
              <option value="yes">Yes please</option>
          </param>
          <when value="yes">
            <param type="data" name="factorFileDataBarPlot" format="tabular" label="Factors file" multiple="false" help="Available only for expression data clustering"/>
            <param name="factorToUse" type="select" optional="false" multiple="false" label="Select factor to use for coloring side bar" refresh_on_change="true"  dynamic_options="get_column_names(conditionBarColor['factorFileDataBarPlot'].file_name,0)">
                <validator type="empty_field" message="You should specify one factor"></validator>
            </param>
            <param name="sideBarPalette" type="select" label="Side bar color palette">
              <option value="Spectral" selected="true">Spectral</option>
              <option value="Set1">Set1</option>
              <option value="Set2">Set2</option>
              <option value="Set3">Set3</option>
              <option value="RdYlBu">RdYlBu</option>
              <option value="RdYlGn">RdYlGn</option>
              <option value="PiYG">PiYG</option>
            </param>
          </when>
          <when value="no">
          </when>
    </conditional>

    <param name="maxSampleToPlot" type="integer" value="1000" label="Maximum gene number to plot">
      <validator type="in_range" min="2" message="The number should be greater than 1"/>
    </param>

    <conditional name="select_color">
      <param type="boolean" name="specifyColors" checked="false" label="Personalized heatmap colors">
      </param>
      <when value="true">
        <param name="featureMin_color" type="color" label="Min value color" value="#ff00ff">
        </param>

        <param name="featureMedium_color" type="color" label="Medium value color" value="#4455ff">
        </param>

        <param name="featureMax_color" type="color" label="Max value color" value="#00ffff">
        </param>
      </when>
      <when value="false">
      </when>
    </conditional>

    <param type="select" name="imageFormat" display="radio" label="Output format">
      <option value="png">PNG format</option>
      <option value="pdf">PDF format</option>
    </param>
    <param type="select" name="imagePlotlyFormat" display="radio" label="Html snapshot format">
      <option value="png">PNG format</option>
      <option value="svg">SVG format</option>
    </param>
    <param name="scaleSnapshot" type="float" value="1.0" label="Scale html snapshots to increase resolution" help="Minimum value is 1.0 (default resolution)" >
        <validator type="in_range" min="1.0" exclude_min="false" message="Scale should be greater than 1"/>
    </param>
  </section>

  </inputs>


  <outputs>
    <data format="tabular" name="outputData" label="${title}_ClusteringResults"/>

    <data format="html" name="html_file" label="${title}_HTML.html"/>
    <!--
    <collection name="outputHeatmap" label="${title}_Heatmap" type="list">
      <discover_datasets pattern="(?P&lt;designation&gt;Heatmap.*)\.(?P&lt;ext&gt;[^\._]+)?" directory="plotDir" visible="false"/>
      <discover_datasets pattern="(?P&lt;designation&gt;screePlot.*)\.(?P&lt;ext&gt;[^\._]+)?" directory="plotDir" visible="false"/>
      <discover_datasets pattern="(?P&lt;designation&gt;circularPlot.*)\.(?P&lt;ext&gt;[^\._]+)?" directory="plotDir" visible="false"/>
    </collection>
	   -->
    <data format="txt" name="log" label="${title}_Log" />
  </outputs>


 <tests>
  <test maxseconds="3600">
    <conditional name="dataToCluster">
      <param name="dataToCluster_selector" value="expression" />
      <param name="expressionData" value="./NormalizedData.tabular" />
      <conditional name="reorder_sample">
        <param name="reordering_selector" value="no" />
      </conditional>
      <conditional name="filtering_step">
        <param name="filtering_step_selector" value="input" />
        <conditional name="select_filtering">
          <param name="filtering_stepBis_selector" value="diffExpParam" />
          <param name="differentialAnalysis" value="./DiffExprLimma/output/outputStat.csv" />
          <param name="comparisonsToInclude" value="TreatVsControl" />
          <param name="FCthreshold" value="2.0" />
          <param name="pvalThreshold" value="0.05" />
        </conditional>
      </conditional>
    </conditional>
    <section name="advSection" >
      <param name="clusterNumber" value="5" />
      <param name="conditionClusterNumber" value="3" />
      <param name="dataTransformation" value="no" />
      <param name="distanceMeasure" value="euclidean" />
      <param name="aggloMethod" value="ward.D2" />
      <conditional name="conditionBarColor">
        <param name="conditionBarColor_selector" value="yes" />
        <param name="factorFileDataBarPlot" value="./FactorFileGenerator/output/conditionsFile.csv"  />
        <param name="factorToUse" value="Treatment" />
        <param name="sideBarPalette" value="Spectral" />
      </conditional>
      <param name="maxSampleToPlot" value="1000" />
      <conditional name="select_color">
        <param name="specifyColors" value="false" />
      </conditional>
      <param name="imageFormat" value="png" />
      <param name="imagePlotlyFormat" value="png" />
      <param name="scaleSnapshot" value="1.0" />
    </section>
    <output name="outputData" file="./ExprHeatmapClustering/output/outputResults.csv" />
    <output name="html_file" file="./ExprHeatmapClustering/output/outputHTML.zip" decompress="true" />
    <output name="log" file="./ExprHeatmapClustering/output/outputLog.txt" compare="sim_size" delta_frac="0.10" />
  </test>
</tests>


  <help>
  	<![CDATA[
**What it does**

Run hierarchical clustering on gene expression data or differential expression analysis (from arrays and RNA-seq studies) and diplay correponding heatmap.

-----

**Parameters**

\- **Title** to personalize output file names (please avoid special characters).

\- **Data to cluster**, genes can be clustered based on : expression data, results from differential analysis tool or any tabular file content.


- **Expression data** with samples as columns and genes as rows (header row contains sample names and first column gene identifiers).

    ::

        Conditions  157_(HuGene-2_0-st).CEL 156_(HuGene-2_0-st).CEL  155_(HuGene-2_0-st).CEL    154_(HuGene-2_0-st).CEL
        DDX11L2     4.500872                4.429759                 4.780281                   4.996189
        MIR1302-2   3.415065                3.520472                 3.471503                   3.567988
        OR4F5       3.737956                3.011586                 3.424494                   3.497545
        VWA1        5.189621                5.129595                 4.806793                   5.227014


- **Differential expression analysis results** with contrasts statistics (p-val, FDR p-val, FC, log2(FC) and t-statistic) as columns and genes as rows (first and second rows contain comparison definition and first and second columns contain gene identifiers and functional informations). Please respect the GIANT-Differential Expression Analysis tool output format.

    ::

        LIMMA    comparison  WT*Treat  WT*Treat  WT*Treat  WT*Treat  WT*Treat
        Gene     Info        p-val     FDR.p-val FC        log2(FC)  t-stat
        ARSD     na          0.0057    0.41      0.8389   -0.2534   -5.175
        TTTY10   na          1.6e-07   0.0074    0.6403   -0.6432   -6.122
        MIR548AL na          0.072     0.2914    1.711     0.775     10.43

    \- **Comparisons to cluster** when clustering is performed on differential results, log2(FC) values of selected comparisons will be used.

- **Generic tabular data** with samples as columns and genes as rows (header row contains sample names and first column gene identifiers).

    ::

        Conditions  SampleA                 SampleB                  SampleC                    SampleD
        DDX11L2     4.500872                4.429759                 4.780281                   4.996189
        MIR1302-2   3.415065                3.520472                 3.471503                   3.567988
        OR4F5       3.737956                3.011586                 3.424494                   3.497545
        VWA1        5.189621                5.129595                 4.806793                   5.227014

    \- **Samples to cluster** when clustering is performed on generic data, user have to select the columns to consider in clustering (first column, containing gene identifiers, will be automatically selected).


\- **Reorder samples** (only available for expression and generic data clustering).

- **Based on a factors file**, samples will be sorted in an alphabetical/numerical order for the selected factors. Names in the 1st column of the factors file have to match with the columns names of the data to cluster.

    ::

        Conditions                Sex   Treatment Reaction
        154_(HuGene-2_0-st).CEL   1     TreatA    Pos
        156_(HuGene-2_0-st).CEL   0     NoTreat   Pos
        157_(HuGene-2_0-st).CEL   0     TreatB    Neg
        155_(HuGene-2_0-st).CEL   0     NoTreat   Neg

\- **Genes filtering** can be applied before or after clustering step.

- **Filtering before clustering** allows to restrict clustering to differentially expressed genes using differential analysis results (available for expression data and differential results clustering) or any generic file (available for generic data clustering). As an alternative, a specific gene list file can be directly used for filtering.

- **Filtering after clustering** will have no effect on clustering or generated heatmaps. This filter is only applied to generated tabular files to keep differentially expressed genes (using differential analysis file or any generic file) or specific user defined genes (using gene list file).

\- **Filter approaches** : three filtering strategies can be applied before/after clustering depending on the nature of clustered data. These strategies use : differential analysis results (available for expression data and differential results clustering), generic file content (available for generic data clustering) or a gene list file (available for any input data).

- **From differential analysis results** to filter genes based on fold change and FDR p-val for selected comparisons.

    \- **Differential expression results file** is requested only for expression data clustering. For differential results clustering, the same differential results file selected as "data to cluster" is used. (see "Data to cluster section" for requested format)

    \- **Comparisons to use** are requested only for expression data clustering. For differential results clustering, the same comparisons selected in "data to cluster" section will be used. If several comparisons are selected, genes that satisfy both fold change and FDR p-val thresholds in at least one of these comparisons are kept.

    \- **Fold change threshold** to use for filtering, genes with fold change >= threshold or fold change <= 1/threshold will be kept (set this threshold to 1 if you do not want to filter on fold change).

    \- **FDR p-val threshold** to use for filtering, genes with FDR p-val <= threshold will be kept (set this threshold to 1 if you do not want to filter on FDR p-val).


- **From generic tabular file** to filter genes based on selected columns values.

    \- **Generic tabular file** contains gene in the first column and various informations used for filtering in the following (same format as clustered generic tabular file).

    \- **Low filtering columns** used to discard rows with values below a given threshold (typically for Fold Change filtering). If several columns are selected, rows satisfying threshold condition in at least one of these columns are kept.

    \- **Low filtering threshold** below which the rows are discarded, the same threshold is applied for all selected columns.

    \- **High filtering columns** used to discard rows with values above a given threshold (typically for p-value filtering). If several columns are selected, rows satisfying threshold condition in at least one of these columns are kept.

    \- **High filtering threshold** above which the rows are discarded, the same threshold is applied for all selected columns.

- **From list of genes** to focus on pre-identified genes.

    \- **Gene list file** with genes identifiers as one column file without header.

    ::

        DDX11L2
        VWA1
        TTTY10
        ARSD

-----

**Advanced parameters**

\- **Genes cluster number** used by hierarchical clustering (minimum is 2). See generated screeplot to adjust this number before re-running a clustering.

\- **Samples/comparisons clusters number** used by hierarchical clustering applied on columns/conditions. Set to 1 (ie. no clustering) if you need to conserve input columns order for visualization purposes. Columns clusters information is not included (yet) in output tabular file.

\- **Mathematical transformation** can be applied to clustered data before clustering and visualization. Data used for the filtering step are not modified by this transformation.

\- **Distance measure** used to cluster rows and columns. For "euclidean", "manhattan" and "binary" distances the 'dist' R function is directly called, for "pearson", "spearman" and "kendall" distances the '(1-correlation)/2' term is used  as a classical distance, for "absPearson", "absSpearman" and "absKendall" the 'abs(1-correlation)'' term is used.

\- **Agglomeration method** used to cluster rows and columns.

\- **Add side bar** to vizualize factor values for displayed columns/conditions, represented as a colored side bar in the heatmap.

- **Factor file** that contains factor information for coloring (same format as the factor file used for input data columns reordering).

- **Factor to use** to color side bar depending on its values for displayed columns/conditions.

- **Color palette used** for coloring factor values (see RColorBrewer R package documentation for more information on proposed palettes).

\- **Maximum gene number** : for readability and running time considerations only, number of displayed rows (genes) in heatmaps/circular plot can be limited. Clustering information in generated tabular file and scree plot are computed from a global clustering considering all genes (excepting those filtered out before clustering). Heatmap and circular plot are displayed for a random gene selection, to avoid such random selection we advise you to use input filtering option before clustering to have a gene number below this limit.

\- **Personalized heatmap colors** to build color gradient choosing start, middle and end colors.

\- **Output format** for circular plots only.

\- **Html snapshot format** for interactive plotly plots.

\- **Scale html snapshots** to increase resolution of snapshots taken from interactive plotly plots.

-----

**Outputs**

\- **Tabular clustering file** containing cluster information for each gene satifying filtering steps. If expression or generic data was clustered, a two columns file is generated with gene identifiers and cluster numbers with possibly additional columns containing informations used for filtering. If differential results was clustered, a similar file is returned with an additional column containing cluster numbers and differential statistics coresponding to comparisons used for filtering.

\- **HTML file** to access interactive version of heatmap and screeplot through PlotLy html pages, circular plot image and tabulated clustering results. As a reminder, when the number of genes to display in heatmap/circular plot exceeds the maximum gene number parameter, a random sampling is performed for plotting efficiency. Thus, clustering displayed on heatmap/circular plot may slighlty differ from clustering information contained in tabular file as heatmap/circular plot clustering is done over a subset of genes whereas tabular file contains clustering results performed on all genes.

\- **LOG file** containing information about execution. Useful especially if tool execution fails. Please attach this log file in any bug report.

]]>
  </help>
 <citations>
  <citation type="bibtex">@misc{vandel_jimmy_2018_1477870, author = {Vandel, J. and Gheeraert, C. and Eeckhoute, J. and Staels, B. and Lefebvre, P. and Dubois-Chevalier, J.}, title = {GIANT: Galaxy-based Interactive tools for ANalaysis of Transcriptomic data}, month = nov, year = 2018, doi = {10.5281/zenodo.1477870}, url = {https://doi.org/10.5281/zenodo.1477870}
  }</citation>

    <citation type="bibtex">@article{,
    author = {Galili, Tal and O'Callaghan, Alan and
      Sidi, Jonathan and Sievert, Carson},
    title = {heatmaply: an R package for creating interactive cluster
      heatmaps for online publishing},
    journal = {Bioinformatics},
    year = {2017},
    doi = {10.1093/bioinformatics/btx657},
    url = {http://dx.doi.org/10.1093/bioinformatics/btx657},
    eprint =
      {https://academic.oup.com/bioinformatics/article-pdf/doi/10.1093/bioinformatics/btx657/21358327/btx657.pdf}
  }</citation>

  <citation type="bibtex">@article{doi:10.1093/bioinformatics/btu393,
    author = {Gu, Zuguang and Gu, Lei and Eils, Roland and Schlesner, Matthias and Brors, Benedikt},
    title = {circlize implements and enhances circular visualization in R },
    journal = {Bioinformatics},
    volume = {30},
    number = {19},
    pages = {2811-2812},
    year = {2014},
    doi = {10.1093/bioinformatics/btu393},
    URL = {http://dx.doi.org/10.1093/bioinformatics/btu393},
    eprint = {/oup/backfile/content_public/journal/bioinformatics/30/19/10.1093_bioinformatics_btu393/2/btu393.pdf}
  }</citation>

  <citation type="bibtex">@online{plotly, author = {Plotly Technologies Inc.}, title = {Collaborative data science}, publisher = {Plotly Technologies Inc.}, address = {Montreal, QC}, year = {2015}, url = {https://plot.ly}
  }</citation>


 </citations>

</tool>
author	vandelj
date	Tue, 15 Sep 2020 15:54:23 +0000
parents	0b09345fa632
children	d75a74a93587