Mercurial > repos > iuc > calculate_contrast_threshold

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/calculate_contrast_threshold.py	Wed Sep 11 09:28:55 2019 -0400
@@ -0,0 +1,186 @@
+#!/usr/bin/python
+
+import getopt
+import math
+import sys
+
+import numpy as np
+
+"""
+Program to calculate the contrast thresholds for heatmap from tagPileUp CDT
+"""
+
+
+def rebin(a, new_shape):
+    M, N = a.shape
+    m, n = new_shape
+    if m >= M:
+        # repeat rows in data matrix
+        a = np.repeat(a, math.ceil(float(m) / M), axis=0)
+
+    M, N = a.shape
+    m, n = new_shape
+
+    row_delete_num = M % m
+    col_delete_num = N % n
+
+    np.random.seed(seed=0)
+
+    if row_delete_num > 0:
+        # select deleted rows with equal intervals
+        row_delete = np.linspace(0, M - 1, num=row_delete_num, dtype=int)
+        # sort the random selected deleted row ids
+        row_delete = np.sort(row_delete)
+        row_delete_plus1 = row_delete[1:-1] + \
+            1  # get deleted rows plus position
+        # get deleted rows plus position (top +1; end -1)
+        row_delete_plus1 = np.append(
+            np.append(row_delete[0] + 1, row_delete_plus1), row_delete[-1] - 1)
+        # put the info of deleted rows into the next rows by mean
+        a[row_delete_plus1, :] = (
+            a[row_delete, :] + a[row_delete_plus1, :]) / 2
+        a = np.delete(a, row_delete, axis=0)  # random remove rows
+
+    if col_delete_num > 0:
+        # select deleted cols with equal intervals
+        col_delete = np.linspace(0, N - 1, num=col_delete_num, dtype=int)
+        # sort the random selected deleted col ids
+        col_delete = np.sort(col_delete)
+        col_delete_plus1 = col_delete[1:-1] + \
+            1  # get deleted cols plus position
+        # get deleted cols plus position (top +1; end -1)
+        col_delete_plus1 = np.append(
+            np.append(col_delete[0] + 1, col_delete_plus1), col_delete[-1] - 1)
+        # put the info of deleted cols into the next cols by mean
+        a[:, col_delete_plus1] = (
+            a[:, col_delete] + a[:, col_delete_plus1]) / 2
+        a = np.delete(a, col_delete, axis=1)  # random remove columns
+
+    M, N = a.shape
+
+    # compare the heatmap matrix
+    a_compress = a.reshape((m, int(M / m), n, int(N / n))).mean(3).mean(1)
+    return np.array(a_compress)
+
+
+def load_Data(input_file, quantile, absolute, header, start_col, row_num, col_num, min_upper_lim):
+    data0 = []
+    with open(input_file, 'r') as data:
+        if header == 'T':
+            data.readline()
+
+        for rec in data:
+            tmp = [(x.strip()) for x in rec.split('\t')]
+            data0.append(tmp[start_col:])
+        data0 = np.array(data0, dtype=float)
+
+    if row_num == -999:
+        row_num = data0.shape[0]
+    if col_num == -999:
+        col_num = data0.shape[1]
+
+    # rebin data0
+    if row_num < data0.shape[0] and col_num < data0.shape[1]:
+        data0 = rebin(data0, (row_num, col_num))
+    elif row_num < data0.shape[0]:
+        data0 = rebin(data0, (row_num, data0.shape[1]))
+    elif col_num < data0.shape[1]:
+        data0 = rebin(data0, (data0.shape[0], col_num))
+
+    # Calculate contrast limits here
+    rows, cols = np.nonzero(data0)
+    upper_lim = np.percentile(data0[rows, cols], quantile)
+    lower_lim = 0
+    if absolute != -999:
+        upper_lim = absolute
+
+    # Setting an absolute threshold to a minimum,
+    # in cases the 95th percentile contrast is <= user defined min_upper_lim
+    if quantile > 0.0:
+        print("\nQUANTILE: {}".format(quantile))
+        print("Quantile calculated UPPER LIM: {}".format(upper_lim))
+        print("LOWER LIM: {}".format(lower_lim))
+        if upper_lim <= min_upper_lim:
+            print("setting heatmap upper_threshold to min_upper_lim\n")
+            upper_lim = min_upper_lim
+
+    outfile = open('calcThreshold.txt', 'w')
+    outfile.write("upper_threshold:{}\nlower_threshold:{}\nrow_num:{}\ncol_num:{}\nheader:{}\nstart_col:{}".format(
+        upper_lim, lower_lim, row_num, col_num, header, start_col))
+    print('heatmap_upper_threshold:' + str(upper_lim))
+    print('heatmap_lower_threshold:' + str(lower_lim))
+    outfile.flush()
+    outfile.close()
+
+
+############################################################################
+# python cdt_to_heatmap.py -i test.tabular.split_line -o test.tabular.split_line.png -q 0.9 -c black -d T -s 2 -r 500 -l 300 -b test.colorsplit
+############################################################################
+usage = """
+Usage:
+This script calculates the contrast thresholds from Tag pile up heatmap data. Outputs a text file that contains the parameters for the heatmap script.
+
+python calculateThreshold.py -i <input file> -q <quantile> -m <min upper thresold after quantile calculation> -t <absolute tag threshold> -d <header T/F> -s <start column> -r <row num after compress> -l <col num after compress>'
+
+Example:
+python calculateThreshold.py -i test.tabular.split_line -q 90 -m 5 -d T -s 2 -r 600 -l 300
+"""
+
+if __name__ == '__main__':
+
+    # check for command line arguments
+    if len(sys.argv) < 2 or not sys.argv[1].startswith("-"):
+        sys.exit(usage)
+    # get arguments
+    try:
+        optlist, alist = getopt.getopt(sys.argv[1:], 'hi:o:q:t:c:d:s:r:l:m:')
+    except getopt.GetoptError:
+        sys.exit(usage)
+
+    # default quantile contrast saturation = 0.9
+    quantile = 90.0
+    min_upper_lim = 5.0
+    # absolute contrast saturation overrides quantile
+    absolute = -999
+
+    # default figure width/height is defined by matrix size
+    # if user-defined size is smaller than matrix, activate rebin function
+    row_num = -999
+    col_num = -999
+
+    for opt in optlist:
+        if opt[0] == "-h":
+            sys.exit(usage)
+        elif opt[0] == "-i":
+            input_file = opt[1]
+        elif opt[0] == "-q":
+            quantile = float(opt[1])
+        elif opt[0] == '-t':
+            absolute = float(opt[1])
+        elif opt[0] == "-d":
+            header = opt[1]
+        elif opt[0] == "-s":
+            start_col = int(opt[1])
+        elif opt[0] == "-r":
+            row_num = int(opt[1])
+        elif opt[0] == "-l":
+            col_num = int(opt[1])
+        elif opt[0] == "-m":
+            min_upper_lim = float(opt[1])
+
+    print("Header present:", header)
+    print("Start column:", start_col)
+    print("Row number (pixels):", row_num)
+    print("Col number (pixels):", col_num)
+    print("Min Upper Limit while using Quantile:", min_upper_lim)
+    if absolute != -999:
+        print("Absolute tag contrast threshold:", absolute)
+    else:
+        print("Percentile tag contrast threshold:", quantile)
+
+    if absolute == -999 and quantile <= 0:
+        print("\nInvalid threshold!!!")
+        sys.exit(usage)
+
+    load_Data(input_file, quantile, absolute,
+              header, start_col, row_num, col_num, min_upper_lim)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/calculate_contrast_threshold.xml	Wed Sep 11 09:28:55 2019 -0400
@@ -0,0 +1,122 @@
+<tool id="calculate_contrast_threshold" name="Calculate Contrast threshold" version="1.0.0">
+    <description>from tag pileup CDT
+    </description>
+    <requirements>
+        <requirement type="package" version="1.15.4">numpy</requirement>
+        <requirement type="package" version="3.7.4">python</requirement>
+    </requirements>
+    <command detect_errors="exit_code">
+        <![CDATA[
+      python '$__tool_directory__/calculate_contrast_threshold.py' -i '$input_file'
+
+      ## Setting the quantile values properly.
+      #if str($quantile_type.quantile_type_selector) == "b_option":
+        -q '${quantile_type.quantile}'
+        -m '${quantile_type.min_contrast}'
+      #else if str($quantile_type.quantile_type_selector) == "t_option":
+        -t '${quantile_type.quantile2}'
+      #end if
+
+      -d '$header' -s '$start_col' -r '$row_num' -l '$col_num'
+    ]]>
+    </command>
+
+    <inputs>
+        <param name="input_file" argument="-i" type="data" format="txt" label="Pileup Data Matrix "/>
+        <param name="header" argument="-d" type="boolean" truevalue="T" falsevalue="F" checked="true" label="Does the input file have a Header ?" help="Standard CDT file have headers."/>
+        <param name="start_col" argument="-s" type="integer" value="2" label="Valid Data Start Column" help="Valid data values start from this column"/>
+        <param name="col_num" argument="-l" type="integer" value="300" label="Plot Width in pixels" help="Equal to the heatmap width you plan to create"/>
+        <param name="row_num" argument="-r" type="integer" value="600" label="Plot Height in pixels" help="Equal to the heatmap height you plan to create"/>
+
+
+        <conditional name="quantile_type">
+            <param name="quantile_type_selector" type="select" display="radio" label="Choose the Contrast paramter">
+                <option value="b_option" selected="true">Calculate thresholds from data (-b)</option>
+                <option value="t_option">Enforce absolute thresholds (-t)
+                </option>
+            </param>
+
+            <when value="b_option">
+                <param name="quantile" argument="-b" type="float" min="0" max="100" value="95" label="Quantile" help="Enter your quantile value above."/>
+                <param name="min_contrast" type="float" min="0" value="0" label="Minimum upper limit after Quantile calculation" help="This value will be used as the upper limit if the calculated quantile is below this value" argument="-m"/>
+            </when>
+
+            <when value="t_option">
+                <param name="quantile2" argument="-t" type="float" min="0" value="0.0" label="Absolute tag threshold" help="Enter your custom tag threshold value above. takes real values (>= 0)"/>
+            </when>
+        </conditional>
+
+    </inputs>
+
+    <outputs>
+        <data name="threshold_output" format="txt" from_work_dir="calcThreshold.txt"/>
+    </outputs>
+
+    <tests>
+        <test>
+            <param name="input_file" value="sample.tabular"/>
+            <param name="header" value="T"/>
+            <param name="start_col" value="2"/>
+            <param name="col_num" value="300"/>
+            <param name="row_num" value="600"/>
+            <conditional name="quantile_type">
+                <param name="quantile_type_selector" value="b_option"/>
+                <param name="quantile" value="95"/>
+                <param name="min_contrast" value="5"/>
+            </conditional>
+            <output name="threshold_output" file="calcThreshold_b.txt" />
+        </test>
+        <test>
+            <param name="input_file" value="sample.tabular"/>
+            <param name="header" value="T"/>
+            <param name="start_col" value="2"/>
+            <param name="col_num" value="300"/>
+            <param name="row_num" value="600"/>
+            <conditional name="quantile_type">
+                <param name="quantile_type_selector" value="t_option"/>
+                <param name="quantile2" value="10.0"/>
+            </conditional>
+            <output name="threshold_output" file="calcThreshold_t.txt" />
+        </test>
+    </tests>
+
+    <help>
+        <![CDATA[
+
+**What it does**
+
+----
+
+Calculates a contrast threshold from the CDT file generated by ``tag_pileup_frequency``. The calculated values are then used to set a uniform contrast for all the heatmaps generated downstream.
+
+**Choosing Plot Width & Height**
+
+If your trying to create heatmaps with dimensions (Width x Height)px = (300 x 600)px. Use Plot width = 300, height = 600. This not only helps in generating unbiased heatmaps but also helps in reusing the calculated contrasts for heatmaps of same dimensions.
+
+**Understanding contrast parameters**
+
+`-b`
+
+    Calculates a percentile value (for example 95th percentile) from the input CDT data matrix to report upper-limit and lower-limit for setting heatmap contrasts.
+
+    Also can set a minimum upper-limit to fall-back, incase the calculated percentile is <= specified minimum.
+
+`-t`
+
+    Takes the absolute value entered and reports an upper_limit and lower_limit.
+
+    ]]>
+    </help>
+
+    <citations>
+        <citation type="bibtex">
+            @unpublished{None,
+            author = {Kuntala, Prashant Kumar and Lai, William KM },
+            title = {None},
+            year = {None},
+            eprint = {None},
+            url = {http://www.pughlab.psu.edu/}
+        }</citation>
+    </citations>
+
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/calcThreshold_b.txt	Wed Sep 11 09:28:55 2019 -0400
@@ -0,0 +1,6 @@
+upper_threshold:5.0
+lower_threshold:0
+row_num:600
+col_num:300
+header:T
+start_col:2
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/calcThreshold_t.txt	Wed Sep 11 09:28:55 2019 -0400
@@ -0,0 +1,6 @@
+upper_threshold:10.0
+lower_threshold:0
+row_num:600
+col_num:300
+header:T
+start_col:2
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sample.tabular	Wed Sep 11 09:28:55 2019 -0400
@@ -0,0 +1,20 @@
+YORF	NAME	0	1	2	3	4	5	6	7
+10001	10001	0.0	0.0	0.0	0.0	0.0	0.0	1.0	0.0
+10002	10002	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
+10003	10003	0.0	0.0	0.0	1.0	0.0	1.0	0.0	0.0
+10004	10004	0.0	0.0	1.0	0.0	0.0	0.0	0.0	0.0
+10005	10005	0.0	0.0	0.0	0.0	0.0	0.0	0.0	1.0
+10006	10006	1.0	2.0	2.0	0.0	1.0	0.0	0.0	0.0
+10007	10007	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
+10008	10008	0.0	0.0	0.0	0.0	0.0	0.0	1.0	0.0
+10009	10009	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
+10011	10011	0.0	1.0	0.0	0.0	0.0	0.0	0.0	0.0
+10012	10012	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
+10013	10013	0.0	0.0	1.0	1.0	0.0	1.0	0.0	0.0
+10014	10014	0.0	0.0	1.0	1.0	0.0	0.0	0.0	0.0
+10015	10015	1.0	1.0	1.0	0.0	1.0	1.0	1.0	0.0
+10016	10016	0.0	0.0	1.0	0.0	0.0	0.0	0.0	0.0
+10017	10017	0.0	0.0	0.0	0.0	0.0	1.0	0.0	1.0
+10018	10018	0.0	0.0	0.0	0.0	1.0	1.0	0.0	0.0
+10019	10019	0.0	1.0	0.0	0.0	0.0	1.0	0.0	0.0
+10020	10020	0.0	2.0	1.0	0.0	1.0	0.0	0.0	0.0