Mercurial > repos > mvdbeek > damidseq_consecutive_peaks
annotate consecutive_peaks.py @ 1:f3ca59e53b73 draft default tip
planemo upload for repository https://github.com/bardin-lab/damid_galaxy_tools commit c753dd4f3e1863aae7ba45dcc7efdf6937b03542-dirty
| author | mvdbeek | 
|---|---|
| date | Mon, 29 Oct 2018 06:49:17 -0400 | 
| parents | 7f827a8e4ec5 | 
| children | 
| rev | line source | 
|---|---|
| 
0
 
7f827a8e4ec5
planemo upload for repository https://github.com/bardin-lab/damid_galaxy_tools commit c753dd4f3e1863aae7ba45dcc7efdf6937b03542-dirty
 
mvdbeek 
parents:  
diff
changeset
 | 
1 import click | 
| 
 
7f827a8e4ec5
planemo upload for repository https://github.com/bardin-lab/damid_galaxy_tools commit c753dd4f3e1863aae7ba45dcc7efdf6937b03542-dirty
 
mvdbeek 
parents:  
diff
changeset
 | 
2 import numpy as np | 
| 
 
7f827a8e4ec5
planemo upload for repository https://github.com/bardin-lab/damid_galaxy_tools commit c753dd4f3e1863aae7ba45dcc7efdf6937b03542-dirty
 
mvdbeek 
parents:  
diff
changeset
 | 
3 import pandas as pd | 
| 
 
7f827a8e4ec5
planemo upload for repository https://github.com/bardin-lab/damid_galaxy_tools commit c753dd4f3e1863aae7ba45dcc7efdf6937b03542-dirty
 
mvdbeek 
parents:  
diff
changeset
 | 
4 | 
| 
 
7f827a8e4ec5
planemo upload for repository https://github.com/bardin-lab/damid_galaxy_tools commit c753dd4f3e1863aae7ba45dcc7efdf6937b03542-dirty
 
mvdbeek 
parents:  
diff
changeset
 | 
5 SHIFTED_PADJ_COLUMN = 'shifted' | 
| 
 
7f827a8e4ec5
planemo upload for repository https://github.com/bardin-lab/damid_galaxy_tools commit c753dd4f3e1863aae7ba45dcc7efdf6937b03542-dirty
 
mvdbeek 
parents:  
diff
changeset
 | 
6 CONSECUTIVE_MAX = 'consecutive_max' | 
| 
 
7f827a8e4ec5
planemo upload for repository https://github.com/bardin-lab/damid_galaxy_tools commit c753dd4f3e1863aae7ba45dcc7efdf6937b03542-dirty
 
mvdbeek 
parents:  
diff
changeset
 | 
7 PEAKS_PER_GROUP = 'peaks_per_group' | 
| 
 
7f827a8e4ec5
planemo upload for repository https://github.com/bardin-lab/damid_galaxy_tools commit c753dd4f3e1863aae7ba45dcc7efdf6937b03542-dirty
 
mvdbeek 
parents:  
diff
changeset
 | 
8 | 
| 
 
7f827a8e4ec5
planemo upload for repository https://github.com/bardin-lab/damid_galaxy_tools commit c753dd4f3e1863aae7ba45dcc7efdf6937b03542-dirty
 
mvdbeek 
parents:  
diff
changeset
 | 
9 | 
| 
 
7f827a8e4ec5
planemo upload for repository https://github.com/bardin-lab/damid_galaxy_tools commit c753dd4f3e1863aae7ba45dcc7efdf6937b03542-dirty
 
mvdbeek 
parents:  
diff
changeset
 | 
10 @click.command() | 
| 
 
7f827a8e4ec5
planemo upload for repository https://github.com/bardin-lab/damid_galaxy_tools commit c753dd4f3e1863aae7ba45dcc7efdf6937b03542-dirty
 
mvdbeek 
parents:  
diff
changeset
 | 
11 @click.argument('input_file', type=click.Path(exists=True)) | 
| 
 
7f827a8e4ec5
planemo upload for repository https://github.com/bardin-lab/damid_galaxy_tools commit c753dd4f3e1863aae7ba45dcc7efdf6937b03542-dirty
 
mvdbeek 
parents:  
diff
changeset
 | 
12 @click.argument('output_file', type=click.Path()) | 
| 
 
7f827a8e4ec5
planemo upload for repository https://github.com/bardin-lab/damid_galaxy_tools commit c753dd4f3e1863aae7ba45dcc7efdf6937b03542-dirty
 
mvdbeek 
parents:  
diff
changeset
 | 
13 @click.argument('padj_column', default=8) | 
| 
 
7f827a8e4ec5
planemo upload for repository https://github.com/bardin-lab/damid_galaxy_tools commit c753dd4f3e1863aae7ba45dcc7efdf6937b03542-dirty
 
mvdbeek 
parents:  
diff
changeset
 | 
14 @click.argument('groupby_column', default=9) | 
| 
 
7f827a8e4ec5
planemo upload for repository https://github.com/bardin-lab/damid_galaxy_tools commit c753dd4f3e1863aae7ba45dcc7efdf6937b03542-dirty
 
mvdbeek 
parents:  
diff
changeset
 | 
15 @click.argument('add_number_of_peaks', default=True) | 
| 
 
7f827a8e4ec5
planemo upload for repository https://github.com/bardin-lab/damid_galaxy_tools commit c753dd4f3e1863aae7ba45dcc7efdf6937b03542-dirty
 
mvdbeek 
parents:  
diff
changeset
 | 
16 def determine_consecutive_peaks(input_file, output_file, padj_column, groupby_column, add_number_of_peaks): | 
| 
 
7f827a8e4ec5
planemo upload for repository https://github.com/bardin-lab/damid_galaxy_tools commit c753dd4f3e1863aae7ba45dcc7efdf6937b03542-dirty
 
mvdbeek 
parents:  
diff
changeset
 | 
17 """Finds the two lowest consecutives peaks for a group and reports""" | 
| 
 
7f827a8e4ec5
planemo upload for repository https://github.com/bardin-lab/damid_galaxy_tools commit c753dd4f3e1863aae7ba45dcc7efdf6937b03542-dirty
 
mvdbeek 
parents:  
diff
changeset
 | 
18 df = pd.read_csv(input_file, sep='\t', header=None) | 
| 
 
7f827a8e4ec5
planemo upload for repository https://github.com/bardin-lab/damid_galaxy_tools commit c753dd4f3e1863aae7ba45dcc7efdf6937b03542-dirty
 
mvdbeek 
parents:  
diff
changeset
 | 
19 grouped = df.groupby(groupby_column, sort=False) | 
| 
 
7f827a8e4ec5
planemo upload for repository https://github.com/bardin-lab/damid_galaxy_tools commit c753dd4f3e1863aae7ba45dcc7efdf6937b03542-dirty
 
mvdbeek 
parents:  
diff
changeset
 | 
20 if add_number_of_peaks: | 
| 
 
7f827a8e4ec5
planemo upload for repository https://github.com/bardin-lab/damid_galaxy_tools commit c753dd4f3e1863aae7ba45dcc7efdf6937b03542-dirty
 
mvdbeek 
parents:  
diff
changeset
 | 
21 df[PEAKS_PER_GROUP] = grouped[groupby_column].transform(np.size) | 
| 
1
 
f3ca59e53b73
planemo upload for repository https://github.com/bardin-lab/damid_galaxy_tools commit c753dd4f3e1863aae7ba45dcc7efdf6937b03542-dirty
 
mvdbeek 
parents: 
0 
diff
changeset
 | 
22 df[SHIFTED_PADJ_COLUMN] = grouped[padj_column].shift() | 
| 
0
 
7f827a8e4ec5
planemo upload for repository https://github.com/bardin-lab/damid_galaxy_tools commit c753dd4f3e1863aae7ba45dcc7efdf6937b03542-dirty
 
mvdbeek 
parents:  
diff
changeset
 | 
23 df[CONSECUTIVE_MAX] = df[[padj_column, SHIFTED_PADJ_COLUMN]].max(axis=1) | 
| 
 
7f827a8e4ec5
planemo upload for repository https://github.com/bardin-lab/damid_galaxy_tools commit c753dd4f3e1863aae7ba45dcc7efdf6937b03542-dirty
 
mvdbeek 
parents:  
diff
changeset
 | 
24 grouped = df.groupby(groupby_column, sort=False) | 
| 
1
 
f3ca59e53b73
planemo upload for repository https://github.com/bardin-lab/damid_galaxy_tools commit c753dd4f3e1863aae7ba45dcc7efdf6937b03542-dirty
 
mvdbeek 
parents: 
0 
diff
changeset
 | 
25 idx = grouped[CONSECUTIVE_MAX].idxmin() # index of groupwise consecutive minimum | 
| 
 
f3ca59e53b73
planemo upload for repository https://github.com/bardin-lab/damid_galaxy_tools commit c753dd4f3e1863aae7ba45dcc7efdf6937b03542-dirty
 
mvdbeek 
parents: 
0 
diff
changeset
 | 
26 new_df = df.loc[idx] | 
| 
0
 
7f827a8e4ec5
planemo upload for repository https://github.com/bardin-lab/damid_galaxy_tools commit c753dd4f3e1863aae7ba45dcc7efdf6937b03542-dirty
 
mvdbeek 
parents:  
diff
changeset
 | 
27 new_df.sort_values(by=CONSECUTIVE_MAX) | 
| 
 
7f827a8e4ec5
planemo upload for repository https://github.com/bardin-lab/damid_galaxy_tools commit c753dd4f3e1863aae7ba45dcc7efdf6937b03542-dirty
 
mvdbeek 
parents:  
diff
changeset
 | 
28 new_df[padj_column].replace(new_df[CONSECUTIVE_MAX]) | 
| 
 
7f827a8e4ec5
planemo upload for repository https://github.com/bardin-lab/damid_galaxy_tools commit c753dd4f3e1863aae7ba45dcc7efdf6937b03542-dirty
 
mvdbeek 
parents:  
diff
changeset
 | 
29 new_df = new_df.drop(labels=[CONSECUTIVE_MAX, SHIFTED_PADJ_COLUMN], axis=1) | 
| 
 
7f827a8e4ec5
planemo upload for repository https://github.com/bardin-lab/damid_galaxy_tools commit c753dd4f3e1863aae7ba45dcc7efdf6937b03542-dirty
 
mvdbeek 
parents:  
diff
changeset
 | 
30 new_df.to_csv(output_file, sep='\t', header=None, na_rep="NaN") | 
| 
 
7f827a8e4ec5
planemo upload for repository https://github.com/bardin-lab/damid_galaxy_tools commit c753dd4f3e1863aae7ba45dcc7efdf6937b03542-dirty
 
mvdbeek 
parents:  
diff
changeset
 | 
31 | 
| 
 
7f827a8e4ec5
planemo upload for repository https://github.com/bardin-lab/damid_galaxy_tools commit c753dd4f3e1863aae7ba45dcc7efdf6937b03542-dirty
 
mvdbeek 
parents:  
diff
changeset
 | 
32 | 
| 
 
7f827a8e4ec5
planemo upload for repository https://github.com/bardin-lab/damid_galaxy_tools commit c753dd4f3e1863aae7ba45dcc7efdf6937b03542-dirty
 
mvdbeek 
parents:  
diff
changeset
 | 
33 if __name__ == '__main__': | 
| 
 
7f827a8e4ec5
planemo upload for repository https://github.com/bardin-lab/damid_galaxy_tools commit c753dd4f3e1863aae7ba45dcc7efdf6937b03542-dirty
 
mvdbeek 
parents:  
diff
changeset
 | 
34 determine_consecutive_peaks() | 
