Mercurial > repos > mvdbeek > damidseq_consecutive_peaks
changeset 0:7f827a8e4ec5 draft
planemo upload for repository https://github.com/bardin-lab/damid_galaxy_tools commit c753dd4f3e1863aae7ba45dcc7efdf6937b03542-dirty
author | mvdbeek |
---|---|
date | Fri, 26 Oct 2018 11:58:06 -0400 |
parents | |
children | f3ca59e53b73 |
files | consecutive_peaks.py consecutive_peaks.xml test-data/deseq2_peaks.bed test-data/grouped.bed |
diffstat | 4 files changed, 110 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/consecutive_peaks.py Fri Oct 26 11:58:06 2018 -0400 @@ -0,0 +1,34 @@ +import click +import numpy as np +import pandas as pd + +SHIFTED_PADJ_COLUMN = 'shifted' +CONSECUTIVE_MAX = 'consecutive_max' +PEAKS_PER_GROUP = 'peaks_per_group' + + +@click.command() +@click.argument('input_file', type=click.Path(exists=True)) +@click.argument('output_file', type=click.Path()) +@click.argument('padj_column', default=8) +@click.argument('groupby_column', default=9) +@click.argument('add_number_of_peaks', default=True) +def determine_consecutive_peaks(input_file, output_file, padj_column, groupby_column, add_number_of_peaks): + """Finds the two lowest consecutives peaks for a group and reports""" + df = pd.read_csv(input_file, sep='\t', header=None) + grouped = df.groupby(groupby_column, sort=False) + if add_number_of_peaks: + df[PEAKS_PER_GROUP] = grouped[groupby_column].transform(np.size) + df[SHIFTED_PADJ_COLUMN] = grouped[8].shift() + df[CONSECUTIVE_MAX] = df[[padj_column, SHIFTED_PADJ_COLUMN]].max(axis=1) + grouped = df.groupby(groupby_column, sort=False) + idx = grouped[CONSECUTIVE_MAX].transform(min) # index of groupwise consecutive minimum + new_df = df[df[CONSECUTIVE_MAX] == idx] + new_df.sort_values(by=CONSECUTIVE_MAX) + new_df[padj_column].replace(new_df[CONSECUTIVE_MAX]) + new_df = new_df.drop(labels=[CONSECUTIVE_MAX, SHIFTED_PADJ_COLUMN], axis=1) + new_df.to_csv(output_file, sep='\t', header=None, na_rep="NaN") + + +if __name__ == '__main__': + determine_consecutive_peaks()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/consecutive_peaks.xml Fri Oct 26 11:58:06 2018 -0400 @@ -0,0 +1,27 @@ +<tool id="consecutive_peaks" name="Consecutive peaks" version="0.1.0"> + <requirements> + <requirement type="package" version="7.0">click</requirement> + <requirement type="package" version="0.23.4">pandas</requirement> + </requirements> + <command detect_errors="exit_code"><![CDATA[ +python '$__tool_directory__/consecutive_peaks.py' '$input_file' '$output_file' + ]]></command> + <inputs> + <param name="input_file" type="data" format="tabular" label="Input file" help="file containing peaks and genes"/> + <param name="padj_column" type="data_column" data_ref="input_file" value="c8" label="Column containing padj value"/> + <param name="groupby" type="data_column" data_ref="input_file" value="c9" label="Group values by this column" help="Usually gene id."/> + </inputs> + <outputs> + <data name="output_file" format_source="input_file"/> + </outputs> + <tests> + <test> + <param name="input_file" value="deseq2_peaks.bed"/> + <output name="output_file" value="grouped.bed"/> + </test> + </tests> + <help><![CDATA[ +Can be used to get the lowest p.adj for 2 consecutive peaks in a gene. +The reported p.adj is the higher p.adj of the 2 consecutive peaks. + ]]></help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/deseq2_peaks.bed Fri Oct 26 11:58:06 2018 -0400 @@ -0,0 +1,40 @@ +2L 5151 5155 40.343042254382 0.364512442363274 0.631307618064081 0.577392751066524 0.56367416996817 0.705361606068607 . +2L 6878 6882 12.092185769541 0.00257609419374086 0.826397671378337 0.00311725732412125 0.997512792537189 0.99873245096982 . +2L 6920 6924 7.11337584147453 1.04514109300767 0.946014451365484 1.10478343274683 0.269253440927885 0.432484606530128 . +2L 7691 7695 11.843536367488 0.151639310614245 0.802284946799774 0.189009293043721 0.850085533276262 0.911041669589917 gene_id "FBgn0031208"; gene_symbol "CG11023"; +2L 7714 7718 9.80005463135221 0.0255918574661465 0.866303804672169 0.0295414349193942 0.976432773062627 0.986972269575067 gene_id "FBgn0031208"; gene_symbol "CG11023"; +2L 12439 12443 395.521214697051 0.588699507050432 0.532443765923156 1.105655741935 0.268875553092083 0.432057816054657 gene_id "FBgn0002121"; gene_symbol "l(2)gl"; +2L 12691 12695 344.959086225371 0.583783806387309 0.531317629472573 1.09874729164703 0.271878308464162 0.435248226767611 gene_id "FBgn0002121"; gene_symbol "l(2)gl"; +2L 13064 13068 84.613630660452 0.217237382993247 0.510837471900007 0.425257337104216 0.670649090435978 0.787775400487521 gene_id "FBgn0002121"; gene_symbol "l(2)gl"; +2L 13291 13295 30.1123838825855 0.155591640049906 0.640283070812485 0.24300445715747 0.808001944619076 0.883541896449563 gene_id "FBgn0002121"; gene_symbol "l(2)gl"; +2L 13321 13325 904.966177257045 0.376461729884521 0.50000103395844 0.75292190278913 0.451496845464273 0.611195498344276 gene_id "FBgn0002121"; gene_symbol "l(2)gl"; +2L 13563 13567 866.603095816488 0.212809815158458 0.495309217028247 0.429650424103295 0.667449951314894 0.785492185048526 gene_id "FBgn0002121"; gene_symbol "l(2)gl"; +2L 13719 13723 113.070341442585 0.0255689119036721 0.508640103147958 0.0502691623122653 0.959907897598717 0.977709493692812 gene_id "FBgn0002121"; gene_symbol "l(2)gl"; +2L 14019 14023 327.160239956387 0.00817968469447774 0.493410798870461 0.0165778388174783 0.986773404191456 0.992823409766067 gene_id "FBgn0002121"; gene_symbol "l(2)gl"; +2L 14665 14669 109.110588808567 0.357622042095099 0.517327585875048 0.691287400593937 0.48938494804763 0.643965620035756 gene_id "FBgn0002121"; gene_symbol "l(2)gl"; +2L 14818 14822 109.589493018768 0.630819180292167 0.51523842372404 1.22432480041518 0.220829724870924 0.377953444427728 gene_id "FBgn0002121"; gene_symbol "l(2)gl"; +2L 14871 14875 15.7006038983715 0.472365868872293 0.749919525827981 0.629888744863333 0.528767377230365 0.677153275459159 gene_id "FBgn0002121"; gene_symbol "l(2)gl"; +2L 14972 14976 23.8476901187452 0.843585794286652 0.688647606374209 1.22498907493225 0.22057933683896 0.377689786125795 gene_id "FBgn0002121"; gene_symbol "l(2)gl"; +2L 15073 15077 1049.30525844711 0.9735164004558 0.481901880919704 2.02015480536796 0.0433673325255996 0.120295777588402 gene_id "FBgn0002121"; gene_symbol "l(2)gl"; +2L 15371 15375 1216.91488535489 0.863105078420247 0.47092399272887 1.83279062385163 0.0668337140515137 0.163575572623876 gene_id "FBgn0002121"; gene_symbol "l(2)gl"; +2L 15662 15666 728.708978107195 0.995657604251844 0.475779274418689 2.09268805470424 0.0363770102785434 0.106086100091789 gene_id "FBgn0002121"; gene_symbol "l(2)gl"; +2L 15837 15841 931.005092915203 1.39832486621617 0.473976801158749 2.95019685098012 0.00317571521468965 0.0178266247016122 gene_id "FBgn0002121"; gene_symbol "l(2)gl"; +2L 16091 16095 466.664835290671 1.68546447518356 0.476743856067749 3.53536695592787 0.00040720924302942 0.00388838218326285 gene_id "FBgn0002121"; gene_symbol "l(2)gl"; +2L 16173 16177 57.9667762555473 1.83956446735437 0.587556668205665 3.1308715684773 0.0017428836310417 0.0114466453224446 gene_id "FBgn0002121"; gene_symbol "l(2)gl"; +2L 16954 16958 104.269971829585 2.00754698293392 0.578809459512681 3.46840734880895 0.000523553024927028 0.00468870862799906 gene_id "FBgn0002121"; gene_symbol "l(2)gl"; +2L 18815 18819 22.0488120933426 1.87326623013989 0.777316254714993 2.40991516487292 0.0159562302940107 0.0585670598443906 gene_id "FBgn0002121"; gene_symbol "l(2)gl"; +2L 19786 19790 113.299800906455 2.88332490053871 0.588037152599408 4.90330396267145 9.42379868583578e-07 3.69117942073248e-05 gene_id "FBgn0002121"; gene_symbol "l(2)gl"; +2L 21641 21645 34.3500408874986 1.50597265027687 0.653596477890316 2.30413213843786 0.0212152271514627 0.0720282848419051 . +2L 22094 22098 156.619353330383 1.30715333943466 0.521853623943724 2.50482755979792 0.0122511106265175 0.0482672435893478 gene_id "FBgn0031209"; gene_symbol "Ir21a";gene_id "FBgn0263584"; gene_symbol "CR43609"; +2L 22350 22354 119.545732721289 1.13036602772727 0.532845303338328 2.12137748169199 0.0338900487038572 0.100853283788594 gene_id "FBgn0031209"; gene_symbol "Ir21a";gene_id "FBgn0263584"; gene_symbol "CR43609"; +2L 22504 22508 85.4632791613666 0.342897535321554 0.527063176431041 0.650581468512851 0.515316696087814 0.665812942096135 gene_id "FBgn0031209"; gene_symbol "Ir21a";gene_id "FBgn0263584"; gene_symbol "CR43609"; +2L 23026 23030 57.9770314290585 0.141961019055869 0.534449867988492 0.265620832857845 0.790531239499375 0.871973857457818 gene_id "FBgn0031209"; gene_symbol "Ir21a";gene_id "FBgn0263584"; gene_symbol "CR43609"; +2L 24357 24361 56.1813761950221 0.415613328757581 0.595963429784736 0.697380590798502 0.48556464487102 0.640693696805243 gene_id "FBgn0031209"; gene_symbol "Ir21a"; +2L 25083 25087 47.4292294853296 0.713596808954638 0.648352348117706 1.10063117844232 0.27105721016165 0.434420734304972 gene_id "FBgn0031209"; gene_symbol "Ir21a"; +2L 43327 43331 5.84901314262136 0.679132204154339 0.997795654791302 0.680632553262006 0.496104023308378 0.649678753077093 gene_id "FBgn0051973"; gene_symbol "Cda5"; +2L 47323 47327 32.6029541213852 1.75539500166679 0.756690748055963 2.31983145846124 0.020349996576802 0.0698998692979901 gene_id "FBgn0051973"; gene_symbol "Cda5"; +2L 54049 54053 5.49243058011769 4.55190341736556 1.22468204021079 3.71680425441865 0.000201758637404118 0.00229429817130296 gene_id "FBgn0051973"; gene_symbol "Cda5"; +2L 55633 55637 7.66903089601476 0.101760387554123 0.930459196736503 0.109365771127888 0.912912381544608 0.949924793245379 gene_id "FBgn0051973"; gene_symbol "Cda5";gene_id "FBgn0267987"; gene_symbol "CR46254"; +2L 65315 65319 137.101857971652 1.23449113928199 0.524990063103154 2.35145620087561 0.0187000910325455 0.065785553432607 gene_id "FBgn0051973"; gene_symbol "Cda5"; +2L 65606 65610 125.605086834427 1.48437920210473 0.503182219131581 2.94998341687541 0.00317790986101209 0.017835456290875 . +2L 65671 65675 49.7172333525855 1.78672093221744 0.65335924547836 2.73466847616013 0.00624431313027083 0.0293462152487687 .
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/grouped.bed Fri Oct 26 11:58:06 2018 -0400 @@ -0,0 +1,9 @@ +3 2L 7691 7695 11.843536367488001 0.151639310614245 0.802284946799774 0.18900929304372102 0.8500855332762621 0.9110416695899171 "gene_id ""FBgn0031208""; gene_symbol ""CG11023"";" 2 +22 2L 16173 16177 57.966776255547295 1.83956446735437 0.5875566682056651 3.1308715684773 0.0017428836310417 0.0114466453224446 "gene_id ""FBgn0002121""; gene_symbol ""l(2)gl"";" 21 +23 2L 16954 16958 104.269971829585 2.00754698293392 0.578809459512681 3.46840734880895 0.0005235530249270281 0.0046887086279990605 "gene_id ""FBgn0002121""; gene_symbol ""l(2)gl"";" 21 +27 2L 22094 22098 156.619353330383 1.30715333943466 0.521853623943724 2.50482755979792 0.0122511106265175 0.0482672435893478 "gene_id ""FBgn0031209""; gene_symbol ""Ir21a"";gene_id ""FBgn0263584""; gene_symbol ""CR43609"";" 4 +31 2L 24357 24361 56.181376195022104 0.41561332875758095 0.5959634297847359 0.6973805907985021 0.48556464487102 0.6406936968052429 "gene_id ""FBgn0031209""; gene_symbol ""Ir21a"";" 2 +32 2L 25083 25087 47.4292294853296 0.713596808954638 0.648352348117706 1.1006311784423202 0.27105721016165 0.4344207343049721 "gene_id ""FBgn0031209""; gene_symbol ""Ir21a"";" 2 +36 2L 55633 55637 7.6690308960147595 0.10176038755412299 0.9304591967365029 0.109365771127888 0.9129123815446079 0.949924793245379 "gene_id ""FBgn0051973""; gene_symbol ""Cda5"";gene_id ""FBgn0267987""; gene_symbol ""CR46254"";" 1 +37 2L 65315 65319 137.10185797165198 1.23449113928199 0.524990063103154 2.3514562008756097 0.0187000910325455 0.065785553432607 "gene_id ""FBgn0051973""; gene_symbol ""Cda5"";" 4 +39 2L 65671 65675 49.717233352585495 1.78672093221744 0.65335924547836 2.73466847616013 0.006244313130270829 0.0293462152487687 . 6