Skip to content

Commit

Permalink
0.6.2
Browse files Browse the repository at this point in the history
  • Loading branch information
SunPengChuan committed Dec 7, 2022
1 parent 6ed073a commit dbf169e
Show file tree
Hide file tree
Showing 9 changed files with 97 additions and 18 deletions.
9 changes: 5 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,6 @@ WGDI (Whole-Genome Duplication Integrated analysis), a Python-based command-line

WGDI supports three main workflows (polyploid inference, hierarchical inference of genomic homology, and ancestral chromosomal karyotyping) that can improve detection of WGD and characterization of related events. It incorporates a more sensitive and accurate collinearity detection algorithm than previous softwares, and can accelerate WGD-related karyotype research.

WGDI outperforms similar tools in terms of efficiency, flexibility and scalability.

## Installation

Python package and command line interface (IDLE) for the analysis of whole genome duplications (WGDI). WGDI can be deployed in Windows, Linux, and Mac OS operating systems and can be installed via pip and conda.
Expand All @@ -38,9 +36,9 @@ Documentation for installation along with a user tutorial, a default parameter f

Here are some videos with simple examples of WGDI.

###### [WGDI的简单使用(一)](https://www.bilibili.com/video/BV1qK4y1U7eK)
###### [WGDI的简单使用(一)](https://www.bilibili.com/video/BV1qK4y1U7eK) or https://youtu.be/k-S6FVcBIQw

###### [WGDI的简单使用(二)](https://www.bilibili.com/video/BV195411P7L1)
###### [WGDI的简单使用(二)](https://www.bilibili.com/video/BV195411P7L1) or https://youtu.be/QiZYFYGclyE

chatting group QQ : 966612552

Expand All @@ -52,6 +50,9 @@ If you use wgdi in your work, please cite:
## News

## 0.6.2
* Added find shared fusions between species (-sf).

## 0.6.1

* Fixed issue with alignment (-a). Only version 0.6.0 has this bug.
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

setup(
name="wgdi",
version="0.6.1",
version="0.6.2",
author="Pengchuan Sun",
author_email="[email protected]",
description="Whole Genome Duplication Identification",
Expand Down
10 changes: 0 additions & 10 deletions wgdi/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,13 +265,3 @@ def Rectangle(ax, loc, height, width, color, alpha):
p = mpatches.Rectangle(
loc, width, height, edgecolor=None, facecolor=color, alpha=alpha)
ax.add_patch(p)


def read_calassfication(file):
classification = pd.read_csv(file, sep="\t", header=None)
classification[0] = classification[0].astype(str)
classification[1] = classification[1].astype(int)
classification[2] = classification[2].astype(int)
classification[3] = classification[3].astype(str)
classification[4] = classification[4].astype(int)
return classification
1 change: 1 addition & 0 deletions wgdi/block_ks.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ def run(self):
# sc = plt.scatter(df['loc1'], df['loc2'], s=float(self.markersize), c='dimgray',
# alpha=0.7, edgecolors=None, linewidths=0, marker='o', vmin=self.area[0], vmax=self.area[1])
cbar = fig.colorbar(sc, shrink=0.5, pad=0.03, fraction=0.1)
# cbar.ax.tick_params(labelsize=15)
align = dict(family='Arial', style='normal',
horizontalalignment="center", verticalalignment="center")
cbar.set_label('Ks', labelpad=12.5, fontsize=18, **align)
Expand Down
10 changes: 10 additions & 0 deletions wgdi/example/shared_fusion.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
[shared_fusion]
blockinfo = block information (*.csv)
lens1 = lens file
lens2 = lens file
ancestor_left = ancestor file
ancestor_top = ancestor file
classid = class1,class2
limit_length = 20
savefile = result file(.csv)
filtered_blockinfo = result blockinfo (.csv)
2 changes: 1 addition & 1 deletion wgdi/ksfigure.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def run(self):
plt.rcParams['ytick.major.pad'] = 0
fig, ax = plt.subplots(figsize=self.figsize)
ksfit = pd.read_csv(self.ksfit, index_col=0)
t = np.arange(self.area[0], self.area[1], 0.005)
t = np.arange(self.area[0], self.area[1], 0.0005)
col = [k for k in ksfit.columns if re.match('Unnamed:', k)]
for index, row in ksfit.iterrows():
ax.plot(t, self.Gaussian_distribution(
Expand Down
1 change: 1 addition & 0 deletions wgdi/polyploidy_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,5 +56,6 @@ def run(self):
bkinfo.loc[index1,'diff']=h
bkinfo.loc[index1, self.classid[1]] = row2[4]
bkinfo.loc[index1, self.classid[1]+'_color'] = row2[3]
# bkinfo=bkinfo[(bkinfo[self.classid[1]+'_color']==bkinfo[self.classid[0]+'_color'])]
bkinfo.to_csv(self.savefile, index=None)
sys.exit(0)
9 changes: 7 additions & 2 deletions wgdi/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
from wgdi.trees import trees
from wgdi.ancestral_karyotype import ancestral_karyotype
from wgdi.ancestral_karyotype_repertoire import ancestral_karyotype_repertoire
from wgdi.shared_fusion import shared_fusion

parser = argparse.ArgumentParser(
prog='wgdi', usage='%(prog)s [options]', epilog="", formatter_class=argparse.RawDescriptionHelpFormatter,)
Expand All @@ -34,7 +35,7 @@
https://wgdi.readthedocs.io/en/latest/
-------------------------------------- '''
parser.add_argument("-v", "--version", action='version', version='0.6.1')
parser.add_argument("-v", "--version", action='version', version='0.6.2')
parser.add_argument("-d", dest="dotplot",
help="Show homologous gene dotplot")
parser.add_argument("-icl", dest="improvedcollinearity",
Expand Down Expand Up @@ -64,7 +65,9 @@
parser.add_argument("-akr", dest="ancestral_karyotype_repertoire",
help="Incorporate genes from collinearity blocks into the ancestral karyotype repertoire")
parser.add_argument("-km", dest="karyotype_mapping",
help="Mapping from the known karyotype result to this species")
help="Mapping from the known karyotype result to this species")
parser.add_argument("-sf", dest="shared_fusion",
help="Quickly find shared fusions between species")
parser.add_argument("-at", dest="alignmenttrees",
help="Collinear genes construct phylogenetic trees")
parser.add_argument("-p", dest="pindex",
Expand Down Expand Up @@ -110,6 +113,7 @@ def module_to_run(argument, conf):
'ancestral_karyotype': (ancestral_karyotype, conf, 'ancestral_karyotype'),
'karyotype_mapping': (karyotype_mapping, conf, 'karyotype_mapping'),
'ancestral_karyotype_repertoire': (ancestral_karyotype_repertoire, conf, 'ancestral_karyotype_repertoire'),
'shared_fusion': (shared_fusion, conf, 'shared_fusion'),
}
if argument == 'configure':
run_configure()
Expand Down Expand Up @@ -140,6 +144,7 @@ def main():
'ancestral_karyotype': 'ancestral_karyotype.conf',
'ancestral_karyotype_repertoire': 'ancestral_karyotype_repertoire.conf',
'karyotype_mapping': 'karyotype_mapping.conf',
'shared_fusion': 'shared_fusion.conf',
}
for arg in vars(args):
value = getattr(args, arg)
Expand Down
71 changes: 71 additions & 0 deletions wgdi/shared_fusion.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
import pandas as pd

import wgdi.base as base


class shared_fusion():
def __init__(self, options):
for k, v in options:
setattr(self, str(k), v)
print(k, ' = ', v)
if hasattr(self, 'classid'):
self.classid = [str(k) for k in self.classid.split(',')]
else:
self.classid = ['class1', 'class2']
if hasattr(self, 'limit_length'):
self.limit_length = int(self.limit_length)
else:
self.limit_length = 20
self.lens1 = self.lens1.replace(' ', '').split(',')
self.lens2 = self.lens2.replace(' ', '').split(',')

def run(self):
ancestor_left = base.read_calassfication(self.ancestor_left)
ancestor_top = base.read_calassfication(self.ancestor_top)
bkinfo = pd.read_csv(self.blockinfo)
bkinfo['chr1'] = bkinfo['chr1'].astype(str)
bkinfo['chr2'] = bkinfo['chr2'].astype(str)
bkinfo = bkinfo[(bkinfo['chr1'].isin(ancestor_left[0].values)) & (
bkinfo['chr2'].isin(ancestor_top[0].values))]
lens1 = pd.read_csv(self.lens1[0], sep='\t', header=None)
lens2 = pd.read_csv(self.lens2[0], sep='\t', header=None)
lens1[0] = lens1[0].astype(str)
lens2[0] = lens2[0].astype(str)
data = []
for name, group in bkinfo.groupby('chr1'):
d1 = ancestor_left[ancestor_left[0] == name]
for index1, row1 in group.iterrows():
a, b = sorted([row1['start1'], row1['end1']])
a, b = int(a), int(b)
for index2, row2 in d1.iterrows():
c, d = sorted([row2[1], row2[2]])
length_in = len(
[k for k in range(a, b) if k in range(c, d)])
length_out = (b-a)-length_in
if length_in > self.limit_length and length_out > self.limit_length:
data.append(
[row1['id'], row2[3], row2[4], length_in, length_out])

for name, group in bkinfo.groupby('chr2'):
d2 = ancestor_top[ancestor_top[0] == name]
for index1, row1 in group.iterrows():
a, b = sorted([row1['start2'], row1['end2']])
a, b = int(a), int(b)
for index2, row2 in d2.iterrows():
c, d = sorted([row2[1], row2[2]])
length_in = len(
[k for k in range(a, b) if k in range(c, d)])
length_out = (b-a)-length_in
if length_in > self.limit_length and length_out > self.limit_length:
data.append(
[row1['id'], row2[3], row2[4], length_in, length_out])

df = pd.DataFrame(data, columns=['id', 'color', 'class', 'in', 'out'])
df.to_csv(self.savefile, index=False)
df.drop_duplicates(subset=['id'], keep='first', inplace=True)
blockinfoout = bkinfo[bkinfo['id'].isin(df['id'].values)]
blockinfoout.to_csv(self.filtered_blockinfo, index=False)
lens1 = lens1[lens1[0].isin(blockinfoout['chr1'].values)]
lens2 = lens2[lens2[0].isin(blockinfoout['chr2'].values)]
lens1.to_csv(self.lens1[1], sep='\t', index=False, header=False)
lens2.to_csv(self.lens2[1], sep='\t', index=False, header=False)

0 comments on commit dbf169e

Please sign in to comment.