0.6.2

SunPengChuan · Dec 7, 2022 · dbf169e · dbf169e
1 parent 6ed073a
commit dbf169e
Show file tree

Hide file tree

Showing 9 changed files with 97 additions and 18 deletions.
diff --git a/README.md b/README.md
@@ -14,8 +14,6 @@ WGDI (Whole-Genome Duplication Integrated analysis), a Python-based command-line
 
 WGDI supports three main workflows (polyploid inference, hierarchical inference of genomic homology, and ancestral chromosomal karyotyping) that can improve detection of WGD and characterization of related events. It incorporates a more sensitive and accurate collinearity detection algorithm than previous softwares, and can accelerate WGD-related karyotype research.
 
-WGDI outperforms similar tools in terms of efficiency, flexibility and scalability.
-
 ## Installation
 
 Python package and command line interface (IDLE) for the analysis of whole genome duplications (WGDI). WGDI can be deployed in Windows, Linux, and Mac OS operating systems and can be installed via pip and conda.
@@ -38,9 +36,9 @@ Documentation for installation along with a user tutorial, a default parameter f
 
 Here are some videos with simple examples of WGDI.
 
-###### [WGDI的简单使用（一）](https://www.bilibili.com/video/BV1qK4y1U7eK)
+###### [WGDI的简单使用（一）](https://www.bilibili.com/video/BV1qK4y1U7eK) or https://youtu.be/k-S6FVcBIQw
 
-###### [WGDI的简单使用（二）](https://www.bilibili.com/video/BV195411P7L1)
+###### [WGDI的简单使用（二）](https://www.bilibili.com/video/BV195411P7L1) or https://youtu.be/QiZYFYGclyE
 
 chatting group QQ : 966612552
 
@@ -52,6 +50,9 @@ If you use wgdi in your work, please cite:
 
 ## News
 
+## 0.6.2
+* Added find shared fusions between species (-sf).
+
 ## 0.6.1
 
 * Fixed issue with alignment (-a). Only version 0.6.0 has this bug.

diff --git a/setup.py b/setup.py
@@ -10,7 +10,7 @@
 
 setup(
     name="wgdi",
-    version="0.6.1",
+    version="0.6.2",
     author="Pengchuan Sun",
     author_email="[email protected]",
     description="Whole Genome Duplication Identification",

diff --git a/wgdi/base.py b/wgdi/base.py
@@ -265,13 +265,3 @@ def Rectangle(ax, loc, height, width, color, alpha):
     p = mpatches.Rectangle(
         loc, width, height, edgecolor=None, facecolor=color, alpha=alpha)
     ax.add_patch(p)
-
-
-def read_calassfication(file):
-    classification = pd.read_csv(file, sep="\t", header=None)
-    classification[0] = classification[0].astype(str)
-    classification[1] = classification[1].astype(int)
-    classification[2] = classification[2].astype(int)
-    classification[3] = classification[3].astype(str)
-    classification[4] = classification[4].astype(int)
-    return classification
diff --git a/wgdi/block_ks.py b/wgdi/block_ks.py
@@ -93,6 +93,7 @@ def run(self):
         # sc = plt.scatter(df['loc1'], df['loc2'], s=float(self.markersize), c='dimgray',
         #                  alpha=0.7, edgecolors=None, linewidths=0, marker='o', vmin=self.area[0], vmax=self.area[1])
         cbar = fig.colorbar(sc, shrink=0.5, pad=0.03, fraction=0.1)
+        # cbar.ax.tick_params(labelsize=15) 
         align = dict(family='Arial', style='normal',
                      horizontalalignment="center", verticalalignment="center")
         cbar.set_label('Ks', labelpad=12.5, fontsize=18, **align)

diff --git a/wgdi/example/shared_fusion.conf b/wgdi/example/shared_fusion.conf
@@ -0,0 +1,10 @@
+[shared_fusion]
+blockinfo = block information (*.csv)
+lens1 = lens file
+lens2 =  lens file
+ancestor_left = ancestor file
+ancestor_top = ancestor file
+classid = class1,class2
+limit_length = 20
+savefile = result file(.csv)
+filtered_blockinfo = result blockinfo (.csv)
diff --git a/wgdi/ksfigure.py b/wgdi/ksfigure.py
@@ -44,7 +44,7 @@ def run(self):
         plt.rcParams['ytick.major.pad'] = 0
         fig, ax = plt.subplots(figsize=self.figsize)
         ksfit = pd.read_csv(self.ksfit, index_col=0)
-        t = np.arange(self.area[0], self.area[1], 0.005)
+        t = np.arange(self.area[0], self.area[1], 0.0005)
         col = [k for k in ksfit.columns if re.match('Unnamed:', k)]
         for index, row in ksfit.iterrows():
             ax.plot(t, self.Gaussian_distribution(

diff --git a/wgdi/polyploidy_classification.py b/wgdi/polyploidy_classification.py
@@ -56,5 +56,6 @@ def run(self):
                         bkinfo.loc[index1,'diff']=h
                         bkinfo.loc[index1, self.classid[1]] = row2[4]
                         bkinfo.loc[index1, self.classid[1]+'_color'] = row2[3]
+        # bkinfo=bkinfo[(bkinfo[self.classid[1]+'_color']==bkinfo[self.classid[0]+'_color'])]
         bkinfo.to_csv(self.savefile, index=None)
         sys.exit(0)
diff --git a/wgdi/run.py b/wgdi/run.py
@@ -26,6 +26,7 @@
 from wgdi.trees import trees
 from wgdi.ancestral_karyotype import ancestral_karyotype
 from wgdi.ancestral_karyotype_repertoire import ancestral_karyotype_repertoire
+from wgdi.shared_fusion import shared_fusion
 
 parser = argparse.ArgumentParser(
     prog='wgdi', usage='%(prog)s [options]', epilog="", formatter_class=argparse.RawDescriptionHelpFormatter,)
@@ -34,7 +35,7 @@
 
     https://wgdi.readthedocs.io/en/latest/
     -------------------------------------- '''
-parser.add_argument("-v", "--version", action='version', version='0.6.1')
+parser.add_argument("-v", "--version", action='version', version='0.6.2')
 parser.add_argument("-d", dest="dotplot",
                     help="Show homologous gene dotplot")
 parser.add_argument("-icl", dest="improvedcollinearity",
@@ -64,7 +65,9 @@
 parser.add_argument("-akr", dest="ancestral_karyotype_repertoire",
                     help="Incorporate genes from collinearity blocks into the ancestral karyotype repertoire")
 parser.add_argument("-km", dest="karyotype_mapping",
-                    help="Mapping from the known karyotype result to this species")                    
+                    help="Mapping from the known karyotype result to this species")
+parser.add_argument("-sf", dest="shared_fusion",
+                    help="Quickly find shared fusions between species")  
 parser.add_argument("-at", dest="alignmenttrees",
                     help="Collinear genes construct phylogenetic trees")
 parser.add_argument("-p", dest="pindex",
@@ -110,6 +113,7 @@ def module_to_run(argument, conf):
         'ancestral_karyotype': (ancestral_karyotype, conf, 'ancestral_karyotype'),
         'karyotype_mapping': (karyotype_mapping, conf, 'karyotype_mapping'),
         'ancestral_karyotype_repertoire': (ancestral_karyotype_repertoire, conf, 'ancestral_karyotype_repertoire'),
+        'shared_fusion': (shared_fusion, conf, 'shared_fusion'),
     }
     if argument == 'configure':
         run_configure()
@@ -140,6 +144,7 @@ def main():
                'ancestral_karyotype': 'ancestral_karyotype.conf',
                'ancestral_karyotype_repertoire': 'ancestral_karyotype_repertoire.conf',
                'karyotype_mapping': 'karyotype_mapping.conf',
+               'shared_fusion': 'shared_fusion.conf',
                }
     for arg in vars(args):
         value = getattr(args, arg)

diff --git a/wgdi/shared_fusion.py b/wgdi/shared_fusion.py
@@ -0,0 +1,71 @@
+import pandas as pd
+
+import wgdi.base as base
+
+
+class shared_fusion():
+    def __init__(self, options):
+        for k, v in options:
+            setattr(self, str(k), v)
+            print(k, ' = ', v)
+        if hasattr(self, 'classid'):
+            self.classid = [str(k) for k in self.classid.split(',')]
+        else:
+            self.classid = ['class1', 'class2']
+        if hasattr(self, 'limit_length'):
+            self.limit_length = int(self.limit_length)
+        else:
+            self.limit_length = 20
+        self.lens1 = self.lens1.replace(' ', '').split(',')
+        self.lens2 = self.lens2.replace(' ', '').split(',')
+
+    def run(self):
+        ancestor_left = base.read_calassfication(self.ancestor_left)
+        ancestor_top = base.read_calassfication(self.ancestor_top)
+        bkinfo = pd.read_csv(self.blockinfo)
+        bkinfo['chr1'] = bkinfo['chr1'].astype(str)
+        bkinfo['chr2'] = bkinfo['chr2'].astype(str)
+        bkinfo = bkinfo[(bkinfo['chr1'].isin(ancestor_left[0].values)) & (
+            bkinfo['chr2'].isin(ancestor_top[0].values))]
+        lens1 = pd.read_csv(self.lens1[0], sep='\t', header=None)
+        lens2 = pd.read_csv(self.lens2[0], sep='\t', header=None)
+        lens1[0] = lens1[0].astype(str)
+        lens2[0] = lens2[0].astype(str)
+        data = []
+        for name, group in bkinfo.groupby('chr1'):
+            d1 = ancestor_left[ancestor_left[0] == name]
+            for index1, row1 in group.iterrows():
+                a, b = sorted([row1['start1'], row1['end1']])
+                a, b = int(a), int(b)
+                for index2, row2 in d1.iterrows():
+                    c, d = sorted([row2[1], row2[2]])
+                    length_in = len(
+                        [k for k in range(a, b) if k in range(c, d)])
+                    length_out = (b-a)-length_in
+                    if length_in > self.limit_length and length_out > self.limit_length:
+                        data.append(
+                            [row1['id'], row2[3], row2[4], length_in, length_out])
+
+        for name, group in bkinfo.groupby('chr2'):
+            d2 = ancestor_top[ancestor_top[0] == name]
+            for index1, row1 in group.iterrows():
+                a, b = sorted([row1['start2'], row1['end2']])
+                a, b = int(a), int(b)
+                for index2, row2 in d2.iterrows():
+                    c, d = sorted([row2[1], row2[2]])
+                    length_in = len(
+                        [k for k in range(a, b) if k in range(c, d)])
+                    length_out = (b-a)-length_in
+                    if length_in > self.limit_length and length_out > self.limit_length:
+                        data.append(
+                            [row1['id'], row2[3], row2[4], length_in, length_out])
+
+        df = pd.DataFrame(data, columns=['id', 'color', 'class', 'in', 'out'])
+        df.to_csv(self.savefile, index=False)
+        df.drop_duplicates(subset=['id'], keep='first', inplace=True)
+        blockinfoout = bkinfo[bkinfo['id'].isin(df['id'].values)]
+        blockinfoout.to_csv(self.filtered_blockinfo, index=False)
+        lens1 = lens1[lens1[0].isin(blockinfoout['chr1'].values)]
+        lens2 = lens2[lens2[0].isin(blockinfoout['chr2'].values)]
+        lens1.to_csv(self.lens1[1], sep='\t', index=False, header=False)
+        lens2.to_csv(self.lens2[1], sep='\t', index=False, header=False)