From ea05559dc748eafe7de4f3ea39401b2d3f21bb4e Mon Sep 17 00:00:00 2001
From: Chathura Widanage <7312649+chathurawidanage@users.noreply.github.com>
Date: Wed, 12 May 2021 10:07:30 -0400
Subject: [PATCH] DOC: Adding Cylon under ecosystem/out of core (#41402)
---
doc/source/ecosystem.rst | 29 +++++++++++++++++++++++++++++
1 file changed, 29 insertions(+)
diff --git a/doc/source/ecosystem.rst b/doc/source/ecosystem.rst
index d53d0556dca04..bc2325f15852c 100644
--- a/doc/source/ecosystem.rst
+++ b/doc/source/ecosystem.rst
@@ -405,6 +405,35 @@ Blaze provides a standard API for doing computations with various
in-memory and on-disk backends: NumPy, pandas, SQLAlchemy, MongoDB, PyTables,
PySpark.
+`Cylon `__
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Cylon is a fast, scalable, distributed memory parallel runtime with a pandas
+like Python DataFrame API. ”Core Cylon” is implemented with C++ using Apache
+Arrow format to represent the data in-memory. Cylon DataFrame API implements
+most of the core operators of pandas such as merge, filter, join, concat,
+group-by, drop_duplicates, etc. These operators are designed to work across
+thousands of cores to scale applications. It can interoperate with pandas
+DataFrame by reading data from pandas or converting data to pandas so users
+can selectively scale parts of their pandas DataFrame applications.
+
+.. code:: python
+
+ from pycylon import read_csv, DataFrame, CylonEnv
+ from pycylon.net import MPIConfig
+
+ # Initialize Cylon distributed environment
+ config: MPIConfig = MPIConfig()
+ env: CylonEnv = CylonEnv(config=config, distributed=True)
+
+ df1: DataFrame = read_csv('/tmp/csv1.csv')
+ df2: DataFrame = read_csv('/tmp/csv2.csv')
+
+ # Using 1000s of cores across the cluster to compute the join
+ df3: Table = df1.join(other=df2, on=[0], algorithm="hash", env=env)
+
+ print(df3)
+
`Dask `__
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~