changed name to vega. closes #27

rajasekarv · May 3, 2020 · d1ef2b5 · d1ef2b5
1 parent ce6280b
commit d1ef2b5
Show file tree

Hide file tree

Showing 16 changed files with 65 additions and 31 deletions.
diff --git a/Cargo.toml b/Cargo.toml
@@ -1,13 +1,13 @@
 [package]
-name = "native_spark"
+name = "vega"
 version = "0.1.0"
 authors = ["raja <[email protected]>"]
 edition = "2018"
 
 build = "build.rs"
 
 [lib]
-name = "native_spark"
+name = "vega"
 
 [features]
 aws_connectors = ["rusoto_core", "rusoto_s3"]

diff --git a/README.md b/README.md
@@ -1,14 +1,16 @@
-# native_spark
+# vega
+
+Previously known as native_spark
 
 [![Join the chat at https://gitter.im/fast_spark/community](https://badges.gitter.im/fast_spark/community.svg)](https://gitter.im/fast_spark/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
 [![Build Status](https://travis-ci.org/rajasekarv/native_spark.svg?branch=master)](https://travis-ci.org/rajasekarv/native_spark)
 [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
 
-**[Documentation](https://rajasekarv.github.io/native_spark/)**
+**[Documentation](https://rajasekarv.github.io/vega/)**
 
 A new, arguably faster, implementation of Apache Spark from scratch in Rust. WIP
 
-Framework tested only on Linux, requires nightly Rust. Read how to get started in the [documentation](https://rajasekarv.github.io/native_spark/chapter_1.html).
+Framework tested only on Linux, requires nightly Rust. Read how to get started in the [documentation](https://rajasekarv.github.io/vega/chapter_1.html).
 
 ## Contributing
 

diff --git a/docker/Dockerfile b/docker/Dockerfile
@@ -20,8 +20,8 @@ RUN set -e; \
 # Install and set up rustup
 RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain $RUST_VERSION --no-modify-path; 
 WORKDIR /home
-COPY . native_spark
-RUN set -e; cd native_spark; \
+COPY . vega
+RUN set -e; cd vega; \
     echo "PATH: ${PATH}"; \
     # Build executables
     cargo build --release --examples; \

diff --git a/docker/build_image.sh b/docker/build_image.sh
@@ -6,7 +6,7 @@ if [ -z $VERSION ]
 then
 VERSION='latest'
 fi
-PACKAGE="native_spark:${VERSION}"
+PACKAGE="vega:${VERSION}"
 
 
 cd $SCRIPT_PATH && cd ..

diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml
@@ -5,7 +5,7 @@ networks:
 
 services:
   ns_master:
-    image: native_spark:latest
+    image: vega:latest
     ports:
       - "3000"
     tty: true
@@ -21,7 +21,7 @@ services:
       - ns_worker
 
   ns_worker:
-    image: native_spark:latest
+    image: vega:latest
     ports:
       - "10500"
       - "22"

diff --git a/examples/file_read.rs b/examples/file_read.rs
@@ -1,6 +1,6 @@
 use chrono::prelude::*;
-use native_spark::io::*;
-use native_spark::*;
+use vega::io::*;
+use vega::*;
 
 fn main() -> Result<()> {
     let context = Context::new()?;

diff --git a/examples/group_by.rs b/examples/group_by.rs
@@ -1,4 +1,4 @@
-use native_spark::*;
+use vega::*;
 
 fn main() -> Result<()> {
     let sc = Context::new()?;

diff --git a/examples/join.rs b/examples/join.rs
@@ -1,4 +1,4 @@
-use native_spark::*;
+use vega::*;
 
 fn main() -> Result<()> {
     let sc = Context::new()?;

diff --git a/examples/make_rdd.rs b/examples/make_rdd.rs
@@ -1,4 +1,4 @@
-use native_spark::*;
+use vega::*;
 
 fn main() -> Result<()> {
     let sc = Context::new()?;

diff --git a/examples/parquet_column_read.rs b/examples/parquet_column_read.rs
@@ -1,7 +1,7 @@
 #![allow(where_clauses_object_safety, clippy::single_component_path_imports)]
 use chrono::prelude::*;
 use itertools::izip;
-use native_spark::*;
+use vega::*;
 use parquet::column::reader::get_typed_column_reader;
 use parquet::data_type::{ByteArrayType, Int32Type, Int64Type};
 use parquet::file::reader::{FileReader, SerializedFileReader};

diff --git a/examples/top.rs b/examples/top.rs
@@ -0,0 +1,32 @@
+use std::time::{Duration, Instant};
+use vega::*;
+
+fn main() -> Result<()> {
+    let sc = Context::new()?;
+    let col = sc.make_rdd((0..10000), 2);
+    let start = Instant::now();
+    for i in 0..100 {
+        let top = col.top(1000);
+    }
+    let duration = start.elapsed();
+    println!("Time elapsed in top() is: {:?}", duration);
+    let start = Instant::now();
+    for i in 0..100 {
+        let top = col.top_iter(1000);
+    }
+    let duration = start.elapsed();
+    println!("Time elapsed in top_iter() is: {:?}", duration);
+    let start = Instant::now();
+    for i in 0..100 {
+        let top = col.top(1000);
+    }
+    let duration = start.elapsed();
+    println!("Time elapsed in top() is: {:?}", duration);
+    let start = Instant::now();
+    for i in 0..100 {
+        let top = col.top_iter(1000);
+    }
+    let duration = start.elapsed();
+    println!("Time elapsed in top_iter() is: {:?}", duration);
+    Ok(())
+}
diff --git a/src/scheduler/distributed_scheduler.rs b/src/scheduler/distributed_scheduler.rs
@@ -95,7 +95,7 @@ impl DistributedScheduler {
             shuffle_to_map_stage: Arc::new(DashMap::new()),
             cache_locs: Arc::new(DashMap::new()),
             master,
-            framework_name: "native_spark".to_string(),
+            framework_name: "vega".to_string(),
             is_registered: true, // TODO: check if it is necessary
             active_jobs: HashMap::new(),
             active_job_queue: Vec::new(),

diff --git a/tests/test_async.rs b/tests/test_async.rs
@@ -1,7 +1,7 @@
 //! Test whether the library can be used with different running async executors.
 use std::sync::Arc;
 
-use native_spark::*;
+use vega::*;
 use once_cell::sync::Lazy;
 
 static CONTEXT: Lazy<Arc<Context>> = Lazy::new(|| Context::new().unwrap());

diff --git a/tests/test_pair_rdd.rs b/tests/test_pair_rdd.rs
@@ -1,6 +1,6 @@
 use std::sync::Arc;
 
-use native_spark::*;
+use vega::*;
 use once_cell::sync::Lazy;
 
 static CONTEXT: Lazy<Arc<Context>> = Lazy::new(|| Context::new().unwrap());

diff --git a/tests/test_rdd.rs b/tests/test_rdd.rs
@@ -2,10 +2,10 @@ use std::fs::{create_dir_all, File};
 use std::io::prelude::*;
 use std::sync::Arc;
 
-use native_spark::io::*;
-use native_spark::partitioner::HashPartitioner;
-use native_spark::rdd::CoGroupedRdd;
-use native_spark::*;
+use vega::io::*;
+use vega::partitioner::HashPartitioner;
+use vega::rdd::CoGroupedRdd;
+use vega::*;
 use once_cell::sync::Lazy;
 
 static CONTEXT: Lazy<Arc<Context>> = Lazy::new(|| Context::new().unwrap());

diff --git a/user_guide/src/chapter_1.md b/user_guide/src/chapter_1.md
@@ -1,6 +1,6 @@
 # Introduction
 
-`native_spark` is a distributed computing framework inspired by Apache Spark.
+`vega` is a distributed computing framework inspired by Apache Spark.
 
 ## Getting started
 
@@ -12,24 +12,24 @@ In order to use the framework you have to clone the repository and add the local
 
 ```doc
 [dependencies]
-native_spark = { path = "/path/to/local/git/repo" }
+vega = { path = "/path/to/local/git/repo" }
 # or
-native_spark = { git = "https://github.com/rajasekarv/native_spark", branch = "master }
+vega = { git = "https://github.com/rajasekarv/vega", branch = "master }
 ```
 
 Is not recommended to use the application for any sort of production code yet as it's under heavy development.
 
-Check [examples](https://github.com/rajasekarv/native_spark/tree/master/examples) and [tests](https://github.com/rajasekarv/native_spark/tree/master/tests) in the source code to get a basic idea of how the framework works.
+Check [examples](https://github.com/rajasekarv/vega/tree/master/examples) and [tests](https://github.com/rajasekarv/vega/tree/master/tests) in the source code to get a basic idea of how the framework works.
 
 ## Executing an application
 
 In order to execute application code some preliminary setup is required. (So far only tested on Linux.)
 
 * Install [Cap'n Proto](https://capnproto.org/install.html). Required for serialization/deserialziation and IPC between executors.
-* If you want to execute examples, tests or contribute to development, clone the repository `git clone https://github.com/rajasekarv/native_spark/`, if you want to use the library in your own application you can just add the depency as indicated in the installation paragraph.
-* You need to have [hosts.conf](https://github.com/rajasekarv/native_spark/blob/master/config_files/hosts.conf) in the format present inside config folder in the home directory of the user deploying executors in any of the machines.
+* If you want to execute examples, tests or contribute to development, clone the repository `git clone https://github.com/rajasekarv/vega/`, if you want to use the library in your own application you can just add the depency as indicated in the installation paragraph.
+* You need to have [hosts.conf](https://github.com/rajasekarv/vega/blob/master/config_files/hosts.conf) in the format present inside config folder in the home directory of the user deploying executors in any of the machines.
     * In `local` mode this means in your current user home, e.g.:
-    > $ cp native_spark/config_files/hosts.conf $HOME
+    > $ cp vega/config_files/hosts.conf $HOME
     * In `distributed` mode the same file is required in each host that may be deploying executors (the ones indicated in the `hosts.conf` file) and the master. E.g.:
     ```doc
     $ ssh [email protected] # this machine IP is in hosts.conf
@@ -79,7 +79,7 @@ In your application you can set the execution mode (`local` or `distributed`) in
 
 1. Set it explicitly while creating the context, e.g.:
 ```doc
-    use native_spark::DeploymentMode;
+    use vega::DeploymentMode;
 
     let context = Context::with_mode(DeploymentMode::Local)?;
 ```