Skip to content

Commit

Permalink
Initial upload
Browse files Browse the repository at this point in the history
  • Loading branch information
AlexBeischl committed Jan 11, 2022
0 parents commit 6118057
Show file tree
Hide file tree
Showing 15 changed files with 1,999,161 additions and 0 deletions.
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@

build/
compile_commands.json
.idea
.DS_Store
/cmake-build-*
80 changes: 80 additions & 0 deletions .gitlab-ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
stages:
- prerequisites
- test
- benchmark

project_private:
stage: prerequisites
script:
- echo "CI_PROJECT_VISIBILITY = $CI_PROJECT_VISIBILITY"
- /bin/bash -c 'if [[ $CI_PROJECT_VISIBILITY = "private" ]]; then exit 0; else echo "Set your project visibility to private! Settings -> General -> Visibility -> Project visibility"; exit 1; fi'
cache:
key: "$CI_JOB_STAGE-$CI_COMMIT_REF_NAME"
tags:
- "fdedi"

team_file_adapted:
stage: prerequisites
script:
- echo "CI_PROJECT_VISIBILITY = $CI_PROJECT_VISIBILITY"
- /bin/bash -c 'line1=$(sed -n 1p team.txt); line2=$(sed -n 2p team.txt); line3=$(sed -n 3p team.txt); error=0; [[ $line1 =~ ^Teamname:.*$ ]] && ! [[ $line1 =~ ^.*YourNameHere.*$ ]] || { echo "Enter your Teamname! You did not change the default value." && error=1; }; [[ $line2 =~ ^Realname:.*$ ]] && ! [[ $line2 =~ ^.*YourNameHere.*$ ]] || { echo "Enter your Realname! You did not change the default value."; error=1; }; [[ $line3 =~ ^MatrNr:.*$ ]] && ! [[ $line3 =~ ^.*YourMatrikelNrHere.*$ ]] || { echo "Enter your MatrNr! You did not change the default value."; error=1; }; if [ $error -ne 0 ]; then echo "Update the team.txt file. Please make sure the prefixes ('Teamname:', 'Realname:', 'MatrNr:') are still correct."; exit 1; else exit 0; fi'
cache:
key: "$CI_JOB_STAGE-$CI_COMMIT_REF_NAME"
tags:
- "fdedi"

test:
stage: test
script:
- mkdir -p build/debug
- cd build/debug
- cmake -DCMAKE_BUILD_TYPE=Debug ../..
- make -j8
- ./test_all
cache:
key: "$CI_JOB_STAGE-$CI_COMMIT_REF_NAME"
paths:
- build/debug
policy: pull-push
tags:
- "fdedi"

benchmark:
stage: benchmark
script:
- mkdir -p build/release
- cd build/release
- cmake -DCMAKE_BUILD_TYPE=Release ../..
- make -j8
- /data/delivery/measureKNN ./bench
cache:
key: "$CI_JOB_STAGE-$CI_COMMIT_REF_NAME"
paths:
- build/release
policy: pull-push
tags:
- "fdedi"

clear_cache_test:
stage: test
script:
- rm -rf ./build/*
cache:
key: "$CI_JOB_STAGE-$CI_COMMIT_REF_NAME"
paths:
- build/
policy: pull-push
when: manual
allow_failure: true

clear_cache_benchmark:
stage: benchmark
script:
- rm -rf ./build/*
cache:
key: "$CI_JOB_STAGE-$CI_COMMIT_REF_NAME"
paths:
- build/
policy: pull-push
when: manual
allow_failure: true
24 changes: 24 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
cmake_minimum_required(VERSION 2.8 FATAL_ERROR)

set(CMAKE_C_COMPILER gcc)
set(CMAKE_CXX_COMPILER g++)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)

project(fde20_bonusproject_3)

include(lib/gtest.cmake)
include(lib/benchmark.cmake)

set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17 -fPIC -Wall -Wextra -fno-omit-frame-pointer -march=native -Wno-unknown-pragmas")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC -Wall -Wextra -fno-omit-frame-pointer -march=native")

### Tests
add_executable(test_all test/GraphTests.cpp)
target_link_libraries(test_all gtest gtest_main pthread)

### Performance Measurements for leaderboard
add_executable(bench benchmark/BenchmarkLeaderboard.cpp include/KNN.hpp include/Util.hpp)

### You can execute this binary for testing
add_executable(forTesting src/ForTesting.cpp include/KNN.hpp include/Util.hpp)
Binary file added Graph.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
130 changes: 130 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
# FDE Bonus Project 3 - K Nearest Neighbors

#### Reminder

Use Linux as operating system to work on this task. We cannot provide support for any Windows or macOs related problems.

---
## Problem Description

In this project we work on a directed graph and implement an algorithm to find the _K Nearest Neighbors (knn)_ of a start node.

Each edge of the directed graph has a weight.
The K Nearest Neighbors are the `k nodes` (`result nodes`) with the cheapest path from `start node` to `result node`.
The cost of a path is the sum of all edges of the path.
The cheapest path is the one with the lowest cost.

---
![](Graph.png)
**Figure 1:** *3-Nearest Neighbors search on a directed graph with start node 8. The result nodes are highlighted in green.*
---

For our example in **Figure 1** the 3-Nearest-Neighbors of `start node 8` are the result nodes: `(node: 6, cost: 0.2)`, `(node: 7, cost: 0.5)`, `(node 3, cost: 0.6)`.

For this project we are only interested in the K Nearest Neighbor nodes and the cost of their path as shown in the example result.
You do not need to keep track of the nodes on the path itself.

Please make sure your K Nearest Neighbors result:
- contains the weight of the cheapest paths from start node to each result node
- contains no result node twice

---
## Your Task

Implement an efficient **K Nearest Neighbors** algorithm for a directed graph in the function **_getKNN()_**
in `include/KNN.hpp`. All solutions that compute correct results and achieve a time measurement better than **2
minutes** on the leaderboard will be accepted for the bonus. It is sufficient to implement a single-thead solution, you
do not need any parallelization.

Your implementation of `getKNN()` should return the K Nearest Neighbors as a vector of `Matrix::Entries`, where each Entry
consists of `column = the result node` and `weight = cost of the cheapest path from start node to this result node`.

### Code base & dataset

You only need to implement the *K Nearest Neighbors* algorithm itself in the function **_getKNN()_**. We provide you the
functions to read the graph file and create a matrix in ```include/Matrix.hpp```.
The file also contains a description of the matrix structure and the dataset format. However, the only function of the
matrix you will need is ```getNeighbors()```.

#### Testing your solution

We provide you a smaller, but sufficient dataset (*~50MB*) in ```test/data/dataSmall.mtx``` to test your implementation,
a test suit ```test/data/GraphTests.cpp``` and a function for developmental and experimental use ```src/ForTesting```
(you probably won't need it).

You can run the test suit executing ```test_all```. To execute ```forTesting``` you have to provide the correct program
arguments: [CLion Tutorial](https://www.jetbrains.com/help/objc/add-environment-variables-and-program-arguments.html#add-program-arguments)
. The binary takes three arguments: <br>
```forTesting <absolute path> <start node> <k = number of nearest neighbors>``` <br>
Please provide the absolute file path and make sure it does not contain whitespaces. We provide an example in the
file ```src/ForTesting.cpp```.

Also, feel free to add tests if needed!

### Leaderboard benchmark

For the leaderboard measurement we will run the binary compiled from `BenchmarkLeaderboard.cpp` (CMake binary `bench`)
for four queries with different *start nodes* and *k* nearest neighbors. The used dataset (*500MB*) has *400000* nodes
with an average number of *50* outgoing edges per node. The overall runtime from start to end is measured.<br>

We use the same machine as for the previous projects: <br>
```Intel(R) Core(TM) i7-4770K CPU @ 3.50GHz with 4 cores, 8 hyperthreads and 32GB of memory.```

**Important:** Remove all prints on `cout` before uploading onto the server, because we use `cout` to collect the
results of the four different queries. If you don't remove them, you will receive the
output `"what(): Your result was wrong."` (which is identical to a real wrong result).

### Build

A configuration file is provided to build this project with CMake. This allows you to build the project
in the terminal but also provides the option to use Jetbrains CLion or Microsoft Visual Studio and other
IDEs.

Building from Terminal:
Start in the project directory.

```
mkdir -p build/debug
cd build/debug
cmake -DCMAKE_BUILD_TYPE=Debug ../..
make
```

This creates the binaries `test_all`, `bench` and `forTesting`.

---
## Submit
1. Change Teamname, Realname and MatrNr in `team.txt`.
The Teamname will be displayed publicly on the leaderboard.
You can choose an arbitrary name for this, but you are also welcome to use your real name.
The Realname and MatrNr will be used later to verify your submission for the grade bonus.
2. Implement the task.
3. Commit your changes in git and push to the gitlab repository.
4. The build system will pick up your changes and evaluate the submission.
If all tests succeed, your binary is handed over to the performance measurement harness.
The resulting times will be entered into the leaderboard.
5. Visit http://contest.db.in.tum.de/ to see your result.

Make sure your builds are not failing! <br/>
*Left Sidebar > CI /CD > Pipelines*


## Hints

- Take care of cycles in the graph.
- What happens if you see a node again, can it be cheaper?
- Make sure the *K Nearest Neighbors* do not contain nodes twice. They should only contain the k cheapest neighboring
nodes with their minimum costs from the start node.
- To get below 2 minutes you need to make your algorithm efficient and only process relevant nodes. Are there ways to
rule out nodes? When can you stop? ;) Which data structures are efficient? You are allowed to write the data
structures yourself or use existing ones from the standard library.
- Feel free to add files and additional classes if needed. However, you won't need it to solve this project.

## Debugging

If you fail a test you might want to use CLion's **conditional breakpoints** to investigate the cause. For example when
you fail the fourth test of `test_all` and your third node is not `1666237`, you can check what happens with the node
using conditional breakpoints. Therefore, you can e.g. set a breakpoint when handling a node with column `1666237` by
setting the `Condition:` `<variable_name_you_use>.column==1666237`. Jetbrains explains the use of breakpoints in
CLion [here](https://www.jetbrains.com/help/clion/using-breakpoints.html#set-breakpoints).

44 changes: 44 additions & 0 deletions benchmark/BenchmarkLeaderboard.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#include "../include/Matrix.hpp"
#include "../include/KNN.hpp"
#include <iostream>
#include <vector>

using namespace std;
//---------------------------------------------------------------------------
int main(int argc, char *argv[]) {
if (argc != 2) {
cout << "usage: " << argv[0] << " <matrixfile>" << endl;
return 0;
}

// Read the matrix file and store it as internal matrix.
Matrix matrix = Matrix::readFile(argv[1]);
// Store the parameters for the different runs
vector<pair<unsigned, unsigned>> params;
// Read from the standard input: <start node> <knn>
for (string line; getline(cin, line);) {
// Find position to split the string
auto whitespace = line.find(' ');
// The start node
unsigned start = atoll(line.substr(0, whitespace).c_str());
// The number of neighbors we are interested in.
unsigned k = atoll(line.substr(whitespace, line.length()).c_str());
params.push_back({start, k});
}

// Computes the sum of all the weights, just to have some result to show.
auto computeSum = [](vector<Matrix::Entry> res) {
double sum = 0.0;
for (auto e : res)
sum += e.weight;
return sum;
};

// Execute the algorithm for all nodes and parameters
for (auto &param : params)
cout << computeSum(getKNN(matrix, param.first, param.second)) << "\n";

// Flush output buffer
cout.flush();
}
//---------------------------------------------------------------------------
26 changes: 26 additions & 0 deletions include/KNN.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#ifndef FDE20_BONUSPROJECT_3_KNN_HPP
#define FDE20_BONUSPROJECT_3_KNN_HPP

#include "Matrix.hpp"

//---------------------------------------------------------------------------
/// Find the top k neighbors for the node start. The directed graph is stored in
/// matrix m and you can use getNeighbors(node) to get all neighbors of a node.
/// A more detailed description is provided in Matrix.hpp.
/// The function should return the k nearest neighbors in sorted order as vector
/// of Matrix::Entry, where Entry->column is the neighbor node and Entry->weight
/// is the cost to reach the node from the start node.
std::vector<Matrix::Entry> getKNN(const Matrix &m, unsigned start, unsigned k) {

using Entry = Matrix::Entry;
std::vector<Entry> result;
result.reserve(k);

// ToDo implement your solution here

return result;
}

//---------------------------------------------------------------------------

#endif // FDE20_BONUSPROJECT_3_KNN_HPP
Loading

0 comments on commit 6118057

Please sign in to comment.