Skip to content

Commit

Permalink
a
Browse files Browse the repository at this point in the history
  • Loading branch information
huyang1988 committed May 6, 2019
1 parent 0e52917 commit 3f3b820
Show file tree
Hide file tree
Showing 6 changed files with 267 additions and 0 deletions.
20 changes: 20 additions & 0 deletions graph_converter/pre-proc-tc/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
exe = pre_proc

cc = "$(shell which g++)"
flags = -O3 #-I../../lib/ -I. -laio -fopenmp

objs = $(patsubst %.cpp,%.o,$(wildcard ../../lib/*.cpp)) \
$(patsubst %.cpp,%.o,$(wildcard *.cpp))

deps = $(wildcard ../../lib/*.h) \
$(wildcard *.h) \
Makefile

%.o:%.cpp $(deps)
$(cc) -c $< -o $@ $(flags)

$(exe):$(objs)
$(cc) $(objs) -o $(exe) $(flags)

clean:
rm -rf $(exe) $(objs)
10 changes: 10 additions & 0 deletions graph_converter/pre-proc-tc/README
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
text tuple list to csr with int as vertex type and long int as index type.

In this proc, we need to transfer the directed graph to an undirected one.
Thus when we read each edge, we write 2 directed edge both directions.



ignore the first lines start with '%'.

print first 64 line as verification.
Binary file added graph_converter/pre-proc-tc/pre_proc
Binary file not shown.
18 changes: 18 additions & 0 deletions graph_converter/pre-proc-tc/toy
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
%1231234
%fffffff
0 1
0 2
1 0
1 2
1 3
1 4
2 0
2 1
2 3
3 1
3 2
3 4
4 1
4 3
5 6
6 5
219 changes: 219 additions & 0 deletions graph_converter/pre-proc-tc/tuple_text_to_bin.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,219 @@
#include <iostream>
#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>
#include <errno.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <string.h>

using namespace std;

typedef int vertex_t;
typedef long int index_t;

//typedef struct packed_edge {
// long int v0;
// long int v1;
//} packed_edge;

inline off_t fsize(const char *filename) {
struct stat st;
if (stat(filename, &st) == 0)
return st.st_size;
return -1;
}


main(int argc, char** argv){
int fd,i;
char* ss_head;
char* ss;

size_t file_size = fsize(argv[1]);


fd=open( argv[1],O_CREAT|O_RDWR,00666 );



ss_head = (char*)mmap(NULL,file_size,PROT_READ|PROT_WRITE,MAP_SHARED,fd,0);

size_t head_offset=0;
while(ss_head[head_offset]=='#'){
while(ss_head[head_offset]!='\n'){
head_offset++;
}
head_offset++;
}
ss = &ss_head[head_offset];
file_size -= head_offset;



size_t curr=0;
size_t next=0;

//step 1. vert_count,edge_count,
size_t edge_count=0;
size_t vert_count;
vertex_t v_max = 0;
vertex_t v_min = 999999;//as infinity
vertex_t a;
while(next<file_size){
char* sss=ss+curr;
a = atoi(sss);

if(v_max<a){
v_max = a;
}
if(v_min>a){
v_min = a;
}

while((ss[next]!=' ')&&(ss[next]!='\n')&&(ss[next]!='\t')){
next++;
}
while((ss[next]==' ')||(ss[next]=='\n')||(ss[next]=='\t')){
next++;
}
curr = next;
edge_count++;
}
// edge_count /=2;
vert_count = v_max - v_min + 1;
cout<<"edge count: "<<edge_count<<endl;
cout<<"max vertex id: "<<v_max<<endl;
cout<<"min vertex id: "<<v_min<<endl;

cout<<"edge count: "<<edge_count<<endl;
cout<<"vert count: "<<vert_count<<endl;
//step 2. each file size
int fd4 = open( "adjacent.bin",O_CREAT|O_RDWR,00666 );
ftruncate(fd4, edge_count*sizeof(vertex_t));
vertex_t* adj = (vertex_t*)mmap(NULL,edge_count*sizeof(vertex_t),PROT_READ|PROT_WRITE,MAP_SHARED,fd4,0);

int fd5 = open( "head.bin",O_CREAT|O_RDWR,00666 );
ftruncate(fd5, edge_count*sizeof(vertex_t));
vertex_t* head = (vertex_t*)mmap(NULL,edge_count*sizeof(vertex_t),PROT_READ|PROT_WRITE,MAP_SHARED,fd5,0);

int fd2 = open( "degree.bin",O_CREAT|O_RDWR,00666 );
ftruncate(fd2, vert_count*sizeof(index_t));
index_t* degree = (index_t*)mmap(NULL,vert_count*sizeof(index_t),PROT_READ|PROT_WRITE,MAP_SHARED,fd2,0);

int fd3 = open( "begin.bin",O_CREAT|O_RDWR,00666 );
ftruncate(fd3, (vert_count+1)*sizeof(index_t));
index_t* begin = (index_t*)mmap(NULL,(vert_count+1)*sizeof(index_t),PROT_READ|PROT_WRITE,MAP_SHARED,fd3,0);

//step 3. count degree
for(int i=0; i<vert_count;i++){
degree[i]=0;
}
vertex_t index;
vertex_t index_a;
size_t offset =0;
curr=0;
next=0;
while(offset<edge_count/2){
char* sss=ss+curr;
index = atoi(sss)-v_min;
while((ss[next]!=' ')&&(ss[next]!='\n')&&(ss[next]!='\t')){
next++;
}
while((ss[next]==' ')||(ss[next]=='\n')||(ss[next]=='\t')){
next++;
}
curr = next;

char* sss1=ss+curr;
index_a = atoi(sss1)-v_min;
while((ss[next]!=' ')&&(ss[next]!='\n')&&(ss[next]!='\t')){
next++;
}
while((ss[next]==' ')||(ss[next]=='\n')||(ss[next]=='\t')){
next++;
}
curr = next;
degree[index]++;
degree[index_a]++;
// cout<<index<<" "<<degree[index]<<endl;

offset++;
}
// exit(-1);
begin[0]=0;
begin[vert_count]=edge_count;
for(size_t i=1; i<vert_count; i++){
begin[i] = begin[i-1] + degree[i-1];
// cout<<begin[i]<<" "<<degree[i]<<endl;
degree [i-1] = 0;
}
degree[vert_count-1] = 0;
//step 4: write adjacent list
vertex_t v_id;
offset =0;
next = 0;
curr = 0;
while(offset<edge_count/2){
char* sss=ss+curr;
index = atoi(sss)-v_min;
while((ss[next]!=' ')&&(ss[next]!='\n')&&(ss[next]!='\t')){
next++;
}
while((ss[next]==' ')||(ss[next]=='\n')||(ss[next]=='\t')){
next++;
}
curr = next;

char* sss1=ss+curr;
v_id = atoi(sss1)-v_min;
adj[begin[index]+degree[index]] = v_id;
head[begin[index]+degree[index]]= index;
degree[index]++;
//reverse edge
adj[begin[v_id]+degree[v_id]] = index;
head[begin[v_id]+degree[v_id]]= v_id;
degree[v_id]++;

while((ss[next]!=' ')&&(ss[next]!='\n')&&(ss[next]!='\t')){
next++;
}
while((ss[next]==' ')||(ss[next]=='\n')||(ss[next]=='\t')){
next++;
}
curr = next;

offset++;
}

//step 5
//print output as a test
// for(size_t i=0; i<vert_count; i++){
for(size_t i=0; i<8; i++){
cout<<begin[i]<<" "<<degree[i]<<" ";
for(index_t j=0; j<degree[i]; j++){
cout<<head[begin[i]+j]<<"-"<<adj[begin[i]+j]<<" ";
}
// if(degree[i]>0){
cout<<endl;
// }
}
cout<<begin[vert_count]<<endl;

// for(int i=0; i<edge_count; i++){
for(int i=0; i<64; i++){
cout<<head[i]<<" "<<adj[i]<<endl;
}

munmap( ss,sizeof(char)*file_size );
munmap( adj,sizeof(vertex_t)*edge_count );
munmap( head,sizeof(vertex_t)*edge_count );
munmap( begin,sizeof(index_t)*vert_count+1 );
munmap( degree,sizeof(index_t)*vert_count );
close(fd2);
close(fd3);
close(fd4);
close(fd5);
}
Binary file added graph_converter/pre-proc-tc/tuple_text_to_bin.o
Binary file not shown.

0 comments on commit 3f3b820

Please sign in to comment.