Skip to content

Commit

Permalink
Fix hang bug of TCPStore (PaddlePaddle#43724)
Browse files Browse the repository at this point in the history
* tmp fix

* init

* compile ok

* compile ok

* add vlogs

* add test

* fix termination error

* add testfile

* add

* fix window compile

* fix window compile

* fix windows compile

* fix windows compile

* fix windows compile

* fix windows compile

* fix windows compile

* fix windows compile

* fix kunlun compile

* fix compilation

* fix compilation

* fix compilation

* tmp fix

* add windows

* add windows

* add more logs

* change timeout to protected

* SB

* add

* add

* fix timeout

* add

* fix test

* fix test

* fix test

* fix ut

* fix ut

* fix ut
  • Loading branch information
gongweibao authored Jun 24, 2022
1 parent 491b87b commit 4c9330d
Show file tree
Hide file tree
Showing 13 changed files with 409 additions and 105 deletions.
9 changes: 8 additions & 1 deletion paddle/fluid/distributed/store/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,11 @@
cc_library(
tcp_store
SRCS tcp_store.cc tcp_utils.cc
SRCS tcp_store.cc tcp_utils.cc socket.cpp
DEPS enforce glog)

if(NOT WIN32)
cc_test(
test_c_tcp_store
SRCS test_tcp_store.cc
DEPS tcp_store)
endif()
79 changes: 79 additions & 0 deletions paddle/fluid/distributed/store/socket.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "paddle/fluid/distributed/store/socket.h"

#ifndef _WIN32
#include <arpa/inet.h>
#include <netinet/ip.h>
#include <sys/socket.h>
#include <unistd.h>
#endif
#include <errno.h>
#include <stdio.h>

namespace paddle {
namespace distributed {

#ifdef _WIN32
static int _get_sockname_of_win(int sock, char* out, int out_len) {
snprintf(out, out_len, "not support win now");
return 0;
}
#else
static int _get_sockname(int sock, char *out, int out_len) {
struct sockaddr_in addr;
socklen_t s_len = sizeof(addr);

if (::getpeername(sock, reinterpret_cast<sockaddr *>(&addr), &s_len)) {
::snprintf(
out, out_len, "can't getsocketname of %d, errno:%d", sock, errno);
return -1;
}

char ip[128];
int port = 0;

// deal with both IPv4 and IPv6:
if (addr.sin_family == AF_INET) {
struct sockaddr_in *s = (struct sockaddr_in *)&addr;
port = ntohs(s->sin_port);
::inet_ntop(AF_INET, &s->sin_addr, ip, sizeof(ip));
} else { // AF_INET6
struct sockaddr_in6 *s = (struct sockaddr_in6 *)&addr;
port = ntohs(s->sin6_port);
::inet_ntop(AF_INET6, &s->sin6_addr, ip, sizeof(ip));
}

::snprintf(out, out_len, "%s:%d", ip, port);
return 0;
}
#endif

int GetSockName(int sock, char* out, int out_len) {
#ifdef _WIN32
return _get_sockname_of_win(sock, out, out_len);
#else
return _get_sockname(sock, out, out_len);
#endif
}

std::string GetSockName(int fd) {
char out[256];
GetSockName(fd, out, sizeof(out));
return std::string(out);
}

}; // namespace distributed
}; // namespace paddle
26 changes: 26 additions & 0 deletions paddle/fluid/distributed/store/socket.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <string>

namespace paddle {
namespace distributed {

int GetSockName(int fd, char* out, int out_len);

std::string GetSockName(int fd);
}; // namespace distributed
}; // namespace paddle
10 changes: 5 additions & 5 deletions paddle/fluid/distributed/store/store.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ namespace distributed {

class Store {
public:
Store() : _timeout(tcputils::kNoTimeout) {}
explicit Store(const std::chrono::seconds& timeout) : _timeout(timeout) {}
Store() : _timeout(900) {}
explicit Store(const int timeout) : _timeout(timeout) {}
virtual ~Store() = default;

virtual int64_t add(const std::string& key, int64_t value) {
Expand All @@ -46,10 +46,10 @@ class Store {
"Implement the add method in the subclass."));
}

virtual const std::chrono::seconds& timeout() const { return _timeout; }
virtual int timeout() { return _timeout; }

private:
std::chrono::seconds _timeout;
protected:
int _timeout;
};

} // namespace distributed
Expand Down
Loading

0 comments on commit 4c9330d

Please sign in to comment.