Skip to content

Commit

Permalink
Support HTTP/1.0 and dump after connection closed
Browse files Browse the repository at this point in the history
  • Loading branch information
dudongcheng committed Apr 24, 2020
1 parent 458df3b commit ec48990
Show file tree
Hide file tree
Showing 5 changed files with 85 additions and 36 deletions.
21 changes: 16 additions & 5 deletions http_flow.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,11 +76,18 @@ static bool process_tcp(struct packet_info *packet, const u_char *content, size_
packet->src_addr.assign(buff);
std::snprintf(buff, 128, "%s:%d", packet->dst_addr.c_str(), dst_port);
packet->dst_addr.assign(buff);
packet->is_fin = !!(tcp_header->th_flags & (TH_FIN | TH_RST));
packet->is_fin = tcp_header->th_flags & TH_FIN;
packet->is_rst = tcp_header->th_flags & TH_RST;

content += tcp_header_len;
packet->body = std::string(reinterpret_cast<const char *>(content), len - tcp_header_len);
packet->seq = htonl(tcp_header->th_seq);
packet->ack = htonl(tcp_header->th_ack);
if (tcp_header->th_flags & (TH_FIN | TH_SYN)) {
packet->nxtseq = packet->seq + 1;
} else {
packet->nxtseq = packet->seq + packet->body.size();
}
return true;
}

Expand Down Expand Up @@ -185,7 +192,7 @@ void process_packet(const pcre *url_filter_re, const pcre_extra *url_filter_extr
struct packet_info packet;
packet.ts_usc = ts_usc;
bool ret = process_ipv4(&packet, data, len);
if (!ret || (packet.body.empty() && !packet.is_fin)) return;
if (!ret || (packet.body.empty() && !packet.is_fin && !packet.is_rst)) return;

std::string join_addr;
get_join_addr(packet.src_addr, packet.dst_addr, join_addr);
Expand All @@ -210,9 +217,13 @@ void process_packet(const pcre *url_filter_re, const pcre_extra *url_filter_extr
}
}

if (packet.is_fin && iter != http_requests.end()) {
delete iter->second;
http_requests.erase(iter);
if (iter != http_requests.end()) {
stream_parser *parser = iter->second;
if (packet.is_rst || parser->is_stream_fin(packet)) {
parser->dump_http_request();
delete parser;
http_requests.erase(iter);
}
}
}

Expand Down
54 changes: 41 additions & 13 deletions stream_parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@ stream_parser::stream_parser(const pcre *url_filter_re, const pcre_extra *url_fi
url_filter_re(url_filter_re),
url_filter_extra(url_filter_extra),
output_path(output_path),
gzip_flag(false) {
gzip_flag(false),
dump_flag(-1),
fin_src(-1) {
std::memset(&next_seq, 0, sizeof next_seq);
std::memset(&ts_usc, 0, sizeof ts_usc);
http_parser_init(&parser[HTTP_REQUEST], HTTP_REQUEST);
Expand Down Expand Up @@ -74,6 +76,7 @@ int stream_parser::on_message_begin(http_parser *parser) {
if (parser->type == HTTP_REQUEST) {
self->ts_usc[parser->type] = self->last_ts_usc;
}
self->dump_flag = 0;
return 0;
}

Expand Down Expand Up @@ -117,6 +120,9 @@ int stream_parser::on_headers_complete(http_parser *parser) {
if (parser->type == HTTP_REQUEST || parser->type == HTTP_RESPONSE) {
stream_parser *self = reinterpret_cast<stream_parser *>(parser->data);
self->header[parser->type] = self->raw[parser->type].substr(0, parser->nread);
if (parser->type == HTTP_RESPONSE) {
self->ts_usc[parser->type] = self->last_ts_usc;
}
}
return 0;
}
Expand All @@ -125,21 +131,16 @@ int stream_parser::on_body(http_parser *parser, const char *at, size_t length) {
if (parser->type == HTTP_REQUEST || parser->type == HTTP_RESPONSE) {
stream_parser *self = reinterpret_cast<stream_parser *>(parser->data);
self->body[parser->type].append(at, length);
if (parser->type == HTTP_RESPONSE) {
self->ts_usc[parser->type] = self->last_ts_usc;
}
}
return 0;
}

int stream_parser::on_message_complete(http_parser *parser) {
stream_parser *self = reinterpret_cast<stream_parser *>(parser->data);
if (parser->type == HTTP_RESPONSE) {
if (self->gzip_flag && !self->body[HTTP_RESPONSE].empty()) {
std::string new_body;
if (gzip_decompress(self->body[HTTP_RESPONSE], new_body)) {
self->body[HTTP_RESPONSE].assign(new_body);
} else {
std::cerr << ANSI_COLOR_RED << "[decompress error]" << ANSI_COLOR_RESET << std::endl;
}
}
if (parser->type == HTTP_RESPONSE && parser->status_code == HTTP_STATUS_CONTINUE) {
self->header_100_continue.assign(self->header[HTTP_RESPONSE]);
self->body_100_continue.assign(self->body[HTTP_RESPONSE]);
Expand All @@ -149,7 +150,7 @@ int stream_parser::on_message_complete(http_parser *parser) {
http_parser_init(parser, HTTP_RESPONSE);
} else {
self->ts_usc[parser->type] = self->last_ts_usc;
self->save_http_request();
self->dump_http_request();
}
}
return 0;
Expand All @@ -162,14 +163,24 @@ bool stream_parser::match_url(const std::string &url) {
return rc >= 0;
}

void stream_parser::save_http_request() {
std::size_t i = url.find('?');
std::string url_no_query = i == std::string::npos ? url : url.substr(0, i);
void stream_parser::dump_http_request() {
if (dump_flag != 0) return;

if (gzip_flag && !body[HTTP_RESPONSE].empty()) {
std::string new_body;
if (gzip_decompress(body[HTTP_RESPONSE], new_body)) {
body[HTTP_RESPONSE].assign(new_body);
} else {
std::cerr << ANSI_COLOR_RED << "[decompress error]" << ANSI_COLOR_RESET << std::endl;
}
}

std::cout << ANSI_COLOR_CYAN << address[HTTP_REQUEST] << " -> " << address[HTTP_RESPONSE];
if (!host.empty()) {
std::cout << " " << ANSI_COLOR_GREEN << host << ANSI_COLOR_CYAN;
}
std::size_t i = url.find('?');
std::string url_no_query = i == std::string::npos ? url : url.substr(0, i);
std::cout << " " << url_no_query << ANSI_COLOR_RESET;

char buff[128];
Expand Down Expand Up @@ -207,6 +218,23 @@ void stream_parser::save_http_request() {
body_100_continue.clear();
host.clear();
std::memset(&ts_usc, 0, sizeof ts_usc);
dump_flag = 1;
}

bool stream_parser::is_stream_fin(const struct packet_info &packet) {
if (!packet.is_fin) {
return false;
}
int fin_cur = is_request_address(packet.src_addr) ? HTTP_REQUEST : HTTP_RESPONSE;
if (fin_src == -1 || fin_src == fin_cur) {
fin_src = fin_cur;
fin_nxtseq = packet.nxtseq;
return false;
}
if (packet.ack == fin_nxtseq) {
return true;
}
return false;
}

std::ostream &operator<<(std::ostream &out, const stream_parser &parser) {
Expand Down
10 changes: 8 additions & 2 deletions stream_parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,15 @@ class stream_parser {

std::string temp_header_field;
bool gzip_flag;
int dump_flag;

int fin_src;
uint32_t fin_nxtseq;

public:
stream_parser(const pcre *url_filter_re, const pcre_extra *url_filter_extra, const std::string &output_path);

bool parse(const struct packet_info &body, enum http_parser_type type);
bool parse(const struct packet_info &packet, enum http_parser_type type);

inline bool is_request_address(const std::string &addr) const {
return address[HTTP_REQUEST] == addr;
Expand All @@ -54,7 +58,9 @@ class stream_parser {

bool match_url(const std::string &url);

void save_http_request();
void dump_http_request();

bool is_stream_fin(const struct packet_info &packet);

static int on_message_begin(http_parser *parser);

Expand Down
33 changes: 17 additions & 16 deletions util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,32 +3,32 @@
bool is_atty = true;

bool is_plain_text(const std::string &s) {
// The algorithm works by dividing the set of bytecodes [0..255] into three
// categories:
// - The white list of textual bytecodes:
// 9 (TAB), 10 (LF), 13 (CR), 32 (SPACE) to 255.
// - The gray list of tolerated bytecodes:
// 7 (BEL), 8 (BS), 11 (VT), 12 (FF), 26 (SUB), 27 (ESC).
// - The black list of undesired, non-textual bytecodes:
// 0 (NUL) to 6, 14 to 31.
// If a file contains at least one byte that belongs to the white list and
// no byte that belongs to the black list, then the file is categorized as
// plain text; otherwise, it is categorized as binary. (The boundary case,
// when the file is empty, automatically falls into the latter category.)
// The algorithm works by dividing the set of bytecodes [0..255] into three
// categories:
// - The white list of textual bytecodes:
// 9 (TAB), 10 (LF), 13 (CR), 32 (SPACE) to 255.
// - The gray list of tolerated bytecodes:
// 7 (BEL), 8 (BS), 11 (VT), 12 (FF), 26 (SUB), 27 (ESC).
// - The black list of undesired, non-textual bytecodes:
// 0 (NUL) to 6, 14 to 31.
// If a file contains at least one byte that belongs to the white list and
// no byte that belongs to the black list, then the file is categorized as
// plain text; otherwise, it is categorized as binary. (The boundary case,
// when the file is empty, automatically falls into the latter category.)
if (s.empty()) {
return true;
}
size_t white_list_char_count = 0;
for (int i = 0; i < s.size(); ++i) {
const unsigned char c = s[i];
for (int i = 0; i < s.size(); ++i) {
const unsigned char c = s[i];
if (c == 9 || c == 10 || c == 13 || (c >= 32 && c <= 255)) {
// white list
white_list_char_count++;
} else if ((c <= 6) || (c >= 14 && c <= 31)) {
// black list
return 0;
}
}
}
return white_list_char_count >= 1 ? true : false;
}

Expand All @@ -44,7 +44,8 @@ std::string timeval2tr(const struct timeval *ts) {
struct tm *local_tm = localtime(&ts->tv_sec);
std::string time_str;
time_str.resize(15);
sprintf(&time_str[0], "%02d:%02d:%02d.%06d", local_tm->tm_hour, local_tm->tm_min, local_tm->tm_sec, (int)ts->tv_usec);
sprintf(&time_str[0], "%02d:%02d:%02d.%06d", local_tm->tm_hour, local_tm->tm_min,
local_tm->tm_sec, (int) ts->tv_usec);
return time_str;
}

Expand Down
3 changes: 3 additions & 0 deletions util.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,11 @@ struct packet_info {
std::string src_addr;
std::string dst_addr;
bool is_fin;
bool is_rst;
std::string body;
uint32_t seq;
uint32_t nxtseq;
uint32_t ack;
};

extern bool is_atty;
Expand Down

0 comments on commit ec48990

Please sign in to comment.