request parser rewrite + added tests
Some checks failed
Docker Build & Publish / build (push) Failing after 11m2s

* rewrote request parser, now more simplified and theoretically faster
* added gtest and an example test to measure parser times
This commit is contained in:
Nicholas Orlowsky 2025-02-04 18:27:33 -05:00
parent f962f5796d
commit 236f7399fe
Signed by: nickorlow
GPG key ID: 838827D8C4611687
5 changed files with 184 additions and 108 deletions

View file

@ -15,7 +15,7 @@ add_custom_target(build-version
add_custom_target(build-supplemental
COMMAND cd ../build_supp && python3 ./error_gen.py
COMMAND mkdir www && cp -r ../default_www/regular/* ./www/
COMMAND mkdir -p www && cp -r ../default_www/regular/* ./www/
DEPENDS build_supp/version.txt ../default_www/regular/* build_supp/error_gen.py build-version
COMMENT "Generated supplemental build files (default www dir + error pages)"
)
@ -38,3 +38,38 @@ add_dependencies(anthracite-bin anthracite)
add_executable(anthracite-api-bin src/api_main.cpp)
target_link_libraries(anthracite-api-bin anthracite)
include(FetchContent)
FetchContent_Declare(
googletest
URL https://github.com/google/googletest/archive/03597a01ee50ed33e9dfd640b249b4be3799d395.zip
)
FetchContent_MakeAvailable(googletest)
file(GLOB TESTS_SRC CONFIGURE_DEPENDS "tests/*.cpp")
enable_testing()
add_custom_target(test_files
COMMAND cp -r ../tests/test_files .
DEPENDS ../tests/test_files/*
COMMENT "Copied test resource files"
)
add_executable(
tests
${TESTS_SRC}
)
add_dependencies(tests anthracite)
add_dependencies(tests test_files)
target_link_libraries(
tests
GTest::gtest_main
)
target_link_libraries(
tests
anthracite
)
include(GoogleTest)
gtest_discover_tests(tests)

View file

@ -5,115 +5,109 @@
namespace anthracite::http {
void request::parse_header(std::string& raw_line) {
auto delim_pos = raw_line.find_first_of(':');
auto value_pos = raw_line.find_first_not_of(' ', delim_pos+1);
std::string header_name = raw_line.substr(0,delim_pos);
std::string header_val = raw_line.substr(value_pos);
_headers[header_name] = header(header_name, header_val);
}
void request::parse_query_param(std::string& raw_param) {
auto delim_pos = raw_param.find_first_of('=');
auto value_pos = delim_pos+1;
std::string query_name = raw_param.substr(0,delim_pos);
std::string query_val = raw_param.substr(value_pos);
_query_params[query_name] = query_param(query_name, query_val);
}
void request::parse_path(std::string& raw_path) {
std::stringstream ss(raw_path);
std::string tok;
if (getline(ss, tok, '?')){
_path = tok;
}
while(getline(ss, tok, '&')) {
parse_query_param(tok);
}
}
void request::parse_request_line(std::string& raw_line) {
request_line_parser_state state = METHOD;
std::stringstream ss(raw_line);
std::string tok;
while(getline(ss, tok, ' ')){
switch(state) {
case METHOD: {
auto search = method_map.find(tok);
if (search != method_map.end()) {
_method = search->second;
} else {
_method = method::UNKNOWN;
}
state = PATH;
break;
};
case PATH: {
parse_path(tok);
state = VERSION;
break;
};
case VERSION: {
auto search = version_map.find(tok);
if (search != version_map.end()) {
_http_version = search->second;
} else {
_http_version = version::HTTP_1_0;
}
return;
};
}
}
}
request::request(std::string& raw_data, const std::string& client_ip)
: _path("")
, _client_ipaddr(client_ip)
{
parser_state state = METHOD;
parser_state state = REQUEST_LINE;
std::string scratch = "";
std::string scratch_2 = "";
for (int i = 0; i < raw_data.length(); i++) {
switch (state) {
case METHOD: {
if (raw_data[i] == ' ') {
if (method_map.find(scratch) == method_map.end()) {
_method = method::UNKNOWN;
std::stringstream line_stream(raw_data);
std::string line;
while(getline(line_stream, line, '\n') && state != BODY_CONTENT){
line.pop_back(); // HTTP requests do newline as \r\n, this removes the \r
switch(state) {
case REQUEST_LINE: {
parse_request_line(line);
state = HEADERS;
break;
};
case HEADERS: {
if (line.length() == 0) {
state = BODY_CONTENT;
} else {
_method = method_map.find(scratch)->second;
parse_header(line);
}
scratch = "";
state = PATH;
} else {
scratch += raw_data[i];
}
} break;
case PATH: {
switch (raw_data[i]) {
case ' ':
state = VERSION;
break;
case '?':
state = QUERY_PARAM_NAME;
break;
default:
_path += raw_data[i];
break;
}
} break;
case QUERY_PARAM_NAME: {
if (raw_data[i] == ' ') {
scratch = "";
state = VERSION;
} else if (raw_data[i] == '=') {
state = QUERY_PARAM_VALUE;
} else {
scratch += raw_data[i];
}
} break;
case QUERY_PARAM_VALUE: {
if (raw_data[i] == ' ') {
_query_params[scratch] = query_param(scratch, scratch_2);
scratch = "";
scratch_2 = "";
state = VERSION;
} else if (raw_data[i] == '&') {
_query_params[scratch] = query_param(scratch, scratch_2);
scratch = "";
scratch_2 = "";
state = QUERY_PARAM_NAME;
} else {
scratch_2 += raw_data[i];
}
} break;
case VERSION: {
if (raw_data[i] == '\n') {
_http_version = version_map.find(scratch)->second;
scratch = "";
state = HEADER_NAME;
} else if (raw_data[i] != '\r') {
scratch += raw_data[i];
}
} break;
case HEADER_NAME: {
if (raw_data[i] == '\n') {
scratch = "";
scratch_2 = "";
state = BODY_CONTENT;
break;
} else if (raw_data[i] == ' ') {
scratch = "";
break;
} else if (raw_data[i] == ':') {
state = HEADER_VALUE;
i++;
} else {
scratch += raw_data[i];
}
} break;
case HEADER_VALUE: {
if (raw_data[i] == '\n') {
_headers[scratch] = header(scratch, scratch_2);
scratch = "";
scratch_2 = "";
state = HEADER_NAME;
} else if (raw_data[i] != '\r') {
scratch_2 += raw_data[i];
}
} break;
case BODY_CONTENT: {
_body_content += raw_data[i];
} break;
};
case BODY_CONTENT: break;
}
}
if (getline(line_stream, line, '\0')) {
_body_content = line;
}
}
std::string request::path() { return _path; }
@ -129,7 +123,6 @@ version request::get_http_version()
bool request::is_supported_version()
{
// log::err << reverse_version_map.find(_http_version)->second << std::endl;
return _http_version == HTTP_1_1 || _http_version == HTTP_1_0;
}

View file

@ -9,14 +9,16 @@ namespace anthracite::http {
class request {
private:
enum parser_state { METHOD,
PATH,
QUERY_PARAM_NAME,
QUERY_PARAM_VALUE,
VERSION,
HEADER_NAME,
HEADER_VALUE,
BODY_CONTENT };
enum request_line_parser_state {
METHOD, PATH, VERSION
};
enum parser_state {
REQUEST_LINE,
HEADERS,
BODY_CONTENT
};
method _method;
version _http_version;
std::string _path;
@ -24,6 +26,11 @@ private:
std::string _body_content;
std::unordered_map<std::string, header> _headers; // kinda goofy, whatever
std::unordered_map<std::string, query_param> _query_params; // kinda goofy, whatever
//
void parse_request_line(std::string& raw_line);
void parse_header(std::string& raw_line);
void parse_path(std::string& raw_path);
void parse_query_param(std::string& raw_param);
public:
request(std::string& raw_data, const std::string& client_ip);

28
tests/speed_tests.cpp Normal file
View file

@ -0,0 +1,28 @@
#include <gtest/gtest.h>
#include <fstream>
#include <chrono>
#include "../lib/http/request.hpp"
TEST(speed_tests, request_parse) {
using std::chrono::high_resolution_clock;
using std::chrono::duration_cast;
using std::chrono::duration;
using std::chrono::milliseconds;
std::ifstream t("./test_files/test_request.http");
std::stringstream buffer;
buffer << t.rdbuf();
std::string raw_req = buffer.str();
auto t1 = high_resolution_clock::now();
for(int i = 0; i < 1000000; ++i) {
volatile anthracite::http::request req (raw_req, "0.0.0.0");
}
auto t2 = high_resolution_clock::now();
/* Getting number of milliseconds as an integer. */
auto ms_int = duration_cast<milliseconds>(t2 - t1);
std::cout << "Parsed 1 Million requests in " << ms_int << "ms" << std::endl;
}

View file

@ -0,0 +1,13 @@
GET /foo/bar?test=a&test2=b HTTP/1.1
Host: example.org
User-Agent: Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6; fr; rv:1.9.2.8) Gecko/20100722 Firefox/3.6.8
Accept: */*
Accept-Language: fr,fr-fr;q=0.8,en-us;q=0.5,en;q=0.3
Accept-Encoding: gzip,deflate
Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7
Keep-Alive: 115
Connection: keep-alive
Content-Type: application/x-www-form-urlencoded
X-Requested-With: XMLHttpRequest
Referer: http://example.org/test
Cookie: foo=bar; lorem=ipsum;