diff options
author | Andrew Scull <ascull@google.com> | 2018-07-18 15:17:33 +0100 |
---|---|---|
committer | Andrew Scull <ascull@google.com> | 2018-07-18 17:13:51 +0100 |
commit | 4ecaac1e9e8441491ed8e399bed9c49e00b9d5a0 (patch) | |
tree | 1e7adfbf150997dcebc7565c4180c421a5bed674 | |
parent | 856ac5db1a6d9615f83bd3c812299da5867aa8f0 (diff) | |
download | generic-4ecaac1e9e8441491ed8e399bed9c49e00b9d5a0.tar.gz |
Transport: timeout after 60 seconds
If the app doesn't respond within 60 seconds then we give up on it. It
may finish at some point in the future but we'll just have to see next
time we call in as there isn't a way to have the app abort the call.
Bug: 110758319
Test: TransportTest.Timeout
Change-Id: I1448df0a266515c813868f5f9327d25ecb7e933f
-rw-r--r-- | libnos/debug.cpp | 1 | ||||
-rw-r--r-- | libnos_transport/test/test.cpp | 23 | ||||
-rw-r--r-- | libnos_transport/transport.c | 44 | ||||
-rw-r--r-- | nugget/include/application.h | 1 |
4 files changed, 59 insertions, 10 deletions
diff --git a/libnos/debug.cpp b/libnos/debug.cpp index 2bd49bd..0398d54 100644 --- a/libnos/debug.cpp +++ b/libnos/debug.cpp @@ -34,6 +34,7 @@ std::string StatusCodeString(uint32_t code) { ErrorString_helper(APP_ERROR_RPC) ErrorString_helper(APP_ERROR_CHECKSUM) ErrorString_helper(APP_ERROR_BUSY) + ErrorString_helper(APP_ERROR_TIMEOUT) default: if (code >= APP_LINE_NUMBER_BASE && code < MAX_APP_STATUS) { return "APP_LINE_NUMBER " + std::to_string(code - APP_LINE_NUMBER_BASE); diff --git a/libnos_transport/test/test.cpp b/libnos_transport/test/test.cpp index c6b4a9c..ac7a884 100644 --- a/libnos_transport/test/test.cpp +++ b/libnos_transport/test/test.cpp @@ -635,3 +635,26 @@ int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } + +#ifdef TEST_TIMEOUT +TEST_F(TransportTest, Timeout) { + const uint8_t app_id = 49; + const uint16_t param = 64; + + InSequence please; + EXPECT_GET_STATUS_IDLE(app_id); + EXPECT_SEND_DATA(app_id, nullptr, 0); + EXPECT_GO_COMMAND(app_id, param, nullptr, 0, 0); + + // Keep saying we're working on it + const uint32_t command = CMD_ID((app_id)) | CMD_IS_READ | CMD_TRANSPORT; + EXPECT_CALL(mock_dev(), Read(command, _, STATUS_MAX_LENGTH)) + .WillRepeatedly(DoAll(ReadStatusV1_Working(), Return(0))); + + // We'll still try and clean up + EXPECT_CLEAR_STATUS(app_id); + + uint32_t res = nos_call_application(dev(), app_id, param, nullptr, 0, nullptr, nullptr); + EXPECT_THAT(res, Eq(APP_ERROR_TIMEOUT)); +} +#endif diff --git a/libnos_transport/transport.c b/libnos_transport/transport.c index 71d4b56..7dad955 100644 --- a/libnos_transport/transport.c +++ b/libnos_transport/transport.c @@ -23,6 +23,7 @@ #include <stdint.h> #include <stdlib.h> #include <string.h> +#include <time.h> #include <unistd.h> #include <application.h> @@ -75,9 +76,8 @@ extern int usleep (uint32_t usec); /* In case of CRC error, try to retransmit */ #define CRC_RETRY_COUNT 3 -/* The number of times to poll before giving up */ -/* TODO: review this limit, maybe replace with a timeout */ -#define POLL_LIMIT 1000000 +/* How long to poll before giving up */ +#define POLL_LIMIT_SECONDS 60 struct transport_context { const struct nos_device *dev; @@ -334,12 +334,31 @@ static uint32_t send_command(const struct transport_context *ctx) { return APP_SUCCESS; } +static bool timespec_before(const struct timespec *lhs, const struct timespec *rhs) { + if (lhs->tv_sec == rhs->tv_sec) { + return lhs->tv_nsec < rhs->tv_nsec; + } else { + return lhs->tv_sec < rhs->tv_sec; + } +} + /* * Keep polling until the app says it is done. */ -uint32_t poll_until_done(const struct transport_context *ctx, - struct transport_status *status) { +static uint32_t poll_until_done(const struct transport_context *ctx, + struct transport_status *status) { uint32_t poll_count = 0; + + /* Start the timer */ + struct timespec now; + struct timespec abort_at; + if (clock_gettime(CLOCK_MONOTONIC, &now) != 0) { + NLOGE("clock_gettime() failing: %s", strerror(errno)); + return APP_ERROR_IO; + } + abort_at.tv_sec = now.tv_sec + POLL_LIMIT_SECONDS; + abort_at.tv_nsec = now.tv_nsec; + NLOGD("Polling app %d", ctx->app_id); do { /* Poll the status */ @@ -370,17 +389,22 @@ uint32_t poll_until_done(const struct transport_context *ctx, NLOGE("App %d just stopped working", ctx->app_id); return APP_ERROR_INTERNAL; } - } while (poll_count != POLL_LIMIT); + if (clock_gettime(CLOCK_MONOTONIC, &now) != 0) { + NLOGE("clock_gettime() failing: %s", strerror(errno)); + return APP_ERROR_IO; + } + } while (timespec_before(&now, &abort_at)); - NLOGE("App %d not done after polling %d times", ctx->app_id, poll_count); - return APP_ERROR_INTERNAL; + NLOGE("App %d not done after polling %d times in %d seconds", + ctx->app_id, poll_count, POLL_LIMIT_SECONDS); + return APP_ERROR_TIMEOUT; } /* * Reconstruct the reply data from datagram stream. */ -uint32_t receive_reply(const struct transport_context *ctx, - const struct transport_status *status) { +static uint32_t receive_reply(const struct transport_context *ctx, + const struct transport_status *status) { int retries = CRC_RETRY_COUNT; while (retries--) { NLOGD("Read app %d reply data (%d bytes)", ctx->app_id, status->reply_len); diff --git a/nugget/include/application.h b/nugget/include/application.h index 8b7eaf2..6dd402a 100644 --- a/nugget/include/application.h +++ b/nugget/include/application.h @@ -309,6 +309,7 @@ enum app_status { APP_ERROR_RPC, /* problem during RPC communication */ APP_ERROR_CHECKSUM, /* checksum failed, only used within protocol */ APP_ERROR_BUSY, /* the app is already working on a commnad */ + APP_ERROR_TIMEOUT, /* the app took too long to respond */ /* more? */ APP_SPECIFIC_ERROR = 0x20, /* "should be enough for anybody" */ |