liblog: Instrument logging of logd write drops

- If logger system is prostrated, send an event message with the
  liblog tag from the associated UID and PID with a count of
  dropped messages once logging is resumed.
- Added to the README a description of the error return values.
- Describe in the README the appropriate mitigations for dropped
  messages.
- If the caller sees this message, then
  /proc/sys/net/unix/max_dgram_qlen is likely too small

Change-Id: Iaf387b9e5e1b6aa93bebc7481f9e8353732e3229
This commit is contained in:
Mark Salyzyn 2015-02-12 15:14:26 -08:00
parent 29eb57066c
commit d45d36e011
4 changed files with 96 additions and 5 deletions

View file

@ -16,6 +16,8 @@
LOCAL_PATH := $(my-dir)
include $(CLEAR_VARS)
liblog_cflags := $(shell sed -n 's/^\([0-9]*\)[ \t]*liblog[ \t].*/-DLIBLOG_LOG_TAG=\1/p' $(LOCAL_PATH)/event.logtags)
ifneq ($(TARGET_USES_LOGD),false)
liblog_sources := logd_write.c
else
@ -45,7 +47,7 @@ else
uio.c
endif
liblog_host_sources := $(liblog_sources) fake_log_device.c
liblog_host_sources := $(liblog_sources) fake_log_device.c event.logtags
liblog_target_sources := $(liblog_sources) log_time.cpp log_is_loggable.c
ifneq ($(TARGET_USES_LOGD),false)
liblog_target_sources += log_read.c
@ -57,7 +59,7 @@ endif
# ========================================================
LOCAL_MODULE := liblog
LOCAL_SRC_FILES := $(liblog_host_sources)
LOCAL_CFLAGS := -DFAKE_LOG_DEVICE=1 -Werror
LOCAL_CFLAGS := -DFAKE_LOG_DEVICE=1 -Werror $(liblog_cflags)
LOCAL_MULTILIB := both
include $(BUILD_HOST_STATIC_LIBRARY)
@ -76,13 +78,13 @@ include $(BUILD_HOST_SHARED_LIBRARY)
include $(CLEAR_VARS)
LOCAL_MODULE := liblog
LOCAL_SRC_FILES := $(liblog_target_sources)
LOCAL_CFLAGS := -Werror
LOCAL_CFLAGS := -Werror $(liblog_cflags)
include $(BUILD_STATIC_LIBRARY)
include $(CLEAR_VARS)
LOCAL_MODULE := liblog
LOCAL_WHOLE_STATIC_LIBRARIES := liblog
LOCAL_CFLAGS := -Werror
LOCAL_CFLAGS := -Werror $(liblog_cflags)
include $(BUILD_SHARED_LIBRARY)
include $(call first-makefiles-under,$(LOCAL_PATH))

View file

@ -131,6 +131,33 @@ DESCRIPTION
when opening the sub-log. It is recommended to open the log
ANDROID_LOG_RDONLY in these cases.
ERRORS
If messages fail, a negative error code will be returned to the caller.
The -ENOTCONN return code indicates that the logger daemon is stopped.
The -EBADF return code indicates that the log access point can not be
opened, or the log buffer id is out of range.
For the -EAGAIN return code, this means that the logging message was
temporarily backed-up either because of Denial Of Service (DOS) logging
pressure from some chatty application or service in the Android system,
or if too small of a value is set in /proc/sys/net/unix/max_dgram_qlen.
To aid in diagnosing the occurence of this, a binary event from liblog
will be sent to the log daemon once a new message can get through
indicating how many messages were dropped as a result. Please take
action to resolve the structural problems at the source.
It is generally not advised for the caller to retry the -EAGAIN return
code as this will only make the problem(s) worse and cause your
application to temporarily drop to the logger daemon priority, BATCH
scheduling policy and background task cgroup. If you require a group of
messages to be passed atomically, merge them into one message with
embedded newlines to the maximum length LOGGER_ENTRY_MAX_PAYLOAD.
Other return codes from writing operation can be returned. Since the
library retries on EINTR, -EINTR should never be returned.
SEE ALSO
syslogd(8)

36
liblog/event.logtags Normal file
View file

@ -0,0 +1,36 @@
# The entries in this file map a sparse set of log tag numbers to tag names.
# This is installed on the device, in /system/etc, and parsed by logcat.
#
# Tag numbers are decimal integers, from 0 to 2^31. (Let's leave the
# negative values alone for now.)
#
# Tag names are one or more ASCII letters and numbers or underscores, i.e.
# "[A-Z][a-z][0-9]_". Do not include spaces or punctuation (the former
# impacts log readability, the latter makes regex searches more annoying).
#
# Tag numbers and names are separated by whitespace. Blank lines and lines
# starting with '#' are ignored.
#
# Optionally, after the tag names can be put a description for the value(s)
# of the tag. Description are in the format
# (<name>|data type[|data unit])
# Multiple values are separated by commas.
#
# The data type is a number from the following values:
# 1: int
# 2: long
# 3: string
# 4: list
#
# The data unit is a number taken from the following list:
# 1: Number of objects
# 2: Number of bytes
# 3: Number of milliseconds
# 4: Number of allocations
# 5: Id
# 6: Percent
# Default value for data of type int/long is 2 (bytes).
#
# TODO: generate ".java" and ".h" files with integer constants from this file.
1005 liblog (dropped|1)

View file

@ -13,12 +13,14 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <endian.h>
#include <errno.h>
#include <fcntl.h>
#if !defined(_WIN32)
#include <pthread.h>
#endif
#include <stdarg.h>
#include <stdatomic.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@ -172,6 +174,7 @@ static int __write_to_log_daemon(log_id_t log_id, struct iovec *vec, size_t nr)
size_t i, payload_size;
static uid_t last_uid = AID_ROOT; /* logd *always* starts up as AID_ROOT */
static pid_t last_pid = (pid_t) -1;
static atomic_int_fast32_t dropped;
if (last_uid == AID_ROOT) { /* have we called to get the UID yet? */
last_uid = getuid();
@ -206,7 +209,6 @@ static int __write_to_log_daemon(log_id_t log_id, struct iovec *vec, size_t nr)
pmsg_header.uid = last_uid;
pmsg_header.pid = last_pid;
header.id = log_id;
header.tid = gettid();
header.realtime.tv_sec = ts.tv_sec;
header.realtime.tv_nsec = ts.tv_nsec;
@ -216,6 +218,28 @@ static int __write_to_log_daemon(log_id_t log_id, struct iovec *vec, size_t nr)
newVec[1].iov_base = (unsigned char *) &header;
newVec[1].iov_len = sizeof(header);
if (logd_fd > 0) {
int32_t snapshot = atomic_exchange_explicit(&dropped, 0, memory_order_relaxed);
if (snapshot) {
android_log_event_int_t buffer;
header.id = LOG_ID_EVENTS;
buffer.header.tag = htole32(LIBLOG_LOG_TAG);
buffer.payload.type = EVENT_TYPE_INT;
buffer.payload.data = htole32(snapshot);
newVec[2].iov_base = &buffer;
newVec[2].iov_len = sizeof(buffer);
ret = TEMP_FAILURE_RETRY(writev(logd_fd, newVec + 1, 2));
if (ret != (ssize_t)(sizeof(header) + sizeof(buffer))) {
atomic_fetch_add_explicit(&dropped, snapshot, memory_order_relaxed);
}
}
}
header.id = log_id;
for (payload_size = 0, i = header_length; i < nr + header_length; i++) {
newVec[i].iov_base = vec[i - header_length].iov_base;
payload_size += newVec[i].iov_len = vec[i - header_length].iov_len;
@ -281,6 +305,8 @@ static int __write_to_log_daemon(log_id_t log_id, struct iovec *vec, size_t nr)
if (ret > (ssize_t)sizeof(header)) {
ret -= sizeof(header);
} else if (ret == -EAGAIN) {
atomic_fetch_add_explicit(&dropped, 1, memory_order_relaxed);
}
#endif