[minicoredumper] [PATCH 8/8] minicoredumper: add coredump rotate support

Juergen Borleis jbe at pengutronix.de
Mon Mar 11 11:23:18 CET 2019


If only a small amount of space is available for coredumps (like on
embedded devices) it is handy to limit the space used by coredumps.

This change brings in some kind of coredump rotation, which removes older
coredumps in favour of a new one. This new feature is disabled by default,
and will be compiled-in on demand only.

It adds a new 'max_taken' json variable to define the max space used by
all stored coredumps. If this variable is omitted or set to zero, no
rotation will happen and 'minicoredumper' won't change its current
behaviour.

In order to handle something like rotation of stored coredumps,
the calculation when to rotate must be atomic from the 'minicoredumper'
point of view. So this change adds an advisory lock, to let only one
instance run at a time. The lock defaults to the '/var/run/lock/'
directory. If more instances are running at the same time, the second
waits up to a configurable time to do its job. If this times out, this
instance terminates in order to free resources. Its coredump in this case
is lost. The timeout defaults to 10 seconds.

Signed-off-by: Juergen Borleis <jbe at pengutronix.de>
---
 configure.ac                                 |  23 +++
 src/minicoredumper/Makefile.am               |   3 +
 src/minicoredumper/core-rotate.c             | 294 +++++++++++++++++++++++++++
 src/minicoredumper/corestripper.c            |   5 +-
 src/minicoredumper/minicoredumper.cfg.json.5 |   7 +
 src/minicoredumper/prog_config.c             |   8 +-
 src/minicoredumper/prog_config.h             |   7 +
 7 files changed, 345 insertions(+), 2 deletions(-)
 create mode 100644 src/minicoredumper/core-rotate.c

diff --git a/configure.ac b/configure.ac
index 7eb85f4..f90034c 100644
--- a/configure.ac
+++ b/configure.ac
@@ -151,6 +151,29 @@ init_ddir="${sysconfdir}/init.d"
 initdefaultsdir="${sysconfdir}/default"
 AS_IF([test "x$runstatedir" = x], [runstatedir="${localstatedir}/run"], [])
 
+AC_ARG_ENABLE([rotate],
+	AS_HELP_STRING([--enable-rotate], [enable coredump rotation to limit used filesystem space @<:@default=disabled@:>@]),
+	[], [enable_rotate=no])
+AM_CONDITIONAL([COND_MINICOREDUMPER_ROTATE],
+	       [test "${enable_rotate}" == yes])
+AS_IF([test "${enable_rotate}" == yes],
+	AC_DEFINE(WANT_MINICOREDUMPER_ROTATE, [1], [enable coredump rotation]), [])
+
+mcd_lock_dir=$(eval echo ${runstatedir}/lock)
+AC_SUBST(mcd_lock_dir)
+AC_DEFINE_UNQUOTED(MCD_LOCK_DIR, ["${mcd_lock_dir}"], [where to store/expect the lock file.])
+
+AC_ARG_WITH([temper],
+	[AS_HELP_STRING([--with-temper],
+		[temper with concurrent instance in seconds @<:@default=10@:>@])],
+	[],[with_temper=10])
+AS_CASE(["${with_temper}"],
+	[yes], [with_temper=10
+		AC_MSG_NOTICE([@<:@--with-temper@:>@ only detected, defaults to 10 seconds])],
+	[no], [AC_MSG_ERROR([@<:@--without-temper@:>@ detected, use @<:@--disable-rotate@:>@ instead])],
+	[])
+AC_DEFINE_UNQUOTED(MCD_TEMPER_SECS, [${with_temper}], [time to wait for a concurrent instance])
+
 AC_SUBST([init_ddir])
 AC_SUBST([initdefaultsdir])
 AC_SUBST([runstatedir])
diff --git a/src/minicoredumper/Makefile.am b/src/minicoredumper/Makefile.am
index 4cb45cf..1b3c78b 100644
--- a/src/minicoredumper/Makefile.am
+++ b/src/minicoredumper/Makefile.am
@@ -12,6 +12,9 @@ EXTRA_DIST = $(man_MANS)
 
 minicoredumper_SOURCES = corestripper.c corestripper.h \
 			 prog_config.c prog_config.h
+if COND_MINICOREDUMPER_ROTATE
+minicoredumper_SOURCES += core-rotate.c
+endif
 minicoredumper_CPPFLAGS = $(MCD_CPPFLAGS) \
 			  -include $(top_builddir)/config.h \
 			  -I$(top_srcdir)/lib \
diff --git a/src/minicoredumper/core-rotate.c b/src/minicoredumper/core-rotate.c
new file mode 100644
index 0000000..e79aa70
--- /dev/null
+++ b/src/minicoredumper/core-rotate.c
@@ -0,0 +1,294 @@
+/*
+ * Copyright (c) 2019 Pengutronix e.K. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#include <stdlib.h>
+#include <sys/file.h>
+#include <errno.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <assert.h>
+#include <dirent.h>
+
+#include "prog_config.h"
+
+void info(const char *fmt, ...);
+void fatal(const char *fmt, ...);
+
+struct coredump_entry {
+	char *name; /* directory's name */
+	unsigned long long ts; /* directory's time stamp */
+	unsigned size; /* size of directory's content in kiB */
+};
+
+static struct coredump_entry *coredump_list;
+static size_t coredump_count;
+static size_t coredump_used;
+
+static void clean_dirent_list(size_t cnt, struct dirent **entries)
+{
+	while(cnt--)
+		free(entries[cnt]);
+	free(entries);
+}
+
+static void clean_coredump_list(size_t cnt, struct coredump_entry *list)
+{
+	size_t u;
+
+	for (u = 0; u < cnt; u++)
+		free(list[u].name);
+
+	free(list);
+}
+
+static int dir_entry_filter(const struct dirent *entry)
+{
+	if (!strcmp(entry->d_name, "."))
+		return 0; /* ignore */
+	if (!strcmp(entry->d_name, ".."))
+		return 0; /* ignore */
+
+	return 1;
+}
+
+/* ***** remove a directory hiercharchy **** */
+
+static void unlink_directory(int dir, const char *sub_dir);
+
+static void unlink_directory_content(int dir, size_t entry_cnt, struct dirent **entries)
+{
+	struct stat stat;
+	int rc;
+	const char *cur_name;
+
+	while (entry_cnt--) {
+		cur_name = entries[entry_cnt]->d_name;
+		rc = fstatat(dir, cur_name, &stat, AT_SYMLINK_NOFOLLOW);
+		if (rc < 0) {
+			/* TODO silently ignore? */
+			fatal("Failed to stat '%s': %m\n", cur_name);
+			exit(EXIT_FAILURE);
+		}
+		if (S_ISDIR(stat.st_mode))
+			unlink_directory(dir, cur_name);
+		else
+			unlinkat(dir, cur_name, 0);
+	}
+}
+
+static void unlink_directory(int dir, const char *sub_dir)
+{
+	struct dirent **namelist;
+	int entries, sub_dir_fd;
+
+	entries = scandirat(dir, sub_dir, &namelist, dir_entry_filter, alphasort);
+	if (entries < 0)
+		return;  /* ignore directories we cannot scan */
+
+	sub_dir_fd = openat(dir, sub_dir, O_RDONLY | O_DIRECTORY | O_PATH);
+	if (sub_dir_fd < 0) {
+		/* TODO silently ignore? */
+		fatal("Failed to open sub directory '%s': %m\n", sub_dir);
+		exit(EXIT_FAILURE);
+	}
+
+	unlink_directory_content(sub_dir_fd, entries, namelist);
+	clean_dirent_list(entries, namelist);
+	close(sub_dir_fd);
+
+	/* the subdirectory is empty now. Remove it as well */
+	unlinkat(dir, sub_dir, AT_REMOVEDIR);
+}
+
+/* ***** summarize size of a directory hiercharchy **** */
+
+static unsigned summarize_directory(int dir, const char *sub_dir);
+
+static unsigned summarize_directory_content(int dir, size_t entry_cnt, struct dirent **entries)
+{
+	struct stat stat;
+	const char *cur_name;
+	unsigned summary = 0;
+	int rc;
+
+	while (entry_cnt--) {
+		cur_name = entries[entry_cnt]->d_name;
+		rc = fstatat(dir, cur_name, &stat, AT_SYMLINK_NOFOLLOW);
+		if (rc < 0) {
+			/* TODO silently ignore? */
+			fatal("Failed to stat '%s': %m\n", cur_name);
+			exit(EXIT_FAILURE);
+		}
+		if (S_ISDIR(stat.st_mode))
+			summary += summarize_directory(dir, cur_name);
+		else if (S_ISREG(stat.st_mode))
+			summary += stat.st_size >> 10; /* kiB */
+	}
+
+	return summary;
+}
+
+static unsigned summarize_directory(int dir, const char *sub_dir)
+{
+	struct dirent **namelist;
+	int entries, sub_dir_fd;
+	unsigned summary = 0;
+
+	entries = scandirat(dir, sub_dir, &namelist, dir_entry_filter, alphasort);
+	if (entries < 0)
+		return 0; /* ignore directories we cannot scan */
+
+	sub_dir_fd = openat(dir, sub_dir, O_RDONLY | O_DIRECTORY | O_PATH);
+	if (sub_dir_fd < 0) {
+		fatal("Failed to open sub directory '%s': %m\n", sub_dir);
+		exit(EXIT_FAILURE);
+	}
+
+	summary = summarize_directory_content(sub_dir_fd, entries, namelist);
+	clean_dirent_list(entries, namelist);
+	close(sub_dir_fd);
+
+	return summary;
+}
+
+static int ts_compare(const void *s1, const void *s2)
+{
+	struct coredump_entry *ts1 = (struct coredump_entry *)s1;
+	struct coredump_entry *ts2 = (struct coredump_entry *)s2;
+
+	if (ts1->ts < ts2->ts)
+		return -1;
+	if (ts1->ts > ts2->ts)
+		return 1;
+	return 0;
+}
+
+/* note: does not return in case of failure, negative return means nothing to be done */
+static int collect_coredump_info(const char *dir)
+{
+	struct dirent **namelist;
+	int entries, dir_fd, rc;
+	const char *cur_name;
+	struct stat stat;
+	size_t u;
+
+	entries = scandir(dir, &namelist, dir_entry_filter, alphasort);
+	if (entries < 0) {
+	        fatal("Failed to scan directory '%s': %m\n", dir);
+		exit(EXIT_FAILURE);
+	}
+
+	if (entries == 0)
+		return -1;	/* nothing to be done here */
+
+	coredump_list = calloc(entries, sizeof(struct coredump_entry));
+	if (coredump_list == NULL)
+		exit(EXIT_FAILURE);
+	coredump_count = entries;
+
+	dir_fd = openat(AT_FDCWD, dir, O_RDONLY | O_DIRECTORY | O_PATH);
+	if (dir_fd < 0) {
+		fatal("Failed to open directory '%s': %m\n", dir);
+		exit(EXIT_FAILURE);
+	}
+
+	for (u = 0; u < entries; u++) {
+		cur_name = namelist[u]->d_name;
+		rc = fstatat(dir_fd, cur_name, &stat, AT_SYMLINK_NOFOLLOW);
+		if (rc < 0) {
+			fatal("Failed to stat '%s/%s': %m\n", dir, cur_name);
+			exit(EXIT_FAILURE);
+		}
+
+		if (!S_ISDIR(stat.st_mode))
+			continue; /* skip non directory entries */;
+
+		if (coredump_used >= coredump_count) {
+			fatal("Directory content changed while atomic\n");
+			exit(EXIT_FAILURE);
+		}
+		coredump_list[coredump_used].name = strdup(cur_name);
+		if (coredump_list[coredump_used].name == NULL)
+			exit(EXIT_FAILURE);
+		coredump_list[coredump_used].ts = stat.st_mtim.tv_sec;
+		coredump_list[coredump_used].size =
+				summarize_directory(dir_fd, cur_name);
+		coredump_used++;
+	}
+
+	clean_dirent_list(entries, namelist);
+	return dir_fd;
+}
+
+/* note: does not return in case of failure */
+static int ensure_single_instance(const char *lock_file)
+{
+	int lock, ret, i;
+
+	lock = openat(AT_FDCWD, lock_file, O_CREAT | O_RDWR, 0600);
+	if (lock < 0) {
+		fatal("Failed to open lock file '%s': %m\n", lock_file);
+		exit(EXIT_FAILURE);
+	}
+
+	/*
+	 * If a different instance is running, wait up to
+	 * 'MCD_TEMPER_SECS' seconds for its termination
+	 */
+	for (i = 0; i < MCD_TEMPER_SECS; i++) {
+		ret = flock(lock, LOCK_EX | LOCK_NB);
+		if (ret < 0) {
+			if (errno == EWOULDBLOCK) {
+				sleep(1);
+				continue;
+			}
+			fatal("Failed to lock '%s': %m\n", lock_file);
+			close(lock);
+			exit(EXIT_FAILURE);
+		}
+	}
+
+	return lock;
+}
+
+void rotate_coredumps(const struct config *cfg)
+{
+	int lock, base;
+	unsigned long long size_sum = 0;
+	size_t u;
+
+	if (cfg->max_space == 0)
+		return; /* no limit */
+
+	lock = ensure_single_instance(MCD_LOCK_DIR "/minicoredumper.lock");
+	base = collect_coredump_info(cfg->base_dir);
+	if (base < 0 || coredump_used == 0)
+		goto finished; /* nothing to be done here */
+
+	for (u = 0; u < coredump_used; u++)
+		size_sum += coredump_list[u].size;
+
+	if (size_sum < cfg->max_space)
+		goto finished; /* still within the limit, nothing to be done here */
+
+	/* sort the list to have the oldest entry first */
+	qsort(coredump_list, coredump_used, sizeof(struct coredump_entry), ts_compare);
+
+	/* remove oldest directories until we meet the space limit */
+	for (u = 0; u < coredump_used && size_sum > cfg->max_space; u++) {
+		unlink_directory(base, coredump_list[u].name);
+		size_sum -= coredump_list[u].size;
+	}
+
+finished:
+	clean_coredump_list(coredump_used, coredump_list);
+	close(base);
+	flock(lock, LOCK_UN);
+	close(lock);
+}
diff --git a/src/minicoredumper/corestripper.c b/src/minicoredumper/corestripper.c
index d96d1df..e1b3bc5 100644
--- a/src/minicoredumper/corestripper.c
+++ b/src/minicoredumper/corestripper.c
@@ -3631,7 +3631,10 @@ static int do_all_dumps(struct dump_info *di, int argc, char *argv[])
 		fatal("unable to init config");
 
 	check_config(cfg);
-
+#if WANT_MINICOREDUMPER_ROTATE
+	/* remove old dumps on demand */
+	rotate_coredumps(cfg);
+#endif
 	core_pid = strtol(argv[1], &p, 10);
 	if (*p != 0)
 		return 1;
diff --git a/src/minicoredumper/minicoredumper.cfg.json.5 b/src/minicoredumper/minicoredumper.cfg.json.5
index 7d9b6f0..83e8cde 100644
--- a/src/minicoredumper/minicoredumper.cfg.json.5
+++ b/src/minicoredumper/minicoredumper.cfg.json.5
@@ -33,6 +33,12 @@ The dump files will be stored in a sub-directory using the template:
 .br
 <command_basename>.<timestamp>.<pid>
 .TP
+.B max_taken
+(number) Maximum used filesystem space in kiB used for coredumps. Honored only
+if coredump rotation is compiled in.
+If this limit is reached, older core dumps are removed until the limit is met
+again. Useful for embedded devices with restricted filesystem space.
+.TP
 .B watch
 (array) A set of conditions, where each condition can specify its own
 recept file. See
@@ -103,6 +109,7 @@ Here is an example configuration file:
 .nf
 {
     "base_dir": "/tmp",
+    "max_taken": 4096,
     "watch": [
         {
             "exe": "*/my_example_app",
diff --git a/src/minicoredumper/prog_config.c b/src/minicoredumper/prog_config.c
index 88721d5..6ab82f1 100644
--- a/src/minicoredumper/prog_config.c
+++ b/src/minicoredumper/prog_config.c
@@ -627,7 +627,13 @@ static int read_base_config(struct json_object *root, struct config *cfg)
 			cfg->base_dir = alloc_json_string(v);
 			if (!cfg->base_dir)
 				return -1;
-
+#if WANT_MINICOREDUMPER_ROTATE
+		} else if (strcmp(n, "max_taken") == 0) {
+			int i;
+			if (get_json_int(v, &i, true) != 0)
+				return -1;
+			cfg->max_space = i;
+#endif
 		} else {
 			info("WARNING: ignoring unknown config item: %s", n);
 		}
diff --git a/src/minicoredumper/prog_config.h b/src/minicoredumper/prog_config.h
index fcd11b8..f7cb13d 100644
--- a/src/minicoredumper/prog_config.h
+++ b/src/minicoredumper/prog_config.h
@@ -57,6 +57,9 @@ struct prog_config {
 
 struct config {
 	char *base_dir;
+#if WANT_MINICOREDUMPER_ROTATE
+	unsigned max_space; /* from corefiles max taken space on the filesystem in kiB, 0 means unlimited */
+#endif
 	struct interesting_prog *ilist;
 	struct prog_config prog_config;
 };
@@ -68,4 +71,8 @@ int init_prog_config(struct config *cfg, const char *cfg_file);
 int simple_match(const char *pattern, const char *string);
 void free_config(struct config *cfg);
 
+#if WANT_MINICOREDUMPER_ROTATE
+void rotate_coredumps(const struct config *cfg);
+#endif
+
 #endif /* CONFIG_H */
-- 
2.11.0




More information about the minicoredumper mailing list