This is the mail archive of the systemtap@sourceware.org mailing list for the systemtap project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [RFC] Crash extension for SystemTap


Hi,

Sorry, I forgot to attach my source code.
Here is the source code.

Satoru MORIYA wrote:

Hi,

Here is an extension(shared object) of the crash to retrieve the trace
data of systemtap scripts.

I'd like to analyze what caused the kernel panic by using the systemtap.
However, currently the systemtap's trace data can't be retrieved from a
dumped image easily. So, I developed a crash's extension which retrieves
the data recorded by systemtap from the dumped image.
Here is a brief document of this extension. This extension supports the new
utt-based buffer as well as the bulk-mode buffer of old systemtap module.

I have tested this extention on the following system.
  * FC6, i386, kernel-2.6.21, systemtap-0.5.14, crash-4.0-1.1
  * FC6, i386, kernel-2.6.20, systemtap-0.5.13/14, crash-4.0-1.1
  * RHEL5, i386, kernel-2.6.18-8.el5, systemtap-0.5.12, crash-4.0-3.14


Preparation ============== (A) Build the shared-object(stplog.so).

1. Put Makefile and stplog.c into a directory ($DIR)
    $ cd $DIR

2. Make the symbolic link to the crash source code directory
    $ ln -s $WHERE_CRASH_PLACED crash

3. Build
    $ make

(B) Make the crash dump which includes SystemTap trace data.
    (*)If you analyze the live system memory, ignore this section.

1. Install kdump
     If you use FC6, see following URL.
     http://fedoraproject.org/wiki/FC6KdumpKexecHowTo?highlight=%28kdump%29

2. Use SystemTap
    $ stap foo.stp

3. Panic
    $ echo c > /proc/sysrq-trigger

How to use
==============
1. start crash
    $ crash vmlinux vmcore
    (*) If you analyze the live system memory, you don't need "vmcore".
         $ crash vmlinux

2. load the shared-object
    crash> extend $(WHERE_OBJ_PLACED)/stplog.so

3. retrieve the data
    crash> stplog -m <mod_name>
    (*) <mod_name> is the name of trace module from which you retrieve data.

4. You can get output files under the directory whose name is <mod_name>.

Output
==============
stplog command makes a file per channel buffer of relayfs(equivalent to per cpu).
And it also removes padding bytes.


I believe this command is very useful for system administrators if they monitor their systems with SystemTap.

Best Regards,

---
Satoru MORIYA
Linux Technology Center
Hitachi, Ltd., Systems Development Laboratory
E-mail: satoru.moriya.br@hitachi.com




-- --- Satoru MORIYA Linux Technology Center Hitachi, Ltd., Systems Development Laboratory E-mail: satoru.moriya.br@hitachi.com
TARGET=stplog.so
CFILE=stplog.c

CFLAGS= -shared -rdynamic -DX86 
CFLAGS+= -I./crash -Wall

PRJNAME=libcrash_for_systemtap
VERSION=`date +%Y%m%d`

$(TARGET):$(CFILE)
	gcc $(CFLAGS) -o $@ $(CFILE)

clean:
	rm -f -r $(TARGET) *~

dist:distclean
	mkdir $(PRJNAME)-$(VERSION)
	cp $(CFILE) Makefile README $(PRJNAME)-$(VERSION)
	tar cvjf $(PRJNAME)-$(VERSION).tar.bz2 $(PRJNAME)-$(VERSION)
	rm -f -r $(PRJNAME)-$(VERSION)

distclean:
	rm -f -r $(TARGET) *~ crash
/*
 crash shared object for retrieving systemtap buffer
 Copyright (c) 2007 Hitachi,Ltd.,
 Created by Satoru Moriya <satoru.moriya.br@hitachi.com>
 
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation; either version 2 of the License, or
 (at your option) any later version.

 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.

 You should have received a copy of the GNU General Public License
 along with this program; if not, write to the Free Software
 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/

#include "defs.h"

#define STPLOG_NO_MOD  -1
#define STPLOG_NO_SYM  -2

struct rchan_offsets {
	long	subbuf_size;
	long	n_subbufs;
	long	buf;
	long	buf_start;
	long	buf_offset;
	long	buf_subbufs_produced;
	long	buf_padding;
};

struct fake_rchan_buf {
	void	*start;
	size_t	offset;
	size_t	subbufs_produced;
	size_t	*padding;
};

struct fake_rchan {
	size_t	subbuf_size;
	size_t	n_subbufs;
};

struct per_cpu_data {
	struct fake_rchan_buf	buf;
};

static struct rchan_offsets rchan_offsets;
static struct fake_rchan chan;
static struct per_cpu_data per_cpu[NR_CPUS];
static FILE *outfp;
static char *subbuf;
static jmp_buf saved_env;
static int is_global;
static int old_format;

void cmd_systemtaplog(void);
char *help_systemtaplog[];

static struct command_table_entry command_table[] = {
	{"stplog", cmd_systemtaplog, help_systemtaplog, 0},
	{NULL},
};

static void cleanup(void)
{
	if (outfp) {
		fclose(outfp);
		outfp = NULL;
	}
	if (subbuf) {
		free(subbuf);
		subbuf = NULL;
	}
}

static int get_rchan_offsets(void)
{
	rchan_offsets.subbuf_size = MEMBER_OFFSET("rchan", "subbuf_size");
	if (rchan_offsets.subbuf_size < 0)
		goto ERR;
	rchan_offsets.n_subbufs = MEMBER_OFFSET("rchan", "n_subbufs");
	if (rchan_offsets.n_subbufs < 0)
		goto ERR;
	rchan_offsets.buf = MEMBER_OFFSET("rchan", "buf");
	if (rchan_offsets.buf < 0)
		goto ERR;
	rchan_offsets.buf_start = MEMBER_OFFSET("rchan_buf", "start");
	if (rchan_offsets.buf_start < 0)
		goto ERR;
	rchan_offsets.buf_offset = MEMBER_OFFSET("rchan_buf", "offset");
	if (rchan_offsets.buf_offset < 0)
		goto ERR;
	rchan_offsets.buf_subbufs_produced
		= MEMBER_OFFSET("rchan_buf", "subbufs_produced");
	if (rchan_offsets.buf_subbufs_produced < 0)
		goto ERR;
	rchan_offsets.buf_padding = MEMBER_OFFSET("rchan_buf", "padding");
	if (rchan_offsets.buf_padding < 0)
		goto ERR;
	return 0;
ERR:
	error(WARNING, "cannot get rchan offset\n");
	return -1;
}

static ulong get_rchan(ulong chan_addr) 
{
	ulong rchan;

	readmem(chan_addr, KVADDR, &rchan, sizeof(void*),
		"stp_channel", FAULT_ON_ERROR);
	readmem(rchan + rchan_offsets.subbuf_size,
		KVADDR, &chan.subbuf_size, sizeof(size_t),
		"stp_channel.subbuf_size", FAULT_ON_ERROR);
	readmem(rchan + rchan_offsets.n_subbufs,
		KVADDR, &chan.n_subbufs, sizeof(size_t),
		"stp_channel.n_subbufs", FAULT_ON_ERROR);

	return rchan;
}

static void get_rchan_buf(int cpu, ulong rchan) 
{
	ulong rchan_buf;
	struct per_cpu_data *pcd;

	pcd = &per_cpu[cpu];
	readmem(rchan + rchan_offsets.buf + sizeof(void*) * cpu,
		KVADDR, &rchan_buf, sizeof(void*),
		"stp_channel.buf", FAULT_ON_ERROR);
	readmem(rchan_buf + rchan_offsets.buf_start,
		KVADDR, &pcd->buf.start, sizeof(void*),
		"stp_channel.buf.start", FAULT_ON_ERROR);
	readmem(rchan_buf + rchan_offsets.buf_offset,
		KVADDR, &pcd->buf.offset, sizeof(size_t),
		"stp_channel.buf.offset", FAULT_ON_ERROR);
	readmem(rchan_buf + rchan_offsets.buf_subbufs_produced,
		KVADDR, &pcd->buf.subbufs_produced, sizeof(size_t),
		"stp_channel.buf.subbufs_produced", FAULT_ON_ERROR);
	readmem(rchan_buf + rchan_offsets.buf_padding,
		KVADDR, &pcd->buf.padding, sizeof(size_t*),
		"stp_channel.buf.padding", FAULT_ON_ERROR);
}

static ulong get_symbol_addr(char *module, char *symbol)
{
	int i;
	struct syment *sym, *sym_end;
	struct load_module *lm;

        for (i = 0; i < kt->mods_installed; i++) {
                lm = &st->load_modules[i];
                if (!STREQ(module, lm->mod_name))
                        continue;
                sym = lm->mod_symtable;
                sym_end = lm->mod_symend;		
                for ( ; sym <= sym_end; sym++) {
			if(STREQ(sym->name, symbol))
				return ((ulong)sym->value);
                }
		error(WARNING, "'%s' doesn't have the symbol named '%s'.\n", 
		      module, symbol);
		return STPLOG_NO_SYM;
        }
	error(WARNING, "'%s' is not loaded.\n", module);
	return STPLOG_NO_MOD;
}

static ulong get_rchan_addr(ulong stp_utt_addr)
{
	ulong stp_utt;

	readmem(stp_utt_addr, KVADDR, &stp_utt, sizeof(void*),
		"stp_utt", FAULT_ON_ERROR);
	return (stp_utt + sizeof(int));
}

static int check_global_buffer(ulong rchan)
{
	int cpu;
	ulong rchan_buf[2];
	
	for (cpu = 0; cpu < 2; cpu++) {
		readmem(rchan + rchan_offsets.buf + sizeof(void*) * cpu,
			KVADDR, &rchan_buf[cpu], sizeof(void*),
			"stp_channel.buf", FAULT_ON_ERROR);
	}
	if (rchan_buf[0] == rchan_buf[1])
		return 1;
	return 0;
}

static int setup_global_data(char *module) 
{
	int i;
	ulong stp_utt_addr = 0;
	ulong stp_rchan_addr = 0;
	ulong rchan;

	stp_utt_addr = get_symbol_addr(module, "_stp_utt");
	if (stp_utt_addr == STPLOG_NO_MOD) {
		return -1;
	} else if (stp_utt_addr == STPLOG_NO_SYM) {
		stp_rchan_addr = get_symbol_addr(module, "_stp_chan");
		if (stp_rchan_addr == STPLOG_NO_SYM)
			return -1;
		old_format = 1;
	} else {
		stp_rchan_addr = get_rchan_addr(stp_utt_addr);
	}
	rchan = get_rchan(stp_rchan_addr);
	for (i = 0; i < kt->cpus; i++)
		get_rchan_buf(i, rchan);

	if (kt->cpus > 1) {
		is_global = check_global_buffer(rchan);
	}
	
	return 0;
}

static int output_cpu_logs(char *module)
{
	int i, max = 256;
	struct per_cpu_data *pcd;
	size_t n, idx, start, end, ready, len;
	unsigned padding;
	char fname[max + 1], *source;
	DIR *dir;

	/* check and create log directory */
	dir = opendir(module);
	if (dir) {
		closedir(dir);
	} else {
		if (mkdir(module, S_IRWXU) < 0) {
			error(WARNING, "cannot create log directory '%s\n'", module);
			return -1;
		}
	}

	/* allocate subbuf memory */
	subbuf = malloc(chan.subbuf_size);
	if (!subbuf) {
		error(WARNING, "cannot allocate memory\n");
		return -1;
	}

	fname[max] = '\0';
	for (i = 0; i < kt->cpus; i++) {
		pcd = &per_cpu[i];
		ready = pcd->buf.subbufs_produced + (pcd->buf.offset ? 1 : 0);
		if (ready > chan.n_subbufs) {
			start = ready % chan.n_subbufs;
			end = start + chan.n_subbufs;
		} else {
			start = 0;
			end = ready;
		}
		/* print information */
		fprintf(fp, "--- generating 'cpu%d' ---\n", i);
		fprintf(fp, "  subbufs ready on relayfs:%d\n", ready);
		fprintf(fp, "    n_subbufs:%d, read from:%d to:%d (offset:%d)\n\n",
		       chan.n_subbufs, start, end, pcd->buf.offset);

		/* create log file */
		snprintf(fname, max, "%s/cpu%d", module, i);
		outfp = fopen(fname, "w");
		if (!outfp) {
			error(WARNING, "cannot create log file '%s'\n", fname);
			return -1;
		}
		for (n = start; n < end; n++) {
			/* read relayfs subbufs and write to log file */
			idx = n % chan.n_subbufs;
			source = pcd->buf.start + idx * chan.subbuf_size;
			readmem((ulong)pcd->buf.padding + sizeof(padding) * idx,
				KVADDR, &padding, sizeof(padding),
				"padding", FAULT_ON_ERROR);
			if (n == end - 1 && pcd->buf.offset) {
				len = pcd->buf.offset;
			} else {
				len = chan.subbuf_size;
			}			
			if (old_format == 1) {
				source += sizeof(padding);
				len -= sizeof(padding) + padding;
			} else {
				len -= padding;
			}
			if (len) {
				readmem((ulong)source, KVADDR, subbuf, len,
					"subbuf", FAULT_ON_ERROR);
				if (fwrite(subbuf, len, 1, outfp) != 1) {
					error(WARNING, "cannot write log data\n");
					return -1;
				}
			}
		}
		fclose(outfp);
		outfp = NULL;
		if (is_global == 1)
			break;
	}
	return 0;
}

static void do_systemtaplog(char *module)
{
	if (setup_global_data(module) < 0)
		return;
	if (output_cpu_logs(module) < 0)
		return;
}

void cmd_systemtaplog(void)
{

	int c;
	char *module = NULL;

	while ((c = getopt(argcnt, args, "m:")) != EOF) {
		switch (c) {
		case 'm':
			module = optarg;
			break;
		default:
			argerrs++;
			break;
		}
	}

	if (!module || argerrs)
		cmd_usage(pc->curcmd, SYNOPSIS);

	saved_env[0] = pc->main_loop_env[0];
	if (setjmp(pc->main_loop_env))
		goto EXIT;

	do_systemtaplog(module);
EXIT:
	cleanup();
	pc->main_loop_env[0] = saved_env[0];
}

char *help_systemtaplog[] = {
	"systemtaplog",
	"Retrieve SystemTap log data",
	"-m module_name",
	"  Retrieve SystemTap's log data and write them to files.\n",
	"    -m module_name  All valid SystemTap log data made by the trace",
	"                    module which name is 'module_name' are written",
	"                    into log files in `module_name` directory. The",
	"                    name of each log file is cpu0, cpu1...cpuN. ",
	"                    They have same format data as channel buffer",
	"                    except padding(This command removes padding). ",
	NULL,
};

void __attribute__ ((constructor)) systemtaplog_init(void) 
{
	if (get_rchan_offsets() < 0)
		return;

	register_extension(command_table);
	return;
}

void __attribute__ ((destructor)) systemtaplog_fini(void)
{
	return;
}

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]