ppc64-diag
|
Main header for rtas_errd. More...
#include <signal.h>
#include <librtasevent.h>
#include <servicelog-1/servicelog.h>
#include "fru_prev6.h"
#include "config.h"
Go to the source code of this file.
Data Structures | |
struct | errdata |
struct | diag_vpd |
struct | event |
struct to track and handle RTAS events in rtas_errd. More... | |
Macros | |
#define | RTAS_ERRD_ARGS "dh" |
standard args for rtas_errd More... | |
#define | RTAS_ERROR_LOG_MAX 4096 |
#define | ADDL_TEXT_MAX 256 |
#define | MAX(x, y) ((x) > (y) ? (x) : (y)) |
#define | TRUE 1 |
#define | FALSE 0 |
#define | RE_SCANLOG_AVAIL 0x00000001 |
#define | RE_SERVICEABLE 0x00000002 |
#define | RE_PLATDUMP_AVAIL 0x00000004 |
#define | RE_PREDICTIVE 0x00000008 |
#define | RE_HMC_TAGGED 0x40000000 |
#define | RE_ALREADY_REPORTED 0x20000000 |
#define | RE_RECOVERED_ERROR 0x10000000 |
#define | dbg(_f, _a...) _dbg("%s(): "_f, __FUNCTION__, ##_a) |
Functions | |
void | log_msg (struct event *, char *,...) |
Log messages to rtas_errd_log. More... | |
void | cfg_log (char *,...) |
dummy interface for calls to diag_cfg More... | |
int | init_files (void) |
Initialize files used by rtas_errd. More... | |
void | close_files (void) |
Close all the files used by rtas_errd. More... | |
void | _dbg (const char *,...) |
Write debug messages to stdout. More... | |
int | print_rtas_event (struct event *) |
Print an RTAS event to the platform log. More... | |
int | platform_log_write (char *,...) |
Write messages to the platform log. More... | |
void | update_epow_status_file (int) |
Update the epow status file. More... | |
int | read_proc_error_log (char *, int) |
Read data from proc_error_log. More... | |
void | check_scanlog_dump (void) |
Check for new scanlog dumps. More... | |
void | check_platform_dump (struct event *) |
Check RTAS event for a platform dump. More... | |
void | check_eeh (struct event *) |
Check a RTAS event for EEH event notification. More... | |
void | handle_resource_dealloc (struct event *) |
Parse RTAS event for CPU guard information. More... | |
int | handle_rtas_event (struct event *) |
Main routine for processing RTAS events. More... | |
void | update_rtas_msgs (void) |
Update RTAS messages in the platform log. More... | |
int | process_pre_v6 (struct event *) |
Handle older (pre-v6) style events. More... | |
int | get_error_fmt (struct event *) |
Extract the error log format indicator from the chrp error log. More... | |
int | process_v6 (struct event *) |
char * | get_dt_status (char *) |
char * | diag_get_fru_pn (struct event *, char *) |
void | free_diag_vpd (struct event *) |
int | menugoal (struct event *, char *) |
void | epow_timer_handler (int, siginfo_t, void *) |
Routine to handle SIGALRM timer interrupts. More... | |
int | check_epow (struct event *) |
Check an RTAS event for EPOW data. More... | |
time_t | get_event_date (struct event *event) |
Retrieve the timestamp from an event. More... | |
int | servicelog_sev (int rtas_sev) |
convert RTAS severity to servicelog severity More... | |
void | add_callout (struct event *event, char pri, int type, char *proc, char *loc, char *pn, char *sn, char *ccin) |
Add a new FRU callout to the list for this event. More... | |
void | log_event (struct event *) |
log the event in the servicelog DB More... | |
void | sighup_handler (int, siginfo_t, void *) |
signal handler for SIGHUP More... | |
void | restore_sigchld_default (void) |
restore child signal handler More... | |
void | setup_sigchld_handler (void) |
setup child signal handler More... | |
void | handle_prrn_event (struct event *) |
void | handle_hotplug_event (struct event *) |
Variables | |
char * | platform_log |
char * | messages_log |
char * | proc_error_log1 |
File to read RTAS events from. More... | |
char * | proc_error_log2 |
Alternate file to read RTAS events from. More... | |
char * | rtas_errd_log |
Message log for the rtas_errd daemon. More... | |
char * | rtas_errd_log0 |
Saved ("rolled over") messages log for rtas_errd daemon. More... | |
char * | test_file |
int | platform_log_fd |
int | debug |
Debug level to run at for rtas_errd daemon. More... | |
char * | scanlog |
buffer to hold scanlog dump path More... | |
struct servicelog * | slog |
servicelog struct for libservicelog use More... | |
char * | epow_status_file |
Copyright (C) 2004 IBM Corporation.
This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#define ADDL_TEXT_MAX 256 |
Referenced by check_eeh(), check_platform_dump(), guard_cpu(), guard_memlmb(), guard_spcpu(), log_epow(), and log_event().
#define dbg | ( | _f, | |
_a... | |||
) | _dbg("%s(): "_f, __FUNCTION__, ##_a) |
Referenced by _log_msg(), add_more_descrs(), add_std_phandles(), check_platform_dump(), devtree_update(), do_node_update(), do_update(), get_diag_vpd(), get_dt_status(), get_refcode(), get_register_data(), handle_hotplug_event(), handle_prrn_event(), handle_rtas_event(), is_integrated(), lsvpd_init(), lsvpd_read(), main(), open_prrn_log(), print_rtas_event(), process_pre_v6(), process_refcodes(), process_v2_sp(), process_v6(), read_rtas_events(), report_menugoal(), report_srn(), run_drmgr(), sensor_epow(), set_srn_and_callouts(), update_nodes(), update_properties(), and update_rtas_msgs().
#define FALSE 0 |
Referenced by guard_memlmb(), and process_pre_v6().
#define MAX | ( | x, | |
y | |||
) | ((x) > (y) ? (x) : (y)) |
Referenced by find_event().
#define RE_ALREADY_REPORTED 0x20000000 |
Referenced by handle_rtas_event().
#define RE_HMC_TAGGED 0x40000000 |
#define RE_PLATDUMP_AVAIL 0x00000004 |
Referenced by check_platform_dump(), and log_event().
#define RE_PREDICTIVE 0x00000008 |
Referenced by handle_rtas_event().
#define RE_RECOVERED_ERROR 0x10000000 |
Referenced by handle_rtas_event().
#define RE_SCANLOG_AVAIL 0x00000001 |
Referenced by print_rtas_event(), and read_rtas_events().
#define RE_SERVICEABLE 0x00000002 |
#define RTAS_ERRD_ARGS "dh" |
Referenced by main().
#define RTAS_ERROR_LOG_MAX 4096 |
Referenced by _dbg(), _log_msg(), extract_platform_dump(), read_rtas_events(), reformat_msg(), and update_rtas_msgs().
#define TRUE 1 |
Referenced by process_pre_v6().
void _dbg | ( | const char * | fmt, |
... | |||
) |
dbg Provide utility to print debug statements if the debug flag is specified.
fmt | format string a la printf() |
... | args a la printf() |
References debug, reformat_msg(), and RTAS_ERROR_LOG_MAX.
Referenced by _log_msg().
void add_callout | ( | struct event * | event, |
char | pri, | ||
int | type, | ||
char * | proc, | ||
char * | loc, | ||
char * | pn, | ||
char * | sn, | ||
char * | ccin | ||
) |
add_callout
event | event to which to add the callout |
pri | priority |
type | type |
proc | procedure ID |
loc | location code |
pn | FRU part number |
sn | FRU serial number |
ccin | FRU ccin |
References log_msg().
Referenced by add_more_descrs(), report_src(), and set_srn_and_callouts().
void cfg_log | ( | char * | fmt, |
... | |||
) |
cfg_log
fmt | formatted string a la printf() |
... | additional args a la printf() |
References _log_msg().
Referenced by main(), read_rtas_events(), and sighup_handler().
void check_eeh | ( | struct event * | event | ) |
check_eeh Parse a RTAS event to see if this is an EEH event notification. If so, then update the platform log file with additional information about the EEH event.
event | pointer to the RTAS event |
References event::addl_text, ADDL_TEXT_MAX, event_desc::desc, log_msg(), platform_log_write(), event::rtas_event, and event_desc::src_code.
Referenced by handle_rtas_event().
int check_epow | ( | struct event * | event | ) |
check_epow Parses error information to determine if it represents an EPOW event. If it is, the epow_status_file is updated with the appropriate condition, and EPOW_PROGRAM is invoked to take the appropriate system action (shutdown, etc).
event | pointer to the RTAS event |
References EPOW_PROGRAM, EPOW_PROGRAM_NOPATH, log_msg(), parse_epow(), and update_epow_status_file().
Referenced by handle_rtas_event().
void check_platform_dump | ( | struct event * | event | ) |
check_platform_dump Parses error information to determine if it indicates the availability of a platform dump. The platform dump is copied to the filesystem, and the error log is updated to indicate the path to the dump.
This should be invoked before the error information is written to LOG_FILE, because the error may need to be updated with the path to the dump.
event | pointer to struct event |
References event::addl_text, ADDL_TEXT_MAX, d_cfg, dbg, DUMP_MAX_FNAME_LEN, EXTRACT_PLATDUMP_CMD, log_msg(), ppc64_diag_config::platform_dump_path, platform_log_write(), RE_PLATDUMP_AVAIL, restore_sigchld_default(), event::rtas_event, and setup_sigchld_handler().
Referenced by handle_rtas_event().
void check_scanlog_dump | ( | void | ) |
check_scanlog_dump This routine checks to see if a new scanlog dump is available, and if so, copies it to the filesystem. The return value is the filename of the new scanlog dump, or NULL if one is not copied. This routine will malloc space for the returned string; it is up to the caller to free it.
This routine should be invoked once when the daemon is started.
References d_cfg, DUMP_BUF_SZ, get_machine_serial(), load_scanlog_module(), log_msg(), scanlog, SCANLOG_DUMP_EXISTS, SCANLOG_DUMP_FILE, and ppc64_diag_config::scanlog_dump_path.
Referenced by main().
void close_files | ( | void | ) |
close_files Perform any file cleanup (i.e. close()) and possibly free()'ing buffers needed by rtas_errd before exiting.
References epow_status_fd, platform_log_fd, proc_error_log_fd, and rtas_errd_log_fd.
Referenced by main().
char* diag_get_fru_pn | ( | struct event * | event, |
char * | phyloc | ||
) |
diag_fru_pn_by_ploc
Returns the FRU part number from VPD, as defined by the "FN" vpd keyword, for the FRU given by the physical location code
RETURNS: 0 if not found information. 1 if found information. -1 if error found during search
References event::diag_vpd, diag_vpd::fn, get_base_loc(), get_diag_vpd(), and is_integrated().
Referenced by add_more_descrs(), and set_srn_and_callouts().
void epow_timer_handler | ( | int | sig, |
siginfo_t | siginfo, | ||
void * | context | ||
) |
epow_timer_handler
sig | unused |
siginfo | unused |
context | unused |
References SENSOR_TOKEN_EPOW_SENSOR, time_remaining, and update_epow_status_file().
Referenced by main().
void free_diag_vpd | ( | struct event * | ) |
References event::diag_vpd, diag_vpd::ds, diag_vpd::fn, diag_vpd::se, diag_vpd::sn, diag_vpd::tm, and diag_vpd::yl.
Referenced by get_diag_vpd(), lsvpd_read(), and read_rtas_events().
char* get_dt_status | ( | char * | ) |
References dbg, log_msg(), and target_status.
Referenced by process_pre_v6().
int get_error_fmt | ( | struct event * | event | ) |
get_error_fmt
References event::event_buf, and I_FORMAT.
Referenced by process_pre_v6().
time_t get_event_date | ( | struct event * | event | ) |
get_event_date
event | the event from which to retrieve the timestamp |
References bcd_2b_toint, bcd_4b_toint, log_msg(), event::rtas_event, and event::rtas_hdr.
Referenced by process_pre_v6(), and process_v6().
void handle_hotplug_event | ( | struct event * | ) |
References dbg, DRMGR_PROGRAM, DRMGR_PROGRAM_NOPATH, log_msg(), event::rtas_event, and event::rtas_hdr.
Referenced by handle_rtas_event().
void handle_prrn_event | ( | struct event * | ) |
References close_prrn_log(), dbg, devtree_update(), open_prrn_log(), prrn_filename, prrn_log_fd, and event::rtas_hdr.
Referenced by handle_rtas_event().
void handle_resource_dealloc | ( | struct event * | event | ) |
handle_resource_dealloc Parses error information to determine if it represents a predictive CPU failure, which should cause a CPU Guard operation. DRMGR_PROGRAM is forked to actually remove the CPU from the system.
event | rtas event |
References guard_cpu(), guard_memlmb(), guard_mempage(), guard_spcpu(), log_msg(), event::rtas_event, and RTAS_V6_TYPE_RESOURCE_DEALLOC.
Referenced by handle_rtas_event().
int handle_rtas_event | ( | struct event * | event | ) |
handle_rtas_event
event | RTAS event structure to be handled |
References check_eeh(), check_epow(), check_platform_dump(), dbg, event::flags, handle_hotplug_event(), handle_prrn_event(), handle_resource_dealloc(), log_event(), log_msg(), platform_log, platform_log_write(), print_rtas_event(), process_pre_v6(), process_v6(), RE_ALREADY_REPORTED, RE_PREDICTIVE, RE_RECOVERED_ERROR, event::rtas_event, event::rtas_hdr, and event::seq_num.
Referenced by read_rtas_events(), and update_rtas_msgs().
int init_files | ( | void | ) |
init_files Open the following files needed by the rtas_errd daemon: rtas_errd_log proc_error_log platform_log epow_status
Note: This should only be called once before any rtas_events are read.
References debug, log_msg(), platform_log, platform_log_fd, proc_error_log1, proc_error_log2, proc_error_log_fd, rtas_errd_log, rtas_errd_log_fd, and update_epow_status_file().
Referenced by main().
void log_event | ( | struct event * | event | ) |
log_event
event | RTAS event structure |
References event::addl_text, ADDL_TEXT_MAX, d_cfg, event::flags, log_msg(), ppc64_diag_config::platform_dump_path, RE_PLATDUMP_AVAIL, event::rtas_event, event::sl_entry, and slog.
Referenced by handle_rtas_event().
void log_msg | ( | struct event * | event, |
char * | fmt, | ||
... | |||
) |
log_msg
event | reference to event |
fmt | formatted string a la printf() |
... | additional args a la printf() |
References _log_msg().
Referenced by _log_msg(), add_callout(), can_delete_lmb(), check_eeh(), check_epow(), check_platform_dump(), check_scanlog_dump(), diag_cfg(), get_dt_status(), get_event_date(), get_machine_serial(), guard_cpu(), guard_memlmb(), guard_mempage(), guard_spcpu(), handle_hotplug_event(), handle_resource_dealloc(), handle_rtas_event(), init_d_cfg(), init_files(), load_scanlog_module(), log_epow(), log_event(), main(), parse_epow(), parse_lparcfg(), print_rtas_event(), process_pre_v6(), process_v6(), read_proc_error_log(), read_rtas_events(), report_menugoal(), report_src(), report_srn(), restore_sigchld_default(), retrieve_drc_name(), run_drmgr(), set_srn_and_callouts(), setup_sigchld_handler(), update_epow_status_file(), and update_rtas_msgs().
int menugoal | ( | struct event * | , |
char * | |||
) |
int platform_log_write | ( | char * | fmt, |
... | |||
) |
platform_log_write Provide a printf() style interface to write messages to platform_log. All messages are prepended with "ppc64-diag" to match the expected format of the platform log.
fmt | format string a la printf() |
... | additional args a la printf() |
References platform_log_fd.
Referenced by check_eeh(), check_platform_dump(), guard_cpu(), guard_memlmb(), guard_spcpu(), and handle_rtas_event().
int print_rtas_event | ( | struct event * | event | ) |
print_rtas_event Prints the binary hexdump of an RTAS event to the PLATFORM_LOG file.
event | pointer to the struct event to print |
References dbg, event::event_buf, event::flags, log_msg(), platform_log, platform_log_fd, RE_SCANLOG_AVAIL, scanlog, and event::seq_num.
Referenced by handle_rtas_event().
int process_pre_v6 | ( | struct event * | event | ) |
process_pre_v6
event | the event to be parsed |
References add_cpu_id(), analyze_io_bus_error(), bypass_errdscr, cpu610, cpu611, cpu612, cpu613, cpu614, cpu615, cpu619, cpu710, cpu711, cpu712, cpu713, cpu714, cpu715, CPUALLZERO, CPUB12b0, CPUB12b1, CPUB12b2, CPUB12b3, CPUB12b4, CPUB12b5, CPUB12b6, CPUB12b7, dbg, event::event_buf, FALSE, FIRST_LOC, fru_callout_pre_v6::floc, event_description_pre_v6::frus, get_cpu_frus(), get_dt_status(), get_error_fmt(), get_error_type(), get_event_date(), get_loc_code(), get_register_data(), I_BYTE24, I_CPU, I_IBM, I_POSTCODE, io630, io631, io632, io633, io634, io639, io730, io731, io732, io733, io734, io735, io736, io770, io771, io772, io773, io832, io_error_type, IOALLZERO, IOB12b0, IOB12b1, IOB12b2, IOB12b3, IOB12b3B13b2, IOB12b4, IOB12b5, IOB12b5B13b1, IOB12b5B13b2, IOB12b6, IOB12b6B13b1, IOB12b6B13b2, IOB12b6B13b3, IOB12b7, IOB12b7B13b1, IOB12b7B13b2, IOB13b4, IOB13b5, IOB13b6, IOB13b7, device_ela::led, event::length, event::loc_codes, LOCSIZE, log_msg(), mem624, mem625, mem626, mem627, mem628, mem629, mem722, mem723, mem724, mem725, MEMALLZERO, MEMB12b0, MEMB12b1, MEMB12b2, MEMB12b3, MEMB12b4, MEMB12b4B13b3, MEMB12b5, MEMB12b6, MEMB12b7, MEMB13b0, MEMB13b1, MEMB13b2, MEMB13b3, MEMB13b4, memtest600, MSGMENUG174, NEXT_LOC, pct_index, post, POSTALLZERO, POSTB12b0, POSTB12b1, POSTB12b2, POSTB12b3, POSTB12b4, POSTB12b5, POSTB12b6, POSTB12b7, POSTB13b0, POSTB13b1, POSTB13b2, POSTB13b3, POSTB13b4, POSTB13b5, POSTB13b7, PREDICT_REPAIR_PENDING, PREDICT_UNRECOV, process_epow(), process_refcodes(), process_v2_sp(), process_v3_logs(), RC_PLANAR, RC_PLANAR_2CPU, RC_PLANAR_CPU, event_description_pre_v6::rcode, report_menugoal(), report_srn(), event_description_pre_v6::rmsg, event::rtas_event, event::rtas_hdr, servicelog_sev(), set_fru_percentages(), set_srn_and_callouts(), event::sl_entry, event_description_pre_v6::sn, SN_V3ELA, sp640, sp641, sp642, sp643, sp644, sp740, sp741, sp742, sp743, sp744, sp745, sp746, sp747, sp748, sp749, sp750, sp751, sp752, sp753, sp760, SPB16b0, SPB16b1, SPB16b2, SPB16b3, SPB16b4, SPB16b5, SPB16b6, SPB16b7, SPB17b0, SPB17b1, SPB17b2, SPB17b3, SPB17b4, SPB17b5, SPB18b0, SPB18b1, SPB18b2, SPB18b3, SPB18b4, SPB18b7, SRC_REG_ID_02, TRUE, and v3_errdscr.
Referenced by handle_rtas_event().
int process_v6 | ( | struct event * | ) |
References dbg, event::event_buf, get_event_date(), event::length, log_msg(), report_menugoal(), report_src(), event::rtas_event, event::rtas_hdr, servicelog_sev(), and event::sl_entry.
Referenced by handle_rtas_event().
int read_proc_error_log | ( | char * | buf, |
int | buflen | ||
) |
read_proc_error_log Read the data in from the /proc error log file. This routine also handles the debug case of reading in a test file that contains an ascii representation of a RTAS event.
buf | buffer to read RTAS event in to. |
buflen | length of buffer parameter |
References log_msg(), proc_error_log1, proc_error_log2, and proc_error_log_fd.
Referenced by read_rtas_events().
void restore_sigchld_default | ( | void | ) |
restore_sigchld_default Restores child signal handler to default action.
References log_msg().
Referenced by check_platform_dump(), get_diag_vpd(), and retrieve_drc_name().
int servicelog_sev | ( | int | rtas_sev | ) |
servicelog_sev
rtas_sev | RTAS severity to be converted |
Referenced by process_pre_v6(), and process_v6().
void setup_sigchld_handler | ( | void | ) |
setup_sigchld_handler Setup custom child signal handler to cleanup child processes when it exited.
References log_msg(), and sigchld_handler().
Referenced by check_platform_dump(), get_diag_vpd(), main(), and retrieve_drc_name().
void sighup_handler | ( | int | sig, |
siginfo_t | siginfo, | ||
void * | context | ||
) |
sighup_handler The SIGHUP signal will cause the rtas_errd daemon to re-read the configuration file. If it is currently safe to re-configure ourselves we do, otherwise we set a flag to indicate that a re-configuration needs to occur at the next "safe" place
References cfg_log(), d_cfg, diag_cfg(), ppc64_diag_config::flags, RE_CFG_RECEIVED_SIGHUP, and RE_CFG_RECFG_SAFE.
Referenced by main().
void update_epow_status_file | ( | int | status | ) |
update_epow_status_file Used to write the current EPOW status (as defined in the parse_epow() function (epow.c) comment) to the epow_status file.
status | value to update epow_status file to. |
References epow_status_fd, epow_status_file, and log_msg().
Referenced by check_epow(), epow_timer_handler(), and init_files().
void update_rtas_msgs | ( | void | ) |
update_rtas_msgs Update the file /var/log/platform with any RTAS events found in syslog that have not been handled by rtas_errd.
References dbg, event::event_buf, find_rtas_end(), find_rtas_start(), get_rtas_no(), handle_rtas_event(), log_msg(), messages_log, msgs_log_fd, platform_log, platform_log_fd, RTAS_ERROR_LOG_MAX, event::rtas_event, and RTAS_START.
Referenced by main().
int debug |
Referenced by _dbg(), init_files(), and main().
char* epow_status_file |
Referenced by main(), print_usage(), and update_epow_status_file().
messages_log |
Used by update_rtas_msgs() to bring the platform log up to date with current RTAS events.
Referenced by main(), and update_rtas_msgs().
char* platform_log |
Referenced by handle_rtas_event(), init_files(), main(), print_rtas_event(), print_usage(), and update_rtas_msgs().
int platform_log_fd |
Referenced by close_files(), init_files(), platform_log_write(), print_rtas_event(), and update_rtas_msgs().
char* proc_error_log1 |
Referenced by init_files(), main(), and read_proc_error_log().
char* proc_error_log2 |
Referenced by init_files(), main(), and read_proc_error_log().
char* rtas_errd_log |
Referenced by _log_msg(), init_files(), main(), and print_usage().
char* rtas_errd_log0 |
Referenced by _log_msg().
char* scanlog |
This is a buffer that is allocated and filled when rtas_errd is initially exec()'ed via check_scanlog_dump(). The buffer will contain the path to a scanlog dump and is reported with the first RTAS event we receive from the kernel.
Referenced by check_scanlog_dump(), print_rtas_event(), and read_rtas_events().
struct servicelog* slog |
Referenced by log_event(), and main().
char* test_file |