watchdog ,hang_detect分析

12563 ワード

役割:モニタリング用system_serverのwatchdogスレッドの動作が異常かどうか.実装:mtkはkernelにwatchdogが正常かどうかを上位通信で監視するためのデバイス(/dev/RT_Monitor)を特別に登録した.大体:watchdog向/dev/RT_Monitorは値を設定しhang_detectはこの値に基づいて、時間を計算します.この時間内にwatchdog、hang_にもう一度通知する必要があります.detectは、タイムアウトしていないことを示します.
コードフロー:kernel-3.18/drivers/misc/mediatek/aee/aed/monitor_hang.c
//  /dev/RT_Monitor    。
static struct miscdevice aed_wdt_RT_Monitor_dev = { 
    .minor = MISC_DYNAMIC_MINOR,
    .name = "RT_Monitor",
    .fops = &aed_wdt_RT_Monitor_fops,
};
//  /dev/RT_Monitor          。
static const struct file_operations aed_wdt_RT_Monitor_fops = {  
    .owner = THIS_MODULE,
    .open = monitor_hang_open,
    .release = monitor_hang_release,
    .poll = monitor_hang_poll,
    .read = monitor_hang_read,
    .write = monitor_hang_write,
    //watchdog   ioctl  RT_Monitor   ,       monitor_hang_ioctl,         。
    .unlocked_ioctl = monitor_hang_ioctl,
#ifdef CONFIG_COMPAT
    .compat_ioctl = monitor_hang_ioctl, 
#endif
}
//    ,     hang_detect
static int __init monitor_hang_init(void)
{
    //        ,/dev/RT_Monitor
    err = misc_register(&aed_wdt_RT_Monitor_dev);
    //   hang_detect
    hang_detect_init();
    return err;
}
//       ,   hang_detect
int hang_detect_init(void)
{
    struct task_struct *hd_thread;
    unsigned char *name = "hang_detect";
    //  monitor hang   ,    watchdog
    //hang_detect_thread          
    //name        
    hd_thread = kthread_create(hang_detect_thread, NULL, name);
    //      hd_thread,
    wake_up_process(hd_thread);
    return 0;
}
//          ,   watchdog
static int hang_detect_thread(void *arg)                                                                            
{
    struct sched_param param = {.sched_priority = 99 };
    //      
    sched_setscheduler(current, SCHED_FIFO, &param);

    //        watchdog
    while (!kthread_should_stop()) {
        if ((1 == hd_detect_enabled) && (FindTaskByName("system_server") != -1)) {

            if (hang_detect_counter <= 0)
                ShowStatus();

            if (hang_detect_counter == 0) {
                //            ,
                //   user  ,   aee_kernel_warning_api   oops,   warning
                if (aee_mode != AEE_MODE_CUSTOMER_USER) {
                    aee_kernel_warning_api
                        (__FILE__, __LINE__,
                         DB_OPT_NE_JBT_TRACES | DB_OPT_DISPLAY_HANG_DUMP,
                         "
CRDISPATCH_KEY:SS Hang
"
, "we triger HWT "); msleep(30 * 1000); } else { // user , kernel bug, BUG(); } } // ,hang_detect_counter , 30s // ioctl RT_Monitor , hang_detect_counter hang_detect_counter--; } else { // system_server , , hang_detect_counter*30s if (1 == hd_detect_enabled) { hang_detect_counter = hd_timeout + 4; hd_detect_enabled = 0; } } // 30s msleep((HD_INTER) * 1000); } return 0; }
//   user  :
kernel-3.18/drivers/misc/mediatek/aee/common/aee-common.c
    void aee_kernel_warning_api(const char *file, const int line, const int db_opt, const char *module, const char *msg, ...)
{
    char msgbuf[KERNEL_REPORT_LENGTH];
    int offset = 0;
    va_list args;

    va_start(args, msg);
    offset += snprintf(msgbuf, KERNEL_REPORT_LENGTH, " ", file, line);
    offset += vsnprintf(msgbuf + offset, KERNEL_REPORT_LENGTH - offset, msg, args);
    if (g_aee_api && g_aee_api->kernel_reportAPI)
        //  g_aee_api        ,     kernel_reportAPI  
        g_aee_api->kernel_reportAPI(AE_DEFECT_WARNING, db_opt, module, msgbuf);
    else
        LOGE("AEE kernel warning: %s", msgbuf);
    va_end(args);
}
EXPORT_SYMBOL(aee_kernel_warning_api);

//g_aee_apiの初期化はdrivers/misc/mediatek/aee/aed/aed-mainである.c module_init(aed_init);
static int __init aed_init(void)    
{
    ......
    aee_register_api(&kernel_api);
    ......
}

このkernel_apiはこのクラスで定義されています.彼のkernel_ReportAPIメソッドもこのクラスのメソッドです
static struct aee_kernel_api kernel_api = {
    .kernel_reportAPI = kernel_reportAPI,
    .md_exception = external_exception,
    .md32_exception = external_exception,
    .scp_exception = external_exception,
    .combo_exception = external_exception
};

drivers/misc/mediatek/aee/common/aee-common.cはaee_register_apiメソッドでkernel_apiはg_に付与するaee_api
 void aee_register_api(struct aee_kernel_api *aee_api) 
 {
     if (!aee_api)
         BUG();
     g_aee_api = aee_api;
 }

上位レベルと下位レベルの通信は次のとおりです.
frameworks/base/services/core/java/com/android/server/Watchdog.java
@Override
public void run() {
    while (true) {
        if (exceptionHWT != null) {
            exceptionHWT.WDTMatterJava(300);
        }
    }
}
frameworks/base/core/java/com/mediatek/aee/ExceptionLog.java
public void WDTMatterJava(long lParam) {
    WDTMatter(lParam) ;
}
private static native void WDTMatter(long lParam) ;
frameworks/base/core/java/com/mediatek/aee/jni/com_mediatek_aee_exceptionlog.cpp
static void com_mediatek_exceptionlog_WDTMatter (JNIEnv* env, jobject clazz, jlong lParam){ 
    //AE_WDT_DEVICE_PATH      "/dev/RT_Monitor"
    int fd = open(AE_WDT_DEVICE_PATH, O_RDONLY);
    //  ioctl   300    RT_Monitor   
    if (ret = ioctl(fd, AEEIOCTL_RT_MON_Kick, (int)(lParam)))
    ......
    close (fd) ;
    return;
}

後でmonitor_に呼び出されますhangクラスのmonitor_hang_ioctlメソッド(このデバイスを登録するときに指定され、ioctlこのデバイスは、この関数を呼び出す)
 //   /dev/RT_Monitor   ioctl          。
 //cmd   AEEIOCTL_RT_MON_Kick
 // arg   300
 static long monitor_hang_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
     ......
     //      
     if (cmd == AEEIOCTL_RT_MON_Kick) {
         LOGE("AEEIOCTL_RT_MON_Kick ( %d)
"
, (int)arg); aee_kernel_RT_Monitor_api((int)arg); return ret; } ...... } // lParam(300), hang_detect_counter void aee_kernel_RT_Monitor_api(int lParam) { ...... hd_detect_enabled = 1; //HD_INTER 30 //300 10, // 10*30s ,watchdog ioctl RT_monitor, watchdog 。 hang_detect_counter = hd_timeout = ((long)lParam + HD_INTER - 1) / (HD_INTER); ...... } }

参照先:http://blog.csdn.net/ldinvicible/article/details/51042811