Supplement

A ROP (retinopathy of prematurity) Screening Classifier for Retinal Fundus Photograph based on Transfer Learning

DNN Model Test Results

Human Experts Performance Data

Go to Download Page * Including all related supplemental files

Source Codes and Scripts for DNN Batch Inference

binaryproto2npy.py . . . . . . 395 B . . . . . . 12/18/2017 3:37:32 PM

# convert mean file from binaryproto to npy

import caffe
import numpy as np
import sys

if len(sys.argv) != 3:
   print "Usage: python convert_protomean.py proto.mean out.npy"
   sys.exit()

blob = caffe.proto.caffe_pb2.BlobProto()
data = open( sys.argv[1] , 'rb' ).read()
blob.ParseFromString(data)
arr = np.array( caffe.io.blobproto_to_array(blob) )
out = arr[0]
np.save( sys.argv[2] , out )

digits_classify_folder.py . . . . . . 5 KB . . . . . . 12/18/2017 3:37:32 PM

# Using digits to classify images under a specific folder

# Copyright (c) 2017, zhangys@zjgsu.edu.cn

# requires NVidia Digits to be installed

SOURCE_FOLDER = "/home/zys/seagate/Extra_20170723/"
SOURCE_FILE_LIST = "/home/zys/seagate/Extra_20170723.txt"
REF_FOLDER = '' #"/home/zys/seagate/ROP_Classified_20170603_Combined/images"+"/"  # the files with the same name in this folder will not be processed
TARGET_FOLDER = '/home/zys/seagate/Extra_Selected_20170723/' #"/home/zys/seagate/ROP_201707/A"+"/"


JOBS_DIR = "/home/zys/DIGITS-master/digits/jobs"  # "/var/lib/digits/jobs/"


import os
import sys
import shutil

caffe_root = os.environ['CAFFE_ROOT']
sys.path.insert(0,caffe_root+'python')

# Add path for DIGITS package
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
import digits.config
from digits.inference.errors import InferenceError
from digits.job import Job
from digits import utils

# To solve error "Check failed: error == cudaSuccess (10 vs. 0)  invalid device ordinal"
import caffe
caffe.set_device(0)

"""
Perform inference on a list of images using the specified model
"""
def nv_digits_infer(input_list,
          output_dir,
          jobs_dir,
          model_id,
          epoch,
          batch_size,
          layers,
          gpu):
    """
    Perform inference on a list of images using the specified model
    """
    # job directory defaults to that defined in DIGITS config
    if jobs_dir == 'none':
        jobs_dir = digits.config.config_value('jobs_dir')

    # load model job
    model_dir = os.path.join(jobs_dir, model_id)
    assert os.path.isdir(model_dir), "Model dir %s does not exist" % model_dir
    model = Job.load(model_dir)

    # load dataset job
    dataset_dir = os.path.join(jobs_dir, model.dataset_id)
    assert os.path.isdir(dataset_dir), "Dataset dir %s does not exist" % dataset_dir
    dataset = Job.load(dataset_dir)
    for task in model.tasks:
        task.dataset = dataset

    # retrieve snapshot file
    task = model.train_task()
    snapshot_filename = None
    epoch = float(epoch)
    if epoch == -1 and len(task.snapshots):
        # use last epoch
        epoch = task.snapshots[-1][1]
        snapshot_filename = task.snapshots[-1][0]
    else:
        for f, e in task.snapshots:
            if e == epoch:
                snapshot_filename = f
                break
    if not snapshot_filename:
        raise InferenceError("Unable to find snapshot for epoch=%s" % repr(epoch))

    # retrieve image dimensions and resize mode
    image_dims = dataset.get_feature_dims()
    height = image_dims[0]
    width = image_dims[1]
    channels = image_dims[2]
    resize_mode = dataset.resize_mode if hasattr(dataset, 'resize_mode') else 'squash'

    n_input_samples = 0  # number of samples we were able to load
    input_ids = []       # indices of samples within file list
    input_data = []      # sample data

    
    # load paths from file
    paths = None
    with open(input_list) as infile:
        paths = infile.readlines()
    # load and resize images
    for idx, path in enumerate(paths):
        path = path.strip()
        try:
            image = utils.image.load_image(path.strip())            
            image = utils.image.resize_image(
                image,
                height,
                width,
                channels=channels,
                resize_mode=resize_mode)
           
            input_ids.append(idx)
            input_data.append(image)
            n_input_samples = n_input_samples + 1
        except utils.errors.LoadImageError as e:
            print e

    # perform inference

    if layers != 'none':
        raise InferenceError("Layer visualization is not supported for multiple inference")
    outputs = model.train_task().infer_many(
        input_data,
        snapshot_epoch=epoch,
        gpu=gpu,
        resize=True)
    
    return outputs["softmax"]

def nv_digits_classify(filelist, model):
    
    DIGITS_JOB_ID = DICT[model]
    softmax = nv_digits_infer(filelist, "/home/zys/data/C3R/test/tmp/",
    		  JOBS_DIR,
    		  DIGITS_JOB_ID,
    		  -1,
    		  1,
    		  'none',
    		  0)
        
    cls = []
    for idx, p in enumerate(softmax):   
        cls.append(p.argmax())
        
    i = 0
    with open(filelist) as f:
        for line in f:
            line = line.strip()            
            c = str(cls[i])
            i+=1
            directory = TARGET_FOLDER+"/" + c;
            if not os.path.exists(directory):
                os.makedirs(directory)
            shutil.copyfile(line, directory + "/" + os.path.basename(line))


def split_filelist(filelist, MAX_LINE_PER_FILE):
     i=0     
     fidx=0
     files=[]
     currentfile = None
     with open(filelist) as f:
        for line in f:
            if(i%MAX_LINE_PER_FILE == 0):
                if (currentfile is not None):
                    currentfile.close()
                currentfile = open(filelist+str(fidx), 'w')
                files.append(filelist+str(fidx))
                fidx+=1
            line = line.strip()            
            currentfile.write("%s\n" % line)
            i+=1            
     
     return files

# nv_digits_classify(SOURCE_FILE_LIST, "VGG-16")
files = split_filelist(SOURCE_FILE_LIST, 500)
i=0
import time
caffe.set_mode_gpu()

# call in the shell: Every 3 rounds, will throw error. So use for loop to handle it.
# for((i=30;i<=132;i=i+3));do sudo python ...py $i;done
print ("------",sys.argv[1],"------")
for f in files:        
    if(i >= int( sys.argv[1])):
        nv_digits_classify(f, "VGG-16")
    print(i, "*************", f, " Finished *************")
    i+=1;
    #if(i%5==0):       # In GPU mode, CUDA handle is not released immediately
    #    caffe.set_mode_gpu()
    #else:
    #    caffe.set_mode_cpu()
    #i+=1    
    time.sleep(0)

digits_predict_folder.py . . . . . . 12 KB . . . . . . 12/18/2017 3:37:32 PM

# Using digits to classify images in the test data set

# Copyright (c) 2017, zhangys@zjgsu.edu.cn

# requires NVidia Digits to be installed

DICT = {}
DICT["VGG-16_transfer_learning"] = "20170719-171742-5c31"
DICT["VGG-16_from_scratch"] = '20170722-143332-f077'
DICT["GoogLeNet_from_scratch"] = '20170722-113606-c59c'
DICT["GoogLeNet_transfer_learning"] = '20170722-190454-1af1' # '20170719-185942-bf65'
DICT["AlexNet_transfer_learning"] = '20170803-121318-b153'#'20170802-164043-96f9'# '20170722-205930-3ac4' # '20170720-094939-b665'
DICT["AlexNet_from_scratch"]= '20170802-163356-7f3d'#'20170802-162531-3531' #'20170722-152531-da8d'

def GetDictKeys():
     return DICT.keys()

FILE_LIST="/home/zys/Desktop/test2/data/FileList.txt"
LABEL_LIST="/home/zys/Desktop/test2/data/LabelList.txt"

JOBS_DIR = "/home/zys/DIGITS-master/digits/jobs"  # "/var/lib/digits/jobs/"


import os
import sys

caffe_root = os.environ['CAFFE_ROOT'] # '/home/zys/nv-caffe-0.15' #  # When use sudo, the environ var may be different than the current terminal 
sys.path.insert(0,caffe_root+'python')

import numpy
import pandas
from sklearn.metrics import roc_curve, auc, precision_recall_curve
from pandas_ml import ConfusionMatrix
import matplotlib.pyplot as plt
import shutil
import inspect

# Add path for DIGITS package
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
import digits.config
from digits.inference.errors import InferenceError
from digits.job import Job
from digits import utils

# To solve error "Check failed: error == cudaSuccess (10 vs. 0)  invalid device ordinal"
import caffe
print (caffe.__file__)
caffe.set_device(0)

"""
Perform inference on a list of images using the specified model
"""
def nv_digits_infer(input_list,
          output_dir,
          jobs_dir,
          model_id,
          epoch,
          batch_size,
          layers,
          gpu):
    """
    Perform inference on a list of images using the specified model
    """
    # job directory defaults to that defined in DIGITS config
    if jobs_dir == 'none':
        jobs_dir = digits.config.config_value('jobs_dir')

    # load model job
    model_dir = os.path.join(jobs_dir, model_id)
    assert os.path.isdir(model_dir), "Model dir %s does not exist" % model_dir
    model = Job.load(model_dir)

    # load dataset job
    dataset_dir = os.path.join(jobs_dir, model.dataset_id)
    assert os.path.isdir(dataset_dir), "Dataset dir %s does not exist" % dataset_dir
    dataset = Job.load(dataset_dir)
    for task in model.tasks:
        task.dataset = dataset

    # retrieve snapshot file
    task = model.train_task()
    snapshot_filename = None
    epoch = float(epoch)
    if epoch == -1 and len(task.snapshots):
        # use last epoch
        epoch = task.snapshots[-1][1]
        snapshot_filename = task.snapshots[-1][0]
    else:
        for f, e in task.snapshots:
            if e == epoch:
                snapshot_filename = f
                break
    if not snapshot_filename:
        raise InferenceError("Unable to find snapshot for epoch=%s" % repr(epoch))

    # retrieve image dimensions and resize mode
    image_dims = dataset.get_feature_dims()
    height = image_dims[0]
    width = image_dims[1]
    channels = image_dims[2]
    resize_mode = dataset.resize_mode if hasattr(dataset, 'resize_mode') else 'squash'

    n_input_samples = 0  # number of samples we were able to load
    input_ids = []       # indices of samples within file list
    input_data = []      # sample data

    
    # load paths from file
    paths = None
    with open(input_list) as infile:
        paths = infile.readlines()
    # load and resize images
    for idx, path in enumerate(paths):
        path = path.strip()
        try:
            image = utils.image.load_image(path.strip())            
            image = utils.image.resize_image(
                image,
                height,
                width,
                channels=channels,
                resize_mode=resize_mode)
           
            input_ids.append(idx)
            input_data.append(image)
            n_input_samples = n_input_samples + 1
        except utils.errors.LoadImageError as e:
            print e

    # perform inference

    if layers != 'none':
        raise InferenceError("Layer visualization is not supported for multiple inference")
    outputs = model.train_task().infer_many(
        input_data,
        snapshot_epoch=epoch,
        gpu=gpu,
        resize=True)
    
    return outputs["softmax"]

def nv_digits_infer_one_round(threshold, softmax1, softmax2):
        
    predicts1 = []
    for idx, probs in enumerate(softmax1):      
        # print paths[idx], probs[0], probs[1], probs[2]

        prediction = False
        if(probs[0]>threshold):
            prediction = True
        # predictions.append(probs.argmax())        
        predicts1.append(prediction)               
            		  
    predicts2 = []
    for idx, probs in enumerate(softmax2):      
        # print paths[idx], probs[0], probs[1], probs[2]

        prediction = False
        if(probs[0]>threshold):
            prediction = True
        # predictions.append(probs.argmax())        
        predicts2.append(prediction)        
    
    P= len(predicts1)
    N= len(predicts2)    
           
    tp = predicts1.count(True) 
    fn = predicts1.count(False)
    tn = predicts2.count(False)
    fp = predicts2.count(True)
    
    precision = 1.0*tp/(tp+fp) #ppv
    recall = 1.0*tp/(tp+fn)
    
    f1 = 2.0*precision*recall/(precision+recall)
       
    accuracy1 = (predicts1.count(0) * 1.0 / P)
    accuracy2 = (predicts2.count(1) * 1.0 / N)
    accuracy = (tp + tn)*1.0/(P+N)
    
    sensativity = 1.0*tp/(tp+fn) #tpr, recall
    specificity = 1.0*tn/(fp+tn) #tnr
    fpr = 1.0*fp/N # fp/(fp+tn) = 1.0-specificity
    fpr = 1.0*fp/(fp+tn)
    fpr = 1-specificity
    tpr = sensativity
    recall = sensativity
    
    print P, N, threshold, accuracy1, accuracy2, accuracy, tp, tn, fp, fn, precision, recall, f1, sensativity, specificity, fpr
    return tpr, fpr

def nv_digits_infer_test_model(model):    

    print ('####### ' + model + ' #########')

    labels = []
    with open(LABEL_LIST) as f:
        for line in f:
            if('0' in line):
                labels.append(True)
            elif('1' in line):
                labels.append(False)
            else:
                print "Unrecognized label!"

    DIGITS_JOB_ID = DICT[model]
    softmax = nv_digits_infer(FILE_LIST, "/home/zys/data/C3R/test/tmp/",  JOBS_DIR,  DIGITS_JOB_ID,  -1,  1,  'none',  0)

    lines=[]    

    probs = []
    for idx, p in enumerate(softmax):      
        probs.append(p[0])
        lines.append(str(p[0])+" "+str(p[1])+" "+str(p[2]))
            
    with open(model+'_softmax_probs.txt', 'w') as the_file:
        for line in lines:
            the_file.write(line+'\n')
            
    nv_digits_infer_test_model_analyze_result(probs, labels, model)
    

def nv_digits_infer_test_model2(fname1, fname2, model):
    
    probs=[]
    
    with open(fname1) as f:
        for line in f:
            fields = line.split()
            if(len(fields) == 8):
                ind = fields.index("Disease")
                
                v = float(fields[ind+1].strip('%'))/100
                probs.append(v)

    labels = []
    with open(fname2) as f:
        for line in f:
            if('0' in line):
                labels.append(True)
            elif('1' in line):
                labels.append(False)
            else:
                print "Unrecognized label!"        
        
    nv_digits_infer_test_model_analyze_result(probs, labels, model)       
    

    
def nv_digits_infer_test_model_analyze_result(probs, labels, model):
            
    
    # fprs = []
    # tprs = []    
    # #for threshold in range(1,1000):
    #    tpr, fpr = nv_digits_infer_one_round(threshold*0.001, softmax1, softmax2)
    #    fprs.append(fpr)
    #    tprs.append(tpr)

    y_actu = labels
    y_pred = probs

    #### CONFUSINO MATRIX ####    
    #cm = ConfusionMatrix(y_actu, y_pred)
    #print cm
    #cm.print_stats()
    
    #### ROC Curve #####
    fpr, tpr, thresholds = roc_curve(y_actu, y_pred, pos_label=True)    
    
    # The Youden index J (Youden, 1950) is defined as:   
    # J = max { sensitivityc + specificityc - 1 }
    # where c ranges over all possible criterion values.
    # Graphically, J is the maximum vertical distance between the ROC curve and the diagonal line.
    # The criterion value corresponding with the Youden index J is the optimal criterion value only when disease prevalence is 50%, equal weight is given to sensitivity and specificity, and costs of various decisions are ignored.

    youden =  tpr - fpr # = tpr + (1-fpr) -1 = tpr - fpr  # Sensitivity + Specificity - 1
    best_ind =  youden.argmax()
    best_threshold = thresholds[best_ind]

    orig_stdout = sys.stdout
    f = open(model + '.txt', 'w')
    sys.stdout = f
    
    print 'ROC curve optimal cutoff ', model, ' = ', best_threshold    
    
    # Find prediction to the dataframe applying threshold
    y_pred_labels = pandas.Series( y_pred).map(lambda x: True if x > best_threshold else False)
    cm = ConfusionMatrix(y_actu, y_pred_labels)
    # print cm
    cm.print_stats()
    
    # Get FP and FN samples      
    cwd = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
    directoryFP = cwd + '/' + model+'_FP'
    if os.path.exists(directoryFP):
        shutil.rmtree(directoryFP)
    os.mkdir(directoryFP)
    
    directoryFN = cwd + '/' +  model+'_FN'
    if os.path.exists(directoryFN):
        shutil.rmtree(directoryFN)
    os.mkdir(directoryFN)
    
    i=0
    with open(FILE_LIST) as f:
        for line in f:
            line = line.strip()
            pred_label = y_pred_labels[i]
            actu_label = y_actu[i]
            basename = os.path.basename(line)            
            new_file_name="{0:.3f}".format(y_pred[i]) + "_" +basename
            if(actu_label == True and pred_label == False):                
                shutil.copyfile(line, directoryFN+"/"+ new_file_name)
            if(actu_label == False and pred_label == True):                
                shutil.copyfile(line, directoryFP+"/"+ new_file_name)
            i+=1

    roc_auc = auc(fpr, tpr)
    print 'ROC AUC = ', roc_auc
    
    # Use precision recall curve for skewed / unbalanced data
    precision, recall, pr_thresholds = precision_recall_curve(y_actu, y_pred, pos_label=True)
    f1 = 2.0*precision*recall/(precision+recall)
    pr_best_ind = f1.argmax()
    pr_best_threshold = pr_thresholds[pr_best_ind]
    print 'precision_recall_curve optimal cutoff ', model, ' = ', pr_best_threshold
    
    sys.stdout = orig_stdout
    f.close()

    plt.figure()
    plt.plot(fpr, tpr, lw=1, label='ROC (area = %0.3f)' % (roc_auc))
    plt.plot([0, 1], [0, 1], '--', color=(0.6, 0.6, 0.6), label='Luck')
    plt.plot(fpr[best_ind], tpr[best_ind], 'ro', label='optimal cutoff') # (probability = %0.3f)' % (best_threshold))
    plt.xlim([-0.05, 1.05])
    plt.ylim([-0.05, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('ROC Curve (' + model + ') ' )
    plt.legend(loc="lower right")
    plt.savefig(model + "_roc_curve.png")
    #plt.show()
    plt.close()
    
    # pr_auc=auc(precision, recall)  
    plt.figure()
    plt.plot(recall, precision, lw=1, label='P-R Curve')
    # plt.plot([0, 1], [0, 1], '--', color=(0.6, 0.6, 0.6), label='Luck')
    # plt.plot(recall[pr_best_ind], precision[pr_best_ind], 'ro') #, label='best threshold (%0.3f)' % (best_threshold))
    plt.xlim([-0.05, 1.05])
    plt.ylim([-0.05, 1.05])
    plt.ylabel('Precision')
    plt.xlabel('Recall')
    plt.title('Precision-Recall Curve (' + model + ') ' )
    plt.legend(loc="lower right")
    plt.savefig(model + "_pr_curve.png")
    #plt.show()
    plt.close()
       

def nv_digits_infer_test_all_models():
    i =0
    for key in DICT:
        plt.figure(i)        
        i += 1
        nv_digits_infer_test_model(key)
            
# nv_digits_infer_test_all_models()

# special treatment for GoogLeNet_transfer_learning. It requires nv-caffe-0.15
# nv_digits_infer_test_model2("/home/zys/Desktop/scripts/GoogLeNet_ResultsFromDigits.txt", LABEL_LIST, "GoogLeNet_Transfer_Learning")

# nv_digits_infer_test_model2("AlexNet_T1.txt","AlexNet_T2.txt", "AlexNet")

# nv_digits_infer_test_model(DICT.keys()[int(sys.argv[1])])

# nv_digits_infer_test_model2("/home/zys/Desktop/scripts/GoogLeNet_ResultsFromDigits.txt", "/home/zys/Desktop/test2/data/LabelList.txt", "GoogLeNet_Transfer_Learning")

digits_predict_folder.sh . . . . . . 1 KB . . . . . . 12/18/2017 3:37:32 PM

# This is a linux shell script, which calls digits_predict_folder.py to batch process mutliple DNN models.

# sudo python -c 'import digits_predict_folder; digits_predict_folder.nv_digits_infer_test_model("AlexNet_from_scratch")'
# sudo python -c 'import digits_predict_folder; digits_predict_folder.nv_digits_infer_test_model("AlexNet_transfer_learning")'
sudo python -c 'import digits_predict_folder; keys=digits_predict_folder.GetDictKeys(); digits_predict_folder.nv_digits_infer_test_model(keys[0])'
sudo python -c 'import digits_predict_folder; keys=digits_predict_folder.GetDictKeys(); digits_predict_folder.nv_digits_infer_test_model(keys[1])'
sudo python -c 'import digits_predict_folder; keys=digits_predict_folder.GetDictKeys(); digits_predict_folder.nv_digits_infer_test_model(keys[2])'
sudo python -c 'import digits_predict_folder; keys=digits_predict_folder.GetDictKeys(); digits_predict_folder.nv_digits_infer_test_model(keys[4])'
sudo python -c 'import digits_predict_folder; keys=digits_predict_folder.GetDictKeys(); digits_predict_folder.nv_digits_infer_test_model(keys[5])'
'

# special treatment for GoogLeNet_transfer_learning. It requires nv-caffe-0.15
python -c 'import digits_predict_folder; digits_predict_folder.nv_digits_infer_test_model2("/home/zys/Desktop/scripts/GoogLeNet_ResultsFromDigits.txt", "/home/zys/Desktop/test2/data/LabelList.txt", "GoogLeNet_transfer_learning")'

sudo chown $USER -R .

digits_predict_folder_with_human_experts.py . . . . . . 16 KB . . . . . . 12/18/2017 3:37:32 PM

# Analyze DNN performance with human experts

# Copyright (c) 2017, zhangys@zjgsu.edu.cn

# requires NVidia Digits to be installed

DICT = {}
DICT["DNN_vs_Human"] = "20170719-171742-5c31"
DICT["VGG-16_transfer_learning"] = "20170719-171742-5c31"
DICT["VGG-16_from_scratch"] = '20170722-143332-f077'
DICT["GoogLeNet_from_scratch"] = '20170722-113606-c59c'
DICT["GoogLeNet_transfer_learning"] = '20170722-190454-1af1' # '20170719-185942-bf65'
DICT["AlexNet_transfer_learning"] = '20170803-121318-b153'#'20170802-164043-96f9'# '20170722-205930-3ac4' # '20170720-094939-b665'
DICT["AlexNet_from_scratch"]= '20170802-163356-7f3d'#'20170802-162531-3531' #'20170722-152531-da8d'

def GetDictKeys():
     return DICT.keys()

FILE_LIST="/home/zys/Desktop/test2/data/FileList.txt"
LABEL_LIST="/home/zys/Desktop/test2/data/LabelList.txt"
JOBS_DIR = "/home/zys/DIGITS-master/digits/jobs"  # "/var/lib/digits/jobs/"


import os
import sys

caffe_root = os.environ['CAFFE_ROOT'] # '/home/zys/nv-caffe-0.15' #  # When use sudo, the environ var may be different than the current terminal 
sys.path.insert(0,caffe_root+'python')

import numpy
import pandas
from sklearn.metrics import roc_curve, auc, precision_recall_curve
from pandas_ml import ConfusionMatrix
import matplotlib.pyplot as plt
import shutil
import inspect

# Add path for DIGITS package
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
import digits.config
from digits.inference.errors import InferenceError
from digits.job import Job
from digits import utils

# To solve error "Check failed: error == cudaSuccess (10 vs. 0)  invalid device ordinal"
import caffe
print (caffe.__file__)
caffe.set_device(0)
"""
Perform inference on a list of images using the specified model
"""
def nv_digits_infer(input_list,
          output_dir,
          jobs_dir,
          model_id,
          epoch,
          batch_size,
          layers,
          gpu):
    """
    Perform inference on a list of images using the specified model
    """
    # job directory defaults to that defined in DIGITS config
    if jobs_dir == 'none':
        jobs_dir = digits.config.config_value('jobs_dir')

    # load model job
    model_dir = os.path.join(jobs_dir, model_id)
    assert os.path.isdir(model_dir), "Model dir %s does not exist" % model_dir
    model = Job.load(model_dir)

    # load dataset job
    dataset_dir = os.path.join(jobs_dir, model.dataset_id)
    assert os.path.isdir(dataset_dir), "Dataset dir %s does not exist" % dataset_dir
    dataset = Job.load(dataset_dir)
    for task in model.tasks:
        task.dataset = dataset

    # retrieve snapshot file
    task = model.train_task()
    snapshot_filename = None
    epoch = float(epoch)
    if epoch == -1 and len(task.snapshots):
        # use last epoch
        epoch = task.snapshots[-1][1]
        snapshot_filename = task.snapshots[-1][0]
    else:
        for f, e in task.snapshots:
            if e == epoch:
                snapshot_filename = f
                break
    if not snapshot_filename:
        raise InferenceError("Unable to find snapshot for epoch=%s" % repr(epoch))

    # retrieve image dimensions and resize mode
    image_dims = dataset.get_feature_dims()
    height = image_dims[0]
    width = image_dims[1]
    channels = image_dims[2]
    resize_mode = dataset.resize_mode if hasattr(dataset, 'resize_mode') else 'squash'

    n_input_samples = 0  # number of samples we were able to load
    input_ids = []       # indices of samples within file list
    input_data = []      # sample data

    
    # load paths from file
    paths = None
    with open(input_list) as infile:
        paths = infile.readlines()
    # load and resize images
    for idx, path in enumerate(paths):
        path = path.strip()
        try:
            image = utils.image.load_image(path.strip())            
            image = utils.image.resize_image(
                image,
                height,
                width,
                channels=channels,
                resize_mode=resize_mode)
           
            input_ids.append(idx)
            input_data.append(image)
            n_input_samples = n_input_samples + 1
        except utils.errors.LoadImageError as e:
            print e

    # perform inference

    if layers != 'none':
        raise InferenceError("Layer visualization is not supported for multiple inference")
    outputs = model.train_task().infer_many(
        input_data,
        snapshot_epoch=epoch,
        gpu=gpu,
        resize=True)
    
    return outputs["softmax"]

def nv_digits_infer_test_model(model, human_results_dir):    

    print ('####### ' + model + ' #########')

    labels = []
    with open(LABEL_LIST) as f:
        for line in f:
            if('0' in line):
                labels.append(True)
            elif('1' in line):
                labels.append(False)
            else:
                print "Unrecognized label!"

    DIGITS_JOB_ID = DICT[model]
    softmax = nv_digits_infer(FILE_LIST, "/home/zys/data/C3R/test/tmp/",  JOBS_DIR,  DIGITS_JOB_ID,  -1,  1,  'none',  0)

    lines=[]    

    probs = []
    for idx, p in enumerate(softmax):      
        probs.append(p[0])
        lines.append(str(p[0])+" "+str(p[1])+" "+str(p[2]))
            
    with open(model+'_softmax_probs.txt', 'w') as the_file:
        for line in lines:
            the_file.write(line+'\n')
            
            
    files=[]
    with open(FILE_LIST) as f:
        for line in f:
            line = line.strip()    
            files.append(line)
            
    dic = {}
    i=0
    for f in files:
        dic[os.path.basename(f)] = labels[i]
        i+=1
        
    humanTitles = []
    humanTPR = []
    humanFPR = []
    humanPrecision=[]
    humanRecall = []
    
    for fn in os.listdir(human_results_dir):
        ks=[]
        ls=[]        
        print "\n\n******", fn, "******"
        humanTitles.append(fn)
        with open(os.path.join(human_results_dir, fn)) as f:            
            for line in f:                
                line = line.strip()
                k = line.split(',')[0]
                l = line.split(',')[1]
                if(k not in ks):                        
                    ks.append(k)
                    if(l=='1'):
                        ls.append(True)
                    else:
                        ls.append(False)
        rs=[]
        for k in ks:
            rs.append(dic[k])
        
        cm = ConfusionMatrix(rs, ls)
        # print cm
        cm.print_stats()
        s= cm.stats()        
        humanTPR.append(s['TPR'])
        humanFPR.append(s['FPR']) # 1- TNR
        humanPrecision.append(s['PPV'])
        humanRecall.append(s['TPR'])        
    
    nv_digits_infer_test_model_analyze_result(probs, labels, model, humanTPR, humanFPR, humanPrecision, humanRecall)       
    
 
def nv_digits_infer_test_model3(fname1, fname2, model, human_results_dir):
    
    probs=[]
    
    with open(fname1) as f:
        for line in f:
            fields = line.split()
            if(len(fields) == 8):
                ind = fields.index("Disease")
                
                v = float(fields[ind+1].strip('%'))/100
                probs.append(v)

    labels = []
    with open(fname2) as f:
        for line in f:
            if('0' in line):
                labels.append(True)
            elif('1' in line):
                labels.append(False)
            else:
                print "Unrecognized label!"        


    files=[]
    with open(FILE_LIST) as f:
        for line in f:
            line = line.strip()    
            files.append(line)
            
    dic = {}
    i=0
    for f in files:
        dic[os.path.basename(f)] = labels[i]
        i+=1

    orig_stdout = sys.stdout
    log = open(model + '_humans.txt', 'w')
    sys.stdout = log

    humanTitles = []
    humanTPR = []
    humanFPR = []
    humanPrecision=[]
    humanRecall = []
    humanTNR=[]
    humanF1=[]
    humanMCC=[]
    humanInformedness=[]
    humanPPV=[]
    
    for fn in os.listdir(human_results_dir):
        ks=[]
        ls=[]        
        rs=[]
        log.write( "\n\n******" + fn + "******\n")
        log.write( "\n---- Misclassified Files (filename, truth, label)---\n")
        humanTitles.append(fn)
        with open(os.path.join(human_results_dir, fn)) as f:            
            for line in f:                
                line = line.strip()
                k = line.split(',')[0]
                l = line.split(',')[1]
                if(k not in ks):                        
                    ks.append(k)
                    b=(l=='1')
                    ls.append(b)
                    rs.append(dic[k])                    
                    if(b!=dic[k]):
                        log.write(k + '\t' + str( dic[k]) + '\t' +  str(b) + '\n')
        
        cm = ConfusionMatrix(rs, ls)
        # print cm
        cm.print_stats()
        s= cm.stats()        
        humanTPR.append(s['TPR'])
        humanFPR.append(s['FPR']) # 1- TNR
        humanPrecision.append(s['PPV'])
        humanRecall.append(s['TPR'])
        humanTNR.append(s['TNR'])
        humanF1.append(s['F1_score'])
        humanMCC.append(s['MCC'])
        humanInformedness.append(s['informedness'])
        humanPPV.append(s['PPV'])
    
    log.write( '\n\n ******** Summary *******\n')
    log.write( "\nExpert\tTPR\t\FPR\tPPV\tTNR\tF1_score\tMCC\tinformednessn\n")
    i=0
    for human in humanTitles:
       print human, '\t', humanTPR[i], '\t', humanFPR[i], '\t', humanPPV[i], '\t', humanTNR[i], '\t', humanF1[i], '\t', humanMCC[i], '\t',humanInformedness[i], '\n\n'
       i+=1
        
    sys.stdout = orig_stdout    
    log.close()
    
    nv_digits_infer_test_model_analyze_result(probs, labels, model, humanTPR, humanFPR, humanPrecision, humanRecall)       
    
def nv_digits_infer_test_model_analyze_result(probs, labels, model, humanTPR, humanFPR, humanPrecision, humanRecall):
            
    
    # fprs = []
    # tprs = []    
    # #for threshold in range(1,1000):
    #    tpr, fpr = nv_digits_infer_one_round(threshold*0.001, softmax1, softmax2)
    #    fprs.append(fpr)
    #    tprs.append(tpr)

    y_actu = labels
    y_pred = probs

    #### CONFUSINO MATRIX ####    
    #cm = ConfusionMatrix(y_actu, y_pred)
    #print cm
    #cm.print_stats()
    
    #### ROC Curve #####
    fpr, tpr, thresholds = roc_curve(y_actu, y_pred, pos_label=True)    
    
    # The Youden index J (Youden, 1950) is defined as:   
    # J = max { sensitivityc + specificityc - 1 }
    # where c ranges over all possible criterion values.
    # Graphically, J is the maximum vertical distance between the ROC curve and the diagonal line.
    # The criterion value corresponding with the Youden index J is the optimal criterion value only when disease prevalence is 50%, equal weight is given to sensitivity and specificity, and costs of various decisions are ignored.

    youden =  tpr - fpr # = tpr + (1-fpr) -1 = tpr - fpr  # Sensitivity + Specificity - 1
    best_ind =  youden.argmax()
    best_threshold = thresholds[best_ind]

    orig_stdout = sys.stdout
    f = open(model + '.txt', 'w')
    sys.stdout = f


    thresh = 0.164
    print '\nP-R curve cutoff = ', thresh    
    
    # Find prediction to the dataframe applying threshold
    y_pred_labels = pandas.Series( y_pred).map(lambda x: True if x > thresh else False)
    cm = ConfusionMatrix(y_actu, y_pred_labels)
    # print cm
    cm.print_stats()
    
    thresh = 0.174
    print '\nP-R curve cutoff = ', thresh    
    
    # Find prediction to the dataframe applying threshold
    y_pred_labels = pandas.Series( y_pred).map(lambda x: True if x > thresh else False)
    cm = ConfusionMatrix(y_actu, y_pred_labels)
    # print cm
    cm.print_stats()
    
    
    thresh = 0.379
    print '\nP-R curve cutoff = ', thresh    
    
    # Find prediction to the dataframe applying threshold
    y_pred_labels = pandas.Series( y_pred).map(lambda x: True if x > thresh else False)
    cm = ConfusionMatrix(y_actu, y_pred_labels)
    # print cm
    cm.print_stats()
    
    
    thresh = 0.519 # argmax(F1 score) point for VGG
    print '\nP-R curve cutoff = ', thresh    
    
    # Find prediction to the dataframe applying threshold
    y_pred_labels = pandas.Series( y_pred).map(lambda x: True if x > thresh else False)
    cm = ConfusionMatrix(y_actu, y_pred_labels)
    # print cm
    cm.print_stats()
    
    thresh = 0.535
    print '\nP-R curve cutoff = ', thresh    
    
    # Find prediction to the dataframe applying threshold
    y_pred_labels = pandas.Series( y_pred).map(lambda x: True if x > thresh else False)
    cm = ConfusionMatrix(y_actu, y_pred_labels)
    # print cm
    cm.print_stats()
    
    print '\nROC curve optimal cutoff = ', best_threshold    
    
    # Find prediction to the dataframe applying threshold
    y_pred_labels = pandas.Series( y_pred).map(lambda x: True if x > best_threshold else False)
    cm = ConfusionMatrix(y_actu, y_pred_labels)
    # print cm
    cm.print_stats()
    
    
    # Get FP and FN samples      
    thresh = 0.519
    y_pred_labels = pandas.Series( y_pred).map(lambda x: True if x > thresh else False)
    cwd = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
    directoryFP = cwd + '/' + model+'_FP'
    if os.path.exists(directoryFP):
        shutil.rmtree(directoryFP)
    os.mkdir(directoryFP)
    
    directoryFN = cwd + '/' +  model+'_FN'
    if os.path.exists(directoryFN):
        shutil.rmtree(directoryFN)
    os.mkdir(directoryFN)
    
    i=0
    with open(FILE_LIST) as f:
        for line in f:
            line = line.strip()
            pred_label = y_pred_labels[i]
            actu_label = y_actu[i]
            basename = os.path.basename(line)            
            new_file_name="{0:.3f}".format(y_pred[i]) + "_" +basename
            if(actu_label == True and pred_label == False):                
                shutil.copyfile(line, directoryFN+"/"+ new_file_name)
            if(actu_label == False and pred_label == True):                
                shutil.copyfile(line, directoryFP+"/"+ new_file_name)
            i+=1

    roc_auc = auc(fpr, tpr)
    print 'ROC AUC = ', roc_auc
    
    # Use precision recall curve for skewed / unbalanced data
    precision, recall, pr_thresholds = precision_recall_curve(y_actu, y_pred, pos_label=True)
    f1 = 2.0*precision*recall/(precision+recall)
    pr_best_ind = f1.argmax()
    pr_best_threshold = pr_thresholds[pr_best_ind]
    print 'precision_recall_curve optimal cutoff ', model, ' = ', pr_best_threshold
    
    sys.stdout = orig_stdout
    f.close()

    plt.figure()
    plt.plot(fpr, tpr, lw=1, label='ROC (area = %0.3f)' % (roc_auc))
    plt.plot([0, 1], [0, 1], '--', color=(0.6, 0.6, 0.6), label='Luck')
    # plt.plot(fpr[best_ind], tpr[best_ind], 'ro', label='optimal cutoff') # (probability = %0.3f)' % (best_threshold))
    plt.plot(humanFPR, humanTPR, 'bo', label='human experts')
    plt.xlim([-0.05, 1.05])
    plt.ylim([-0.05, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('ROC Curve (' + model + ') ' )
    plt.legend(loc="lower right")
    plt.savefig(model + "_roc_curve.png")
    #plt.show()
    plt.close()
    
    # pr_auc=auc(precision, recall)  
    plt.figure()
    plt.plot(recall, precision, lw=1, label='P-R Curve')
    plt.plot(humanRecall, humanPrecision, 'bo', label='human experts')    
    # plt.plot(recall[pr_best_ind], precision[pr_best_ind], 'ro') #, label='best threshold (%0.3f)' % (best_threshold))
    plt.xlim([-0.05, 1.05])
    plt.ylim([-0.05, 1.05])
    plt.ylabel('Precision')
    plt.xlabel('Recall')
    plt.title('Precision-Recall Curve (' + model + ') ' )
    plt.legend(loc="lower right")
    plt.savefig(model + "_pr_curve.png")
    #plt.show()
    plt.close()
    
    myfile = open(model + "_pr_curve.txt", "w")
    myfile.write('threshold\trecall\tprecision\n\n')
    i=0
    for th in pr_thresholds:
        myfile.write(str(th) + str(recall[i]) + '\t'+ str(precision[i]) + '\n')
        i+=1
       
nv_digits_infer_test_model('DNN_vs_Human',"/home/zys/Desktop/scripts/human") # Batch inference
# nv_digits_infer_test_model3("/home/zys/Desktop/scripts/GoogLeNet_ResultsFromDigits.txt", "/home/zys/Desktop/test2/data/LabelList.txt", "GoogLeNet vs Human", "/home/zys/Desktop/scripts/human") # special treatment for GoogLeNet_transfer_learning. It requires nv-caffe-0.15. Use provided probs file