重磅干貨,,第一時間送達 分割對于圖像解釋任務至關(guān)重要,,那就不要落后于流行趨勢,讓我們來實施它,我們很快就會成為專業(yè)人士,! 什么是語義分割,? 它描述了將圖像的每個像素與類別標簽(例如花,、人、道路,、天空,、海洋或汽車)相關(guān)聯(lián)的過程,即我們要輸入圖像,,然后為該圖像中的每個像素輸出一個類別決策,。例如下面這個輸入圖像,這是一只坐在床上的狗: 因此,,在輸出中,,我們希望為每個像素定義一組類別,即狗,、床,、后面的桌子和櫥柜。在語義分割之后,,圖像看起來像這樣: 關(guān)于語義分割的一件有趣的事情是它不區(qū)分實例,,即如果此圖像中有兩只狗,它們將僅被描述為一個標簽,,即 dog ,,而不是 dog1 和 dog2。 語義分割一般用于:
語義分割實現(xiàn):
讓我們在代碼中實現(xiàn)這一點:
#SSCV IIITH 2K19 import random import time import numpy as np import torch print(torch.__version__) import math from PIL import Image, ImageOps from torch.optim import SGD, Adam, lr_scheduler from torch.autograd import Variable from torch.utils.data import DataLoader from torchvision.transforms import Resize from torchvision.transforms import ToTensor, ToPILImage from dataset import cityscapes from dataset import idd_lite import sys print(sys.executable) from transform import Relabel, ToLabel, Colorize import matplotlib from matplotlib import pyplot as plt %matplotlib inline import importlib from iouEval import iouEval, getColorEntry #importing iouEval class from the iouEval.py file from shutil import copyfile
NUM_CHANNELS = 3 #RGB Images NUM_CLASSES = 8 #IDD Lite has 8 labels or Level1 hierarchy of labels USE_CUDA = torch.cuda.is_available() IMAGE_HEIGHT = 160 DATA_ROOT = '/tmp/school/6-segmentation/user/1/6-segmentation/idd1_lite’ BATCH_SIZE = 2 NUM_WORKERS = 4 NUM_EPOCHS = 100 ENCODER_ONLY = True device = torch.device(“cuda” ) #device = 'cuda’ color_transform = Colorize(NUM_CLASSES) image_transform = ToPILImage() IOUTRAIN = False IOUVAL = True
class MyCoTransform(object): def __init__(self, enc, augment=True, height=160): self.enc=enc self.augment = augment self.height = height pass def __call__(self, input, target): # Resizing data to required size input = Resize((self.height,320), Image.BILINEAR)(input) target = Resize((self.height,320), Image.NEAREST)(target) if(self.augment): # Random horizontal flip hflip = random.random() if (hflip < 0.5): input = input.transpose(Image.FLIP_LEFT_RIGHT) target = target.transpose(Image.FLIP_LEFT_RIGHT) #Random translation 0–2 pixels (fill rest with padding) transX = random.randint(0, 2) transY = random.randint(0, 2) input = ImageOps.expand(input, border=(transX,transY,0,0), fill=0) target = ImageOps.expand(target, border=(transX,transY,0,0), fill=7) #pad label filling with 7 input = input.crop((0, 0, input.size[0]-transX, input.size[1]-transY)) target = target.crop((0, 0, target.size[0]-transX, target.size[1]-transY)) input = ToTensor()(input) target = ToLabel()(target) target = Relabel(255,7)(target) return input, target
best_acc = 0 co_transform = MyCoTransform(ENCODER_ONLY, augment=True, height=IMAGE_HEIGHT) co_transform_val = MyCoTransform(ENCODER_ONLY, augment=False, height=IMAGE_HEIGHT) #train data dataset_train = idd_lite(DATA_ROOT, co_transform, 'train’) print(len(dataset_train)) #test data dataset_val = idd_lite(DATA_ROOT, co_transform_val, 'val’) print(len(dataset_val)) loader_train = DataLoader(dataset_train, num_workers=NUM_WORKERS, batch_size=BATCH_SIZE, shuffle=True) loader_val = DataLoader(dataset_val, num_workers=NUM_WORKERS, batch_size=BATCH_SIZE, shuffle=False)
答案是負對數(shù),在較小值的時候效果不好,,并且在較大值的時候效果也不好,。因為我們將損失函數(shù)加到所有正確的類別上,實際發(fā)生的情況是,,每當網(wǎng)絡為正確的類別,,分配高置信度時,損失就低,,但是當網(wǎng)絡為正確的類別時分配低置信度,,損失就高。 criterion = torch.nn.CrossEntropyLoss()
model_file = importlib.import_module('erfnet’) model = model_file.Net(NUM_CLASSES).to(device) optimizer = Adam(model.parameters(), 5e-4, (0.9, 0.999), eps=1e-08, weight_decay=1e-4) start_epoch = 1
import os steps_loss = 50 my_start_time = time.time() for epoch in range(start_epoch, NUM_EPOCHS+1): print(“ — — — TRAINING — EPOCH”, epoch, “ — — -”) epoch_loss = [] time_train = [] doIouTrain = IOUTRAIN doIouVal = IOUVAL if (doIouTrain): iouEvalTrain = iouEval(NUM_CLASSES) model.train() for step, (images, labels) in enumerate(loader_train): start_time = time.time() inputs = images.to(device) targets = labels.to(device) outputs = model(inputs, only_encode=ENCODER_ONLY) # zero the parameter gradients optimizer.zero_grad() # forward + backward + optimize loss = criterion(outputs, targets[:, 0]) loss.backward() optimizer.step() epoch_loss.append(loss.item()) time_train.append(time.time() — start_time) if (doIouTrain): #start_time_iou = time.time() iouEvalTrain.addBatch(outputs.max(1)[1].unsqueeze(1).data, targets.data) #print (“Time to add confusion matrix: “, time.time() — start_time_iou) # print statistics if steps_loss > 0 and step % steps_loss == 0: average = sum(epoch_loss) / len(epoch_loss) print('loss: {average:0.4} (epoch: {epoch}, step: {step})’, “// Avg time/img: %.4f s” % (sum(time_train) / len(time_train) / BATCH_SIZE)) average_epoch_loss_train = sum(epoch_loss) / len(epoch_loss) iouTrain = 0 if (doIouTrain): iouTrain, iou_classes = iouEvalTrain.getIoU() iouStr = getColorEntry(iouTrain)+’{:0.2f}’.format(iouTrain*100) + '\033[0m’ print (“EPOCH IoU on TRAIN set: “, iouStr, “%”) my_end_time = time.time() print(my_end_time — my_start_time) 在訓練了 100 個 epoch 之后,,我們會看到:
#Validate on val images after each epoch of training print(“ — — — VALIDATING — EPOCH”, epoch, “ — — -”) model.eval() epoch_loss_val = [] time_val = [] if (doIouVal): iouEvalVal = iouEval(NUM_CLASSES) for step, (images, labels) in enumerate(loader_val): start_time = time.time() inputs = images.to(device) targets = labels.to(device) with torch.no_grad(): outputs = model(inputs, only_encode=ENCODER_ONLY) #outputs = model(inputs) loss = criterion(outputs, targets[:, 0]) epoch_loss_val.append(loss.item()) time_val.append(time.time() — start_time) #Add batch to calculate TP, FP and FN for iou estimation if (doIouVal): #start_time_iou = time.time() iouEvalVal.addBatch(outputs.max(1)[1].unsqueeze(1).data, targets.data) #print (“Time to add confusion matrix: “, time.time() — start_time_iou) if steps_loss > 0 and step % steps_loss == 0: average = sum(epoch_loss_val) / len(epoch_loss_val) print('VAL loss: {average:0.4} (epoch: {epoch}, step: {step})’, “// Avg time/img: %.4f s” % (sum(time_val) / len(time_val) / BATCH_SIZE)) average_epoch_loss_val = sum(epoch_loss_val) / len(epoch_loss_val) iouVal = 0 if (doIouVal): iouVal, iou_classes = iouEvalVal.getIoU() print(iou_classes) iouStr = getColorEntry(iouVal)+’{:0.2f}’.format(iouVal*100) + '\033[0m’ print (“EPOCH IoU on VAL set: “, iouStr, “%”)
# Qualitative Analysis dataiter = iter(loader_val) images, labels = dataiter.next() if USE_CUDA: images = images.to(device) inputs = images.to(device) with torch.no_grad(): outputs = model(inputs, only_encode=ENCODER_ONLY) label = outputs[0].max(0)[1].byte().cpu().data label_color = Colorize()(label.unsqueeze(0)) label_save = ToPILImage()(label_color) plt.figure() plt.imshow(ToPILImage()(images[0].cpu())) plt.figure() plt.imshow(label_save) 很快我們就可以準備好我們的模型了,!
因此,,簡而言之,,現(xiàn)在我們將能夠輕松地將圖像的每個像素與類標簽相關(guān)聯(lián),并可以調(diào)整超參數(shù)以查看顯示的更改,。本文展示了語義分割的基礎(chǔ)知識,,要對實例進行分類,我們需要進行實例分割,,這是語義分割的高級版本,。 |
|