From d2ebb6da11ef8b820041ed392dd4f39463cafec8 Mon Sep 17 00:00:00 2001
From: Mariam <112161752+mermalade0325@users.noreply.github.com>
Date: Mon, 11 Nov 2024 11:24:06 +1000
Subject: [PATCH 01/28] Update README.md
Testing branch
---
README.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/README.md b/README.md
index 3a10f6515..8973ff481 100644
--- a/README.md
+++ b/README.md
@@ -16,4 +16,4 @@ In the recognition folder, you will find many recognition problems solved includ
* StyleGAN
* Stable diffusion
* transformers
-etc.
+etc
From 575ce21fd848cd43b033cdb69c54cb818e7c8086 Mon Sep 17 00:00:00 2001
From: Mariam <112161752+mermalade0325@users.noreply.github.com>
Date: Mon, 11 Nov 2024 11:38:18 +1000
Subject: [PATCH 02/28] Add files via upload
Reupload of files to the correct branch (topic-recognition)
---
recognition/dataset.py | 76 +++++++++++++
recognition/modules.py | 237 +++++++++++++++++++++++++++++++++++++++++
recognition/predict.py | 66 ++++++++++++
recognition/train.py | 114 ++++++++++++++++++++
4 files changed, 493 insertions(+)
create mode 100644 recognition/dataset.py
create mode 100644 recognition/modules.py
create mode 100644 recognition/predict.py
create mode 100644 recognition/train.py
diff --git a/recognition/dataset.py b/recognition/dataset.py
new file mode 100644
index 000000000..73ad0c7a9
--- /dev/null
+++ b/recognition/dataset.py
@@ -0,0 +1,76 @@
+import torch
+from torch.utils.data import Dataset
+import pandas as pd
+import os
+import cv2
+import numpy as np
+
+class ISICDataset(Dataset):
+ """Custom Dataset class for YOLO model with ISIC data."""
+
+ def __init__(self, image_dir, mask_dir, labels_path, image_size):
+ self.image_size = image_size
+ self.image_dir = image_dir
+ self.mask_dir = mask_dir
+ self.labels = pd.read_csv(labels_path)
+
+ # Load all image file names in the directory
+ self.image_files = [f for f in os.listdir(image_dir) if f.endswith('.jpg')]
+ self.samples = [self._process_sample(i) for i in range(len(self.image_files))]
+
+ def __len__(self):
+ return len(self.image_files)
+
+ def __getitem__(self, idx):
+ return self.samples[idx]
+
+ def _process_sample(self, idx):
+ """Helper function to process and return a single sample (image and target vector)."""
+ # Load image and mask
+ image = self._load_image(idx)
+ mask = self._load_mask(idx)
+
+ # Resize image and mask to the target size
+ image = cv2.resize(image, (self.image_size, self.image_size)).astype(np.float32) / 255.0
+ mask = cv2.resize(mask, (self.image_size, self.image_size))
+
+ # Obtain bounding box coordinates from the mask
+ x, y, w, h = self._extract_bounding_box(mask)
+
+ # Retrieve label probabilities
+ label1, label2 = self.labels.iloc[idx, 1:3]
+ total_prob = label1 + label2
+
+ # Create target vector
+ target_vector = np.array(
+ [x + w / 2, y + h / 2, w, h, total_prob, label1, label2],
+ dtype=np.float32
+ )
+
+ # Convert image to tensor format (C, H, W)
+ image_tensor = torch.tensor(image.transpose(2, 0, 1), dtype=torch.float32)
+ target_tensor = torch.tensor(target_vector, dtype=torch.float32)
+
+ return image_tensor, target_tensor
+
+ def _load_image(self, idx):
+ """Loads an image given an index."""
+ img_name = os.path.join(self.image_dir, self.image_files[idx])
+ return cv2.imread(img_name)
+
+ def _load_mask(self, idx):
+ """Loads the mask corresponding to the image at the given index."""
+ mask_name = os.path.join(
+ self.mask_dir, self.image_files[idx].replace('.jpg', '_segmentation.png')
+ )
+ return cv2.imread(mask_name, cv2.IMREAD_GRAYSCALE)
+
+ def _extract_bounding_box(self, mask):
+ """Extracts the bounding box from the mask image."""
+ _, thresh = cv2.threshold(mask, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
+ contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+
+ if contours:
+ x, y, w, h = cv2.boundingRect(contours[0])
+ return x, y, w, h
+ return 0, 0, 0, 0 # Return zero box if no contours are found
diff --git a/recognition/modules.py b/recognition/modules.py
new file mode 100644
index 000000000..3ea4365ac
--- /dev/null
+++ b/recognition/modules.py
@@ -0,0 +1,237 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import numpy as np
+
+# Device configuration
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+if torch.cuda.is_available():
+ print("cuda")
+if not torch.cuda.is_available():
+ print("cpu")
+
+class YOLO(nn.Module):
+
+ #REFERENCE: yolov3-tiny.cfg from https://github.com/pjreddie/darknet/blob/master/cfg
+ #Used as basis for what layers were needed
+ def __init__(self, num_classes):
+ super(YOLO, self).__init__()
+ self.num_classes = num_classes
+ layers = []
+ filters = [16,32,64,128,256,512]
+ in_channels = 3
+ #Convulution layers and maxpooling
+ for i in filters:
+ layers.append(nn.Conv2d(in_channels, i, kernel_size=3, stride=1, padding=1, bias=False))
+ in_channels = i
+ layers.append(nn.BatchNorm2d(i))
+ layers.append(nn.LeakyReLU(0.1, True)) #might be false
+ layers.append(nn.MaxPool2d(kernel_size=2, stride=2)) #Hopefully works
+ layers.append(nn.Conv2d(512, 1024, kernel_size=3, stride=1, padding=1, bias=False))
+ layers.append(nn.BatchNorm2d(1024))
+ layers.append(nn.LeakyReLU(0.1, True))
+
+ layers.append(nn.Conv2d(1024, 256, kernel_size=1, stride=1, padding=1, bias=False))
+ layers.append(nn.BatchNorm2d(256))
+ layers.append(nn.LeakyReLU(0.1, True))
+
+ layers.append(nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1, bias=False))
+ layers.append(nn.BatchNorm2d(512))
+ layers.append(nn.LeakyReLU(0.1, True))
+
+ layers.append(nn.Conv2d(512, 255, kernel_size=1, stride=1, padding=1, bias=True))
+ self.conv_start = nn.Sequential(*layers)
+
+ #Detection layer - given anchors
+ self.anchor1 = [(81,82), (135,169), (344,319)] #Anchors depends on image?
+
+ #Route layer could go here
+ self.conv_mid = nn.Sequential(
+ nn.Conv2d(255, 128, kernel_size=1, stride=1, padding=1, bias=False),
+ nn.BatchNorm2d(128),
+ nn.LeakyReLU(0.1, True),
+ nn.Upsample(scale_factor=2, mode="bilinear"))
+ #Another route layer maybe
+ self.conv_end = nn.Sequential(
+ nn.Conv2d(128,256,kernel_size=3,stride=1,padding=1,bias=False),
+ nn.BatchNorm2d(256),
+ nn.LeakyReLU(0.1, True),
+ nn.Conv2d(256, 255, kernel_size=1, stride=1, padding=1, bias=True))
+
+ #Another detection layer
+ self.anchor2 = [(10,14), (23,27), (37,58)]
+
+ def forward(self, x):
+ out = self.conv_start(x)
+ out = out.data
+ a = self.predict_transform(out, 416, self.anchor1, self.num_classes)
+ out = self.conv_mid(out)
+ out = self.conv_end(out)
+ out = out.data
+ b = self.predict_transform(out, 416, self.anchor2, self.num_classes)
+ return torch.cat((a, b), 1)
+
+ def predict_transform(self, prediction, inp_dim, anchors, num_classes):
+ """
+ Decodes the output from the convolution layers and arranges the information into a usable format.
+ The below reference was used for a base for this function.
+ REFERENCE: refer to reference 2 in README.
+ """
+ batch_size = prediction.size(0)
+ stride = inp_dim // prediction.size(2)
+ grid_size = inp_dim // stride
+ bbox_attrs = 5 + num_classes
+ num_anchors = len(anchors)
+
+ #Rearranges the feature map to (batch_size, number of boxes, box_attributes)
+ prediction = prediction.view(batch_size, bbox_attrs*num_anchors, grid_size*grid_size)
+ prediction = prediction.transpose(1,2).contiguous()
+ prediction = prediction.view(batch_size, grid_size*grid_size*num_anchors, bbox_attrs)
+ anchors = [(a[0]/stride, a[1]/stride) for a in anchors]
+ #Get the centre_X, centre_Y and object confidence between 1 and 0
+ prediction[:,:,0] = torch.sigmoid(prediction[:,:,0])
+ prediction[:,:,1] = torch.sigmoid(prediction[:,:,1])
+ prediction[:,:,4] = torch.sigmoid(prediction[:,:,4])
+ #Add the center offsets
+ grid = np.arange(grid_size)
+ a,b = np.meshgrid(grid, grid)
+
+ x_offset = torch.FloatTensor(a).view(-1,1)
+ y_offset = torch.FloatTensor(b).view(-1,1)
+
+ x_offset = x_offset.to(device)
+ y_offset = y_offset.to(device)
+
+ x_y_offset = torch.cat((x_offset, y_offset), 1).repeat(1,num_anchors).view(-1,2).unsqueeze(0)
+
+ prediction[:,:,:2] += x_y_offset
+ #log space transform height and the width
+ #so that all boxes are on the same scale
+ anchors = torch.FloatTensor(anchors)
+ anchors = anchors.to(device)
+
+ #arrange the probabilities of the classes
+ anchors = anchors.repeat(grid_size*grid_size, 1).unsqueeze(0)
+ prediction[:,:,2:4] = torch.exp(prediction[:,:,2:4])*anchors
+ prediction[:,:,5: 5 + num_classes] = torch.sigmoid((prediction[:,:, 5 : 5 + num_classes]))
+ prediction[:,:,:4] *= stride
+ return prediction
+
+
+def calculate_iou(pred, label):
+ """
+ Caculates the IoUs of a given list of boxes.
+ Used to determine accuracy of given bounding boxes.
+ Also is a key part of the loss function.
+ """
+ px, py, pw, ph = pred[:,0], pred[:,1], pred[:,2], pred[:,3]
+ lx, ly, lw, lh = label[0], label[1], label[2], label[3]
+ box_a = [px-(pw/2), py-(ph/2), px+(pw/2), py+(ph/2)]
+ box_b = [lx-(lw/2), ly-(lh/2), lx+(lw/2), ly+(lh/2)]
+
+ # determine the (x, y) of the corners of intersection area
+ ax = torch.clamp(box_a[0], min=box_b[0])
+ ay = torch.clamp(box_a[1], min=box_b[1])
+ bx = torch.clamp(box_a[2], max=box_b[2])
+ by = torch.clamp(box_a[3], max=box_b[3])
+
+ # compute the area of intersection
+ intersect = torch.abs(torch.clamp((bx - ax), min=0) * torch.clamp((by - ay), min=0))
+
+ # compute the area of both the prediction and ground-truth
+ area_a = torch.abs((box_a[2] - box_a[0]) * (box_a[3] - box_a[1]))
+ area_b = torch.abs((box_b[2] - box_b[0]) * (box_b[3] - box_b[1]))
+
+ # compute the iou
+ iou = intersect / (area_a + area_b - intersect)
+ iou = torch.reshape(iou, (776, 3))
+ return iou
+
+class YOLO_loss(nn.Module):
+ """
+ Given one batch at a time, the loss of the predictions is calculated.
+ The formulas used to calculate loss are from the reference below.
+ REFERENCE: refer to reference 3 in README.
+ """
+ def __init__(self):
+ super(YOLO_loss, self).__init__()
+
+ def forward(pred, label):
+ #Constants
+ no_object = 0.5 #Puts less emphasis on loss from boxes with no object
+ #Rearrange predictions to have one box shape on each line
+ boxes = torch.reshape(pred, (776, 3))
+
+ #IoU
+ iou = calculate_iou(pred, label)
+ iou, best_boxes = torch.max(iou, dim=1)
+
+ #Loss set up
+ class_loss = torch.zeros(776)
+ coord_loss = torch.zeros(776)
+ conf_loss = torch.zeros(776)
+
+ #Calculate loss
+ i = 0
+ for idx in best_boxes:
+ box = boxes[i][idx]
+ #coordinate loss
+ xy_loss = (label[0]-box[0])**2 + (label[1]-box[1])**2
+ wh_loss = ((label[0])**(1/2)-(box[0])**(1/2))**2 + ((label[1])**(1/2)-(box[1])**(1/2))**2
+ coord_loss[i] = (xy_loss + wh_loss)
+ #Check if there was a detection
+ if box[4] > 0.8: #There was
+ #classification loss
+ class_loss[i] = (label[5] - box[5])**2 + (label[6] - box[6])**2
+ #confidence loss
+ conf_loss[i] = (label[4] - box[4])**2
+ else: #There wasn't
+ conf_loss[i] = no_object*((label[4] - box[4])**2)
+ i += 1
+
+ #Final count
+ total_loss = 0
+ total_loss += torch.sum(coord_loss)
+ total_loss += torch.sum(class_loss)
+ total_loss += torch.sum(conf_loss)
+
+ return total_loss
+
+def single_iou(pred, label):
+ """
+ Calculates the IoU of a single box
+ """
+ px, py, pw, ph = pred[:,0], pred[:,1], pred[:,2], pred[:,3]
+ lx, ly, lw, lh = label[0], label[1], label[2], label[3]
+ box_a = [px-(pw/2), py-(ph/2), px+(pw/2), py+(ph/2)]
+ box_b = [lx-(lw/2), ly-(lh/2), lx+(lw/2), ly+(lh/2)]
+
+ # determine the (x, y) of the corners of intersection area
+ ax = torch.clamp(box_a[0], min=box_b[0])
+ ay = torch.clamp(box_a[1], min=box_b[1])
+ bx = torch.clamp(box_a[2], max=box_b[2])
+ by = torch.clamp(box_a[3], max=box_b[3])
+
+ # compute the area of intersection
+ intersect = torch.abs(torch.clamp((bx - ax), min=0) * torch.clamp((by - ay), min=0))
+
+ # compute the area of both the prediction and ground-truth
+ area_a = torch.abs((box_a[2] - box_a[0]) * (box_a[3] - box_a[1]))
+ area_b = torch.abs((box_b[2] - box_b[0]) * (box_b[3] - box_b[1]))
+
+ # compute the iou
+ iou = intersect / (area_a + area_b - intersect)
+ return iou
+
+def filter_boxes(pred):
+ """
+ Returns highest confidence box that has detected something
+ """
+ best_box = None
+ highest_conf = 0
+ for i in range(pred.size(0)):
+ box = pred[i,:]
+ if box[4] >= highest_conf:
+ best_box = box
+ highest_conf = box[4]
+ return best_box
\ No newline at end of file
diff --git a/recognition/predict.py b/recognition/predict.py
new file mode 100644
index 000000000..16053b3bd
--- /dev/null
+++ b/recognition/predict.py
@@ -0,0 +1,66 @@
+from modules import YOLO, filter_boxes
+from dataset import ISICDataset
+import matplotlib.pyplot as plt
+import matplotlib.patches as patches
+import cv2
+import torch
+import numpy as np
+
+def plot_boxes(image_tensor, bounding_box):
+ """
+ Plots the bounding box and label on an image.
+
+ Args:
+ image_tensor (torch.Tensor): The image tensor of shape (3, 416, 416).
+ bounding_box (torch.Tensor): The bounding box tensor with format [center_x, center_y, width, height, score, label1, label2].
+ """
+ image_tensor = image_tensor.cpu().permute(1, 2, 0) # Reshape for plotting
+ fig, ax = plt.subplots()
+ ax.imshow(image_tensor)
+
+ if bounding_box is not None:
+ box_coords = bounding_box.cpu()
+ x, y, w, h = box_coords[0] - box_coords[2] / 2, box_coords[1] - box_coords[3] / 2, box_coords[2], box_coords[3]
+ rect = patches.Rectangle((x, y), w, h, linewidth=1, edgecolor='r', facecolor='none')
+
+ # Determine label based on probabilities
+ label = "melanoma" if box_coords[5] > box_coords[6] else "seborrheic keratosis"
+
+ # Add rectangle patch and label text
+ ax.add_patch(rect)
+ plt.text(x, y, label, bbox=dict(facecolor='red', alpha=0.5), color='white')
+
+ plt.axis("off")
+ plt.show()
+
+def predict(image_path, model):
+ """
+ Predicts the bounding box and class label for an image using the model.
+
+ Args:
+ image_path (str): Path to the input image.
+ model (YOLO): Trained YOLO model.
+ """
+ # Load and preprocess the image
+ image = cv2.imread(image_path)
+ image = cv2.resize(image, (416, 416))
+ image = torch.from_numpy(image.transpose((2, 0, 1))).float().div(255).unsqueeze(0).to(device)
+
+ # Model prediction
+ predictions = model(image)
+ best_box = filter_boxes(predictions[0])
+
+ # Display the image with the predicted bounding box
+ plot_boxes(image.squeeze(0), best_box)
+
+# Load model and weights
+model = YOLO(num_classes=2)
+checkpoint_path = "/content/drive/MyDrive/Uni/COMP3710/model.pt"
+checkpoint = torch.load(checkpoint_path, map_location=device)
+model.load_state_dict(checkpoint['model_state_dict'])
+model.to(device)
+model.eval()
+
+# Run prediction on an image
+image_path = "/path/to/your/image.jpg" # Specify the image path here
+predict(image_path, model)
diff --git a/recognition/train.py b/recognition/train.py
new file mode 100644
index 000000000..fc681c4d3
--- /dev/null
+++ b/recognition/train.py
@@ -0,0 +1,114 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import time
+
+from dataset import *
+from modules import *
+
+
+# Device configuration
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+if torch.cuda.is_available():
+ print("cuda")
+if not torch.cuda.is_available():
+ print("cpu")
+
+#hyperparameters
+epochs = 10
+learning_rate=0.001
+image_size = 416
+batch_size = 10
+
+#Train data - change directories as needed
+mask_dir = '/Users/mariam/Downloads/COMP3710_YOLO/ISIC-2017_Training_Part1_GroundTruth/'
+image_dir = '/Users/mariam/Downloads/COMP3710_YOLO/ISIC-2017_Training_Data/'
+labels = '/Users/mariam/Downloads/COMP3710_YOLO/ISIC-2017_Training_Part3_GroundTruth.csv'
+train_dataset = ISICDataset(image_dir, mask_dir, labels, image_size)
+train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
+
+#Model
+model = YOLO(2)
+model.to(device)
+checkpoint_path = "model.pt"
+
+#optimizer and loss
+optimizer = torch.optim.Adam(model.parameters(), learning_rate)
+criterion = YOLO_loss()
+
+#learning rate schedule, using because SGD is dumb, adam has its own learning rate
+total_step = len(train_dataloader)
+scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer,max_lr=learning_rate,
+ steps_per_epoch=total_step, epochs=epochs)
+
+#Train
+model.train()
+start = time.time()
+for epoch in range(epochs):
+ for i, (images, labels) in enumerate(train_dataloader):
+ images = images.to(device)
+ labels = labels.to(device)
+
+ #Forward pass
+ outputs = model(images)
+ total_loss = 0
+ for a in range(batch_size):
+ loss = criterion(outputs[a], labels[a])
+ total_loss += loss
+
+ #Backwards and optimize
+ optimizer.zero_grad()
+ total_loss.requires_grad = True
+ total_loss.backward()
+ optimizer.step()
+
+ if (i+1) % 50 == 0:
+ print("Epoch [{}/{}], Step[{},{}] Loss: {:.5f}".format(epoch+1, epochs, i+1, total_step, total_loss.item()))
+ torch.save({
+ 'epoch': epoch,
+ 'model_state_dict': model.state_dict(),
+ 'optimizer_state_dict': optimizer.state_dict(),
+ 'loss': total_loss,
+ }, checkpoint_path)
+
+ scheduler.step()
+end = time.time()
+elapsed = end - start
+print("Training took {} secs or {} mins.".format(elapsed, elapsed/60))
+
+#Test data
+mask_dir = '/Users/mariam/Downloads/COMP3710_YOLO/ISIC-2017_Test_v2_Part1_GroundTruth/'
+image_dir = '/Users/mariam/Downloads/COMP3710_YOLO/ISIC-2017_Test_v2_Data/'
+labels = '/Users/mariam/Downloads/COMP3710_YOLO/ISIC-2017_Test_v2_Part3_GroundTruth.csv'
+test_dataset = ISICDataset(image_dir, mask_dir, labels, 416)
+test_dataloader = DataLoader(test_dataset, batch_size, shuffle=True)
+
+#Test
+model.eval()
+torch.set_grad_enabled(True)
+start = time.time()
+total = 0
+total_step = len(test_dataloader)
+
+for i, (images, labels) in enumerate(test_dataloader):
+ images = images.to(device)
+ labels = labels.to(device)
+ outputs = model(images)
+
+ #Calculate IoU
+ for a in range(batch_size):
+ best_box = filter_boxes(outputs[a])
+ if best_box is not None:
+ best_box = torch.reshape(best_box, (1, 7))
+ iou = single_iou(best_box, labels[a,:])
+ total += iou[0]
+
+ #Keep track of average
+ average = total/(i+1)
+
+ if (i+1) % 50 == 0:
+ print("Step[{},{}] IoU average: {:.5f}".format(i+1, total_step, average))
+
+end = time.time()
+elapsed = end - start
+print("Testing took {} secs or {} mins.".format(elapsed, elapsed/60))
\ No newline at end of file
From 3480acdcd864ce80fc11fd93812257b24353130b Mon Sep 17 00:00:00 2001
From: Mariam <112161752+mermalade0325@users.noreply.github.com>
Date: Mon, 11 Nov 2024 11:42:23 +1000
Subject: [PATCH 03/28] Add files via upload
---
recognition/s4612960_YOLO/dataset.py | 76 +++++++++
recognition/s4612960_YOLO/modules.py | 237 +++++++++++++++++++++++++++
recognition/s4612960_YOLO/predict.py | 66 ++++++++
recognition/s4612960_YOLO/train.py | 114 +++++++++++++
4 files changed, 493 insertions(+)
create mode 100644 recognition/s4612960_YOLO/dataset.py
create mode 100644 recognition/s4612960_YOLO/modules.py
create mode 100644 recognition/s4612960_YOLO/predict.py
create mode 100644 recognition/s4612960_YOLO/train.py
diff --git a/recognition/s4612960_YOLO/dataset.py b/recognition/s4612960_YOLO/dataset.py
new file mode 100644
index 000000000..73ad0c7a9
--- /dev/null
+++ b/recognition/s4612960_YOLO/dataset.py
@@ -0,0 +1,76 @@
+import torch
+from torch.utils.data import Dataset
+import pandas as pd
+import os
+import cv2
+import numpy as np
+
+class ISICDataset(Dataset):
+ """Custom Dataset class for YOLO model with ISIC data."""
+
+ def __init__(self, image_dir, mask_dir, labels_path, image_size):
+ self.image_size = image_size
+ self.image_dir = image_dir
+ self.mask_dir = mask_dir
+ self.labels = pd.read_csv(labels_path)
+
+ # Load all image file names in the directory
+ self.image_files = [f for f in os.listdir(image_dir) if f.endswith('.jpg')]
+ self.samples = [self._process_sample(i) for i in range(len(self.image_files))]
+
+ def __len__(self):
+ return len(self.image_files)
+
+ def __getitem__(self, idx):
+ return self.samples[idx]
+
+ def _process_sample(self, idx):
+ """Helper function to process and return a single sample (image and target vector)."""
+ # Load image and mask
+ image = self._load_image(idx)
+ mask = self._load_mask(idx)
+
+ # Resize image and mask to the target size
+ image = cv2.resize(image, (self.image_size, self.image_size)).astype(np.float32) / 255.0
+ mask = cv2.resize(mask, (self.image_size, self.image_size))
+
+ # Obtain bounding box coordinates from the mask
+ x, y, w, h = self._extract_bounding_box(mask)
+
+ # Retrieve label probabilities
+ label1, label2 = self.labels.iloc[idx, 1:3]
+ total_prob = label1 + label2
+
+ # Create target vector
+ target_vector = np.array(
+ [x + w / 2, y + h / 2, w, h, total_prob, label1, label2],
+ dtype=np.float32
+ )
+
+ # Convert image to tensor format (C, H, W)
+ image_tensor = torch.tensor(image.transpose(2, 0, 1), dtype=torch.float32)
+ target_tensor = torch.tensor(target_vector, dtype=torch.float32)
+
+ return image_tensor, target_tensor
+
+ def _load_image(self, idx):
+ """Loads an image given an index."""
+ img_name = os.path.join(self.image_dir, self.image_files[idx])
+ return cv2.imread(img_name)
+
+ def _load_mask(self, idx):
+ """Loads the mask corresponding to the image at the given index."""
+ mask_name = os.path.join(
+ self.mask_dir, self.image_files[idx].replace('.jpg', '_segmentation.png')
+ )
+ return cv2.imread(mask_name, cv2.IMREAD_GRAYSCALE)
+
+ def _extract_bounding_box(self, mask):
+ """Extracts the bounding box from the mask image."""
+ _, thresh = cv2.threshold(mask, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
+ contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+
+ if contours:
+ x, y, w, h = cv2.boundingRect(contours[0])
+ return x, y, w, h
+ return 0, 0, 0, 0 # Return zero box if no contours are found
diff --git a/recognition/s4612960_YOLO/modules.py b/recognition/s4612960_YOLO/modules.py
new file mode 100644
index 000000000..3ea4365ac
--- /dev/null
+++ b/recognition/s4612960_YOLO/modules.py
@@ -0,0 +1,237 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import numpy as np
+
+# Device configuration
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+if torch.cuda.is_available():
+ print("cuda")
+if not torch.cuda.is_available():
+ print("cpu")
+
+class YOLO(nn.Module):
+
+ #REFERENCE: yolov3-tiny.cfg from https://github.com/pjreddie/darknet/blob/master/cfg
+ #Used as basis for what layers were needed
+ def __init__(self, num_classes):
+ super(YOLO, self).__init__()
+ self.num_classes = num_classes
+ layers = []
+ filters = [16,32,64,128,256,512]
+ in_channels = 3
+ #Convulution layers and maxpooling
+ for i in filters:
+ layers.append(nn.Conv2d(in_channels, i, kernel_size=3, stride=1, padding=1, bias=False))
+ in_channels = i
+ layers.append(nn.BatchNorm2d(i))
+ layers.append(nn.LeakyReLU(0.1, True)) #might be false
+ layers.append(nn.MaxPool2d(kernel_size=2, stride=2)) #Hopefully works
+ layers.append(nn.Conv2d(512, 1024, kernel_size=3, stride=1, padding=1, bias=False))
+ layers.append(nn.BatchNorm2d(1024))
+ layers.append(nn.LeakyReLU(0.1, True))
+
+ layers.append(nn.Conv2d(1024, 256, kernel_size=1, stride=1, padding=1, bias=False))
+ layers.append(nn.BatchNorm2d(256))
+ layers.append(nn.LeakyReLU(0.1, True))
+
+ layers.append(nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1, bias=False))
+ layers.append(nn.BatchNorm2d(512))
+ layers.append(nn.LeakyReLU(0.1, True))
+
+ layers.append(nn.Conv2d(512, 255, kernel_size=1, stride=1, padding=1, bias=True))
+ self.conv_start = nn.Sequential(*layers)
+
+ #Detection layer - given anchors
+ self.anchor1 = [(81,82), (135,169), (344,319)] #Anchors depends on image?
+
+ #Route layer could go here
+ self.conv_mid = nn.Sequential(
+ nn.Conv2d(255, 128, kernel_size=1, stride=1, padding=1, bias=False),
+ nn.BatchNorm2d(128),
+ nn.LeakyReLU(0.1, True),
+ nn.Upsample(scale_factor=2, mode="bilinear"))
+ #Another route layer maybe
+ self.conv_end = nn.Sequential(
+ nn.Conv2d(128,256,kernel_size=3,stride=1,padding=1,bias=False),
+ nn.BatchNorm2d(256),
+ nn.LeakyReLU(0.1, True),
+ nn.Conv2d(256, 255, kernel_size=1, stride=1, padding=1, bias=True))
+
+ #Another detection layer
+ self.anchor2 = [(10,14), (23,27), (37,58)]
+
+ def forward(self, x):
+ out = self.conv_start(x)
+ out = out.data
+ a = self.predict_transform(out, 416, self.anchor1, self.num_classes)
+ out = self.conv_mid(out)
+ out = self.conv_end(out)
+ out = out.data
+ b = self.predict_transform(out, 416, self.anchor2, self.num_classes)
+ return torch.cat((a, b), 1)
+
+ def predict_transform(self, prediction, inp_dim, anchors, num_classes):
+ """
+ Decodes the output from the convolution layers and arranges the information into a usable format.
+ The below reference was used for a base for this function.
+ REFERENCE: refer to reference 2 in README.
+ """
+ batch_size = prediction.size(0)
+ stride = inp_dim // prediction.size(2)
+ grid_size = inp_dim // stride
+ bbox_attrs = 5 + num_classes
+ num_anchors = len(anchors)
+
+ #Rearranges the feature map to (batch_size, number of boxes, box_attributes)
+ prediction = prediction.view(batch_size, bbox_attrs*num_anchors, grid_size*grid_size)
+ prediction = prediction.transpose(1,2).contiguous()
+ prediction = prediction.view(batch_size, grid_size*grid_size*num_anchors, bbox_attrs)
+ anchors = [(a[0]/stride, a[1]/stride) for a in anchors]
+ #Get the centre_X, centre_Y and object confidence between 1 and 0
+ prediction[:,:,0] = torch.sigmoid(prediction[:,:,0])
+ prediction[:,:,1] = torch.sigmoid(prediction[:,:,1])
+ prediction[:,:,4] = torch.sigmoid(prediction[:,:,4])
+ #Add the center offsets
+ grid = np.arange(grid_size)
+ a,b = np.meshgrid(grid, grid)
+
+ x_offset = torch.FloatTensor(a).view(-1,1)
+ y_offset = torch.FloatTensor(b).view(-1,1)
+
+ x_offset = x_offset.to(device)
+ y_offset = y_offset.to(device)
+
+ x_y_offset = torch.cat((x_offset, y_offset), 1).repeat(1,num_anchors).view(-1,2).unsqueeze(0)
+
+ prediction[:,:,:2] += x_y_offset
+ #log space transform height and the width
+ #so that all boxes are on the same scale
+ anchors = torch.FloatTensor(anchors)
+ anchors = anchors.to(device)
+
+ #arrange the probabilities of the classes
+ anchors = anchors.repeat(grid_size*grid_size, 1).unsqueeze(0)
+ prediction[:,:,2:4] = torch.exp(prediction[:,:,2:4])*anchors
+ prediction[:,:,5: 5 + num_classes] = torch.sigmoid((prediction[:,:, 5 : 5 + num_classes]))
+ prediction[:,:,:4] *= stride
+ return prediction
+
+
+def calculate_iou(pred, label):
+ """
+ Caculates the IoUs of a given list of boxes.
+ Used to determine accuracy of given bounding boxes.
+ Also is a key part of the loss function.
+ """
+ px, py, pw, ph = pred[:,0], pred[:,1], pred[:,2], pred[:,3]
+ lx, ly, lw, lh = label[0], label[1], label[2], label[3]
+ box_a = [px-(pw/2), py-(ph/2), px+(pw/2), py+(ph/2)]
+ box_b = [lx-(lw/2), ly-(lh/2), lx+(lw/2), ly+(lh/2)]
+
+ # determine the (x, y) of the corners of intersection area
+ ax = torch.clamp(box_a[0], min=box_b[0])
+ ay = torch.clamp(box_a[1], min=box_b[1])
+ bx = torch.clamp(box_a[2], max=box_b[2])
+ by = torch.clamp(box_a[3], max=box_b[3])
+
+ # compute the area of intersection
+ intersect = torch.abs(torch.clamp((bx - ax), min=0) * torch.clamp((by - ay), min=0))
+
+ # compute the area of both the prediction and ground-truth
+ area_a = torch.abs((box_a[2] - box_a[0]) * (box_a[3] - box_a[1]))
+ area_b = torch.abs((box_b[2] - box_b[0]) * (box_b[3] - box_b[1]))
+
+ # compute the iou
+ iou = intersect / (area_a + area_b - intersect)
+ iou = torch.reshape(iou, (776, 3))
+ return iou
+
+class YOLO_loss(nn.Module):
+ """
+ Given one batch at a time, the loss of the predictions is calculated.
+ The formulas used to calculate loss are from the reference below.
+ REFERENCE: refer to reference 3 in README.
+ """
+ def __init__(self):
+ super(YOLO_loss, self).__init__()
+
+ def forward(pred, label):
+ #Constants
+ no_object = 0.5 #Puts less emphasis on loss from boxes with no object
+ #Rearrange predictions to have one box shape on each line
+ boxes = torch.reshape(pred, (776, 3))
+
+ #IoU
+ iou = calculate_iou(pred, label)
+ iou, best_boxes = torch.max(iou, dim=1)
+
+ #Loss set up
+ class_loss = torch.zeros(776)
+ coord_loss = torch.zeros(776)
+ conf_loss = torch.zeros(776)
+
+ #Calculate loss
+ i = 0
+ for idx in best_boxes:
+ box = boxes[i][idx]
+ #coordinate loss
+ xy_loss = (label[0]-box[0])**2 + (label[1]-box[1])**2
+ wh_loss = ((label[0])**(1/2)-(box[0])**(1/2))**2 + ((label[1])**(1/2)-(box[1])**(1/2))**2
+ coord_loss[i] = (xy_loss + wh_loss)
+ #Check if there was a detection
+ if box[4] > 0.8: #There was
+ #classification loss
+ class_loss[i] = (label[5] - box[5])**2 + (label[6] - box[6])**2
+ #confidence loss
+ conf_loss[i] = (label[4] - box[4])**2
+ else: #There wasn't
+ conf_loss[i] = no_object*((label[4] - box[4])**2)
+ i += 1
+
+ #Final count
+ total_loss = 0
+ total_loss += torch.sum(coord_loss)
+ total_loss += torch.sum(class_loss)
+ total_loss += torch.sum(conf_loss)
+
+ return total_loss
+
+def single_iou(pred, label):
+ """
+ Calculates the IoU of a single box
+ """
+ px, py, pw, ph = pred[:,0], pred[:,1], pred[:,2], pred[:,3]
+ lx, ly, lw, lh = label[0], label[1], label[2], label[3]
+ box_a = [px-(pw/2), py-(ph/2), px+(pw/2), py+(ph/2)]
+ box_b = [lx-(lw/2), ly-(lh/2), lx+(lw/2), ly+(lh/2)]
+
+ # determine the (x, y) of the corners of intersection area
+ ax = torch.clamp(box_a[0], min=box_b[0])
+ ay = torch.clamp(box_a[1], min=box_b[1])
+ bx = torch.clamp(box_a[2], max=box_b[2])
+ by = torch.clamp(box_a[3], max=box_b[3])
+
+ # compute the area of intersection
+ intersect = torch.abs(torch.clamp((bx - ax), min=0) * torch.clamp((by - ay), min=0))
+
+ # compute the area of both the prediction and ground-truth
+ area_a = torch.abs((box_a[2] - box_a[0]) * (box_a[3] - box_a[1]))
+ area_b = torch.abs((box_b[2] - box_b[0]) * (box_b[3] - box_b[1]))
+
+ # compute the iou
+ iou = intersect / (area_a + area_b - intersect)
+ return iou
+
+def filter_boxes(pred):
+ """
+ Returns highest confidence box that has detected something
+ """
+ best_box = None
+ highest_conf = 0
+ for i in range(pred.size(0)):
+ box = pred[i,:]
+ if box[4] >= highest_conf:
+ best_box = box
+ highest_conf = box[4]
+ return best_box
\ No newline at end of file
diff --git a/recognition/s4612960_YOLO/predict.py b/recognition/s4612960_YOLO/predict.py
new file mode 100644
index 000000000..16053b3bd
--- /dev/null
+++ b/recognition/s4612960_YOLO/predict.py
@@ -0,0 +1,66 @@
+from modules import YOLO, filter_boxes
+from dataset import ISICDataset
+import matplotlib.pyplot as plt
+import matplotlib.patches as patches
+import cv2
+import torch
+import numpy as np
+
+def plot_boxes(image_tensor, bounding_box):
+ """
+ Plots the bounding box and label on an image.
+
+ Args:
+ image_tensor (torch.Tensor): The image tensor of shape (3, 416, 416).
+ bounding_box (torch.Tensor): The bounding box tensor with format [center_x, center_y, width, height, score, label1, label2].
+ """
+ image_tensor = image_tensor.cpu().permute(1, 2, 0) # Reshape for plotting
+ fig, ax = plt.subplots()
+ ax.imshow(image_tensor)
+
+ if bounding_box is not None:
+ box_coords = bounding_box.cpu()
+ x, y, w, h = box_coords[0] - box_coords[2] / 2, box_coords[1] - box_coords[3] / 2, box_coords[2], box_coords[3]
+ rect = patches.Rectangle((x, y), w, h, linewidth=1, edgecolor='r', facecolor='none')
+
+ # Determine label based on probabilities
+ label = "melanoma" if box_coords[5] > box_coords[6] else "seborrheic keratosis"
+
+ # Add rectangle patch and label text
+ ax.add_patch(rect)
+ plt.text(x, y, label, bbox=dict(facecolor='red', alpha=0.5), color='white')
+
+ plt.axis("off")
+ plt.show()
+
+def predict(image_path, model):
+ """
+ Predicts the bounding box and class label for an image using the model.
+
+ Args:
+ image_path (str): Path to the input image.
+ model (YOLO): Trained YOLO model.
+ """
+ # Load and preprocess the image
+ image = cv2.imread(image_path)
+ image = cv2.resize(image, (416, 416))
+ image = torch.from_numpy(image.transpose((2, 0, 1))).float().div(255).unsqueeze(0).to(device)
+
+ # Model prediction
+ predictions = model(image)
+ best_box = filter_boxes(predictions[0])
+
+ # Display the image with the predicted bounding box
+ plot_boxes(image.squeeze(0), best_box)
+
+# Load model and weights
+model = YOLO(num_classes=2)
+checkpoint_path = "/content/drive/MyDrive/Uni/COMP3710/model.pt"
+checkpoint = torch.load(checkpoint_path, map_location=device)
+model.load_state_dict(checkpoint['model_state_dict'])
+model.to(device)
+model.eval()
+
+# Run prediction on an image
+image_path = "/path/to/your/image.jpg" # Specify the image path here
+predict(image_path, model)
diff --git a/recognition/s4612960_YOLO/train.py b/recognition/s4612960_YOLO/train.py
new file mode 100644
index 000000000..fc681c4d3
--- /dev/null
+++ b/recognition/s4612960_YOLO/train.py
@@ -0,0 +1,114 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import time
+
+from dataset import *
+from modules import *
+
+
+# Device configuration
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+if torch.cuda.is_available():
+ print("cuda")
+if not torch.cuda.is_available():
+ print("cpu")
+
+#hyperparameters
+epochs = 10
+learning_rate=0.001
+image_size = 416
+batch_size = 10
+
+#Train data - change directories as needed
+mask_dir = '/Users/mariam/Downloads/COMP3710_YOLO/ISIC-2017_Training_Part1_GroundTruth/'
+image_dir = '/Users/mariam/Downloads/COMP3710_YOLO/ISIC-2017_Training_Data/'
+labels = '/Users/mariam/Downloads/COMP3710_YOLO/ISIC-2017_Training_Part3_GroundTruth.csv'
+train_dataset = ISICDataset(image_dir, mask_dir, labels, image_size)
+train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
+
+#Model
+model = YOLO(2)
+model.to(device)
+checkpoint_path = "model.pt"
+
+#optimizer and loss
+optimizer = torch.optim.Adam(model.parameters(), learning_rate)
+criterion = YOLO_loss()
+
+#learning rate schedule, using because SGD is dumb, adam has its own learning rate
+total_step = len(train_dataloader)
+scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer,max_lr=learning_rate,
+ steps_per_epoch=total_step, epochs=epochs)
+
+#Train
+model.train()
+start = time.time()
+for epoch in range(epochs):
+ for i, (images, labels) in enumerate(train_dataloader):
+ images = images.to(device)
+ labels = labels.to(device)
+
+ #Forward pass
+ outputs = model(images)
+ total_loss = 0
+ for a in range(batch_size):
+ loss = criterion(outputs[a], labels[a])
+ total_loss += loss
+
+ #Backwards and optimize
+ optimizer.zero_grad()
+ total_loss.requires_grad = True
+ total_loss.backward()
+ optimizer.step()
+
+ if (i+1) % 50 == 0:
+ print("Epoch [{}/{}], Step[{},{}] Loss: {:.5f}".format(epoch+1, epochs, i+1, total_step, total_loss.item()))
+ torch.save({
+ 'epoch': epoch,
+ 'model_state_dict': model.state_dict(),
+ 'optimizer_state_dict': optimizer.state_dict(),
+ 'loss': total_loss,
+ }, checkpoint_path)
+
+ scheduler.step()
+end = time.time()
+elapsed = end - start
+print("Training took {} secs or {} mins.".format(elapsed, elapsed/60))
+
+#Test data
+mask_dir = '/Users/mariam/Downloads/COMP3710_YOLO/ISIC-2017_Test_v2_Part1_GroundTruth/'
+image_dir = '/Users/mariam/Downloads/COMP3710_YOLO/ISIC-2017_Test_v2_Data/'
+labels = '/Users/mariam/Downloads/COMP3710_YOLO/ISIC-2017_Test_v2_Part3_GroundTruth.csv'
+test_dataset = ISICDataset(image_dir, mask_dir, labels, 416)
+test_dataloader = DataLoader(test_dataset, batch_size, shuffle=True)
+
+#Test
+model.eval()
+torch.set_grad_enabled(True)
+start = time.time()
+total = 0
+total_step = len(test_dataloader)
+
+for i, (images, labels) in enumerate(test_dataloader):
+ images = images.to(device)
+ labels = labels.to(device)
+ outputs = model(images)
+
+ #Calculate IoU
+ for a in range(batch_size):
+ best_box = filter_boxes(outputs[a])
+ if best_box is not None:
+ best_box = torch.reshape(best_box, (1, 7))
+ iou = single_iou(best_box, labels[a,:])
+ total += iou[0]
+
+ #Keep track of average
+ average = total/(i+1)
+
+ if (i+1) % 50 == 0:
+ print("Step[{},{}] IoU average: {:.5f}".format(i+1, total_step, average))
+
+end = time.time()
+elapsed = end - start
+print("Testing took {} secs or {} mins.".format(elapsed, elapsed/60))
\ No newline at end of file
From 7847e1528cf5a1a3a160fdc972cc637b93deaddc Mon Sep 17 00:00:00 2001
From: Mariam <112161752+mermalade0325@users.noreply.github.com>
Date: Mon, 11 Nov 2024 11:43:22 +1000
Subject: [PATCH 04/28] Delete recognition/dataset.py
Adjusting folder structure, deleting this file
---
recognition/dataset.py | 76 ------------------------------------------
1 file changed, 76 deletions(-)
delete mode 100644 recognition/dataset.py
diff --git a/recognition/dataset.py b/recognition/dataset.py
deleted file mode 100644
index 73ad0c7a9..000000000
--- a/recognition/dataset.py
+++ /dev/null
@@ -1,76 +0,0 @@
-import torch
-from torch.utils.data import Dataset
-import pandas as pd
-import os
-import cv2
-import numpy as np
-
-class ISICDataset(Dataset):
- """Custom Dataset class for YOLO model with ISIC data."""
-
- def __init__(self, image_dir, mask_dir, labels_path, image_size):
- self.image_size = image_size
- self.image_dir = image_dir
- self.mask_dir = mask_dir
- self.labels = pd.read_csv(labels_path)
-
- # Load all image file names in the directory
- self.image_files = [f for f in os.listdir(image_dir) if f.endswith('.jpg')]
- self.samples = [self._process_sample(i) for i in range(len(self.image_files))]
-
- def __len__(self):
- return len(self.image_files)
-
- def __getitem__(self, idx):
- return self.samples[idx]
-
- def _process_sample(self, idx):
- """Helper function to process and return a single sample (image and target vector)."""
- # Load image and mask
- image = self._load_image(idx)
- mask = self._load_mask(idx)
-
- # Resize image and mask to the target size
- image = cv2.resize(image, (self.image_size, self.image_size)).astype(np.float32) / 255.0
- mask = cv2.resize(mask, (self.image_size, self.image_size))
-
- # Obtain bounding box coordinates from the mask
- x, y, w, h = self._extract_bounding_box(mask)
-
- # Retrieve label probabilities
- label1, label2 = self.labels.iloc[idx, 1:3]
- total_prob = label1 + label2
-
- # Create target vector
- target_vector = np.array(
- [x + w / 2, y + h / 2, w, h, total_prob, label1, label2],
- dtype=np.float32
- )
-
- # Convert image to tensor format (C, H, W)
- image_tensor = torch.tensor(image.transpose(2, 0, 1), dtype=torch.float32)
- target_tensor = torch.tensor(target_vector, dtype=torch.float32)
-
- return image_tensor, target_tensor
-
- def _load_image(self, idx):
- """Loads an image given an index."""
- img_name = os.path.join(self.image_dir, self.image_files[idx])
- return cv2.imread(img_name)
-
- def _load_mask(self, idx):
- """Loads the mask corresponding to the image at the given index."""
- mask_name = os.path.join(
- self.mask_dir, self.image_files[idx].replace('.jpg', '_segmentation.png')
- )
- return cv2.imread(mask_name, cv2.IMREAD_GRAYSCALE)
-
- def _extract_bounding_box(self, mask):
- """Extracts the bounding box from the mask image."""
- _, thresh = cv2.threshold(mask, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
- contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
-
- if contours:
- x, y, w, h = cv2.boundingRect(contours[0])
- return x, y, w, h
- return 0, 0, 0, 0 # Return zero box if no contours are found
From ed81b195ea9549b4d3fba9c680113b54e47a6f81 Mon Sep 17 00:00:00 2001
From: Mariam <112161752+mermalade0325@users.noreply.github.com>
Date: Mon, 11 Nov 2024 11:43:45 +1000
Subject: [PATCH 05/28] Delete recognition/modules.py
Adjusting folder structure, deleting this file
---
recognition/modules.py | 237 -----------------------------------------
1 file changed, 237 deletions(-)
delete mode 100644 recognition/modules.py
diff --git a/recognition/modules.py b/recognition/modules.py
deleted file mode 100644
index 3ea4365ac..000000000
--- a/recognition/modules.py
+++ /dev/null
@@ -1,237 +0,0 @@
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-import numpy as np
-
-# Device configuration
-device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-if torch.cuda.is_available():
- print("cuda")
-if not torch.cuda.is_available():
- print("cpu")
-
-class YOLO(nn.Module):
-
- #REFERENCE: yolov3-tiny.cfg from https://github.com/pjreddie/darknet/blob/master/cfg
- #Used as basis for what layers were needed
- def __init__(self, num_classes):
- super(YOLO, self).__init__()
- self.num_classes = num_classes
- layers = []
- filters = [16,32,64,128,256,512]
- in_channels = 3
- #Convulution layers and maxpooling
- for i in filters:
- layers.append(nn.Conv2d(in_channels, i, kernel_size=3, stride=1, padding=1, bias=False))
- in_channels = i
- layers.append(nn.BatchNorm2d(i))
- layers.append(nn.LeakyReLU(0.1, True)) #might be false
- layers.append(nn.MaxPool2d(kernel_size=2, stride=2)) #Hopefully works
- layers.append(nn.Conv2d(512, 1024, kernel_size=3, stride=1, padding=1, bias=False))
- layers.append(nn.BatchNorm2d(1024))
- layers.append(nn.LeakyReLU(0.1, True))
-
- layers.append(nn.Conv2d(1024, 256, kernel_size=1, stride=1, padding=1, bias=False))
- layers.append(nn.BatchNorm2d(256))
- layers.append(nn.LeakyReLU(0.1, True))
-
- layers.append(nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1, bias=False))
- layers.append(nn.BatchNorm2d(512))
- layers.append(nn.LeakyReLU(0.1, True))
-
- layers.append(nn.Conv2d(512, 255, kernel_size=1, stride=1, padding=1, bias=True))
- self.conv_start = nn.Sequential(*layers)
-
- #Detection layer - given anchors
- self.anchor1 = [(81,82), (135,169), (344,319)] #Anchors depends on image?
-
- #Route layer could go here
- self.conv_mid = nn.Sequential(
- nn.Conv2d(255, 128, kernel_size=1, stride=1, padding=1, bias=False),
- nn.BatchNorm2d(128),
- nn.LeakyReLU(0.1, True),
- nn.Upsample(scale_factor=2, mode="bilinear"))
- #Another route layer maybe
- self.conv_end = nn.Sequential(
- nn.Conv2d(128,256,kernel_size=3,stride=1,padding=1,bias=False),
- nn.BatchNorm2d(256),
- nn.LeakyReLU(0.1, True),
- nn.Conv2d(256, 255, kernel_size=1, stride=1, padding=1, bias=True))
-
- #Another detection layer
- self.anchor2 = [(10,14), (23,27), (37,58)]
-
- def forward(self, x):
- out = self.conv_start(x)
- out = out.data
- a = self.predict_transform(out, 416, self.anchor1, self.num_classes)
- out = self.conv_mid(out)
- out = self.conv_end(out)
- out = out.data
- b = self.predict_transform(out, 416, self.anchor2, self.num_classes)
- return torch.cat((a, b), 1)
-
- def predict_transform(self, prediction, inp_dim, anchors, num_classes):
- """
- Decodes the output from the convolution layers and arranges the information into a usable format.
- The below reference was used for a base for this function.
- REFERENCE: refer to reference 2 in README.
- """
- batch_size = prediction.size(0)
- stride = inp_dim // prediction.size(2)
- grid_size = inp_dim // stride
- bbox_attrs = 5 + num_classes
- num_anchors = len(anchors)
-
- #Rearranges the feature map to (batch_size, number of boxes, box_attributes)
- prediction = prediction.view(batch_size, bbox_attrs*num_anchors, grid_size*grid_size)
- prediction = prediction.transpose(1,2).contiguous()
- prediction = prediction.view(batch_size, grid_size*grid_size*num_anchors, bbox_attrs)
- anchors = [(a[0]/stride, a[1]/stride) for a in anchors]
- #Get the centre_X, centre_Y and object confidence between 1 and 0
- prediction[:,:,0] = torch.sigmoid(prediction[:,:,0])
- prediction[:,:,1] = torch.sigmoid(prediction[:,:,1])
- prediction[:,:,4] = torch.sigmoid(prediction[:,:,4])
- #Add the center offsets
- grid = np.arange(grid_size)
- a,b = np.meshgrid(grid, grid)
-
- x_offset = torch.FloatTensor(a).view(-1,1)
- y_offset = torch.FloatTensor(b).view(-1,1)
-
- x_offset = x_offset.to(device)
- y_offset = y_offset.to(device)
-
- x_y_offset = torch.cat((x_offset, y_offset), 1).repeat(1,num_anchors).view(-1,2).unsqueeze(0)
-
- prediction[:,:,:2] += x_y_offset
- #log space transform height and the width
- #so that all boxes are on the same scale
- anchors = torch.FloatTensor(anchors)
- anchors = anchors.to(device)
-
- #arrange the probabilities of the classes
- anchors = anchors.repeat(grid_size*grid_size, 1).unsqueeze(0)
- prediction[:,:,2:4] = torch.exp(prediction[:,:,2:4])*anchors
- prediction[:,:,5: 5 + num_classes] = torch.sigmoid((prediction[:,:, 5 : 5 + num_classes]))
- prediction[:,:,:4] *= stride
- return prediction
-
-
-def calculate_iou(pred, label):
- """
- Caculates the IoUs of a given list of boxes.
- Used to determine accuracy of given bounding boxes.
- Also is a key part of the loss function.
- """
- px, py, pw, ph = pred[:,0], pred[:,1], pred[:,2], pred[:,3]
- lx, ly, lw, lh = label[0], label[1], label[2], label[3]
- box_a = [px-(pw/2), py-(ph/2), px+(pw/2), py+(ph/2)]
- box_b = [lx-(lw/2), ly-(lh/2), lx+(lw/2), ly+(lh/2)]
-
- # determine the (x, y) of the corners of intersection area
- ax = torch.clamp(box_a[0], min=box_b[0])
- ay = torch.clamp(box_a[1], min=box_b[1])
- bx = torch.clamp(box_a[2], max=box_b[2])
- by = torch.clamp(box_a[3], max=box_b[3])
-
- # compute the area of intersection
- intersect = torch.abs(torch.clamp((bx - ax), min=0) * torch.clamp((by - ay), min=0))
-
- # compute the area of both the prediction and ground-truth
- area_a = torch.abs((box_a[2] - box_a[0]) * (box_a[3] - box_a[1]))
- area_b = torch.abs((box_b[2] - box_b[0]) * (box_b[3] - box_b[1]))
-
- # compute the iou
- iou = intersect / (area_a + area_b - intersect)
- iou = torch.reshape(iou, (776, 3))
- return iou
-
-class YOLO_loss(nn.Module):
- """
- Given one batch at a time, the loss of the predictions is calculated.
- The formulas used to calculate loss are from the reference below.
- REFERENCE: refer to reference 3 in README.
- """
- def __init__(self):
- super(YOLO_loss, self).__init__()
-
- def forward(pred, label):
- #Constants
- no_object = 0.5 #Puts less emphasis on loss from boxes with no object
- #Rearrange predictions to have one box shape on each line
- boxes = torch.reshape(pred, (776, 3))
-
- #IoU
- iou = calculate_iou(pred, label)
- iou, best_boxes = torch.max(iou, dim=1)
-
- #Loss set up
- class_loss = torch.zeros(776)
- coord_loss = torch.zeros(776)
- conf_loss = torch.zeros(776)
-
- #Calculate loss
- i = 0
- for idx in best_boxes:
- box = boxes[i][idx]
- #coordinate loss
- xy_loss = (label[0]-box[0])**2 + (label[1]-box[1])**2
- wh_loss = ((label[0])**(1/2)-(box[0])**(1/2))**2 + ((label[1])**(1/2)-(box[1])**(1/2))**2
- coord_loss[i] = (xy_loss + wh_loss)
- #Check if there was a detection
- if box[4] > 0.8: #There was
- #classification loss
- class_loss[i] = (label[5] - box[5])**2 + (label[6] - box[6])**2
- #confidence loss
- conf_loss[i] = (label[4] - box[4])**2
- else: #There wasn't
- conf_loss[i] = no_object*((label[4] - box[4])**2)
- i += 1
-
- #Final count
- total_loss = 0
- total_loss += torch.sum(coord_loss)
- total_loss += torch.sum(class_loss)
- total_loss += torch.sum(conf_loss)
-
- return total_loss
-
-def single_iou(pred, label):
- """
- Calculates the IoU of a single box
- """
- px, py, pw, ph = pred[:,0], pred[:,1], pred[:,2], pred[:,3]
- lx, ly, lw, lh = label[0], label[1], label[2], label[3]
- box_a = [px-(pw/2), py-(ph/2), px+(pw/2), py+(ph/2)]
- box_b = [lx-(lw/2), ly-(lh/2), lx+(lw/2), ly+(lh/2)]
-
- # determine the (x, y) of the corners of intersection area
- ax = torch.clamp(box_a[0], min=box_b[0])
- ay = torch.clamp(box_a[1], min=box_b[1])
- bx = torch.clamp(box_a[2], max=box_b[2])
- by = torch.clamp(box_a[3], max=box_b[3])
-
- # compute the area of intersection
- intersect = torch.abs(torch.clamp((bx - ax), min=0) * torch.clamp((by - ay), min=0))
-
- # compute the area of both the prediction and ground-truth
- area_a = torch.abs((box_a[2] - box_a[0]) * (box_a[3] - box_a[1]))
- area_b = torch.abs((box_b[2] - box_b[0]) * (box_b[3] - box_b[1]))
-
- # compute the iou
- iou = intersect / (area_a + area_b - intersect)
- return iou
-
-def filter_boxes(pred):
- """
- Returns highest confidence box that has detected something
- """
- best_box = None
- highest_conf = 0
- for i in range(pred.size(0)):
- box = pred[i,:]
- if box[4] >= highest_conf:
- best_box = box
- highest_conf = box[4]
- return best_box
\ No newline at end of file
From 71c72ebf8bf87e73677fae90ae3ce25c3c7e3c01 Mon Sep 17 00:00:00 2001
From: Mariam <112161752+mermalade0325@users.noreply.github.com>
Date: Mon, 11 Nov 2024 11:44:04 +1000
Subject: [PATCH 06/28] Delete recognition/predict.py
---
recognition/predict.py | 66 ------------------------------------------
1 file changed, 66 deletions(-)
delete mode 100644 recognition/predict.py
diff --git a/recognition/predict.py b/recognition/predict.py
deleted file mode 100644
index 16053b3bd..000000000
--- a/recognition/predict.py
+++ /dev/null
@@ -1,66 +0,0 @@
-from modules import YOLO, filter_boxes
-from dataset import ISICDataset
-import matplotlib.pyplot as plt
-import matplotlib.patches as patches
-import cv2
-import torch
-import numpy as np
-
-def plot_boxes(image_tensor, bounding_box):
- """
- Plots the bounding box and label on an image.
-
- Args:
- image_tensor (torch.Tensor): The image tensor of shape (3, 416, 416).
- bounding_box (torch.Tensor): The bounding box tensor with format [center_x, center_y, width, height, score, label1, label2].
- """
- image_tensor = image_tensor.cpu().permute(1, 2, 0) # Reshape for plotting
- fig, ax = plt.subplots()
- ax.imshow(image_tensor)
-
- if bounding_box is not None:
- box_coords = bounding_box.cpu()
- x, y, w, h = box_coords[0] - box_coords[2] / 2, box_coords[1] - box_coords[3] / 2, box_coords[2], box_coords[3]
- rect = patches.Rectangle((x, y), w, h, linewidth=1, edgecolor='r', facecolor='none')
-
- # Determine label based on probabilities
- label = "melanoma" if box_coords[5] > box_coords[6] else "seborrheic keratosis"
-
- # Add rectangle patch and label text
- ax.add_patch(rect)
- plt.text(x, y, label, bbox=dict(facecolor='red', alpha=0.5), color='white')
-
- plt.axis("off")
- plt.show()
-
-def predict(image_path, model):
- """
- Predicts the bounding box and class label for an image using the model.
-
- Args:
- image_path (str): Path to the input image.
- model (YOLO): Trained YOLO model.
- """
- # Load and preprocess the image
- image = cv2.imread(image_path)
- image = cv2.resize(image, (416, 416))
- image = torch.from_numpy(image.transpose((2, 0, 1))).float().div(255).unsqueeze(0).to(device)
-
- # Model prediction
- predictions = model(image)
- best_box = filter_boxes(predictions[0])
-
- # Display the image with the predicted bounding box
- plot_boxes(image.squeeze(0), best_box)
-
-# Load model and weights
-model = YOLO(num_classes=2)
-checkpoint_path = "/content/drive/MyDrive/Uni/COMP3710/model.pt"
-checkpoint = torch.load(checkpoint_path, map_location=device)
-model.load_state_dict(checkpoint['model_state_dict'])
-model.to(device)
-model.eval()
-
-# Run prediction on an image
-image_path = "/path/to/your/image.jpg" # Specify the image path here
-predict(image_path, model)
From 12c0b1661b4a4dd32b4d41e1fc19f157936a742e Mon Sep 17 00:00:00 2001
From: Mariam <112161752+mermalade0325@users.noreply.github.com>
Date: Mon, 11 Nov 2024 11:44:23 +1000
Subject: [PATCH 07/28] Delete recognition/train.py
---
recognition/train.py | 114 -------------------------------------------
1 file changed, 114 deletions(-)
delete mode 100644 recognition/train.py
diff --git a/recognition/train.py b/recognition/train.py
deleted file mode 100644
index fc681c4d3..000000000
--- a/recognition/train.py
+++ /dev/null
@@ -1,114 +0,0 @@
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-import time
-
-from dataset import *
-from modules import *
-
-
-# Device configuration
-device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-if torch.cuda.is_available():
- print("cuda")
-if not torch.cuda.is_available():
- print("cpu")
-
-#hyperparameters
-epochs = 10
-learning_rate=0.001
-image_size = 416
-batch_size = 10
-
-#Train data - change directories as needed
-mask_dir = '/Users/mariam/Downloads/COMP3710_YOLO/ISIC-2017_Training_Part1_GroundTruth/'
-image_dir = '/Users/mariam/Downloads/COMP3710_YOLO/ISIC-2017_Training_Data/'
-labels = '/Users/mariam/Downloads/COMP3710_YOLO/ISIC-2017_Training_Part3_GroundTruth.csv'
-train_dataset = ISICDataset(image_dir, mask_dir, labels, image_size)
-train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
-
-#Model
-model = YOLO(2)
-model.to(device)
-checkpoint_path = "model.pt"
-
-#optimizer and loss
-optimizer = torch.optim.Adam(model.parameters(), learning_rate)
-criterion = YOLO_loss()
-
-#learning rate schedule, using because SGD is dumb, adam has its own learning rate
-total_step = len(train_dataloader)
-scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer,max_lr=learning_rate,
- steps_per_epoch=total_step, epochs=epochs)
-
-#Train
-model.train()
-start = time.time()
-for epoch in range(epochs):
- for i, (images, labels) in enumerate(train_dataloader):
- images = images.to(device)
- labels = labels.to(device)
-
- #Forward pass
- outputs = model(images)
- total_loss = 0
- for a in range(batch_size):
- loss = criterion(outputs[a], labels[a])
- total_loss += loss
-
- #Backwards and optimize
- optimizer.zero_grad()
- total_loss.requires_grad = True
- total_loss.backward()
- optimizer.step()
-
- if (i+1) % 50 == 0:
- print("Epoch [{}/{}], Step[{},{}] Loss: {:.5f}".format(epoch+1, epochs, i+1, total_step, total_loss.item()))
- torch.save({
- 'epoch': epoch,
- 'model_state_dict': model.state_dict(),
- 'optimizer_state_dict': optimizer.state_dict(),
- 'loss': total_loss,
- }, checkpoint_path)
-
- scheduler.step()
-end = time.time()
-elapsed = end - start
-print("Training took {} secs or {} mins.".format(elapsed, elapsed/60))
-
-#Test data
-mask_dir = '/Users/mariam/Downloads/COMP3710_YOLO/ISIC-2017_Test_v2_Part1_GroundTruth/'
-image_dir = '/Users/mariam/Downloads/COMP3710_YOLO/ISIC-2017_Test_v2_Data/'
-labels = '/Users/mariam/Downloads/COMP3710_YOLO/ISIC-2017_Test_v2_Part3_GroundTruth.csv'
-test_dataset = ISICDataset(image_dir, mask_dir, labels, 416)
-test_dataloader = DataLoader(test_dataset, batch_size, shuffle=True)
-
-#Test
-model.eval()
-torch.set_grad_enabled(True)
-start = time.time()
-total = 0
-total_step = len(test_dataloader)
-
-for i, (images, labels) in enumerate(test_dataloader):
- images = images.to(device)
- labels = labels.to(device)
- outputs = model(images)
-
- #Calculate IoU
- for a in range(batch_size):
- best_box = filter_boxes(outputs[a])
- if best_box is not None:
- best_box = torch.reshape(best_box, (1, 7))
- iou = single_iou(best_box, labels[a,:])
- total += iou[0]
-
- #Keep track of average
- average = total/(i+1)
-
- if (i+1) % 50 == 0:
- print("Step[{},{}] IoU average: {:.5f}".format(i+1, total_step, average))
-
-end = time.time()
-elapsed = end - start
-print("Testing took {} secs or {} mins.".format(elapsed, elapsed/60))
\ No newline at end of file
From 4b55702c79177684ae80fd5d0077bbaf625e5baa Mon Sep 17 00:00:00 2001
From: Mariam <112161752+mermalade0325@users.noreply.github.com>
Date: Mon, 11 Nov 2024 11:44:58 +1000
Subject: [PATCH 08/28] Create README.md
Creating new ReadMe in correct directory
---
recognition/s4612960_YOLO/README.md | 1 +
1 file changed, 1 insertion(+)
create mode 100644 recognition/s4612960_YOLO/README.md
diff --git a/recognition/s4612960_YOLO/README.md b/recognition/s4612960_YOLO/README.md
new file mode 100644
index 000000000..345e6aef7
--- /dev/null
+++ b/recognition/s4612960_YOLO/README.md
@@ -0,0 +1 @@
+Test
From 9068706661a4cc887aef7dbafc5e3a3a9789a08f Mon Sep 17 00:00:00 2001
From: Mariam <112161752+mermalade0325@users.noreply.github.com>
Date: Mon, 11 Nov 2024 11:46:16 +1000
Subject: [PATCH 09/28] Update README.md
Moving ReadMe work into this new file with the correct directory and branch as specified
---
recognition/s4612960_YOLO/README.md | 125 +++++++++++++++++++++++++++-
1 file changed, 124 insertions(+), 1 deletion(-)
diff --git a/recognition/s4612960_YOLO/README.md b/recognition/s4612960_YOLO/README.md
index 345e6aef7..079ff60ba 100644
--- a/recognition/s4612960_YOLO/README.md
+++ b/recognition/s4612960_YOLO/README.md
@@ -1 +1,124 @@
-Test
+# Melanoma Detection using YOLO11
+
+## Overview
+
+Melanoma is one of the most aggressive forms of skin cancer, and early detection can significantly increase survival rates. This project leverages the YOLO11 (You Only Look Once) deep learning algorithm by Ultralytics to automatically detect melanoma in dermoscopic images, distinguishing it from other skin conditions like benign lesions and nevus. YOLO11 is a state-of-the-art object detection model.
+
+
+
+*Figure: Sample output of YOLO11 detecting a lesion in a dermoscopic image*
+
+## How it Works
+
+YOLO11 is a single-stage object detection model that processes the entire image in a single forward pass, predicting bounding boxes and classification scores simultaneously. It divides the input image into a grid, with each grid cell responsible for detecting an object within its bounds. Using anchor boxes, the model generates bounding box coordinates and confidence scores, optimized for melanoma detection by training on a labeled dataset of dermoscopic images. The final model can localize and classify skin lesions as either melanoma or benign in real time.
+
+
+## Dependencies
+
+To run this project, the following dependencies are required:
+
+- **Python**: 3.10
+- **Ultralytics**: 8.3.2 (includes YOLO11)
+- **PyTorch**: 2.4.1+cu121
+- **OpenCV**: 4.5.3
+- **Matplotlib**: 3.4.2
+
+Ensure you install the dependencies via:
+```bash
+pip install ultralytics opencv-python-headless matplotlib
+```
+
+To reproduce the results, a GPU with CUDA support is recommended. The model was trained on an NVIDIA Tesla T4 GPU for optimal performance.
+
+## Dataset Preparation and Pre-Processing
+
+### Dataset
+
+The model was trained on the ISIC (International Skin Imaging Collaboration) dataset, a comprehensive collection of dermoscopic images labeled for melanoma and benign conditions. The dataset was divided as follows:
+
+- **Training Set**: 80% of the data
+- **Validation Set**: 10% of the data
+- **Testing Set**: 10% of the data
+
+This split ensures the model has a sufficient amount of data for learning while keeping a balanced validation and testing set for evaluating performance.
+
+### Pre-Processing
+
+Pre-Processing
+The preprocessing pipeline prepares the melanoma dataset for efficient and consistent model training. First, a metadata CSV file is generated for each dataset split (train, validation, and test). This metadata file serves as an index, listing each image path along with its corresponding class label (nevus, seborrheic keratosis, or melanoma). Labels are mapped to integers, with benign classes (nevus and seborrheic keratosis) labeled as 0 and malignant (melanoma) as 1. This structure allows for efficient data loading and simplifies referencing images during training. See below.
+
+
+
+Each image is then processed by:
+Decoding from JPEG format and resizing to a standardized size of 299x299 pixels, ensuring consistency in model input dimensions.
+Normalization, where pixel values are scaled to the [0,1] range for optimized training.
+Caching the dataset to reduce I/O bottlenecks, and shuffling the training data with a buffer size of 1000 to ensure varied batches.
+Batching and Prefetching: Images are batched into sets of 64, and prefetch is used to load data in the background, preventing delays and ensuring data availability during model training.
+
+For more details on the dataset and augmentation methods, refer to the [ISIC Archive](https://www.isic-archive.com/).
+
+## Training the Model
+
+To train the YOLO11 model, we use transfer learning from a pre-trained checkpoint, fine-tuning it on the melanoma dataset for 50 epochs. The training configuration is specified in the `melanoma.yaml` file, where the dataset paths and class names are defined.
+
+In the training set, these images are associated with various labels.
+
+
+### Example Training Command
+
+```python
+from ultralytics import YOLO
+
+# Load a pre-trained YOLO11 model
+model = YOLO('yolo11n.pt')
+
+# Train the model
+model.train(data='melanoma.yaml', epochs=50, imgsz=640)
+```
+
+The model’s performance is evaluated using mean Average Precision (mAP), precision, and recall metrics on the validation set.
+
+## Example Inputs and Outputs
+
+### Input
+The dataset used for melanoma detection consists of dermoscopic images from the ISIC archive. The image dataset includes three main types of lesions: nevus, seborrheic keratosis, and melanoma. Each lesion type is stored in separate folders, and each image has an associated label to identify the type of lesion. The dataset follows the structure required for machine learning tasks, ensuring that each image file name is unique and follows a standardized naming convention (e.g., ISIC_0000000.jpg).
+
+
+
+In the provided dataset folder structure, each lesion type is represented by high-resolution .jpg images. Additionally, there are auxiliary files with names ending in _superpixels.png or _perpixels.png, which appear to contain data that may be used for other types of analysis, such as texture segmentation or pixel intensity mapping. However, for the purpose of training a melanoma detection model, only the main dermoscopic images in .jpg format are used.
+
+
+
+
+
+
+### Output
+The model outputs bounding boxes and classification labels.
+
+
+
+
+
+
+
+## Results Visualization
+
+After training, the model can detect melanoma with high accuracy.
+
+
+
+*Figure: Training and validation loss over epochs. This was from an earlier test, eventually, 31 epochs were chosen*
+
+
+## Conclusion
+
+This project demonstrates the power of YOLO11 for real-time melanoma detection in dermoscopic images. With proper training and pre-processing, YOLO11 achieves high accuracy, making it a valuable tool for early skin cancer diagnosis.
+
+## References
+
+- ISIC Archive: [ISIC 2018: Skin Lesion Analysis Towards Melanoma Detection](https://www.isic-archive.com/)
+- Ultralytics YOLO Documentation: [YOLO Docs](https://docs.ultralytics.com/)
+
+---
+
+This README provides comprehensive guidance on setup, training, and usage of YOLO11 for melanoma detection. Adjust paths and parameters as necessary for optimal performance on your dataset.
From 6083cd8c81e8d82e541992d5842979be3b4bcd76 Mon Sep 17 00:00:00 2001
From: Mariam <112161752+mermalade0325@users.noreply.github.com>
Date: Tue, 12 Nov 2024 09:18:58 +1000
Subject: [PATCH 10/28] Adding Results Visualisations to ReadMe.md
As per GITHUB feedback
---
recognition/s4612960_YOLO/README.md | 31 ++++++++++++++++++++++++++---
1 file changed, 28 insertions(+), 3 deletions(-)
diff --git a/recognition/s4612960_YOLO/README.md b/recognition/s4612960_YOLO/README.md
index 079ff60ba..9a413b958 100644
--- a/recognition/s4612960_YOLO/README.md
+++ b/recognition/s4612960_YOLO/README.md
@@ -103,22 +103,47 @@ The model outputs bounding boxes and classification labels.
## Results Visualization
-After training, the model can detect melanoma with high accuracy.
+After training, the model can detect lesions with high accuracy.
*Figure: Training and validation loss over epochs. This was from an earlier test, eventually, 31 epochs were chosen*
+TRAIN BATCH:
+
+
+
+VAL BATCH
+
+
+
+
+
+Normalised Confusion Matrix
+
+
+
+
+
+
+
+
+## Testing
+
+
## Conclusion
This project demonstrates the power of YOLO11 for real-time melanoma detection in dermoscopic images. With proper training and pre-processing, YOLO11 achieves high accuracy, making it a valuable tool for early skin cancer diagnosis.
+## Future Improvements
+
+
+
## References
- ISIC Archive: [ISIC 2018: Skin Lesion Analysis Towards Melanoma Detection](https://www.isic-archive.com/)
- Ultralytics YOLO Documentation: [YOLO Docs](https://docs.ultralytics.com/)
----
-This README provides comprehensive guidance on setup, training, and usage of YOLO11 for melanoma detection. Adjust paths and parameters as necessary for optimal performance on your dataset.
+
From 962f6c197cabcda9a79a7c46694eb6b095a0b577 Mon Sep 17 00:00:00 2001
From: Mariam <112161752+mermalade0325@users.noreply.github.com>
Date: Tue, 12 Nov 2024 09:26:48 +1000
Subject: [PATCH 11/28] Update README to add details on file structure and
labels.md
---
recognition/s4612960_YOLO/README.md | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/recognition/s4612960_YOLO/README.md b/recognition/s4612960_YOLO/README.md
index 9a413b958..b852f2c64 100644
--- a/recognition/s4612960_YOLO/README.md
+++ b/recognition/s4612960_YOLO/README.md
@@ -57,6 +57,16 @@ Batching and Prefetching: Images are batched into sets of 64, and prefetch is us
For more details on the dataset and augmentation methods, refer to the [ISIC Archive](https://www.isic-archive.com/).
+## File Structure
+The file structure should be organised as follows, with the labels folders being generated from the dataset code.
+
+
+
+Label files look as follows, giving the location of the lesion bounding box.
+
+
+
+
## Training the Model
To train the YOLO11 model, we use transfer learning from a pre-trained checkpoint, fine-tuning it on the melanoma dataset for 50 epochs. The training configuration is specified in the `melanoma.yaml` file, where the dataset paths and class names are defined.
From f7a9144b3c300372769ac379c24d649bd0edd1f9 Mon Sep 17 00:00:00 2001
From: Mariam <112161752+mermalade0325@users.noreply.github.com>
Date: Tue, 12 Nov 2024 09:34:36 +1000
Subject: [PATCH 12/28] Update README.md with future improvements
---
recognition/s4612960_YOLO/README.md | 25 ++++++++++++++++---------
1 file changed, 16 insertions(+), 9 deletions(-)
diff --git a/recognition/s4612960_YOLO/README.md b/recognition/s4612960_YOLO/README.md
index b852f2c64..b22c3793f 100644
--- a/recognition/s4612960_YOLO/README.md
+++ b/recognition/s4612960_YOLO/README.md
@@ -105,10 +105,14 @@ In the provided dataset folder structure, each lesion type is represented by hig
### Output
The model outputs bounding boxes and classification labels.
-
+TRAIN BATCH:
+
+
+
+VAL BATCH
+
-
## Results Visualization
@@ -120,13 +124,6 @@ After training, the model can detect lesions with high accuracy.
*Figure: Training and validation loss over epochs. This was from an earlier test, eventually, 31 epochs were chosen*
-TRAIN BATCH:
-
-
-
-VAL BATCH
-
-
@@ -140,6 +137,9 @@ Normalised Confusion Matrix
## Testing
+All detections have a minimum Intersection Over Union of 0.8 on the test set. The following are some pictures of lesions identified from the test set.
+
+
## Conclusion
@@ -148,6 +148,13 @@ This project demonstrates the power of YOLO11 for real-time melanoma detection i
## Future Improvements
+In future, this model may also be used not just in classifying lesions, but in differentiating melanoma and benign lesions. Much training is required, but see the following test output as an example of this in action.
+
+
+
+
+
+
## References
From 2874d836f64ac52b6e76d3bf245e27bbabf018f7 Mon Sep 17 00:00:00 2001
From: Mariam <112161752+mermalade0325@users.noreply.github.com>
Date: Tue, 12 Nov 2024 09:35:51 +1000
Subject: [PATCH 13/28] Update README.md with formatting
---
recognition/s4612960_YOLO/README.md | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/recognition/s4612960_YOLO/README.md b/recognition/s4612960_YOLO/README.md
index b22c3793f..100ce233f 100644
--- a/recognition/s4612960_YOLO/README.md
+++ b/recognition/s4612960_YOLO/README.md
@@ -106,11 +106,13 @@ In the provided dataset folder structure, each lesion type is represented by hig
The model outputs bounding boxes and classification labels.
-TRAIN BATCH:
+From the TRAIN BATCH:
+

-VAL BATCH
+From the VALIDATION BATCH:
+

From 3f39af6cf24feb70b95b736b09404ca920c0e4b4 Mon Sep 17 00:00:00 2001
From: Mariam <112161752+mermalade0325@users.noreply.github.com>
Date: Tue, 12 Nov 2024 10:12:56 +1000
Subject: [PATCH 14/28] Edit to specify YOLO's classification of lesions
---
recognition/s4612960_YOLO/README.md | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/recognition/s4612960_YOLO/README.md b/recognition/s4612960_YOLO/README.md
index 100ce233f..b7294715a 100644
--- a/recognition/s4612960_YOLO/README.md
+++ b/recognition/s4612960_YOLO/README.md
@@ -2,7 +2,7 @@
## Overview
-Melanoma is one of the most aggressive forms of skin cancer, and early detection can significantly increase survival rates. This project leverages the YOLO11 (You Only Look Once) deep learning algorithm by Ultralytics to automatically detect melanoma in dermoscopic images, distinguishing it from other skin conditions like benign lesions and nevus. YOLO11 is a state-of-the-art object detection model.
+Melanoma is one of the most aggressive forms of skin cancer, and early detection can significantly increase survival rates. This project leverages the YOLO11 (You Only Look Once) deep learning algorithm by Ultralytics to automatically detect skin lesions in dermoscopic images, including melanoma, benign lesions, and nevus. YOLO11 is a state-of-the-art object detection model.
@@ -10,8 +10,7 @@ Melanoma is one of the most aggressive forms of skin cancer, and early detection
## How it Works
-YOLO11 is a single-stage object detection model that processes the entire image in a single forward pass, predicting bounding boxes and classification scores simultaneously. It divides the input image into a grid, with each grid cell responsible for detecting an object within its bounds. Using anchor boxes, the model generates bounding box coordinates and confidence scores, optimized for melanoma detection by training on a labeled dataset of dermoscopic images. The final model can localize and classify skin lesions as either melanoma or benign in real time.
-
+YOLO11 is a single-stage object detection model that processes the entire image in a single forward pass, predicting bounding boxes and classification scores simultaneously. It divides the input image into a grid, with each grid cell responsible for detecting an object within its bounds. Using anchor boxes, the model generates bounding box coordinates and confidence scores, optimized for melanoma detection by training on a labeled dataset of dermoscopic images. The final model can localize and classify skin lesions.
## Dependencies
From b159937939feb435cb1e89e4f956f804aa975f08 Mon Sep 17 00:00:00 2001
From: Mariam <112161752+mermalade0325@users.noreply.github.com>
Date: Wed, 13 Nov 2024 12:55:39 +1000
Subject: [PATCH 15/28] Update dataset to create label files.py
---
recognition/s4612960_YOLO/dataset.py | 120 ++++++++++-----------------
1 file changed, 46 insertions(+), 74 deletions(-)
diff --git a/recognition/s4612960_YOLO/dataset.py b/recognition/s4612960_YOLO/dataset.py
index 73ad0c7a9..fa92983a3 100644
--- a/recognition/s4612960_YOLO/dataset.py
+++ b/recognition/s4612960_YOLO/dataset.py
@@ -1,76 +1,48 @@
-import torch
-from torch.utils.data import Dataset
-import pandas as pd
-import os
import cv2
-import numpy as np
-
-class ISICDataset(Dataset):
- """Custom Dataset class for YOLO model with ISIC data."""
-
- def __init__(self, image_dir, mask_dir, labels_path, image_size):
- self.image_size = image_size
- self.image_dir = image_dir
- self.mask_dir = mask_dir
- self.labels = pd.read_csv(labels_path)
-
- # Load all image file names in the directory
- self.image_files = [f for f in os.listdir(image_dir) if f.endswith('.jpg')]
- self.samples = [self._process_sample(i) for i in range(len(self.image_files))]
-
- def __len__(self):
- return len(self.image_files)
-
- def __getitem__(self, idx):
- return self.samples[idx]
-
- def _process_sample(self, idx):
- """Helper function to process and return a single sample (image and target vector)."""
- # Load image and mask
- image = self._load_image(idx)
- mask = self._load_mask(idx)
-
- # Resize image and mask to the target size
- image = cv2.resize(image, (self.image_size, self.image_size)).astype(np.float32) / 255.0
- mask = cv2.resize(mask, (self.image_size, self.image_size))
-
- # Obtain bounding box coordinates from the mask
- x, y, w, h = self._extract_bounding_box(mask)
-
- # Retrieve label probabilities
- label1, label2 = self.labels.iloc[idx, 1:3]
- total_prob = label1 + label2
-
- # Create target vector
- target_vector = np.array(
- [x + w / 2, y + h / 2, w, h, total_prob, label1, label2],
- dtype=np.float32
- )
-
- # Convert image to tensor format (C, H, W)
- image_tensor = torch.tensor(image.transpose(2, 0, 1), dtype=torch.float32)
- target_tensor = torch.tensor(target_vector, dtype=torch.float32)
-
- return image_tensor, target_tensor
-
- def _load_image(self, idx):
- """Loads an image given an index."""
- img_name = os.path.join(self.image_dir, self.image_files[idx])
- return cv2.imread(img_name)
-
- def _load_mask(self, idx):
- """Loads the mask corresponding to the image at the given index."""
- mask_name = os.path.join(
- self.mask_dir, self.image_files[idx].replace('.jpg', '_segmentation.png')
- )
- return cv2.imread(mask_name, cv2.IMREAD_GRAYSCALE)
+import os
- def _extract_bounding_box(self, mask):
- """Extracts the bounding box from the mask image."""
- _, thresh = cv2.threshold(mask, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
- contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
-
- if contours:
- x, y, w, h = cv2.boundingRect(contours[0])
- return x, y, w, h
- return 0, 0, 0, 0 # Return zero box if no contours are found
+SRC_DIR = r'/Users/mariam/Downloads/COMP3710_YOLO/ISIC-2017_Validation_Part1_GroundTruth'
+DEST_DIR = r'/Users/mariam/Downloads/COMP3710_YOLO/val/labels'
+
+def create_directory(path):
+ os.makedirs(path, exist_ok=True)
+
+def load_image_as_grayscale(path):
+ grayscale_img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
+ if grayscale_img is None:
+ raise FileNotFoundError(f"Could not locate image: {path}")
+ return grayscale_img
+
+def locate_bounding_box(image):
+ contours, _ = cv2.findContours(image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+ if not contours:
+ return None
+ return cv2.boundingRect(contours[0])
+
+def save_as_yolo_format(filepath, coordinates):
+ with open(filepath, 'w') as file:
+ file.write(f"0 {coordinates[0]:.6f} {coordinates[1]:.6f} {coordinates[2]:.6f} {coordinates[3]:.6f}")
+
+def process_segmentation_files(source, destination):
+ create_directory(destination)
+
+ for file in os.listdir(source):
+ if file.endswith('_segmentation.png'):
+ img_path = os.path.join(source, file)
+ label_file = os.path.join(destination, file.replace("_segmentation.png", ".txt"))
+
+ try:
+ img = load_image_as_grayscale(img_path)
+ bbox = locate_bounding_box(img)
+
+ if bbox:
+ normalized_coords = normalize_coordinates(*bbox, img.shape)
+ save_as_yolo_format(label_file, normalized_coords)
+ print(f"Processed: {file}")
+ else:
+ print(f"No bounding box found in {file}")
+ except Exception as error:
+ print(f"Failed to process {file}: {error}")
+
+if __name__ == "__main__":
+ process_segmentation_files(SRC_DIR, DEST_DIR)
From 361aa3f83e7f1de25a9cb86d1cc2f3cae2ca7403 Mon Sep 17 00:00:00 2001
From: Mariam <112161752+mermalade0325@users.noreply.github.com>
Date: Wed, 13 Nov 2024 12:55:59 +1000
Subject: [PATCH 16/28] Ensure bounding box coordinates are normalised.py
---
recognition/s4612960_YOLO/dataset.py | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/recognition/s4612960_YOLO/dataset.py b/recognition/s4612960_YOLO/dataset.py
index fa92983a3..3f71f27eb 100644
--- a/recognition/s4612960_YOLO/dataset.py
+++ b/recognition/s4612960_YOLO/dataset.py
@@ -23,6 +23,16 @@ def save_as_yolo_format(filepath, coordinates):
with open(filepath, 'w') as file:
file.write(f"0 {coordinates[0]:.6f} {coordinates[1]:.6f} {coordinates[2]:.6f} {coordinates[3]:.6f}")
+
+def normalize_coordinates(x, y, width, height, image_size):
+ img_height, img_width = image_size[:2]
+ center_x = (x + width / 2) / img_width
+ center_y = (y + height / 2) / img_height
+ norm_width = width / img_width
+ norm_height = height / img_height
+ return center_x, center_y, norm_width, norm_height
+
+
def process_segmentation_files(source, destination):
create_directory(destination)
From 313acfd4a571ae4c197404c0d1086ec784777d27 Mon Sep 17 00:00:00 2001
From: Mariam <112161752+mermalade0325@users.noreply.github.com>
Date: Wed, 13 Nov 2024 12:59:40 +1000
Subject: [PATCH 17/28] Conventional comments to all functions on dataset.py
---
recognition/s4612960_YOLO/dataset.py | 34 ++++++++++++++++++++++++++++
1 file changed, 34 insertions(+)
diff --git a/recognition/s4612960_YOLO/dataset.py b/recognition/s4612960_YOLO/dataset.py
index 3f71f27eb..8717dacc2 100644
--- a/recognition/s4612960_YOLO/dataset.py
+++ b/recognition/s4612960_YOLO/dataset.py
@@ -5,26 +5,55 @@
DEST_DIR = r'/Users/mariam/Downloads/COMP3710_YOLO/val/labels'
def create_directory(path):
+ """
+ Creates the specified directory if it does not exist.
+ :param path: Directory path to check/create.
+ """
os.makedirs(path, exist_ok=True)
def load_image_as_grayscale(path):
+ """
+ Loads an image from the specified path in grayscale.
+ :param path: Path to the image file.
+ :return: Grayscale image array.
+ :raises FileNotFoundError: If the image does not exist at the specified path.
+ """
grayscale_img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
if grayscale_img is None:
raise FileNotFoundError(f"Could not locate image: {path}")
return grayscale_img
def locate_bounding_box(image):
+ """
+ Identifies the bounding box of the largest contour in the image.
+ :param image: Input grayscale image.
+ :return: (x, y, w, h) representing the bounding box, or None if no contours are found.
+ """
contours, _ = cv2.findContours(image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
if not contours:
return None
return cv2.boundingRect(contours[0])
def save_as_yolo_format(filepath, coordinates):
+ """
+ Writes normalized bounding box coordinates in YOLO format to a text file.
+ :param filepath: Path for the output text file.
+ :param coordinates: Normalized bounding box (center_x, center_y, norm_width, norm_height).
+ """
with open(filepath, 'w') as file:
file.write(f"0 {coordinates[0]:.6f} {coordinates[1]:.6f} {coordinates[2]:.6f} {coordinates[3]:.6f}")
def normalize_coordinates(x, y, width, height, image_size):
+ """
+ Normalizes bounding box coordinates relative to image dimensions.
+ :param x: X-coordinate of the bounding box's top-left corner.
+ :param y: Y-coordinate of the bounding box's top-left corner.
+ :param width: Width of the bounding box.
+ :param height: Height of the bounding box.
+ :param image_size: (height, width) of the image.
+ :return: (center_x, center_y, norm_width, norm_height) normalized coordinates.
+ """
img_height, img_width = image_size[:2]
center_x = (x + width / 2) / img_width
center_y = (y + height / 2) / img_height
@@ -34,6 +63,11 @@ def normalize_coordinates(x, y, width, height, image_size):
def process_segmentation_files(source, destination):
+ """
+ Converts each segmentation mask in the source directory to YOLO-format labels and saves them.
+ :param source: Path to the directory containing segmentation mask images.
+ :param destination: Directory where YOLO format labels will be saved.
+ """
create_directory(destination)
for file in os.listdir(source):
From c1438cc007e859dfce268c284af51bcf7b0e0654 Mon Sep 17 00:00:00 2001
From: Mariam <112161752+mermalade0325@users.noreply.github.com>
Date: Wed, 13 Nov 2024 13:02:22 +1000
Subject: [PATCH 18/28] Update README.md, mention of Google Colab usage
---
recognition/s4612960_YOLO/README.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/recognition/s4612960_YOLO/README.md b/recognition/s4612960_YOLO/README.md
index b7294715a..86b2e1b4f 100644
--- a/recognition/s4612960_YOLO/README.md
+++ b/recognition/s4612960_YOLO/README.md
@@ -27,7 +27,7 @@ Ensure you install the dependencies via:
pip install ultralytics opencv-python-headless matplotlib
```
-To reproduce the results, a GPU with CUDA support is recommended. The model was trained on an NVIDIA Tesla T4 GPU for optimal performance.
+To reproduce the results, a GPU with CUDA support is recommended. The model was trained on an NVIDIA Tesla T4 GPU using Google Colab for optimal performance.
## Dataset Preparation and Pre-Processing
From 3dacbbd78bb24b40fa9bf3bb4a4bed0b2e3f3858 Mon Sep 17 00:00:00 2001
From: Mariam <112161752+mermalade0325@users.noreply.github.com>
Date: Wed, 13 Nov 2024 13:07:48 +1000
Subject: [PATCH 19/28] Specific detail of ISIC image count in README.md
---
recognition/s4612960_YOLO/README.md | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/recognition/s4612960_YOLO/README.md b/recognition/s4612960_YOLO/README.md
index 86b2e1b4f..1591a0b8e 100644
--- a/recognition/s4612960_YOLO/README.md
+++ b/recognition/s4612960_YOLO/README.md
@@ -33,13 +33,13 @@ To reproduce the results, a GPU with CUDA support is recommended. The model was
### Dataset
-The model was trained on the ISIC (International Skin Imaging Collaboration) dataset, a comprehensive collection of dermoscopic images labeled for melanoma and benign conditions. The dataset was divided as follows:
+The model was trained on the ISIC (International Skin Imaging Collaboration) dataset, a comprehensive collection of dermoscopic images labeled for melanoma and benign conditions. The dataset was already divided by ISIC as follows:
-- **Training Set**: 80% of the data
-- **Validation Set**: 10% of the data
-- **Testing Set**: 10% of the data
+- **Training Set**: 2000 images - roughly 72%
+- **Validation Set**: 150 images - roughly 6%
+- **Testing Set**: 600 of the data - roughly 22%
-This split ensures the model has a sufficient amount of data for learning while keeping a balanced validation and testing set for evaluating performance.
+This split follows the ISIC competition guidelines, and ensures the model has a sufficient amount of data for learning while keeping a validation and testing set for evaluating performance.
### Pre-Processing
From 572d96a8567bfb85f5e85307cba3a3f94fef0dec Mon Sep 17 00:00:00 2001
From: Mariam <112161752+mermalade0325@users.noreply.github.com>
Date: Wed, 13 Nov 2024 13:56:22 +1000
Subject: [PATCH 20/28] Simplified modules.py for YOLO version 8
---
recognition/s4612960_YOLO/modules.py | 237 ++-------------------------
1 file changed, 13 insertions(+), 224 deletions(-)
diff --git a/recognition/s4612960_YOLO/modules.py b/recognition/s4612960_YOLO/modules.py
index 3ea4365ac..1afd092cf 100644
--- a/recognition/s4612960_YOLO/modules.py
+++ b/recognition/s4612960_YOLO/modules.py
@@ -10,228 +10,17 @@
if not torch.cuda.is_available():
print("cpu")
-class YOLO(nn.Module):
-
- #REFERENCE: yolov3-tiny.cfg from https://github.com/pjreddie/darknet/blob/master/cfg
- #Used as basis for what layers were needed
- def __init__(self, num_classes):
- super(YOLO, self).__init__()
- self.num_classes = num_classes
- layers = []
- filters = [16,32,64,128,256,512]
- in_channels = 3
- #Convulution layers and maxpooling
- for i in filters:
- layers.append(nn.Conv2d(in_channels, i, kernel_size=3, stride=1, padding=1, bias=False))
- in_channels = i
- layers.append(nn.BatchNorm2d(i))
- layers.append(nn.LeakyReLU(0.1, True)) #might be false
- layers.append(nn.MaxPool2d(kernel_size=2, stride=2)) #Hopefully works
- layers.append(nn.Conv2d(512, 1024, kernel_size=3, stride=1, padding=1, bias=False))
- layers.append(nn.BatchNorm2d(1024))
- layers.append(nn.LeakyReLU(0.1, True))
-
- layers.append(nn.Conv2d(1024, 256, kernel_size=1, stride=1, padding=1, bias=False))
- layers.append(nn.BatchNorm2d(256))
- layers.append(nn.LeakyReLU(0.1, True))
-
- layers.append(nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1, bias=False))
- layers.append(nn.BatchNorm2d(512))
- layers.append(nn.LeakyReLU(0.1, True))
-
- layers.append(nn.Conv2d(512, 255, kernel_size=1, stride=1, padding=1, bias=True))
- self.conv_start = nn.Sequential(*layers)
-
- #Detection layer - given anchors
- self.anchor1 = [(81,82), (135,169), (344,319)] #Anchors depends on image?
-
- #Route layer could go here
- self.conv_mid = nn.Sequential(
- nn.Conv2d(255, 128, kernel_size=1, stride=1, padding=1, bias=False),
- nn.BatchNorm2d(128),
- nn.LeakyReLU(0.1, True),
- nn.Upsample(scale_factor=2, mode="bilinear"))
- #Another route layer maybe
- self.conv_end = nn.Sequential(
- nn.Conv2d(128,256,kernel_size=3,stride=1,padding=1,bias=False),
- nn.BatchNorm2d(256),
- nn.LeakyReLU(0.1, True),
- nn.Conv2d(256, 255, kernel_size=1, stride=1, padding=1, bias=True))
-
- #Another detection layer
- self.anchor2 = [(10,14), (23,27), (37,58)]
-
- def forward(self, x):
- out = self.conv_start(x)
- out = out.data
- a = self.predict_transform(out, 416, self.anchor1, self.num_classes)
- out = self.conv_mid(out)
- out = self.conv_end(out)
- out = out.data
- b = self.predict_transform(out, 416, self.anchor2, self.num_classes)
- return torch.cat((a, b), 1)
-
- def predict_transform(self, prediction, inp_dim, anchors, num_classes):
- """
- Decodes the output from the convolution layers and arranges the information into a usable format.
- The below reference was used for a base for this function.
- REFERENCE: refer to reference 2 in README.
- """
- batch_size = prediction.size(0)
- stride = inp_dim // prediction.size(2)
- grid_size = inp_dim // stride
- bbox_attrs = 5 + num_classes
- num_anchors = len(anchors)
-
- #Rearranges the feature map to (batch_size, number of boxes, box_attributes)
- prediction = prediction.view(batch_size, bbox_attrs*num_anchors, grid_size*grid_size)
- prediction = prediction.transpose(1,2).contiguous()
- prediction = prediction.view(batch_size, grid_size*grid_size*num_anchors, bbox_attrs)
- anchors = [(a[0]/stride, a[1]/stride) for a in anchors]
- #Get the centre_X, centre_Y and object confidence between 1 and 0
- prediction[:,:,0] = torch.sigmoid(prediction[:,:,0])
- prediction[:,:,1] = torch.sigmoid(prediction[:,:,1])
- prediction[:,:,4] = torch.sigmoid(prediction[:,:,4])
- #Add the center offsets
- grid = np.arange(grid_size)
- a,b = np.meshgrid(grid, grid)
-
- x_offset = torch.FloatTensor(a).view(-1,1)
- y_offset = torch.FloatTensor(b).view(-1,1)
-
- x_offset = x_offset.to(device)
- y_offset = y_offset.to(device)
-
- x_y_offset = torch.cat((x_offset, y_offset), 1).repeat(1,num_anchors).view(-1,2).unsqueeze(0)
-
- prediction[:,:,:2] += x_y_offset
- #log space transform height and the width
- #so that all boxes are on the same scale
- anchors = torch.FloatTensor(anchors)
- anchors = anchors.to(device)
-
- #arrange the probabilities of the classes
- anchors = anchors.repeat(grid_size*grid_size, 1).unsqueeze(0)
- prediction[:,:,2:4] = torch.exp(prediction[:,:,2:4])*anchors
- prediction[:,:,5: 5 + num_classes] = torch.sigmoid((prediction[:,:, 5 : 5 + num_classes]))
- prediction[:,:,:4] *= stride
- return prediction
-
-
-def calculate_iou(pred, label):
- """
- Caculates the IoUs of a given list of boxes.
- Used to determine accuracy of given bounding boxes.
- Also is a key part of the loss function.
- """
- px, py, pw, ph = pred[:,0], pred[:,1], pred[:,2], pred[:,3]
- lx, ly, lw, lh = label[0], label[1], label[2], label[3]
- box_a = [px-(pw/2), py-(ph/2), px+(pw/2), py+(ph/2)]
- box_b = [lx-(lw/2), ly-(lh/2), lx+(lw/2), ly+(lh/2)]
-
- # determine the (x, y) of the corners of intersection area
- ax = torch.clamp(box_a[0], min=box_b[0])
- ay = torch.clamp(box_a[1], min=box_b[1])
- bx = torch.clamp(box_a[2], max=box_b[2])
- by = torch.clamp(box_a[3], max=box_b[3])
-
- # compute the area of intersection
- intersect = torch.abs(torch.clamp((bx - ax), min=0) * torch.clamp((by - ay), min=0))
-
- # compute the area of both the prediction and ground-truth
- area_a = torch.abs((box_a[2] - box_a[0]) * (box_a[3] - box_a[1]))
- area_b = torch.abs((box_b[2] - box_b[0]) * (box_b[3] - box_b[1]))
-
- # compute the iou
- iou = intersect / (area_a + area_b - intersect)
- iou = torch.reshape(iou, (776, 3))
- return iou
-
-class YOLO_loss(nn.Module):
- """
- Given one batch at a time, the loss of the predictions is calculated.
- The formulas used to calculate loss are from the reference below.
- REFERENCE: refer to reference 3 in README.
- """
- def __init__(self):
- super(YOLO_loss, self).__init__()
-
- def forward(pred, label):
- #Constants
- no_object = 0.5 #Puts less emphasis on loss from boxes with no object
- #Rearrange predictions to have one box shape on each line
- boxes = torch.reshape(pred, (776, 3))
-
- #IoU
- iou = calculate_iou(pred, label)
- iou, best_boxes = torch.max(iou, dim=1)
-
- #Loss set up
- class_loss = torch.zeros(776)
- coord_loss = torch.zeros(776)
- conf_loss = torch.zeros(776)
-
- #Calculate loss
- i = 0
- for idx in best_boxes:
- box = boxes[i][idx]
- #coordinate loss
- xy_loss = (label[0]-box[0])**2 + (label[1]-box[1])**2
- wh_loss = ((label[0])**(1/2)-(box[0])**(1/2))**2 + ((label[1])**(1/2)-(box[1])**(1/2))**2
- coord_loss[i] = (xy_loss + wh_loss)
- #Check if there was a detection
- if box[4] > 0.8: #There was
- #classification loss
- class_loss[i] = (label[5] - box[5])**2 + (label[6] - box[6])**2
- #confidence loss
- conf_loss[i] = (label[4] - box[4])**2
- else: #There wasn't
- conf_loss[i] = no_object*((label[4] - box[4])**2)
- i += 1
-
- #Final count
- total_loss = 0
- total_loss += torch.sum(coord_loss)
- total_loss += torch.sum(class_loss)
- total_loss += torch.sum(conf_loss)
-
- return total_loss
-
-def single_iou(pred, label):
- """
- Calculates the IoU of a single box
- """
- px, py, pw, ph = pred[:,0], pred[:,1], pred[:,2], pred[:,3]
- lx, ly, lw, lh = label[0], label[1], label[2], label[3]
- box_a = [px-(pw/2), py-(ph/2), px+(pw/2), py+(ph/2)]
- box_b = [lx-(lw/2), ly-(lh/2), lx+(lw/2), ly+(lh/2)]
-
- # determine the (x, y) of the corners of intersection area
- ax = torch.clamp(box_a[0], min=box_b[0])
- ay = torch.clamp(box_a[1], min=box_b[1])
- bx = torch.clamp(box_a[2], max=box_b[2])
- by = torch.clamp(box_a[3], max=box_b[3])
-
- # compute the area of intersection
- intersect = torch.abs(torch.clamp((bx - ax), min=0) * torch.clamp((by - ay), min=0))
-
- # compute the area of both the prediction and ground-truth
- area_a = torch.abs((box_a[2] - box_a[0]) * (box_a[3] - box_a[1]))
- area_b = torch.abs((box_b[2] - box_b[0]) * (box_b[3] - box_b[1]))
-
- # compute the iou
- iou = intersect / (area_a + area_b - intersect)
- return iou
+class YOLOv8Model:
+ def __init__(self, weights_path='yolov8n.pt', device=None):
+ self.device = device or ('cuda' if torch.cuda.is_available() else 'cpu')
+ self.model = YOLO(weights_path)
+ self.model.to(self.device)
+ print(f"Model load completed")
+
+ def train(self, **kwargs):
+ self.model.train(**kwargs)
+ print("Finished training")
+
+def load_yolov8_model(weights_path='yolov8n.pt', device=None):
+ return YOLOv8Model(weights_path, device)
-def filter_boxes(pred):
- """
- Returns highest confidence box that has detected something
- """
- best_box = None
- highest_conf = 0
- for i in range(pred.size(0)):
- box = pred[i,:]
- if box[4] >= highest_conf:
- best_box = box
- highest_conf = box[4]
- return best_box
\ No newline at end of file
From 881d82b488fc40d3aff00a8066afe9a10257999e Mon Sep 17 00:00:00 2001
From: Mariam <112161752+mermalade0325@users.noreply.github.com>
Date: Wed, 13 Nov 2024 14:00:49 +1000
Subject: [PATCH 21/28] adjusted train.py parameters
---
recognition/s4612960_YOLO/train.py | 129 ++++++++---------------------
1 file changed, 33 insertions(+), 96 deletions(-)
diff --git a/recognition/s4612960_YOLO/train.py b/recognition/s4612960_YOLO/train.py
index fc681c4d3..ef537564e 100644
--- a/recognition/s4612960_YOLO/train.py
+++ b/recognition/s4612960_YOLO/train.py
@@ -2,7 +2,6 @@
import torch.nn as nn
import torch.nn.functional as F
import time
-
from dataset import *
from modules import *
@@ -13,102 +12,40 @@
print("cuda")
if not torch.cuda.is_available():
print("cpu")
-
-#hyperparameters
-epochs = 10
-learning_rate=0.001
-image_size = 416
-batch_size = 10
-
-#Train data - change directories as needed
-mask_dir = '/Users/mariam/Downloads/COMP3710_YOLO/ISIC-2017_Training_Part1_GroundTruth/'
-image_dir = '/Users/mariam/Downloads/COMP3710_YOLO/ISIC-2017_Training_Data/'
-labels = '/Users/mariam/Downloads/COMP3710_YOLO/ISIC-2017_Training_Part3_GroundTruth.csv'
-train_dataset = ISICDataset(image_dir, mask_dir, labels, image_size)
-train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
-
-#Model
-model = YOLO(2)
-model.to(device)
-checkpoint_path = "model.pt"
-
-#optimizer and loss
-optimizer = torch.optim.Adam(model.parameters(), learning_rate)
-criterion = YOLO_loss()
-
-#learning rate schedule, using because SGD is dumb, adam has its own learning rate
-total_step = len(train_dataloader)
-scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer,max_lr=learning_rate,
- steps_per_epoch=total_step, epochs=epochs)
-
-#Train
-model.train()
-start = time.time()
-for epoch in range(epochs):
- for i, (images, labels) in enumerate(train_dataloader):
- images = images.to(device)
- labels = labels.to(device)
-
- #Forward pass
- outputs = model(images)
- total_loss = 0
- for a in range(batch_size):
- loss = criterion(outputs[a], labels[a])
- total_loss += loss
- #Backwards and optimize
- optimizer.zero_grad()
- total_loss.requires_grad = True
- total_loss.backward()
- optimizer.step()
- if (i+1) % 50 == 0:
- print("Epoch [{}/{}], Step[{},{}] Loss: {:.5f}".format(epoch+1, epochs, i+1, total_step, total_loss.item()))
- torch.save({
- 'epoch': epoch,
- 'model_state_dict': model.state_dict(),
- 'optimizer_state_dict': optimizer.state_dict(),
- 'loss': total_loss,
- }, checkpoint_path)
- scheduler.step()
-end = time.time()
-elapsed = end - start
-print("Training took {} secs or {} mins.".format(elapsed, elapsed/60))
-
-#Test data
-mask_dir = '/Users/mariam/Downloads/COMP3710_YOLO/ISIC-2017_Test_v2_Part1_GroundTruth/'
-image_dir = '/Users/mariam/Downloads/COMP3710_YOLO/ISIC-2017_Test_v2_Data/'
-labels = '/Users/mariam/Downloads/COMP3710_YOLO/ISIC-2017_Test_v2_Part3_GroundTruth.csv'
-test_dataset = ISICDataset(image_dir, mask_dir, labels, 416)
-test_dataloader = DataLoader(test_dataset, batch_size, shuffle=True)
-
-#Test
-model.eval()
-torch.set_grad_enabled(True)
-start = time.time()
-total = 0
-total_step = len(test_dataloader)
-
-for i, (images, labels) in enumerate(test_dataloader):
- images = images.to(device)
- labels = labels.to(device)
- outputs = model(images)
-
- #Calculate IoU
- for a in range(batch_size):
- best_box = filter_boxes(outputs[a])
- if best_box is not None:
- best_box = torch.reshape(best_box, (1, 7))
- iou = single_iou(best_box, labels[a,:])
- total += iou[0]
-
- #Keep track of average
- average = total/(i+1)
-
- if (i+1) % 50 == 0:
- print("Step[{},{}] IoU average: {:.5f}".format(i+1, total_step, average))
+# hyperparameters
+epochs = 10
+image_size = 640
+batch_size = 16
-end = time.time()
-elapsed = end - start
-print("Testing took {} secs or {} mins.".format(elapsed, elapsed/60))
\ No newline at end of file
+#Train data - change directories as needed
+MODEL_WEIGHTS_PATH = r'/content/drive/MyDrive/COMP3710_YOLO/yolov8n.pt'
+YAML_CONFIG_PATH = r'/content/drive/MyDrive/COMP3710_YOLO/yolov8n.yaml'
+OUTPUT_PATH = r'/content/drive/MyDrive/COMP3710_YOLO/results'
+
+def main():
+ # Load the YOLOv8 model with pre-trained weights
+ model = load_yolov8_model(MODEL_WEIGHTS_PATH)
+
+ # Prepare the overrides dictionary for training parameters
+ overrides = {
+ 'data': YAML_CONFIG_PATH,
+ 'epochs': epochs,
+ 'imgsz': image_size,
+ 'batch': batch_size,
+ 'optimizer': 'AdamW',
+ 'lr0': 0.001,
+ 'momentum': 0.9,
+ 'weight_decay': 0.0005,
+ 'project': OUTPUT_PATH,
+ 'save_period': 10
+ }
+
+ # Start training the model
+ print("Starting training...")
+ model.train(**overrides)
+
+if __name__ == "__main__":
+ main()
From 507fa8e1009d7ba079abde7bcc1bb20ee61ca0c2 Mon Sep 17 00:00:00 2001
From: Mariam <112161752+mermalade0325@users.noreply.github.com>
Date: Wed, 13 Nov 2024 14:01:04 +1000
Subject: [PATCH 22/28] Update train.py to 31 Epochs
---
recognition/s4612960_YOLO/train.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/recognition/s4612960_YOLO/train.py b/recognition/s4612960_YOLO/train.py
index ef537564e..f59c55ae7 100644
--- a/recognition/s4612960_YOLO/train.py
+++ b/recognition/s4612960_YOLO/train.py
@@ -16,7 +16,7 @@
# hyperparameters
-epochs = 10
+epochs = 31
image_size = 640
batch_size = 16
From 382a6e9ccfa4fcf35f9a5b634732e15b8a2dc19d Mon Sep 17 00:00:00 2001
From: Mariam <112161752+mermalade0325@users.noreply.github.com>
Date: Wed, 13 Nov 2024 14:01:58 +1000
Subject: [PATCH 23/28] Update train.py
Specifications on directories in Colab
---
recognition/s4612960_YOLO/train.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/recognition/s4612960_YOLO/train.py b/recognition/s4612960_YOLO/train.py
index f59c55ae7..24480aeb9 100644
--- a/recognition/s4612960_YOLO/train.py
+++ b/recognition/s4612960_YOLO/train.py
@@ -20,7 +20,7 @@
image_size = 640
batch_size = 16
-#Train data - change directories as needed
+#Train data - change directories as needed - the following would be used if your Google Drive is linked to Colab
MODEL_WEIGHTS_PATH = r'/content/drive/MyDrive/COMP3710_YOLO/yolov8n.pt'
YAML_CONFIG_PATH = r'/content/drive/MyDrive/COMP3710_YOLO/yolov8n.yaml'
OUTPUT_PATH = r'/content/drive/MyDrive/COMP3710_YOLO/results'
From 3cba2ad5dad9e5e697982719065a37fc34d2dbf1 Mon Sep 17 00:00:00 2001
From: Mariam <112161752+mermalade0325@users.noreply.github.com>
Date: Wed, 13 Nov 2024 14:03:42 +1000
Subject: [PATCH 24/28] Add files via upload
---
recognition/s4612960_YOLO/yolov8n.yaml | 14 ++++++++++++++
1 file changed, 14 insertions(+)
create mode 100644 recognition/s4612960_YOLO/yolov8n.yaml
diff --git a/recognition/s4612960_YOLO/yolov8n.yaml b/recognition/s4612960_YOLO/yolov8n.yaml
new file mode 100644
index 000000000..5dc4929ed
--- /dev/null
+++ b/recognition/s4612960_YOLO/yolov8n.yaml
@@ -0,0 +1,14 @@
+#train: C:/Users/mariam/Downloads/COMP3710_YOLO/train
+#val: C:/Users/mariam/Downloads/COMP3710_YOLO/val
+#test: C:/Users/mariam/Downloads/COMP3710_YOLO/test
+
+
+#nc: 1 # Number of classes (lesion detection)
+#names: ['lesion']
+
+train: /content/drive/MyDrive/COMP3710_YOLO/train
+val: /content/drive/MyDrive/COMP3710_YOLO/val
+test: /content/drive/MyDrive/COMP3710_YOLO/test
+
+nc: 1 # Number of classes (lesion detection)
+names: ['lesion']
From c59893142363452be95a50513fca7943a876ee3a Mon Sep 17 00:00:00 2001
From: Mariam <112161752+mermalade0325@users.noreply.github.com>
Date: Wed, 13 Nov 2024 14:04:30 +1000
Subject: [PATCH 25/28] Update yolov8n.yaml to be consistent with Colab
directories
---
recognition/s4612960_YOLO/yolov8n.yaml | 10 +---------
1 file changed, 1 insertion(+), 9 deletions(-)
diff --git a/recognition/s4612960_YOLO/yolov8n.yaml b/recognition/s4612960_YOLO/yolov8n.yaml
index 5dc4929ed..b8d30c38b 100644
--- a/recognition/s4612960_YOLO/yolov8n.yaml
+++ b/recognition/s4612960_YOLO/yolov8n.yaml
@@ -1,14 +1,6 @@
-#train: C:/Users/mariam/Downloads/COMP3710_YOLO/train
-#val: C:/Users/mariam/Downloads/COMP3710_YOLO/val
-#test: C:/Users/mariam/Downloads/COMP3710_YOLO/test
-
-
-#nc: 1 # Number of classes (lesion detection)
-#names: ['lesion']
-
train: /content/drive/MyDrive/COMP3710_YOLO/train
val: /content/drive/MyDrive/COMP3710_YOLO/val
test: /content/drive/MyDrive/COMP3710_YOLO/test
-nc: 1 # Number of classes (lesion detection)
+nc: 1
names: ['lesion']
From da6667c56c9900c8f1b9d00ab51623c3062170d6 Mon Sep 17 00:00:00 2001
From: Mariam <112161752+mermalade0325@users.noreply.github.com>
Date: Wed, 13 Nov 2024 14:05:03 +1000
Subject: [PATCH 26/28] Update yolov8n.yaml format
---
recognition/s4612960_YOLO/yolov8n.yaml | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/recognition/s4612960_YOLO/yolov8n.yaml b/recognition/s4612960_YOLO/yolov8n.yaml
index b8d30c38b..eca4ff455 100644
--- a/recognition/s4612960_YOLO/yolov8n.yaml
+++ b/recognition/s4612960_YOLO/yolov8n.yaml
@@ -1,6 +1,5 @@
+nc: 1
+names: ['lesion']
train: /content/drive/MyDrive/COMP3710_YOLO/train
val: /content/drive/MyDrive/COMP3710_YOLO/val
test: /content/drive/MyDrive/COMP3710_YOLO/test
-
-nc: 1
-names: ['lesion']
From 051ebb41899d1a154ca13c8e691dfb4c73e2ebd6 Mon Sep 17 00:00:00 2001
From: Mariam <112161752+mermalade0325@users.noreply.github.com>
Date: Wed, 13 Nov 2024 14:14:02 +1000
Subject: [PATCH 27/28] Update predict.py
Print bounding box details while training
Fix model paths to be Colab Consistent
Commented out the code for specific melanoma classification to focus on lesion detection as per scope.
---
recognition/s4612960_YOLO/predict.py | 55 +++++++++++++++++++++-------
1 file changed, 41 insertions(+), 14 deletions(-)
diff --git a/recognition/s4612960_YOLO/predict.py b/recognition/s4612960_YOLO/predict.py
index 16053b3bd..e0e082171 100644
--- a/recognition/s4612960_YOLO/predict.py
+++ b/recognition/s4612960_YOLO/predict.py
@@ -5,15 +5,46 @@
import cv2
import torch
import numpy as np
+from ultralytics import YOLO
-def plot_boxes(image_tensor, bounding_box):
- """
- Plots the bounding box and label on an image.
+MODEL_PATH = '/content/drive/MyDrive/COMP3710_YOLO/results/train4/weights/best.pt'
+TEST_IMAGES_PATH = '/content/drive/MyDrive/COMP3710_YOLO/test/images'
+OUTPUT_DIR = '/content/drive/MyDrive/COMP3710_YOLO/predictions'
+
+# Create output directory if it doesn't exist
+os.makedirs(OUTPUT_DIR, exist_ok=True)
+
+def run_predictions():
+ # Load the trained YOLOv8 model
+ model = YOLO(MODEL_PATH)
+ print("Model loaded successfully.")
+
+ # Run predictions on the test images
+ print("Running predictions on the test images...")
+ results = model.predict(source=TEST_IMAGES_PATH, imgsz=640, conf=0.5, iou=0.8, save=True, project=OUTPUT_DIR, stream=True)
+
+
+ # Process and print results
+ for result in results:
+ # Display the image path and number of detections
+ print(f"Processed image: {result.path}")
+ print(f"Number of detections: {len(result.boxes)}")
+
+ # Print bounding box details
+ for box in result.boxes.data:
+ x1, y1, x2, y2, conf, cls = box[:6]
+ print(f"Bounding Box - x1: {x1}, y1: {y1}, x2: {x2}, y2: {y2}, Confidence: {conf}, Class: {cls}")
+
+ print(f"Predictions complete! Results are saved in: {OUTPUT_DIR}")
- Args:
- image_tensor (torch.Tensor): The image tensor of shape (3, 416, 416).
- bounding_box (torch.Tensor): The bounding box tensor with format [center_x, center_y, width, height, score, label1, label2].
- """
+# Run the predictions function
+if __name__ == "__main__":
+ run_predictions()
+
+
+
+"""
+def plot_boxes(image_tensor, bounding_box):
image_tensor = image_tensor.cpu().permute(1, 2, 0) # Reshape for plotting
fig, ax = plt.subplots()
ax.imshow(image_tensor)
@@ -34,13 +65,6 @@ def plot_boxes(image_tensor, bounding_box):
plt.show()
def predict(image_path, model):
- """
- Predicts the bounding box and class label for an image using the model.
-
- Args:
- image_path (str): Path to the input image.
- model (YOLO): Trained YOLO model.
- """
# Load and preprocess the image
image = cv2.imread(image_path)
image = cv2.resize(image, (416, 416))
@@ -62,5 +86,8 @@ def predict(image_path, model):
model.eval()
# Run prediction on an image
+
+
image_path = "/path/to/your/image.jpg" # Specify the image path here
predict(image_path, model)
+"""
From 70e16a242854fec1863978d9b2c2c839f85a25da Mon Sep 17 00:00:00 2001
From: Mariam <112161752+mermalade0325@users.noreply.github.com>
Date: Wed, 13 Nov 2024 14:16:03 +1000
Subject: [PATCH 28/28] Output prediction images predict.py
Also output 5 example images from the test set (pngs) with predicted lesion bounding boxes
---
recognition/s4612960_YOLO/predict.py | 41 ++++++++++++++++++++++++++++
1 file changed, 41 insertions(+)
diff --git a/recognition/s4612960_YOLO/predict.py b/recognition/s4612960_YOLO/predict.py
index e0e082171..628d0b38e 100644
--- a/recognition/s4612960_YOLO/predict.py
+++ b/recognition/s4612960_YOLO/predict.py
@@ -43,6 +43,47 @@ def run_predictions():
+# ----------
+
+import matplotlib.pyplot as plt
+import os
+from PIL import Image
+
+# Path to the directory where predictions are saved
+OUTPUT_DIR = '/content/drive/MyDrive/COMP3710_YOLO/predictions/predict5'
+
+# Number of images to display
+num_images = 5
+
+# Get the list of image files in the output directory, excluding files ending with "_superpixels.jpg"
+image_files = [f for f in os.listdir(OUTPUT_DIR) if (f.endswith('.jpg') or f.endswith('.png')) and not f.endswith('_superpixels.jpg')]
+image_files = sorted(image_files)[:num_images] # Select the first 'num_images' files
+
+# Check if there are any images to display
+if not image_files:
+ print("No images found in the output directory.")
+else:
+ # Display the images
+ plt.figure(figsize=(15, 10))
+ for i, image_file in enumerate(image_files):
+ image_path = os.path.join(OUTPUT_DIR, image_file)
+
+ # Open and display each image
+ try:
+ image = Image.open(image_path)
+ plt.subplot(1, num_images, i + 1)
+ plt.imshow(image)
+ plt.axis('off')
+ plt.title(f"Prediction {i + 1}")
+ except Exception as e:
+ print(f"Error loading image {image_file}: {e}")
+
+ plt.tight_layout()
+ plt.show()
+
+
+
+
"""
def plot_boxes(image_tensor, bounding_box):
image_tensor = image_tensor.cpu().permute(1, 2, 0) # Reshape for plotting