Find circles

Finding circles is crucial in my previous project of studying bacterial suspensions confined in droplets, as well as for my current project about condensation. I’ve been using the HoughCircle function in OpenCV to detect circles, but the results are always not very satisfactory. Even more annoying, Matlab has a function imfindcircles, always gives better results than HoughCircle, making a Python user very frustrated. Is there an equivalence in Python that can find circle with comparable performance as imfindcircle in Matlab?

After a brief search, I conclude that such equivalence does not exist. I start to read the documentation of the Matlab function, and notice the paper based on which the Matlab function is built. The paper is Atherton and Kerbyson 1999.

The key idea of this is to use the edge orientation information, besides the edge location information, to calculate the accumulator for the circle detection.

I haven’t understood the full content of the paper yet, but with the key idea, I’m able to build a circle detection algorithm that improves the performance of the HoughCircle method. The new method also has comparable performance than the imfindcircles.

In this notebook, I document the algorithm. Further development is possible.

0 Packages

[1]:

import sys
import os
sys.path.insert(0, os.path.abspath(os.path.join("..", "..", "..", "src")))
from myimagelib import readdata, imfindcircles, bestcolor
import cv2
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from skimage.io import imread
import matplotlib
matplotlib.rcParams["font.family"] = "STIXGeneral"
matplotlib.rcParams["mathtext.fontset"] = "stix"
matplotlib.rcParams["xtick.direction"] = "in"
matplotlib.rcParams["ytick.direction"] = "in"
plt.rcParams['xtick.major.size'] = 2  # Length of major ticks
plt.rcParams['ytick.major.size'] = 2  # Length of major ticks
plt.rcParams['xtick.minor.size'] = 1  # Length of minor ticks
plt.rcParams['ytick.minor.size'] = 1  # Length of minor ticks
from scipy.ndimage import gaussian_filter
from skimage.feature import peak_local_max
# try to detect cluster
from sklearn.cluster import DBSCAN

1 Step by step

1.1 Preprocessing

[8]:

import requests
from io import BytesIO
from skimage.io import imread
url = r"https://drive.google.com/uc?export=download&id=1LFRt5ozQjJ_WVrWBoPQOFVNl5ZWb8ytP"
response = requests.get(url, stream=True)
# read image
img = imread(BytesIO(response.content))
# convert to grayscale
gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
enhanced_gray = clahe.apply(gray)
# blur the image
blurred_frame = cv2.GaussianBlur(enhanced_gray, (11, 11), 0)

[12]:

plt.imshow(blurred_frame, cmap='gray')

[12]:

<matplotlib.image.AxesImage at 0x1e683421640>

1.2 Detect edge

1.2.1 Use gradient directly

[9]:

# Compute image gradients (Sobel)
grad_x = cv2.Sobel(blurred_frame, cv2.CV_64F, 1, 0, ksize=3)
grad_y = cv2.Sobel(blurred_frame, cv2.CV_64F, 0, 1, ksize=3)

# smooth the gradient images
grad_x = cv2.GaussianBlur(grad_x, (11, 11), 0)
grad_y = cv2.GaussianBlur(grad_y, (11, 11), 0)

# Compute gradient magnitude and direction
magnitude = np.sqrt(grad_x**2 + grad_y**2)
direction = np.arctan2(grad_y, grad_x) + np.pi

# Normalize magnitude
magnitude /= (magnitude.max() + 1e-5)

[10]:

plt.imshow(magnitude)

[10]:

<matplotlib.image.AxesImage at 0x19f862cc9d0>

[11]:

plt.imshow(direction)

[11]:

<matplotlib.image.AxesImage at 0x19f8389e220>

[12]:

[12]:

<matplotlib.image.AxesImage at 0x19f838ee6d0>

1.3 Construct accumulator

[ ]:

# Thresholding for strong edges using Canny edge
canny_magnitude = cv2.Canny(blurred_frame, 100, 150)
strong_edges = canny_magnitude > 0

[189]:

strong_edges = magnitude > magnitude.mean() + 3 * magnitude.std()

[190]:

plt.imshow(strong_edges, cmap='gray')

[190]:

<matplotlib.image.AxesImage at 0x1a00d2cfe50>

[191]:

# Loop through radius values, updating accumulator
min_radius, max_radius = 50.5, 60

# Create accumulator using vectorized voting
height, width = gray.shape
accumulator = np.zeros_like(gray, dtype=np.float32)

# Get indices of strong edge points
y_idx, x_idx = np.where(strong_edges)
theta = direction[y_idx, x_idx]

# Precompute cosine and sine values for efficiency
cos_theta = np.cos(theta)
sin_theta = np.sin(theta)


for r in np.linspace(min_radius, max_radius, 10):  # Step size of 5 for performance
    x_shift = (r * cos_theta).astype(np.int32)
    y_shift = (r * sin_theta).astype(np.int32)
    xc = x_idx - x_shift
    yc = y_idx - y_shift

    # Validity check
    valid = (xc >= 0) & (xc < width) & (yc >= 0) & (yc < height)
    accumulator[yc[valid], xc[valid]] += magnitude[y_idx[valid], x_idx[valid]]

plt.imshow(accumulator, cmap='hot')

[191]:

<matplotlib.image.AxesImage at 0x1a011e8c100>

[192]:

# Apply Gaussian smoothing
accumulator_smooth = gaussian_filter(accumulator, sigma=(min_radius//10)*2+1)

[193]:

plt.imshow(accumulator_smooth, cmap='hot')

[193]:

<matplotlib.image.AxesImage at 0x19f90eb2970>

1.4 Find peaks in accumulator

[195]:

# Find local maxima
centers = peak_local_max(accumulator_smooth, min_distance=int(min_radius), threshold_abs=0.1)

[196]:

plt.figure()
plt.imshow(gray, cmap='gray')
for (x, y) in centers:
    circ = plt.Circle((y, x), 60, color='r', fill=False)
    plt.gca().add_patch(circ)

1.5 Estimate radii

[197]:

score_thres = magnitude[strong_edges].max()
sensitivity = .88

[198]:

# Estimate radii
radii = []
angles = np.linspace(0, 2*np.pi, 50) # 8 angles
for y, x in centers:
    radius_votes = []
    scores = []
    for r in np.linspace(min_radius, max_radius, 100):
        dx, dy = (r * np.cos(angles)).astype(int), (r * np.sin(angles)).astype(int)
        if (0 <= x + dx).all() and (x + dx < width).all() \
            and (0 <= y + dy).all and (y + dy < height).all(): # validity check
            score = magnitude[y + dy, x + dx].mean()
            if score > (1-sensitivity) * score_thres: # if shifted point is strong edge
                radius_votes.append(r)
                scores.append(score)
    if scores:
        best_radius = radius_votes[np.argmax(scores)]
        radii.append(best_radius)
    else:
        radii.append(0)

[199]:

# Visualize results
fig, ax = plt.subplots(dpi=300)
ax.imshow(gray, cmap='gray')
for (y, x), r in zip(centers, radii):
    circle = plt.Circle((x, y), r, color='r', fill=False, linewidth=1)
    ax.add_patch(circle)
plt.show()

2 Combine the steps

Let’s now combine all the 5 steps in a function. First, let’s plan the parameters.

A very important parameter is the initial smoothing size, which I believe should be considered as preprocessing. However, to simplify the use of the function, it is desired that we minimize the number of lines. So, I’m going to include the smoothing to the function, and provide an optional argument for the smoothing window size.

For the edge detection, let’s fix the gradient kernel to 3, i.e. do not allow user to adjust it.

We need to determine a threshold for strong edge. Only the strong edges will be used to construct the accumulator. Later in step 5, the strong edges will also be used to determine a threshold for edge brightness. Here, tentatively, we fix the threshold for strong edge as the mean of the magnitude map.

To construct the accumulator, min_radius and max_radius are needed. They are given in a 2-array.

The accumulator is smoothed with a Gaussian filter, whose size is determined as (min_radius//10)*2+1.

peak_local_max has two optional arguments: min_distance and threshold_abs. We determine the min_distance as the min_radius, as the actual minimum distance used is 2*min_dist+1. We also fix threshold_abs at 0.1, as the result is not very sensitive to it.

Lastly, we choose to implement a sensitivity parameter, which determines how bright of a pixel that would consider it as an edge during the radius voting process. The sensitivity takes values in (0, 1], and higher value leads to more detected circle.

The results will be given as a DataFrame of x,y,r. r=0 entries will be trimmed.

[ ]:

def imfindcircles_new(img, radius, edge_width=10, smooth_window=11, nIter=1):
    """
    Find circles in images. The algorithm is based on Hough Transform and the idea of `Atherton and Kerbyson 1999 <https://www.sciencedirect.com/science/article/pii/S0262885698001607>`_, using the edge orientation information to improve the performance on noisy images. See `step-by-step phase code find circles <../tests/find_circles.html>`_ for more details about this method.

    :param img: Input image. Note that it has to be grayscale and 8-bit, otherwise the function will raise an error.
    :type img: numpy array
    :param radius: Radius of circles to detect. A list of 2 values, [minimum, maximum].
    :type radius: list
    :param edge_width: An estimated with of the edge in pixels. Default is 10.
    :type edge_width: int
    :param smooth_window: Size of the Gaussian window for smoothing the image / gradient field / accumulator. Default is 11.
    :type smooth_window: int
    :param nIter: Number of iterations for multi-stage detection. Default is 1. When nIter > 1, the function will run the detection multiple times with different radius ranges. This is useful when the circles have different sizes.
    :type nIter: int
    :return: DataFrame with columns [x, y, r] for the centers and radii of detected circles.
    :rtype: pandas.DataFrame

    >>> from myimagelib import imfindcircles
    >>> circles = imfindcircles(img, [50, 120])
    >>> circles = imfindcircles(img, [50, 120], edge_width=5)
    >>> circles = imfindcircles(img, [50, 120], edge_width=5, smooth_window=5)
    >>> circles = imfindcircles(img, [50, 120], edge_width=5, smooth_window=5, nIter=4)

    .. rubric:: Edit

    * Feb 27, 2025 -- Initial commit.
    * Feb 28, 2025 -- (i) Add `smooth_window` argument, include the preprocessing step in this function to simplify the code on the user side; (ii) determine step size adaptively.
    * Mar 04, 2025 -- Smooth the score before locating the peak.
    * Mar 05, 2025 -- (i) Implement multi-stage detection. Add `nIter` argument to determine the number of iterations. (ii) Use threshold of magnitude (magnitude > mean + 2*std) to determine the strong edges.
    """
    assert(img.ndim == 2), "Input image must be grayscale"
    assert(img.dtype == np.uint8), "Input image must be 8-bit"

    # Loop through radius values, updating accumulator
    min_radius, max_radius = radius
    # estimate smooth sigma for score
    sigma = edge_width / 100 * (max_radius - min_radius) / 3
    # make sure the window size is odd
    s = smooth_window // 2 * 2 + 1

    # preprocess the image
    # clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(5, 5))
    # img = clahe.apply(img)
    img = cv2.GaussianBlur(img, (s, s), 0)

    # Compute image gradients (Sobel)
    grad_x = cv2.Sobel(img, cv2.CV_64F, 1, 0, ksize=3)
    grad_y = cv2.Sobel(img, cv2.CV_64F, 0, 1, ksize=3)

    # smooth the gradient images
    grad_x = cv2.GaussianBlur(grad_x, (s, s), 0)
    grad_y = cv2.GaussianBlur(grad_y, (s, s), 0)

    # Compute gradient magnitude and direction
    magnitude = np.sqrt(grad_x**2 + grad_y**2)
    direction = np.arctan2(grad_y, grad_x) + np.pi

    # Thresholding for strong edges
    # strong_edges = cv2.Canny(img, 100, 150) > 0
    strong_edges = magnitude > magnitude.mean() + 2 * magnitude.std()

    def imfindcircles_one(magnitude, direction, strong_edges, min_radius, max_radius, sigma):
        # Create accumulator using vectorized voting
        height, width = magnitude.shape
        accumulator = np.zeros_like(img, dtype=np.float32)
        # Get indices of strong edge points
        y_idx, x_idx = np.where(strong_edges)
        theta = direction[y_idx, x_idx]
        # Precompute cosine and sine values for efficiency
        cos_theta = np.cos(theta)
        sin_theta = np.sin(theta)

        for r in np.linspace(min_radius, max_radius, 10):  # Step size of 5 for performance
            x_shift = (r * cos_theta).astype(np.int32)
            y_shift = (r * sin_theta).astype(np.int32)
            xc = x_idx - x_shift
            yc = y_idx - y_shift
            # Validity check
            valid = (xc >= 0) & (xc < width) & (yc >= 0) & (yc < height)
            accumulator[yc[valid], xc[valid]] += magnitude[y_idx[valid], x_idx[valid]]

        # Apply Gaussian smoothing to accumulator
        accumulator_smooth = gaussian_filter(accumulator, sigma=(min_radius//10)*2+1)
        # Find local maxima
        centers = peak_local_max(accumulator_smooth, min_distance=int(min_radius), threshold_abs=0.1)

        radii = []
        scores = []
        angles = np.linspace(0, 2*np.pi, 20) # 8 angles
        for y, x in centers:
            radius_votes = []
            scores_tmp = []
            for r in np.linspace(min_radius, max_radius, 100):
                dx, dy = (r * np.cos(angles)).astype(int), (r * np.sin(angles)).astype(int)
                if (0 <= x + dx).all() and (x + dx < width).all() \
                    and (0 <= y + dy).all and (y + dy < height).all(): # validity check
                    score = magnitude[y + dy, x + dx].mean()
                    radius_votes.append(r)
                    scores_tmp.append(score)
            if scores_tmp:
                scores_smooth = gaussian_filter(scores_tmp, sigma=sigma)
                best_radius = radius_votes[np.argmax(scores_smooth)]
                radii.append(best_radius)
                scores.append(np.max(scores_smooth))
            else:
                radii.append(0)
                scores.append(0)

        # construct the dataframe for centers and radii
        df = pd.DataFrame(centers, columns=["y", "x"])
        df["r"] = radii
        df["score"] = scores

        # keep only the circles with radius > 0
        df = df[df["r"] > 0]

        return df

    if nIter > 1:
        min_r, max_r = min_radius, max_radius
        ranges, step = np.linspace(min_r, max_r, num=nIter, endpoint=False, retstep=True)
        df_list = []
        for min_radius in ranges:
            max_radius = min_radius + step
            df_tmp = imfindcircles_one(magnitude, direction, strong_edges, \
                min_radius, max_radius, sigma)
            df_list.append(df_tmp)
        df = pd.concat(df_list).reset_index(drop=True)
    else:
        df = imfindcircles_one(magnitude, direction, strong_edges, min_radius, max_radius
        , sigma)
        return df

    # filter overlapping circles
    # Define the threshold distance
    threshold_distance = 60
    # Apply DBSCAN clustering
    db = DBSCAN(eps=threshold_distance, min_samples=1).fit(df[['x', 'y']])
    df['cluster'] = db.labels_
    # construct a new dataframe to store the final result
    g_list = []
    for c, g in df.groupby("cluster"):
        g = g.loc[[g['score'].idxmax()]]
        if len(g) > 1:
            print(c, len(g))
        g_list.append(g)
    df_final = pd.concat(g_list)

    return df_final.drop(columns=["cluster", "score"])

[147]:

df = imfindcircles_new(enhanced_gray, [50, 100], smooth_window=11)

[149]:

plt.figure(dpi=300)
plt.imshow(gray, cmap='gray')
for _, row in df.iterrows():
    circ = plt.Circle((row["x"], row["y"]), row["r"], color='r', fill=False)
    plt.gca().add_patch(circ)

3 Development

This section documents the new developments.

use gradient magnitude in evaluating the radius_vote (done)
smooth the score as a function of radius (done)
multi-stage

3.1 Radius voting

Currently, the radius voting only considers the highest score. This means all other lower scores are completely ignored. This is not a good use of the information. In this section, we observe more closely how the score varies with radius. Theoretically, an edge has some finite thickness, so that adjacent radii would all give rise to high scores. In contrast, a “fake” circle would have less consistent score in the vicinity. This idea is tested in this section.

[135]:

y, x = centers[180]

[143]:

plt.imshow(gray, cmap='gray')
plt.scatter(x, y)
plt.xlim(x-100, x+100)
plt.ylim(y-100, y+100)

[143]:

(2273.0, 2473.0)

[161]:

radius_votes = []
scores = []
for r in np.linspace(min_radius, max_radius, 100):
    dx, dy = (r * np.cos(angles)).astype(int), (r * np.sin(angles)).astype(int)
    if (0 <= x + dx).all() and (x + dx < width).all() \
        and (0 <= y + dy).all and (y + dy < height).all(): # validity check
        score = magnitude[y + dy, x + dx].mean()
        # if score > (1-sensitivity) * score_thres: # if shifted point is strong edge
        radius_votes.append(r)
        scores.append(score)

[166]:

plt.plot(radius_votes, scores)
edge_width = 10
sigma = edge_width / 100 * (max_radius - min_radius) / 3
scores_smooth = gaussian_filter(scores, sigma=sigma)
plt.plot(radius_votes, scores_smooth)

[166]:

[<matplotlib.lines.Line2D at 0x20e80050a30>]

[167]:

best_radius = radius_votes[np.argmax(scores_smooth)]

[165]:

best_radius

[165]:

68.38383838383838

3.2 Multi-stage find circle

The algorithm works best when the circle size range is narrow. It is naturally to think that, if we can use small range multiple times to detect the circles, the detection rate and accuracy may be improved. In this section, we implement this idea.

[ ]:

min_r, max_r = 50, 100
nIter = 5
ranges, step = np.linspace(min_r, max_r, num=nIter, endpoint=False, retstep=True)
df_list = []
for min_radius in ranges:
    max_radius = min_radius + step
    df_tmp = imfindcircles_new(enhanced_gray, [min_radius, max_radius], smooth_window=11)
    df_list.append(df_tmp)
df = pd.concat(df_list).reset_index(drop=True)

[132]:

df

[132]:

	index	y	x	r	score
0	0	2401	1533	60.000000	78.829622
1	1	1104	1782	53.838384	100.467745
2	2	1655	1822	60.000000	82.237878
3	3	855	1905	57.070707	99.095707
4	4	389	1880	58.888889	89.835546
...	...	...	...	...	...
1473	173	1680	3796	90.101010	51.813414
1474	174	160	1085	94.343434	38.015646
1475	175	1354	3795	95.353535	49.895891
1476	176	2509	771	95.959596	37.865775
1477	177	106	854	90.303030	17.886952

1478 rows × 5 columns

[133]:

# Define the threshold distance
threshold_distance = 60

# Apply DBSCAN clustering
db = DBSCAN(eps=threshold_distance, min_samples=1).fit(df[['x', 'y']])
df['cluster'] = db.labels_

[134]:

g_list = []
for c, g in df.groupby("cluster"):
    g = g.loc[[g['score'].idxmax()]]
    if len(g) > 1:
        print(c, len(g))
    g_list.append(g)
df_final = pd.concat(g_list)

[135]:

sub = df[df["cluster"] == 401]

[136]:

plt.figure(dpi=300)
plt.imshow(gray, cmap='gray')
plt.scatter(sub["x"], sub["y"], c='b', s=2)
for _, row in df_final.iterrows():
    circ = plt.Circle((row["x"], row["y"]), row["r"], color='r', fill=False)
    plt.gca().add_patch(circ)

4 Compare new and old algorithm

[13]:

### INPUTS ########
radii = [50, 120]
edge_width = 10
smooth_window = 11
###################

plt.figure(dpi=300)
plt.imshow(img, cmap='gray')
df_new = imfindcircles_new(enhanced_gray, radii, edge_width=edge_width, smooth_window=smooth_window, nIter=2)
df = imfindcircles(gray, radii, edge_width=edge_width, smooth_window=smooth_window, nIter=2)
xlim = [2000, 3000]
ylim = [750, 1750]
df = df.loc[(df["x"] > xlim[0]) & (df["x"] < xlim[1]) & (df["y"] > ylim[0]) & (df["y"] < ylim[1])]
df_new = df_new.loc[(df_new["x"] > xlim[0]) & (df_new["x"] < xlim[1]) & (df_new["y"] > ylim[0]) & (df_new["y"] < ylim[1])]
for _, row in df.iterrows():
    circ = plt.Circle((row["x"], row["y"]), row["r"], color=bestcolor(0), fill=False, lw=1)
    plt.gca().add_patch(circ)
for _, row in df_new.iterrows():
    circ = plt.Circle((row["x"], row["y"]), row["r"], color=bestcolor(1), fill=False, lw=1)
    plt.gca().add_patch(circ)
plt.xlim(xlim)
plt.ylim(ylim)
plt.axis("off")

[13]:

(2000.0, 3000.0, 750.0, 1750.0)