Ask Your Question

Revision history [back]

Triangulate points from 2 images to estimate the pose on a third

I want to estimate the pose of a current image relative to a matched one in a database. The images are coming from a moving robot with reasonably accurate odometry so I can take two consecutive images that are both similar to the one in the database and have a good estimate of the relative pose between these two images. I want to use that information to estimate the 3D relationship between the matched keypoints in the two current images using triangulatePoints and then use solvePnPRansac to estimate the pose of the image in the database relative to the first current image using the keypoints that also match to it.

If have tried to implement this in OpenCV with Python as shown below. Currently I do not use the database image and am simply confirming if I can get the odometery that I enforced back, but unfortunately the current output is garbage (translation in the order of e+10).

import numpy as np
import cv2

# Fisheye camera and distortion matrices
K=np.array([[455.5000196386718, 0.0, 482.65324003911945], [0.0, 340.6409393462825, 254.5063795692748], [0.0, 0.0, 1.0]])
D=np.array([[-0.018682808343432777], [-0.044315351694893736], [0.047678551616171246], [-0.018283908577445218]])

orb = cv2.ORB_create(nfeatures=1000)
bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=False)

img0 = cv2.imread("0cm.png")
img2 = cv2.imread("2cm.png")

# Find keypoints and match them up
kp0, des0 = orb.detectAndCompute(img0, None)
kp2, des2 = orb.detectAndCompute(img2, None)
matches = bf.knnMatch(des0, des2, k=2)

# Find good matches using the ratio test
ratio_thresh = 0.8
good_matches = []
for m,n in matches:
    if m.distance < ratio_thresh * n.distance:
        good_matches.append(m)

# Convert from keypoints to points
pts0 = np.float32([kp0[m.queryIdx].pt for m in good_matches]).reshape(-1, 1, 2)
pts2 = np.float32([kp2[m.trainIdx].pt for m in good_matches]).reshape(-1, 1, 2)

# Remove the fisheye distortion from the points
pts0 = cv2.fisheye.undistortPoints(pts0, K, D, P=K)
pts2 = cv2.fisheye.undistortPoints(pts2, K, D, P=K)

# Keep only the points that make geometric sense
# TODO: find a more efficient way to apply the mask
E, mask = cv2.findEssentialMat(pts0, pts2, K, cv2.RANSAC, 0.999, 1, None)
_, R, t, mask = cv2.recoverPose(E, pts0, pts2, cameraMatrix=K, mask=mask)
pts0_m = []
pts2_m = []
for i in range(len(mask)):
    if mask[i] == 1:
        pts0_m.append(pts0[i])
        pts2_m.append(pts2[i])
pts0 = np.array(pts0_m).T.reshape(2, -1)
pts2 = np.array(pts2_m).T.reshape(2, -1)

# Setup the projection matrices
R = np.eye(3)
t0 = np.array([[0], [0], [0]])
t2 = np.array([[0], [0], [2]])
P0 = np.dot(K, np.concatenate((R, t0), axis=1))
P2 = np.dot(K, np.concatenate((R, t2), axis=1))

# Find the keypoint world homogeneous coordinates assuming img0 is the world origin
X = cv2.triangulatePoints(P0, P2, pts0, pts2)

# Convert from homogeneous cooridinates, TODO: find a more efficient way
X /= X[3]
objPts = X.T[:,:3]

# Find the pose of the second frame
_, rvec, tvec, inliers = cv2.solvePnPRansac(objPts, pts2.T, K, None)
print(rvec)
print(tvec)

Is there something wrong with my code or my approach (or both)?

Triangulate points from 2 images to estimate the pose on a third

I want to estimate the pose of a current image relative to a matched one in a database. The images are coming from a moving robot with reasonably accurate odometry so I can take two consecutive images that are both similar to the one in the database and have a good estimate of the relative pose between these two images. I want to use that information to estimate the 3D relationship between the matched keypoints in the two current images using triangulatePoints and then use solvePnPRansac to estimate the pose of the image in the database relative to the first current image using the keypoints that also match to it.

If have tried to implement this in OpenCV with Python as shown below. Currently I do not use the database image and am simply confirming if I can get the odometery that I enforced back, but unfortunately the current output is garbage (translation in the order of e+10).

import numpy as np
import cv2

# Fisheye camera and distortion matrices
K=np.array([[455.5000196386718, 0.0, 482.65324003911945], [0.0, 340.6409393462825, 254.5063795692748], [0.0, 0.0, 1.0]])
D=np.array([[-0.018682808343432777], [-0.044315351694893736], [0.047678551616171246], [-0.018283908577445218]])

orb = cv2.ORB_create(nfeatures=1000)
bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=False)

img0 = cv2.imread("0cm.png")
img2 = cv2.imread("2cm.png")

# Find keypoints and match them up
kp0, des0 = orb.detectAndCompute(img0, None)
kp2, des2 = orb.detectAndCompute(img2, None)
matches = bf.knnMatch(des0, des2, k=2)

# Find good matches using the ratio test
ratio_thresh = 0.8
good_matches = []
for m,n in matches:
    if m.distance < ratio_thresh * n.distance:
        good_matches.append(m)

# Convert from keypoints to points
pts0 = np.float32([kp0[m.queryIdx].pt for m in good_matches]).reshape(-1, 1, 2)
pts2 = np.float32([kp2[m.trainIdx].pt for m in good_matches]).reshape(-1, 1, 2)

# Remove the fisheye distortion from the points
pts0 = cv2.fisheye.undistortPoints(pts0, K, D, P=K)
pts2 = cv2.fisheye.undistortPoints(pts2, K, D, P=K)

# Keep only the points that make geometric sense
# TODO: find a more efficient way to apply the mask
E, mask = cv2.findEssentialMat(pts0, pts2, K, cv2.RANSAC, 0.999, 1, None)
_, R, t, mask = cv2.recoverPose(E, pts0, pts2, cameraMatrix=K, mask=mask)
pts0_m = []
pts2_m = []
for i in range(len(mask)):
    if mask[i] == 1:
        pts0_m.append(pts0[i])
        pts2_m.append(pts2[i])
pts0 = np.array(pts0_m).T.reshape(2, -1)
pts2 = np.array(pts2_m).T.reshape(2, -1)

# Setup the projection matrices
R = np.eye(3)
t0 = np.array([[0], [0], [0]])
t2 = np.array([[0], [0], [2]])
P0 = np.dot(K, np.concatenate((R, t0), axis=1))
P2 = np.dot(K, np.concatenate((R, t2), axis=1))

# Find the keypoint world homogeneous coordinates assuming img0 is the world origin
X = cv2.triangulatePoints(P0, P2, pts0, pts2)

# Convert from homogeneous cooridinates, TODO: find a more efficient way
cooridinates
X /= X[3]
objPts = X.T[:,:3]

# Find the pose of the second frame
_, rvec, tvec, inliers = cv2.solvePnPRansac(objPts, pts2.T, K, None)
print(rvec)
print(tvec)

Is there something wrong with my code or my approach (or both)?

Triangulate points from 2 images to estimate the pose on a third

I want to estimate the pose of a current image relative to a matched one in a database. The images are coming from a moving robot with reasonably accurate odometry so I can take two consecutive images that are both similar to the one in the database and have a good estimate of the relative pose between these two images. I want to use that information to estimate the 3D relationship between the matched keypoints in the two current images using triangulatePoints and then use solvePnPRansac to estimate the pose of the image in the database relative to the first current image using the keypoints that also match to it.

If have tried to implement this in OpenCV with Python as shown below. Currently I do not use the database image and am simply confirming if I can get the odometery that I enforced back, but unfortunately the current output is garbage (translation in the order of e+10).

import numpy as np
import cv2

# Fisheye camera and distortion matrices
K=np.array([[455.5000196386718, 0.0, 482.65324003911945], [0.0, 340.6409393462825, 254.5063795692748], [0.0, 0.0, 1.0]])
D=np.array([[-0.018682808343432777], [-0.044315351694893736], [0.047678551616171246], [-0.018283908577445218]])

orb = cv2.ORB_create(nfeatures=1000)
bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=False)

img0 = cv2.imread("0cm.png")
img2 = cv2.imread("2cm.png")

# Find keypoints and match them up
kp0, des0 = orb.detectAndCompute(img0, None)
kp2, des2 = orb.detectAndCompute(img2, None)
matches = bf.knnMatch(des0, des2, k=2)

# Find good matches using the ratio test
ratio_thresh = 0.8
good_matches = []
for m,n in matches:
    if m.distance < ratio_thresh * n.distance:
        good_matches.append(m)

# Convert from keypoints to points
pts0 = np.float32([kp0[m.queryIdx].pt for m in good_matches]).reshape(-1, 1, 2)
pts2 = np.float32([kp2[m.trainIdx].pt for m in good_matches]).reshape(-1, 1, 2)

# Remove the fisheye distortion from the points
pts0 = cv2.fisheye.undistortPoints(pts0, K, D, P=K)
pts2 = cv2.fisheye.undistortPoints(pts2, K, D, P=K)

# Keep only the points that make geometric sense
# TODO: find a more efficient way to apply the mask
E, mask = cv2.findEssentialMat(pts0, pts2, K, cv2.RANSAC, 0.999, 1, None)
_, R, t, mask = cv2.recoverPose(E, pts0, pts2, cameraMatrix=K, mask=mask)
pts0_m = []
pts2_m = []
for i in range(len(mask)):
    if mask[i] == 1:
        pts0_m.append(pts0[i])
        pts2_m.append(pts2[i])
pts0 = np.array(pts0_m).T.reshape(2, -1)
pts2 = np.array(pts2_m).T.reshape(2, -1)

# Setup the projection matrices
R = np.eye(3)
t0 = np.array([[0], [0], [0]])
t2 = np.array([[0], [0], [2]])
P0 = np.dot(K, np.concatenate((R, t0), axis=1))
P2 = np.dot(K, np.concatenate((R, t2), axis=1))

# Find the keypoint world homogeneous coordinates assuming img0 is the world origin
X = cv2.triangulatePoints(P0, P2, pts0, pts2)

# Convert from homogeneous cooridinates
X /= X[3]
objPts = X.T[:,:3]

# Find the pose of the second frame
_, rvec, tvec, inliers = cv2.solvePnPRansac(objPts, pts2.T, K, None)
print(rvec)
print(tvec)

Is there something wrong with my code or my approach (or both)?

EDIT: I tested this with an image that is 6cm away instead of just 2cm away and it seems to work just fine then. I guess a small translation only in the forward direction is resulting in some kind of numerical instability somewhere. There is a slight difference in the results depending on the units I use to construct the project matrices though (0.06 for metres vs 6 for centimetres), but I have no idea what units I should then actually use to get the most accurate results seeing as how it does not appear to be irrelevant. I though it could somehow be related to the camera matrix, but I obtained mine using the guide at https://medium.com/@kennethjiang/calibrate-fisheye-lens-using-opencv-333b05afa0b0 and scaling the dimensions of the checkerboard in the calibration code has no effect on the resulting matrix so now I have no idea.

Triangulate points from 2 images to estimate the pose on a third

I want to estimate the pose of a current image relative to a matched one in a database. The images are coming from a moving robot with reasonably accurate odometry so I can take two consecutive images that are both similar to the one in the database and have a good estimate of the relative pose between these two images. I want to use that information to estimate the 3D relationship between the matched keypoints in the two current images using triangulatePoints and then use solvePnPRansac to estimate the pose of the image in the database relative to the first current image using the keypoints that also match to it.

If have tried to implement this in OpenCV with Python as shown below. Currently I do not use the database image and am simply confirming if I can get the odometery that I enforced back, but unfortunately the current output is garbage (translation in the order of e+10).

import numpy as np
import cv2

# Fisheye camera and distortion matrices
K=np.array([[455.5000196386718, 0.0, 482.65324003911945], [0.0, 340.6409393462825, 254.5063795692748], [0.0, 0.0, 1.0]])
D=np.array([[-0.018682808343432777], [-0.044315351694893736], [0.047678551616171246], [-0.018283908577445218]])

orb = cv2.ORB_create(nfeatures=1000)
bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=False)

img0 = cv2.imread("0cm.png")
img2 = cv2.imread("2cm.png")

# Find keypoints and match them up
kp0, des0 = orb.detectAndCompute(img0, None)
kp2, des2 = orb.detectAndCompute(img2, None)
matches = bf.knnMatch(des0, des2, k=2)

# Find good matches using the ratio test
ratio_thresh = 0.8
good_matches = []
for m,n in matches:
    if m.distance < ratio_thresh * n.distance:
        good_matches.append(m)

# Convert from keypoints to points
pts0 = np.float32([kp0[m.queryIdx].pt for m in good_matches]).reshape(-1, 1, 2)
pts2 = np.float32([kp2[m.trainIdx].pt for m in good_matches]).reshape(-1, 1, 2)

# Remove the fisheye distortion from the points
pts0 = cv2.fisheye.undistortPoints(pts0, K, D, P=K)
pts2 = cv2.fisheye.undistortPoints(pts2, K, D, P=K)

# Keep only the points that make geometric sense
# TODO: find a more efficient way to apply the mask
E, mask = cv2.findEssentialMat(pts0, pts2, K, cv2.RANSAC, 0.999, 1, None)
_, R, t, mask = cv2.recoverPose(E, pts0, pts2, cameraMatrix=K, mask=mask)
pts0_m = []
pts2_m = []
for i in range(len(mask)):
    if mask[i] == 1:
        pts0_m.append(pts0[i])
        pts2_m.append(pts2[i])
pts0 = np.array(pts0_m).T.reshape(2, -1)
pts2 = np.array(pts2_m).T.reshape(2, -1)

# Setup the projection matrices
R = np.eye(3)
t0 = np.array([[0], [0], [0]])
t2 = np.array([[0], [0], [2]])
P0 = np.dot(K, np.concatenate((R, t0), axis=1))
P2 = np.dot(K, np.concatenate((R, t2), axis=1))

# Find the keypoint world homogeneous coordinates assuming img0 is the world origin
X = cv2.triangulatePoints(P0, P2, pts0, pts2)

# Convert from homogeneous cooridinates
X /= X[3]
objPts = X.T[:,:3]

# Find the pose of the second frame
_, rvec, tvec, inliers = cv2.solvePnPRansac(objPts, pts2.T, K, None)
print(rvec)
print(tvec)

Is there something wrong with my code or my approach (or both)?

EDIT: I tested this with an image that is 6cm away instead of just 2cm away and it seems to work just fine then. I guess a small translation only in the forward direction is resulting in some kind of numerical instability somewhere. There is a slight difference in the results depending on the units I use to construct the project matrices though (0.06 for metres vs 6 for centimetres), but I have no idea what units I should then actually use to get the most accurate results seeing as how it does not appear to be irrelevant. I though it could somehow be related to the camera matrix, but I obtained mine using the guide at https://medium.com/@kennethjiang/calibrate-fisheye-lens-using-opencv-333b05afa0b0 and scaling the dimensions of the checkerboard in the calibration code has no effect on the resulting matrix so now I have no idea.

Triangulate points from 2 images to estimate the pose on a third

I want to estimate the pose of a current image relative to a matched one in a database. The images are coming from a moving robot with reasonably accurate odometry so I can take two consecutive images that are both similar to the one in the database and have a good estimate of the relative pose between these two images. I want to use that information to estimate the 3D relationship between the matched keypoints in the two current images using triangulatePoints and then use solvePnPRansac to estimate the pose of the image in the database relative to the first current image using the keypoints that also match to it.

If have tried to implement this in OpenCV with Python as shown below. Currently I do not use the database image and am simply confirming if I can get the odometery that I enforced back, but unfortunately the current output is garbage (translation in the order of e+10).

import numpy as np
import cv2

# Fisheye camera and distortion matrices
K=np.array([[455.5000196386718, 0.0, 482.65324003911945], [0.0, 340.6409393462825, 254.5063795692748], [0.0, 0.0, 1.0]])
D=np.array([[-0.018682808343432777], [-0.044315351694893736], [0.047678551616171246], [-0.018283908577445218]])

orb = cv2.ORB_create(nfeatures=1000)
bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=False)

img0 = cv2.imread("0cm.png")
img2 = cv2.imread("2cm.png")

# Find keypoints and match them up
kp0, des0 = orb.detectAndCompute(img0, None)
kp2, des2 = orb.detectAndCompute(img2, None)
matches = bf.knnMatch(des0, des2, k=2)

# Find good matches using the ratio test
ratio_thresh = 0.8
good_matches = []
for m,n in matches:
    if m.distance < ratio_thresh * n.distance:
        good_matches.append(m)

# Convert from keypoints to points
pts0 = np.float32([kp0[m.queryIdx].pt for m in good_matches]).reshape(-1, 1, 2)
pts2 = np.float32([kp2[m.trainIdx].pt for m in good_matches]).reshape(-1, 1, 2)

# Remove the fisheye distortion from the points
pts0 = cv2.fisheye.undistortPoints(pts0, K, D, P=K)
pts2 = cv2.fisheye.undistortPoints(pts2, K, D, P=K)

# Keep only the points that make geometric sense
# TODO: find a more efficient way to apply the mask
E, mask = cv2.findEssentialMat(pts0, pts2, K, cv2.RANSAC, 0.999, 1, None)
_, R, t, mask = cv2.recoverPose(E, pts0, pts2, cameraMatrix=K, mask=mask)
pts0_m = []
pts2_m = []
for i in range(len(mask)):
    if mask[i] == 1:
        pts0_m.append(pts0[i])
        pts2_m.append(pts2[i])
pts0 = np.array(pts0_m).T.reshape(2, -1)
pts2 = np.array(pts2_m).T.reshape(2, -1)

# Setup the projection matrices
R = np.eye(3)
t0 = np.array([[0], [0], [0]])
t2 = np.array([[0], [0], [2]])
P0 = np.dot(K, np.concatenate((R, t0), axis=1))
P2 = np.dot(K, np.concatenate((R, t2), axis=1))

# Find the keypoint world homogeneous coordinates assuming img0 is the world origin
X = cv2.triangulatePoints(P0, P2, pts0, pts2)

# Convert from homogeneous cooridinates
X /= X[3]
objPts = X.T[:,:3]

# Find the pose of the second frame
_, rvec, tvec, inliers = cv2.solvePnPRansac(objPts, pts2.T, K, None)
print(rvec)
print(tvec)

Is there something wrong with my code or my approach (or both)?

EDIT: I tested this with an image that is 6cm away instead of just 2cm away and it seems to work just fine then. I guess a small translation only in the forward direction is resulting in some kind of numerical instability somewhere. There is a slight difference in the results depending on the units I use to construct the project projection matrices though (0.06 for metres vs 6 for centimetres), but I have no idea what units I should then actually use to get the most accurate results seeing as how it does not appear to be irrelevant. I though it could somehow be related to the camera matrix, but I obtained mine using the guide at https://medium.com/@kennethjiang/calibrate-fisheye-lens-using-opencv-333b05afa0b0 and scaling the dimensions of the checkerboard in the calibration code has no effect on the resulting matrix so now I have no idea.