RANSAC原理及二次/三次多项式曲线拟合

RANSAC（RANdom SAmple Consensus）是一种经典的模型拟合算法，用于从一组杂乱的数据中找出最佳的模型。它的基本思想是随机选取一定数量的数据点，使用这些数据点来拟合模型，然后将所有数据点带入模型中，统计符合模型的数据点数量，如果符合数量超过阈值，则认为这些数据点符合这个模型，即它们是局内点（inlier）。重复以上过程，多次迭代之后，找到的最佳模型是拟合最优的模型，符合该模型

ywfwyht

6305人浏览 · 2023-05-27 12:49:50

ywfwyht · 2023-05-27 12:49:50 发布

RANSAC原理

RANSAC的算法流程如下：

随机选取一定数量（例如 n 个）的样本点作为内点，计算模型参数；
遍历所有数据点，并计算它们到模型的距离；
将距离小于阈值的数据点划分为内点，其余点划分为外点；
如果内点数量大于一定比例（例如一半）且比当前记录中的内点数量大，则重新计算模型参数并记录内点数量；
重复迭代上述步骤，直到达到指定的迭代次数或者内点数量超过一定比例。

RANSAC算法通常用于处理含有噪声或者异常数据的拟合问题，例如点云配准、图像匹配等问题。它的优点是可以对噪声数据进行过滤，提高拟合准确性，缺点是需要设置阈值和迭代次数等参数，调节过程相对比较复杂。

拟合二次多项式曲线

以下是一个简单的RANSAC拟合二次多项式的代码示例，供参考：

import numpy as np
import random

def ransac2d(x, y, n, k, t, d):
    """
    RANSAC算法拟合二次多项式
    :param x: x坐标值
    :param y: y坐标值
    :param n: 最小拟合数据量
    :param k: 迭代次数
    :param t: 阈值
    :param d: 拟合数据量偏差
    :return: 拟合出的二次多项式系数
    """
    bestfit = None
    besterr = np.inf
    for i in range(k):
        # 随机选择n个点
        indices = random.sample(range(len(x)), n)
        # 使用这n个点进行拟合
        p = np.polyfit(x[indices], y[indices], 2)
        # 计算距离小于阈值t的点的数量
        err = np.sum(np.abs(np.polyval(p, x) - y) < t)
        # 如果符合条件的点数超过了d个，认为该次拟合比之前的更好
        if err > d and err < besterr:
            bestfit = p
            besterr = err
    return bestfit

# 生成测试数据
x = np.linspace(-10, 10, 100)
y = 2 * x ** 2 - 3 * x + 1 + np.random.randn(x.shape[0]) * 10

# 使用RANSAC算法拟合二次多项式
p = ransac2d(x, y, 10, 100, 3, 20)

# 绘制拟合曲线
import matplotlib.pyplot as plt
plt.plot(x, y, 'b.')
xp = np.linspace(-10, 10, 100)
plt.plot(xp, np.polyval(p, xp), 'r-')
plt.show()

在此代码中，我们先定义了一个ransac2d函数来实现RANSAC算法拟合二次多项式的功能。该函数接受x和y坐标值，最小拟合数据量n，迭代次数k，阈值t和拟合数据量偏差d作为参数，并返回拟合的二次多项式系数。

在函数内部，我们循环k次来执行RANSAC算法。在每次循环中，我们随机选择n个点，并使用这些点进行二次多项式拟合。然后，我们计算距离小于阈值t的点的数量，并将其与拟合数据量偏差d进行比较。如果符合条件的点数超过了d个，我们认为这是一个比之前更好的拟合，并将其保存下来。

最后，我们绘制原始数据点和拟合曲线以进行可视化。

拟合三次多项式曲线

以下是用Python实现RANSAC拟合三次多项式的代码示例：

import random
import numpy as np
import matplotlib.pyplot as plt

# 生成随机样本数据
x = np.arange(-5, 5, 0.1)
y = 5 * x ** 3 - 2 * x ** 2 + 3 * x + np.random.randn(len(x))

def fit_polynomial(x, y, degree):
    # 返回拟合多项式的系数
    return np.polyfit(x, y, degree)

def evaluate_polynomial(coef, x):
    # 计算多项式函数值
    return np.polyval(coef, x)

def ransac_polynomial(x, y, degree, n_iter, threshold):
    best_inliers = 0
    best_coef = 0
    best_err = np.inf
    for i in range(n_iter):
        # 随机选择若干个样本点
        sample_indices = random.sample(range(len(x)), degree + 1)
        sample_x = x[sample_indices]
        sample_y = y[sample_indices]
        # 拟合多项式
        coef = fit_polynomial(sample_x, sample_y, degree)
        # 计算所有样本点到多项式的距离
        all_errors = np.abs(evaluate_polynomial(coef, x) - y)
        # 选择符合阈值内的样本点
        inliers = all_errors < threshold
        num_inliers = np.sum(inliers)
        # 如果当前符合阈值的样本点数量比之前的多，则更新最佳参数
        if num_inliers > degree and num_inliers > np.sum(best_inliers):
            best_inliers = inliers
            best_coef = fit_polynomial(x[inliers], y[inliers], degree)
            best_err = np.sum(np.abs(evaluate_polynomial(best_coef, x[inliers]) - y[inliers]))
    return best_coef, best_err, best_inliers

# 进行RANSAC拟合
degree = 3
n_iter = 100
threshold = 0.5
best_coef, best_err, best_inliers = ransac_polynomial(x, y, degree, n_iter, threshold)

# 画出拟合曲线和数据点
plt.plot(x, y, 'o')
plt.plot(x[best_inliers], y[best_inliers], 'ro', alpha=0.5)
plt.plot(x, evaluate_polynomial(best_coef, x), '-r', label='RANSAC', alpha=0.5)
plt.plot(x, evaluate_polynomial(fit_polynomial(x, y, degree), x), '-g', label='Ordinary Least Squares')
plt.legend()
plt.show()

在这个代码示例中，我们使用了numpy.polyfit和numpy.polyval函数分别进行多项式拟合和多项式函数值的计算，并定义了fit_polynomial和evaluate_polynomial两个函数。实现RANSAC拟合的ransac_polynomial函数则是在随机选择若干个样本点后计算其拟合多项式，然后计算所有样本点到多项式的距离，并选择符合阈值内的样本点进行拟合，最后返回符合阈值内的最佳参数和误差以及所有点的内外状态，也就是样本点是否符合阈值。

此外，在这个代码示例中我们还使用了matplotlib库画出了拟合曲线和数据点。
以下是C++代码实现RANSAC拟合三次多项式曲线：

#include <iostream>
#include <cmath>
#include <vector>
#include <random>
#include <algorithm>

using namespace std;

// 定义点结构体
struct Point {
    double x;
    double y;
};

// 定义三次多项式函数
double cubic_poly(double a, double b, double c, double d, double x) {
    return a * pow(x, 3) + b * pow(x, 2) + c * x + d;
}

// 定义误差计算函数
double calc_error(double a, double b, double c, double d, Point& p) {
    double x = p.x;
    double y = p.y;
    double error = cubic_poly(a, b, c, d, x) - y;
    return error * error;
}

// 定义RANSAC函数
void ransac(vector<Point>& points, double& a, double& b, double& c, double& d, int iterations, double threshold, int inlier_count) {
    int best_inliers = -1;
    int num_points = points.size();
    random_device rd;
    mt19937 gen(rd());
    uniform_int_distribution<> distrib(0, num_points - 1);
    uniform_real_distribution<> rand(-1., 1.);
    vector<int> inliers(num_points);

    for (int i = 0; i < iterations; ++i) {
        int idx1 = distrib(gen), idx2 = distrib(gen), idx3 = distrib(gen);
        while (idx1 == idx2 || idx1 == idx3 || idx2 == idx3) {
            idx1 = distrib(gen);
            idx2 = distrib(gen);
            idx3 = distrib(gen);
        }

        // 从三个点中获得三次多项式系数的初始估计
        double x1 = points[idx1].x, y1 = points[idx1].y;
        double x2 = points[idx2].x, y2 = points[idx2].y;
        double x3 = points[idx3].x, y3 = points[idx3].y;
        double aa = ((y2 - y3) * (x1 - x3) + (y3 - y1) * (x2 - x3)) / ((x1 - x2) * (x1 - x3) * (x2 - x3));
        double bb = ((y2 - y3) - aa * (x2 * x2 - x3 * x3) - aa * (x2 - x3)) / (x2 - x3);
        double cc = y1 - aa * x1 * x1 - bb * x1;
        double dd = -bb / (3.0 * aa);

        int cur_inliers = 0;
        for (int k = 0; k < num_points; ++k) {
            double error = calc_error(aa, bb, cc, dd, points[k]);
            if (error < threshold) {
                inliers[cur_inliers] = k;
                ++cur_inliers;
            }
        }

        if (cur_inliers > inlier_count) {
            // 使用inliers重新估计三次多项式系数
            double sum_x = 0.0, sum_x2 = 0.0, sum_x3 = 0.0, sum_x4 = 0.0, sum_y = 0.0, sum_xy = 0.0, sum_x2y = 0.0;
            for (int j = 0; j < cur_inliers; ++j) {
                Point& p = points[inliers[j]];
                double x = p.x;
                double y = p.y;
                sum_x += x;
                sum_x2 += x * x;
                sum_x3 += x * x * x;
                sum_x4 += x * x * x * x;
                sum_y += y;
                sum_xy += x * y;
                sum_x2y += x * x * y;
            }
            double det = (cur_inliers * sum_x2 * sum_x4 + 2 * sum_x * sum_x2 * sum_x3
                - sum_x2 * sum_x2 * sum_x2 - cur_inliers * sum_x3 * sum_x3 - sum_x * sum_x4);
            a = (cur_inliers * sum_xy * sum_x4 + sum_x2 * sum_x3 * sum_y + sum_x2y * sum_x3
                - sum_y * sum_x2 * sum_x4 - sum_xy * sum_x3 * sum_x - sum_x2y * sum_x2 * cur_inliers) / det;
            b = (sum_xy * sum_x2 * sum_x2 + cur_inliers * sum_x3 * sum_y * sum_x2 + sum_x2y * sum_x2 * sum_x2
                - sum_x2 * sum_xy * sum_x - sum_x2y * sum_x3 * cur_inliers - sum_y * sum_x2 * sum_x2) / det;
            c = (sum_x2 * sum_xy * sum_x3 + sum_x2 * sum_x2y * sum_x2 + sum_y * sum_x2 * sum_x4
                - sum_x4 * sum_xy * cur_inliers - sum_x2y * sum_x3 * sum_x2 - sum_x2 * sum_y * sum_x3) / det;
            d = (-sum_x2 * sum_x2 * sum_x2y - cur_inliers * sum_x2 * sum_x3 * sum_y - sum_xy * sum_x2 * sum_x4
                + sum_x4 * sum_x2y * cur_inliers + sum_xy * sum_x3 * sum_x2 + sum_x2 * sum_y * sum_x2y) / det;
            best_inliers = cur_inliers;
        }
    }
}

int main() {
    vector<Point> points = {{0.0, 1.0}, {1.0, 4.0}, {2.0, 7.0}, {3.0, 16.0}, {4.0, 19.0},
                            {5.0, 28.0}, {6.0, 37.0}, {7.0, 46.0}, {8.0, 55.0}, {9.0, 64.0}};
    double a, b, c, d;
    ransac(points, a, b, c, d, 1000, 1.0, 6);
    cout << "a = " << a << ", b = " << b << ", c = " << c << ", d = " << d << endl;
    return 0;
}

这里我们定义了一个点结构体Point、一个三次多项式函数cubic_poly、一个误差计算函数calc_error和一个RANSAC函数ransac。在main函数中，我们定义了一个点集points，并调用ransac函数计算三次多项式系数a、b、c和d。

需要注意的是，我们在计算初始的三次多项式系数时，使用了随机三个点的方式。然后，我们计算每个点到当前估计的三次多项式曲线的误差，并根据阈值判断是否为内点。如果内点的个数大于指定的数量，我们就用这些内点重新估计三次多项式系数。

这个代码是简单的示例代码，需要根据你的具体情况修改。

以下是用C++实现的RANSAC拟合三次多项式曲线的示例代码：

#include <iostream>
#include <vector>
#include <random>
#include <cmath>

using namespace std;

// 生成[l, r]之间的随机整数
int randomInt(int l, int r) {
    random_device rd;
    mt19937 eng(rd());
    uniform_int_distribution<int> dist(l, r);
    return dist(eng);
}

// 计算一组三次多项式系数
vector<double> polyfit(vector<double> x, vector<double> y) {
    int n = x.size();
    int m = 3;  // 三次多项式

    vector<vector<double>> A(m + 1, vector<double>(m + 1, 0));
    vector<double> B(m + 1, 0);

    for (int i = 0; i < n; i++) {
        double xi = x[i], yi = y[i];
        for (int j = 0; j <= m; j++) {
            for (int k = 0; k <= m; k++) {
                if (j == 0 && k == 0) {
                    A[j][k] += 1;
                } else {
                    A[j][k] += pow(xi, j + k);
                }
            }
            B[j] += pow(xi, j) * yi;
        }
    }

    // 高斯-约旦消元
    for (int i = 0; i <= m; i++) {
        double d = A[i][i];
        for (int j = i; j <= m; j++) {
            A[i][j] /= d;
        }
        B[i] /= d;
        for (int j = i + 1; j <= m; j++) {
            double d2 = A[j][i];
            for (int k = i; k <= m; k++) {
                A[j][k] -= d2 * A[i][k];
            }
            B[j] -= d2 * B[i];
        }
    }

    vector<double> res(m + 1);
    for (int i = m; i >= 0; i--) {
        for (int j = i + 1; j <= m; j++) {
            B[i] -= A[i][j] * res[j];
        }
        res[i] = B[i];
    }

    return res;
}

// 计算多项式函数的值
double polyval(vector<double> c, double x) {
    double res = 0;
    for (int i = c.size() - 1; i >= 0; i--) {
        res = res * x + c[i];
    }
    return res;
}

// RANSAC拟合三次多项式曲线
vector<double> ransacPolyfit(vector<double> x, vector<double> y, int nIter, double inlierThreshold, int minInliers) {
    int n = x.size();
    int m = 3;  // 三次多项式
    int bestNInliers = 0;
    vector<double> bestModel;
    vector<int> bestInlierIndices;

    for (int i = 0; i < nIter; i++) {
        // 随机选择n个点
        vector<int> indices(n);
        for (int j = 0; j < n; j++) {
            indices[j] = randomInt(0, n - 1);
        }

        // 拟合三次多项式曲线
        vector<double> xSample(n), ySample(n);
        for (int j = 0; j < n; j++) {
            xSample[j] = x[indices[j]];
            ySample[j] = y[indices[j]];
        }
        vector<double> model = polyfit(xSample, ySample);

        // 计算拟合误差和内点个数
        vector<int> inlierIndices;
        int nInliers = 0;
        for (int j = 0; j < n; j++) {
            double dist = abs(polyval(model, x[j]) - y[j]);
            if (dist < inlierThreshold) {
                inlierIndices.push_back(j);
                nInliers++;
            }
        }

        // 更新最优模型和内点
        if (nInliers > bestNInliers && nInliers >= minInliers) {
            bestNInliers = nInliers;
            bestModel = model;
            bestInlierIndices = inlierIndices;
        }
    }

    // 使用所有内点重新拟合模型
    int nInliers = bestInlierIndices.size();
    vector<double> xInliers(nInliers), yInliers(nInliers);
    for (int i = 0; i < nInliers; i++) {
        xInliers[i] = x[bestInlierIndices[i]];
        yInliers[i] = y[bestInlierIndices[i]];
    }
    return polyfit(xInliers, yInliers);
}

int main() {
    // 生成随机数据
    const int n = 100;
    vector<double> x(n), y(n);
    for (int i = 0; i < n; i++) {
        x[i] = i;
        y[i] = 3 * pow(i - n / 2, 3) - 1000 * pow(i - n / 2, 2) + 50000 * (i - n / 2) + randomInt(-50000, 50000);
    }

    // RANSAC拟合
    int nIter = 1000;
    double inlierThreshold = 1000;
    int minInliers = 50;
    vector<double> c = ransacPolyfit(x, y, nIter, inlierThreshold, minInliers);

    // 输出拟合结果
    cout << "拟合结果：y = " << c[0] << " + " << c[1] << "x + " << c[2] << "x^2 + " << c[3] << "x^3" << endl;

    return 0;
}