opencv bow algorithm for image classification

Keywords: C++ xml

Brief introduction to process

  1. feature extraction of training set pictures

  2. Aggregate these feature s into n classes. Each of these n categories is equivalent to a "word" of a picture, and all n categories form a "vocabulary". If the training set is large, the value should be increased.

  3. To construct bag of words for pictures in training set is to classify features in pictures into different categories and then count the frequency of features in each category. This is equivalent to counting the frequency of each word in a text.

  4. A multi-class classifier is trained. The bag of words of each picture is used as feature vector, and the category of this picture is used as label.

  5. For images of unknown categories, the bag of words is calculated and classified using the trained classifier.

Detailed steps

feature extraction and clustering

This step is used to extract the eigenvalues of all pictures to be trained and save them in a vocab_descriptors(vector array), then use bowtrainer to cluster vocab_descriptors into vocab(Mat type)


Mat vocab_descriptors;
// Traverse each picture, extract SURF eigenvalues, and store them in vocab_descriptors
multimap<string,Mat> ::iterator i=train_set.begin();
for(;i!=train_set.end();i++)
{
  vector<KeyPoint>kp;//key point
  Mat templ=(*i).second; //picture
  Mat descrip; //characteristic value

  //featureDectre is a surf algorithm to extract eigenvalues
  featureDecter->detect(templ,kp);
  featureDecter->compute(templ,kp,descrip);

  //push_back(Mat); add a few more rows after the last line of the original Mat, and when the element is Mat, its type and number of columns must be the same as the matrix container.
  vocab_descriptors.push_back(descrip);
}
//By adding surf features of each pair of graphs to bow Training, clustering training can be carried out.
vocab=bowtrainer->cluster(vocab_descriptors);

Construct bag of words

This step counts the frequency of each category according to the feature points of each picture. As the bag of words of this picture, bowDescriptor Extractor sets Vocabulary according to the vocabulary obtained in the previous step, passes vocabulary to it, and then calculates each category with the feature points of a picture as input. Frequency of characteristic points


// Traveling through each picture, extracting key points of SURF, and counting the frequency of feature points of each category
multimap<string,Mat> ::iterator i=train_set.begin();
for(;i!=train_set.end();i++)
{
  vector<KeyPoint>kp; //key point
  string cate_nam=(*i).first; //Category name, according to folder directory name
  Mat tem_image=(*i).second; //Corresponding pictures
  Mat imageDescriptor; //Statistical Frequency of Characteristic Points

  featureDecter->detect(tem_image,kp);
  bowDescriptorExtractor->compute(tem_image,kp,imageDescriptor);

  //push_back(Mat); add a few more rows after the last line of the original Mat, and when the element is Mat, its type and number of columns must be the same as the matrix container.
  //In the value Mat of allsamples_bow, each line represents a bag of words for a picture.
  allsamples_bow[cate_nam].push_back(imageDescriptor);
}

Training classifier

The classifier used is svm, and the classical 1 vs all method is used to realize multi-class classification. A binary classifier is trained for each category. After training, for feature vectors to be classified, each classifier is used to calculate the possibility of classifying them, and then the category with the highest possibility is selected as the category of feature vectors.


stor_svms=new Ptr<SVM>[categories_size]; //Initialize a svm trainer
for(int i=0;i<categories_size;i++)
{
  Mat tem_Samples( 0, allsamples_bow.at( category_name[i] ).cols, allsamples_bow.at( category_name[i] ).type() ); //Get the bag of word built in the previous step
  Mat responses( 0, 1, CV_32SC1 );
  tem_Samples.push_back( allsamples_bow.at( category_name[i] ) );
  Mat posResponses( allsamples_bow.at( category_name[i]).rows, 1, CV_32SC1, Scalar::all(1) ); 
  responses.push_back( posResponses );
  
  for ( map<string,Mat>::iterator itr = allsamples_bow.begin(); itr != allsamples_bow.end(); ++itr ) 
  {
    if ( itr -> first == category_name[i] ) {
      continue;
    }
    tem_Samples.push_back( itr -> second );
    Mat response( itr -> second.rows, 1, CV_32SC1, Scalar::all( -1 ) );
    responses.push_back( response );
  }
  //Setting up training parameters
  stor_svms[i] = SVM::create();
  stor_svms[i]->setType(SVM::C_SVC);
  stor_svms[i]->setKernel(SVM::LINEAR);
  stor_svms[i]->setGamma(3);
  stor_svms[i]->setTermCriteria(TermCriteria(CV_TERMCRIT_ITER, 100, 1e-6));

  stor_svms[i]->train( tem_Samples, ROW_SAMPLE, responses); //The key step is to construct svm trainer

}

Classification of Unknown Pictures

The bag of words of a picture to be categorized is used as feature vector input, the possibility of classifying the picture is calculated by using classifiers of each category, and then the category with the highest possibility is used as the category of the picture.


Mat input_pic=imread(train_pic_path); //Getting pictures to be categorized

// Extraction of BOW Descriptors
vector<KeyPoint>kp;
Mat test;
featureDecter->detect(input_pic,kp);
bowDescriptorExtractor->compute(input_pic,kp,test);
int sign=0;
float best_score = -2.0f;
for(int i=0;i<categories_size;i++)
{    
  if(sign==0)
  {
    float scoreValue = stor_svms[i]->predict( test, noArray(), true );
    float classValue = stor_svms[i]->predict( test, noArray(), false );
    sign = ( scoreValue < 0.0f ) == ( classValue < 0.0f )? 1 : -1;
  }
  curConfidence = sign * stor_svms[i]->predict( test, noArray(), true );

  if(curConfidence>best_score)
  {
    best_score=curConfidence;
    prediction_category=cate_na;
  }
}
cout<<"This picture belongs to:"<<prediction_category<<endl;

Full source code


#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/features2d/features2d.hpp>
#include <opencv2/xfeatures2d.hpp>
#include <opencv2/ml/ml.hpp>
#include <boost/filesystem.hpp>
#include <boost/filesystem/fstream.hpp>
#include <iostream>
#include <fstream>
#include <cstring>
#include <iterator>
#include <vector>
#include <map>
#include<fstream>

using namespace cv;
using namespace cv::xfeatures2d;
using namespace std;
using namespace cv::ml;

#define DATA_FOLDER "data/"
#define TRAIN_FOLDER "data/train_images/"
#define TEMPLATE_FOLDER "data/templates/"
#define TEST_FOLDER "data/test_image"
#define RESULT_FOLDER "data/result_image/"


class categorizer
{
private :
    // // Mapping from Category Name to Data
    // map<string,Mat> result_objects;    
    //BOW for storing all training pictures
    map<string,Mat> allsamples_bow;
    //From the mapping of class names to training atlases, keywords can be repeated
    multimap<string,Mat> train_set;
    // The trained SVM
  Ptr<SVM> *stor_svms;
    //Category name, which is the directory name set by TRAIN_FOLDER
    vector<string> category_name;
    //Number of Categories
    int categories_size;
    //Constructing the Number of Visual Lexicon Clusters with SURF Features
    int clusters;
    //Store Training Picture Dictionary
    Mat vocab;

    Ptr<SURF> featureDecter;
    Ptr<BOWKMeansTrainer> bowtrainer;
    Ptr<BFMatcher> descriptorMacher;
  Ptr<BOWImgDescriptorExtractor> bowDescriptorExtractor;

    //Constructing training set
    void make_train_set();
    // Remove the extension used to organize templates into categories
    string remove_extention(string);

public:
    //Constructor
    categorizer(int);
    // Clustering Dictionary
    void bulid_vacab();
    //Constructing BOW
    void compute_bow_image();
    //Training classifier
    void trainSvm();
    //Classify test pictures
    void category_By_svm();
};

// Remove the extension used to organize templates into categories
string categorizer::remove_extention(string full_name)
{
    //find_last_of to find where the character last appeared
    int last_index=full_name.find_last_of(".");
    string name=full_name.substr(0,last_index);
    return name;
}

// Constructor
categorizer::categorizer(int _clusters)
{
    cout<<"Start initialization..."<<endl;
    clusters=_clusters;
    //Initialization pointer
    int minHessian = 400;
    featureDecter = SURF::create( minHessian );
    bowtrainer = new BOWKMeansTrainer(clusters);
    descriptorMacher = BFMatcher::create();
    bowDescriptorExtractor = new BOWImgDescriptorExtractor(featureDecter,descriptorMacher);

    // // The boost library file traversing the data folder directory_iterator(p) is the starting point of the iterator, and the parameter-free directory_iterator() is the end point of the iterator.
    // boost::filesystem::directory_iterator begin_iter(TEMPLATE_FOLDER);
    // boost::filesystem::directory_iterator end_iter;
    // // Get all file names in this directory
    // for(;begin_iter!=end_iter;++begin_iter)
    // {
    //     // File path data/templates/airplanes.jpg
    //     string filename=string(TEMPLATE_FOLDER)+begin_iter->path().filename().string();
    //     // Folder name airplanes
    //     string sub_category =remove_extention(begin_iter->path().filename().string());
    //     // Read in the template image
    //     if(begin_iter->path().filename().string() != ".DS_Store") {
    //         Mat image=imread(filename);
    //         Mat templ_image;
    //         // Storage of original drawing template
    //         result_objects[sub_category]=image;
    //     }
    // }
    cout<<"Initialization completed..."<<endl;
    //Read training set
    make_train_set();
}

//Constructing training set
void categorizer::make_train_set()
{
    cout<<"Read training set..."<<endl;
    string categor;
    //Recursive iteration rescursive directly defines two iterators: i is the starting point of iteration (with parameters), and end_iter is the end point of iteration.
    for(boost::filesystem::recursive_directory_iterator i(TRAIN_FOLDER),end_iter;i!=end_iter;i++)
    {
        // level == 0 is the directory, because this is set in TRAIN__FOLDER
        if(i.level()==0)
        {
            // Set the category name to the directory name
            if((i->path()).filename().string() != ".DS_Store") {
                categor=(i->path()).filename().string();
                category_name.push_back(categor);
            }
        }
        else
        {
            // Read the files under the folder. level 1 indicates that this is a training graph, and a one-to-many mapping from the class name to the training graph is established through the multimap container.
            string filename=string(TRAIN_FOLDER)+categor+string("/")+(i->path()).filename().string();
            if((i->path()).filename().string() != ".DS_Store") {
                Mat temp=imread(filename,CV_LOAD_IMAGE_GRAYSCALE);
                pair<string,Mat> p(categor,temp);
                //Get the training set
                train_set.insert(p);
            }
        }    
    }
    categories_size=category_name.size();
    cout<<"find "<<categories_size<<"Category objects..."<<endl;
}

// Training picture feature clustering to get dictionary
void categorizer::bulid_vacab()
{
    FileStorage vacab_fs(DATA_FOLDER "vocab.xml",FileStorage::READ);

    //If the dictionary has been generated before, there is no need to regroup the dictionary.
    if(vacab_fs.isOpened())
    {
        cout<<"Pictures have been clustered and dictionaries already exist..."<<endl;
        vacab_fs.release();
    }else
    {
        Mat vocab_descriptors;
        // For each template, the SURF operator is extracted and stored in vocab_descriptors
        multimap<string,Mat> ::iterator i=train_set.begin();
        for(;i!=train_set.end();i++)
        {
            vector<KeyPoint>kp;
            Mat templ=(*i).second;
            Mat descrip;
            featureDecter->detect(templ,kp);

            featureDecter->compute(templ,kp,descrip);
            //push_back(Mat); add a few more rows after the last line of the original Mat, and when the element is Mat, its type and number of columns must be the same as the matrix container.
            vocab_descriptors.push_back(descrip);
        }
        // vocab_descriptors.convertTo(vocab_descriptors, CV_32F);
        cout << "Training Pictures Start Clustering..." << endl;
        //By adding ORB features of each pair of graphs to bow Training, clustering training can be carried out.
        // Clustering ORB Descriptors
        vocab=bowtrainer->cluster(vocab_descriptors);
        cout<<"After clustering, a dictionary is obtained...."<<endl;

        //Save dictionaries in file format
        FileStorage file_stor(DATA_FOLDER "vocab.xml",FileStorage::WRITE);
        file_stor<<"vocabulary"<<vocab;
        file_stor.release();
    }
}

//Construct bag of words
void categorizer::compute_bow_image()
{
    cout<<"structure bag of words..."<<endl;
    FileStorage va_fs(DATA_FOLDER "vocab.xml",FileStorage::READ);
    //If a dictionary exists, read it directly
    if(va_fs.isOpened())
    {
        Mat temp_vacab;
        va_fs["vocabulary"] >> temp_vacab;
        bowDescriptorExtractor->setVocabulary(temp_vacab);
        va_fs.release();
    }
    else
    {
        //For the feature points of each picture, the frequency of each category of the picture is counted as the bag of words of the picture.
        bowDescriptorExtractor->setVocabulary(vocab);
    }

    //If bow.txt already exists and has been trained before, there is no need to reconstruct the BOW below.
    string bow_path=string(DATA_FOLDER)+string("bow.txt");
    boost::filesystem::ifstream read_file(bow_path);
    // // If BOW already exists, no construction is required
    if(read_file.is_open())
    {
        cout<<"BOW Ready..."<<endl;
    }
    else{
        // For each template, the SURF operator is extracted and stored in vocab_descriptors
        multimap<string,Mat> ::iterator i=train_set.begin();
        for(;i!=train_set.end();i++)
        {
            vector<KeyPoint>kp;
            string cate_nam=(*i).first;
            Mat tem_image=(*i).second;
            Mat imageDescriptor;
            featureDecter->detect(tem_image,kp);
            bowDescriptorExtractor->compute(tem_image,kp,imageDescriptor);
            //push_back(Mat); add a few more rows after the last line of the original Mat, and when the element is Mat, its type and number of columns must be the same as the matrix container.
            allsamples_bow[cate_nam].push_back(imageDescriptor);
        }
        //Simply output a text to prepare for later judgments
        boost::filesystem::ofstream ous(bow_path);
        ous<<"flag";
        cout<<"bag of words Completion of construction..."<<endl;
    }
}

//Training classifier

void categorizer::trainSvm()
{
    int flag=0;
    for(int k=0;k<categories_size;k++)
    {
        string svm_file_path=string(DATA_FOLDER) + category_name[k] + string("SVM.xml");
        FileStorage svm_fil(svm_file_path,FileStorage::READ);
        //Judging whether the training results exist
        if(svm_fil.isOpened())
        {
            svm_fil.release();
            continue;
        }
        else
        {
            flag=-1;
            break;
        }
    }
    //If the training results already exist, no retraining is required.
    if(flag!=-1)
    {
        cout<<"The classifier has been trained...."<<endl;
    }else

    {
        stor_svms=new Ptr<SVM>[categories_size];

        cout<<"Training classifier..."<<endl;
        for(int i=0;i<categories_size;i++)
        {
            Mat tem_Samples( 0, allsamples_bow.at( category_name[i] ).cols, allsamples_bow.at( category_name[i] ).type() );
            Mat responses( 0, 1, CV_32SC1 );
            tem_Samples.push_back( allsamples_bow.at( category_name[i] ) );
            Mat posResponses( allsamples_bow.at( category_name[i]).rows, 1, CV_32SC1, Scalar::all(1) ); 
            responses.push_back( posResponses );
            
            for ( map<string,Mat>::iterator itr = allsamples_bow.begin(); itr != allsamples_bow.end(); ++itr ) 
            {
                if ( itr -> first == category_name[i] ) {
                    continue;
                }
                tem_Samples.push_back( itr -> second );
                Mat response( itr -> second.rows, 1, CV_32SC1, Scalar::all( -1 ) );
                responses.push_back( response );
            }
      //Setting up training parameters
            stor_svms[i] = SVM::create();
      stor_svms[i]->setType(SVM::C_SVC);
      stor_svms[i]->setKernel(SVM::LINEAR);
            stor_svms[i]->setGamma(3);
      stor_svms[i]->setTermCriteria(TermCriteria(CV_TERMCRIT_ITER, 100, 1e-6));
            stor_svms[i]->train( tem_Samples, ROW_SAMPLE, responses);
            //Storage svm
            string svm_filename=string(DATA_FOLDER) + category_name[i] + string("SVM.xml");
            cout<<svm_filename.c_str()<<endl;
            stor_svms[i]->save(svm_filename.c_str());
        }
        cout<<"Classifier training completed..."<<endl;
    }
}


//Classification of test pictures

void categorizer::category_By_svm()
{
    cout<<"Object Classification Begins..."<<endl;
    Mat gray_pic;
    Mat threshold_image;
    string prediction_category;
    float curConfidence;

    boost::filesystem::directory_iterator begin_train(TEST_FOLDER);
    boost::filesystem::directory_iterator end_train;

    for(;begin_train!=end_train;++begin_train)
    {
        
        //Get the image name in this directory
        string train_pic_name=(begin_train->path()).filename().string();
        string train_pic_path=string(TEST_FOLDER)+string("/")+(begin_train->path()).filename().string();
        
        //Read pictures
        if((begin_train->path()).filename().string() == ".DS_Store") {
            continue;
        }
        Mat input_pic=imread(train_pic_path);
        cvtColor(input_pic,gray_pic,CV_BGR2GRAY);
    
        // Extraction of BOW Descriptors
        vector<KeyPoint>kp;
        Mat test;
        featureDecter->detect(gray_pic,kp);
        bowDescriptorExtractor->compute(gray_pic,kp,test);
        int sign=0;
        float best_score = -2.0f;
        for(int i=0;i<categories_size;i++)
        {    
            string cate_na=category_name[i];
            string f_path=string(DATA_FOLDER)+cate_na + string("SVM.xml");
            FileStorage svm_fs(f_path,FileStorage::READ);
            //Read SVM.xml
            if(svm_fs.isOpened())
            {
                svm_fs.release();
                Ptr<SVM> st_svm = Algorithm::load<SVM>(f_path.c_str());
                if(sign==0)
                {
                    float score_Value = st_svm->predict( test, noArray(), true );
                    float class_Value = st_svm->predict( test, noArray(), false );
                    sign = ( score_Value < 0.0f ) == ( class_Value < 0.0f )? 1 : -1;
                }
                curConfidence = sign * st_svm->predict( test, noArray(), true );
            }
            else
            {            
                if(sign==0)
                {
                    float scoreValue = stor_svms[i]->predict( test, noArray(), true );
                    float classValue = stor_svms[i]->predict( test, noArray(), false );
                    sign = ( scoreValue < 0.0f ) == ( classValue < 0.0f )? 1 : -1;
                }
                curConfidence = sign * stor_svms[i]->predict( test, noArray(), true );
            }
            if(curConfidence>best_score)
            {
                best_score=curConfidence;
                prediction_category=cate_na;
            }
        }
        //Write the picture to the appropriate folder
        boost::filesystem::directory_iterator begin_iterater(RESULT_FOLDER);
        boost::filesystem::directory_iterator end_iterator;
        //Get the file name in this directory
        for(;begin_iterater!=end_iterator;++begin_iterater)
        {

            if(begin_iterater->path().filename().string()==prediction_category)
            {
                string filename=string(RESULT_FOLDER)+prediction_category+string("/")+train_pic_name;
                imwrite(filename,input_pic);
            }
        }
        cout<<"This picture belongs to:"<<prediction_category<<endl;
    }
}


int main(void)
{
    int clusters=1000;
    categorizer c(clusters);
    //Feature clustering
    c.bulid_vacab();
    //Constructing BOW
    c.compute_bow_image();
    //Training classifier
    c.trainSvm();
    //Classify test pictures
    c.category_By_svm();
    return 0;
}

Posted by Aleks on Fri, 28 Jun 2019 15:50:15 -0700