Code Walkthrough: Detailed Use of protobuf in caffe
[1] Proto file, take BlobShape in caffe.proto as an example
syntax = "proto2"; //Specify the protobuf version, default is v2, other versions: "proto3" package caffe; // Finally generate c + + code: namespace caffe message BlobShape { // Finally generate c++ code: class BlobShape: public:: google:: protobuf:: Message{ //The following statement finally generates c++ code: google:: protobuf:: Repeated Field <: google:: protobuf:: Int64 > dim_; repeated int64 dim = 1 [packed = true]; //repeated list field, number after equal sign denotes identifier, [packed=true] guarantees more efficient encoding }
[2] Compile command protoc-I=. --cpp_out=. caffe. proto
[3] generated caffe.pb.h
...... namespace caffe { //Corresponding to package caffe in caffe.proto; // Implementation details inside protobuf -- don't call these interfaces void protobuf_AddDesc_caffe_2eproto(); void protobuf_AssignDesc_caffe_2eproto(); void protobuf_ShutdownFile_caffe_2eproto(); class BlobShape; // =================================================================== class BlobShape : public ::google::protobuf::Message { //Corresponding to message BlobShape in caffe.proto public: BlobShape(); // Constructor virtual ~BlobShape(); //Destructive function BlobShape(const BlobShape& from); // copy constructor inline BlobShape& operator=(const BlobShape& from) { //Assignment constructor CopyFrom(from); return *this; } // ** UnknownField Set is used to track the fields seen when parsing protocol messages but their field numbers or types cannot be recognized. // This situation most often occurs when new fields are added to message types, and then messages containing these fields are read by old software compiled before adding new types. // Most users never need to use this class. For example, it's not used in caffe // Reference Web Site: https://developers.google.com/protocol-buffers/docs/reference/java/com/google/protobuf/UnknownField Set inline const ::google::protobuf::UnknownFieldSet& unknown_fields() const { return _unknown_fields_; } inline ::google::protobuf::UnknownFieldSet* mutable_unknown_fields() { return &_unknown_fields_; } // ** Describing metadata, mainly in reflection, not in caffe // Reference Web site: http://www.blogjava.net/DLevin/archive/2015/04/01/424012.html static const ::google::protobuf::Descriptor* descriptor(); static const BlobShape& default_instance(); void Swap(BlobShape* other); // exchange // Implementing Message Message Message BlobShape* New() const; // Newly build void CopyFrom(const ::google::protobuf::Message& from); // copy void MergeFrom(const ::google::protobuf::Message& from); // Merge: Single digit segments will be overwritten, Repeated (similar to linked list) fields will be linked together void CopyFrom(const BlobShape& from); void MergeFrom(const BlobShape& from); void Clear(); // Clear all fields of the message and set them to default values. bool IsInitialized() const; // Quickly check whether all required fields have values set. int ByteSize() const; // Serialization size of messages bool MergePartialFromCodedStream(::google::protobuf::io::CodedInputStream* input); //Read the protocol buffer from the stream and merge it into this message. void SerializeWithCachedSizes(::google::protobuf::io::CodedOutputStream* output) const; //Serialize the message without recalculating the size. ::google::protobuf::uint8* SerializeWithCachedSizesToArray(::google::protobuf::uint8* output) const; //Serialize to output int GetCachedSize() const { return _cached_size_; } //Returns the result of the last call to ByteSize() private: void SharedCtor(); void SharedDtor(); void SetCachedSize(int size) const; public: ::google::protobuf::Metadata GetMetadata() const; // Get buffer size // Types embedded in nested types message // If defined in proto's mesage /* enum SnapshotFormat { HDF5 = 0; BINARYPROTO = 1; } */ // Generate the following code: /* enum SolverParameter_SnapshotFormat { SolverParameter_SnapshotFormat_HDF5 = 0, SolverParameter_SnapshotFormat_BINARYPROTO = 1 }; bool SolverParameter_SnapshotFormat_IsValid(int value); const SolverParameter_SnapshotFormat SolverParameter_SnapshotFormat_SnapshotFormat_MIN = SolverParameter_SnapshotFormat_HDF5; const SolverParameter_SnapshotFormat SolverParameter_SnapshotFormat_SnapshotFormat_MAX = SolverParameter_SnapshotFormat_BINARYPROTO; const int SolverParameter_SnapshotFormat_SnapshotFormat_ARRAYSIZE = SolverParameter_SnapshotFormat_SnapshotFormat_MAX + 1; typedef SolverParameter_SnapshotFormat SnapshotFormat; static const SnapshotFormat HDF5 = SolverParameter_SnapshotFormat_HDF5; static const SnapshotFormat BINARYPROTO = SolverParameter_SnapshotFormat_BINARYPROTO; static inline bool SnapshotFormat_IsValid(int value) { return SolverParameter_SnapshotFormat_IsValid(value); } static const SnapshotFormat SnapshotFormat_MIN = SolverParameter_SnapshotFormat_SnapshotFormat_MIN; static const SnapshotFormat SnapshotFormat_MAX = SolverParameter_SnapshotFormat_SnapshotFormat_MAX; static const int SnapshotFormat_ARRAYSIZE = SolverParameter_SnapshotFormat_SnapshotFormat_ARRAYSIZE; static inline const ::google::protobuf::EnumDescriptor* SnapshotFormat_descriptor() { return SolverParameter_SnapshotFormat_descriptor(); } static inline const ::std::string& SnapshotFormat_Name(SnapshotFormat value) { return SolverParameter_SnapshotFormat_Name(value); } static inline bool SnapshotFormat_Parse(const ::std::string& name, SnapshotFormat* value) { return SolverParameter_SnapshotFormat_Parse(name, value); } */ // accessors ------------------------------------------------------- // repeated int64 dim = 1 [packed = true]; // operations on dim members inline int dim_size() const; // Returns the dim list size inline void clear_dim(); // empty static const int kDimFieldNumber = 1; // * The value of FieldNumber corresponds to dim = 1 identifier 1 inline ::google::protobuf::int64 dim(int index) const; // Gets the value of the specified index in the list dim inline void set_dim(int index, ::google::protobuf::int64 value); // Set the list dim to specify the value of the index inline void add_dim(::google::protobuf::int64 value); // Add new elements to list dim inline const ::google::protobuf::RepeatedField< ::google::protobuf::int64 >& dim() const; // Returns the const reference of dim, not modifiable inline ::google::protobuf::RepeatedField< ::google::protobuf::int64 >* mutable_dim(); // Return the dim pointer, which can be modified // @@protoc_insertion_point(class_scope:caffe.BlobShape) private: ::google::protobuf::UnknownFieldSet _unknown_fields_; ::google::protobuf::RepeatedField< ::google::protobuf::int64 > dim_; mutable int _dim_cached_byte_size_; mutable int _cached_size_; ::google::protobuf::uint32 _has_bits_[(1 + 31) / 32]; // 1 denotes that the number of members defined in message BlobShape is 1 friend void protobuf_AddDesc_caffe_2eproto(); friend void protobuf_AssignDesc_caffe_2eproto(); friend void protobuf_ShutdownFile_caffe_2eproto(); void InitAsDefaultInstance(); static BlobShape* default_instance_; }; // =================================================================== // BlobShape Corresponding Operations // repeated int64 dim = 1 [packed = true]; inline int BlobShape::dim_size() const { //dim is in the form of a list, so it provides an interface for returning sizes return dim_.size(); } inline void BlobShape::clear_dim() { // empty dim_.Clear(); } inline ::google::protobuf::int64 BlobShape::dim(int index) const { //Gets the value of the specified index return dim_.Get(index); } inline void BlobShape::set_dim(int index, ::google::protobuf::int64 value) { //Sets the value of the specified index dim_.Set(index, value); } inline void BlobShape::add_dim(::google::protobuf::int64 value) { //Incrementally add values to list dim dim_.Add(value); } inline const ::google::protobuf::RepeatedField< ::google::protobuf::int64 >& BlobShape::dim() const { //Return read-only dim return dim_; } inline ::google::protobuf::RepeatedField< ::google::protobuf::int64 >* BlobShape::mutable_dim() { //Returns readable and writable dim return &dim_; } ......
[4] Generated caffe.pb.cc
slightly
[5] Notes on the official website
Google's official website notes: https://developers.google.com/protocol-buffers/
c++API Manual: https://developers.google.com/protocol-buffers/docs/reference/cpp/#google.protobuf
[6] Use
1. Read protobuf in text format into Message
1.1 Take ReadProtoFromTextFile in caffe as an example
Call steps for ReadProtoFromTextFile:
train() --> caffe::ReadSolverParamsFromTextFileOrDie(FLAGS_solver, &solver_param); --> ReadProtoFromTextFile(param_file, param)
1.2. ReadProtoFromTextFile Source Details
bool ReadProtoFromTextFile(const char* filename, Message* proto) { int fd = open(filename, O_RDONLY); CHECK_NE(fd, -1) << "File not found: " << filename; FileInputStream* input = new FileInputStream(fd); bool success = google::protobuf::TextFormat::Parse(input, proto); delete input; close(fd); return success; }
1.2.1 FileInputStream Details
Official note:
FileInputStream inherits from ZeroCopyInputStream (read stream from file descriptor)
Usage: (Reference to Builder Function)
explicit FileInputStream(int file_descriptor, int block_size = -1);
Parameters:
file_descriptor: Unix file descriptor block_size: The size read and returned when Next() is specified, and a reasonable default value is used when not specified.
1.2.2 google::protobuf::TextFormat::Parse
TextFormat class:
This class implements protobuf text format. Printing and parsing protocol messages in text format is very useful for debugging and editing messages manually. This class is actually a namespace containing only static methods.
Static function Parse:
Parse text format protocol messages from a given input stream to a given message object. This function parses the format written by Print(). static bool Parse(io::ZeroCopyInputStream* input, Message* output);
Other similar static functions:
static bool ParseFromString(const string& input, Message* output); // Resolve directly from strings static bool Merge(io::ZeroCopyInputStream* input, Message* output); // Resolve from the stream and merge into Message, see Message::MergeFrom() static bool MergeFromString(const string& input, Message* output); // Ibid., but parsed directly from strings //Resolve a single field value into the specified field field field field of the specified message static bool ParseFieldValueFromString(const string& input, const FieldDescriptor* field, Message* message);
Corresponding Back Operation: Print Message to Stream or Text
static bool Print(const Message& message, io::ZeroCopyOutputStream* output); // Print message to the output stream static bool PrintToString(const Message& message, string* output); // Print message to a string // The field field field specified by message is output to the string. When a field is in list format, specify a reference index; when not in list format, index must be - 1 static void PrintFieldValueToString(const Message& message, const FieldDescriptor* field, int index, string* output); // Print the fields in the Unknown Field Set. They are printed only by label number. By trying to parse embedded messages, they can be recognized intuitively. static bool PrintUnknownFields(const UnknownFieldSet& unknown_fields, io::ZeroCopyOutputStream* output);
2. Serializing message into binary files
2.1 Take WriteProtoToBinaryFile function in caffe as an example
void WriteProtoToBinaryFile(const Message& proto, const char* filename) { fstream output(filename, ios::out | ios::trunc | ios::binary); CHECK(proto.SerializeToOstream(&output)); // The prototype bool Message::SerializeToOstream(ostream* output) const; to serialize the message into the c++ standard output stream, all required fields must be set. }
2.2 Summary of serialization functions:
// To serialize Message into a file descriptor, all required fields must be set. bool SerializeToFileDescriptor(int file_descriptor) const; // Ibid., but allow for the absence of required fields. bool SerializePartialToFileDescriptor(int file_descriptor) const; // To serialize Message into the c++ standard output stream, all required fields must be set. bool SerializeToOstream(ostream* output) const; // Ibid., but allow for the absence of required fields. bool SerializePartialToOstream(ostream* output) const;
3. Deserialize from binary file to message
3.1 Take ReadProtoFromBinaryFile in caffe as an example
bool ReadProtoFromBinaryFile(const char* filename, Message* proto) { int fd = open(filename, O_RDONLY); CHECK_NE(fd, -1) << "File not found: " << filename; ZeroCopyInputStream* raw_input = new FileInputStream(fd); CodedInputStream* coded_input = new CodedInputStream(raw_input); coded_input->SetTotalBytesLimit(kProtoReadBytesLimit, 536870912); bool success = proto->ParseFromCodedStream(coded_input); // Prototype bool ParseFromCodedStream(io::CodedInputStream* input); delete coded_input; delete raw_input; close(fd); return success; }
3.2 Other deserialization functions:
bool ParsePartialFromCodedStream(io::CodedInputStream* input); // Like ParseFromCodedStream, but accepts messages that lack the required fields. bool ParseFromZeroCopyStream(io::ZeroCopyInputStream* input); // Get from io::ZeroCopyInputStream stream bool ParsePartialFromZeroCopyStream(io::ZeroCopyInputStream* input); // Ibid., but accept messages that lack the required fields. bool ParseFromBoundedZeroCopyStream(io::ZeroCopyInputStream* input, int size); // Ibid., specifying message size must be size bool ParsePartialFromBoundedZeroCopyStream(io::ZeroCopyInputStream* input, , int size); // Ibid., but accept messages that lack the required fields.