Understand the structure and working principle of CPU in the code

Keywords: Verilog Windows Linux

I. Preface

From the beginning of graduate school to half a year's work, I have been contacting MCU SOC controllers with CPU as the core, but because of professional reasons, I have no idea about the internal structure and working principle of CPU. Today, I broke the blind spot from a blog. It is hereby declared that the design idea and code of this article are all from the following blog posts, which are only used for learning records and sharing experience.

Design and implementation of simple CPU
https://blog.csdn.net/weixin_36077867/article/details/82286612

2, The structure and working principle of simple CPU

Use the structure chart in the original:

CPU core modules include controller, program counter (PC), memory, decoder and ALU. The controller is responsible for commanding and dispatching the normal work of each module: every time the PC reaches a certain number of stages, it will take instructions - > decode - > execute instructions. Take the instruction to take out the data of the PC value pointing to the address from memory, and then the data is transferred to the decoder for translation. Finally, according to this goal, let Alu complete the arithmetic and logical operation, and save the operation results to the address specified in memory. The content of memory is a bit stream which uses IDE to convert high-level languages such as C/C + + into when we play single-chip microcomputer. It includes code instructions, temporary variables and all data values that need to be saved.

3, Design code and simulation analysis

The following code is only a few changes to the reproduced blog, no substantial changes.

 1 `timescale 1ns / 1ps
 2 
 3 // Description: 
 4 // program counter 
 5 
 6 module PC
 7 #(parameter ADDR_WIDTH = 5)
 8 (
 9 input clock,
10 input reset,
11 input en,
12 output reg [ADDR_WIDTH-1:0] pc 
13     );
14     
15     wire [ADDR_WIDTH-1:0] pc_next;
16     
17     always@(posedge clock or posedge reset)begin
18         if(reset)
19             pc <= 0;
20         else if(en)
21             pc <= pc_next;
22     end
23     
24     assign pc_next = pc + 1;
25     
26 endmodule
PC.v
 1 `timescale 1ns / 1ps
 2 
 3 // Description: 
 4 // memory used for storing instructions, temporary variables, and initialization data
 5 //STAļ¼Œstore A to
 6 //LDA, load A from
 7 
 8 
 9 module memory
10 #(
11 parameter ADDR_WIDTH = 5,
12 parameter DATA_WIDTH = 8
13 )
14 (
15 input clock,
16 input reset,
17 input wr_en,
18 input rd_en,
19 input [ADDR_WIDTH-1:0] addr,
20 input [DATA_WIDTH-1:0] din,
21 output reg [DATA_WIDTH-1:0] dout
22     );
23     
24     reg [DATA_WIDTH-1:0] mem [0:32-1];
25     
26     always@(posedge clock,posedge reset)begin
27         if(reset)begin
28             mem [0] <= 'b000_01011;      //LDA 01011
29             mem [1] <= 'b010_01100;      //ADD 01100
30             mem [2] <= 'b001_01101;      //STA 01101
31             mem [3] <= 'b000_01011;      //LDA 01011
32             mem [4] <= 'b100_01100;      //AND 01100
33             mem [5] <= 'b001_01110;      //STA 01110
34             mem [6] <= 'b000_01011;      //LDA 01011
35             mem [7] <= 'b011_01100;      //SUB 01100
36             mem [8] <= 'b001_01111;      //STA 01111
37             mem [9] <= 'b10100000;      //HLT
38             mem [10] <= 'b00000000;
39             mem [11] <= 'b10010101;
40             mem [12] <= 'b01100101;
41             mem [13] <= 'b00000000;
42             mem [14] <= 'b00000000;
43             mem [15] <= 'b00000000;
44             mem [16] <= 'b00000000;
45             mem [17] <= 'b00000000;
46             mem [18] <= 'b00000000;
47             mem [19] <= 'b00000000;
48             mem [20] <= 'b00000000;
49             mem [21] <= 'b00000000;
50             mem [22] <= 'b00000000;
51             mem [23] <= 'b00000000;
52             mem [24] <= 'b00000000;
53             mem [25] <= 'b00000000;
54             mem [26] <= 'b00000000;
55             mem [27] <= 'b00000000;
56             mem [28] <= 'b00000000;
57             mem [29] <= 'b00000000;
58             mem [30] <= 'b00000000;
59             mem [31] <= 'b00000000;
60         end
61         else begin
62             if(wr_en)
63                 mem[addr] <= din;
64             else if(rd_en)
65                 dout <= mem[addr];
66         end
67     end
68 endmodule
memory.v
`timescale 1ns / 1ps

// Description: 
// instruction decoder


module idec
#(
parameter DATA_WIDTH = 8,
parameter ADDR_WIDTH = 5
)
(
input clock,
input reset,
input en,
input [DATA_WIDTH-1:0] instruction,//from memory
output reg [DATA_WIDTH-ADDR_WIDTH-1:0] opcode,
output reg [ADDR_WIDTH-1:0] addr
    );
    
    always@(posedge clock,posedge reset)begin
        if(reset)begin
            opcode <= 0;
            addr <= 0;
        end
        else if(en)begin
            opcode <= instruction[DATA_WIDTH-1 -:3];
            addr <= instruction[ADDR_WIDTH-1:0];
        end
    end
    
endmodule
idec.v
 1 `timescale 1ns / 1ps
 2 
 3 // Description: 
 4 // arithmetic logic unit
 5 
 6 
 7 module alu 
 8 #(parameter OP_WIDTH = 8)
 9 (
10 input clock,
11 input reset,
12 
13 input en,
14 input add_en,//Add operation enable
15 input sub_en,
16 input and_en,
17 input pass_en,
18 input [OP_WIDTH-1:0] din,
19 
20 output n,//Negative sign
21 output z,//0 sign
22 output reg c,//Output carry flag
23 output v,//Output overflow flag
24 output reg [OP_WIDTH-1:0] a//Accumulator output register dout
25 
26     );
27     
28     assign n = (c == 1) ? 1: 0 ;       //Negative flag, if carry flag is 1,be n=1                                    
29     assign z = (a == 'd0) ? 1: 0 ;    //0 flag, if accumulator is 0, z=1                                        
30     assign v = ((a>2**(OP_WIDTH-1)-1) || (a<-2**(OP_WIDTH-1)) ? 1:0 );  //Value range of overflow flag complement:-2^(n-1)~~~~~2^(n-1)-1   n=8              
31                                                                   
32     always @(posedge clock or posedge reset)begin 
33         if (reset) begin
34             a <= 0;      //Reset accumulator clear 0,
35             c <= 0;    
36         end
37         else begin
38             if(en) begin
39                 if(add_en)
40                     {c,a} <= a + din;
41                 else if(sub_en)
42                     {c,a} <= a - din;
43                 else if(and_en)
44                     a <= a & din;
45                 else if(pass_en)
46                     a <= din; 
47             end
48         end
49     end    
50  
51 endmodule
alu.v
 1 `timescale 1ns / 1ps
 2 
 3 
 4 module control#(
 5 parameter DATA_WIDTH = 8,
 6 parameter ADDR_WIDTH = 5
 7 )
 8 (
 9 input clock,
10 input reset,
11 input [DATA_WIDTH-ADDR_WIDTH-1:0] opcode,//Instruction from decoder after decoding
12 
13 output reg [6-1:0] s,//Enable signal
14 output reg addr_sel,//Program or data address strobe
15 output reg [4-1:0] instrs
16 
17 );
18 
19     parameter [DATA_WIDTH-ADDR_WIDTH-1:0] LDA = 'b000,
20                                           STA = 'b001,
21                                           ADD = 'b010,
22                                           SUB = 'b011,
23                                           AND = 'b100;
24     
25     reg [8-1:0] cnt;
26     wire add_cnt,end_cnt;
27     
28     always@(posedge clock, posedge reset)begin
29         if(reset)
30             cnt <= 0;
31         else if(add_cnt)begin
32             if(end_cnt)
33                 cnt <= 0;
34             else 
35                 cnt <= cnt + 1;
36         end
37     end
38     
39     assign add_cnt = 1;
40     assign end_cnt = add_cnt && cnt == 6-1;
41     
42     always@(*)begin
43         case(cnt)
44             0:begin//Fetch instruction
45                  s = 'b100_000;
46                  addr_sel = 0; 
47                  instrs = 0;
48             end
49             1:begin//Decode
50                 s = 'b010_000;
51                 addr_sel = 0;
52             end
53             2:begin//read from the memory
54                 addr_sel = 1;
55                 if(
56                    (opcode == LDA) ||
57                    (opcode == ADD) ||
58                    (opcode == SUB) ||
59                    (opcode == AND)
60                    )
61                     s = 'b001_000;
62                 else
63                     s = 'b000_000;
64             end
65             3:begin//ALU operations
66                 s = 'b000_100;
67                 addr_sel = 1;
68                 case(opcode)
69                     LDA:instrs = 'b0001;
70                     ADD:instrs = 'b1000;
71                     SUB:instrs = 'b0100;
72                     AND:instrs = 'b0010;
73                     STA:instrs = 'b0000;
74                     default:instrs = 'b0000;
75                 endcase
76             end
77             4:begin//write to the memory
78                 addr_sel = 1;
79                 if(opcode == STA)
80                     s = 'b000_010;
81                 else
82                     s = 'b000_000;
83             end
84             5:begin// PC 
85                 s = 'b000_001;
86                 addr_sel = 1;
87             end
88             default:begin
89                 s = 'b000_000;
90                 addr_sel = 0;
91                 instrs = 0;
92             end
93         endcase
94     end
95 
96 endmodule
control.v
  1 `timescale 1ns / 1ps
  2 
  3 module cpu_top
  4 (
  5 input clock,
  6 input reset,
  7 
  8 output n,//Negative sign
  9 output z,//0 sign
 10 output c,//Output carry flag
 11 output v//Output overflow flag
 12 );
 13 
 14 parameter DATA_WIDTH = 8,
 15           ADDR_WIDTH = 5;
 16             
 17 
 18 wire [6-1:0] s;
 19 wire [ADDR_WIDTH-1:0] addr_mem,addr_idec,addr_pc;
 20 wire addr_sel;
 21 wire [DATA_WIDTH-1:0] dout_mem,din_mem;
 22 wire [DATA_WIDTH-ADDR_WIDTH-1:0] opcode;
 23 wire [4-1:0] alu_oper;
 24 
 25 assign addr_mem = addr_sel == 1 ?  addr_idec: addr_pc; 
 26 
 27 control#(
 28 .DATA_WIDTH (DATA_WIDTH),
 29 .ADDR_WIDTH (ADDR_WIDTH)
 30 )
 31 controlor
 32 (
 33     .clock        (clock),
 34     .reset        (reset),
 35     .opcode        (opcode),//Instruction from decoder after decoding
 36     .s            (s),//Enable signal
 37     .addr_sel    (addr_sel),//Program or data address strobe
 38     .instrs        (alu_oper)
 39 
 40 );
 41 
 42 PC 
 43 #(.ADDR_WIDTH (ADDR_WIDTH))
 44 pointer_counter
 45 (
 46     .clock    (clock),
 47     .reset    (reset),
 48     .en        (s[0]),
 49     .pc     (addr_pc)//code address    
 50     );
 51     
 52     
 53 memory 
 54 #(
 55 .ADDR_WIDTH(ADDR_WIDTH),
 56 .DATA_WIDTH (DATA_WIDTH)
 57 )
 58 memory
 59 (
 60     .clock    (clock),
 61     .reset    (reset),
 62     .wr_en    (s[1]),
 63     .rd_en    (s[5] | s[3]),
 64     .addr    (addr_mem),
 65     .din    (din_mem),
 66     .dout    (dout_mem)
 67     );
 68 
 69 idec 
 70 #(
 71 .DATA_WIDTH (DATA_WIDTH),
 72 .ADDR_WIDTH (ADDR_WIDTH)
 73 )
 74 instr_decoder
 75 (
 76     .clock        (clock),
 77     .reset        (reset),
 78     .en            (s[4]),
 79     .instruction(dout_mem),//from memory
 80     
 81     .opcode        (opcode),
 82     .addr        (addr_idec)//data address
 83     );
 84     
 85 alu 
 86 #(.OP_WIDTH(DATA_WIDTH))
 87 alu
 88 (
 89     .clock        (clock),    
 90     .reset        (reset),
 91     .en            (s[2]),
 92     .add_en        (alu_oper[3]),//Add operation enable
 93     .sub_en        (alu_oper[2]),
 94     .and_en        (alu_oper[1]),
 95     .pass_en    (alu_oper[0]),
 96     .din        (dout_mem),    
 97     .n            (n),//Negative sign
 98     .z            (z),//0 sign
 99     .c            (c),//Output carry flag
100     .v            (v),//Output overflow flag
101     .a            (din_mem)//Accumulator output register dout
102 
103     );
104     
105 
106 endmodule
cpu_top.v

Now the simulation looks to see if the logic works as expected. The Questasim tool is used here. The Windows/Linux versions of the tool are easy to download, and the support for SV UVM is high. It is the first choice for chip self-study. Only a simple testbench is written to toggle clock and reset.

`timescale 1ns/1ps;

module tb_top;

    parameter T = 10;

    logic clock;
    logic reset;
    logic n,z,c,v;

    initial begin:clock_toggle
        clock = 1;
        forever begin
            #(T/2.0);
            clock = ~clock;
        end
    end
    
    initial begin
        reset = 0;
        #1;
        reset = 1;
        #T;
        reset = 0;
        #20;
        $stop;
    end
    
cpu_top DUT
(
.clock    (clock),
.reset    (reset),
.n        (n),//Negative sign
.z        (z),//0 sign
.c        (c),//Output carry flag
.v        (v)//Output overflow flag
);

endmodule
testbench.sv

The PC counts from 0 to 5. In each counting cycle, the enable signal s of each module is also alternately pulled up, indicating that different operation steps are currently carried out. Let's take the third cycle as an example:

s5: read memory's' h1 address data 'b010

s4: 8'h4c is obtained, and the current operation code is 3'h2(ADD) in the upper three bits, and the operation address is 5'h0c in the fifth bit

s3: read the data in the 5'h0c address

s2: call ALU to add the last calculation result and the data in the current read memory to DIN? Mem. 'h95+'h65='hfa

s1: because the operation code does not include writing, the current clock does not operate

s0:PC plus 1 to prepare for the next instruction cycle

This "CPU" is so simple that it can hardly do anything, but it is very helpful for people who have preliminary contact with it. The instruction set of modern CPU is very large, including some registers, bus units and other special hardware logic, so there is a lot to learn. From the perspective of application, it is more important to master the structure and principle of MCU at a higher level.

Posted by cottonbuds2005 on Mon, 03 Feb 2020 01:53:57 -0800