Understand the structure and working principle of CPU in the code
Keywords:
Verilog
Windows
Linux
I. Preface
From the beginning of graduate school to half a year's work, I have been contacting MCU SOC controllers with CPU as the core, but because of professional reasons, I have no idea about the internal structure and working principle of CPU. Today, I broke the blind spot from a blog. It is hereby declared that the design idea and code of this article are all from the following blog posts, which are only used for learning records and sharing experience.
Design and implementation of simple CPU
https://blog.csdn.net/weixin_36077867/article/details/82286612
2, The structure and working principle of simple CPU
Use the structure chart in the original:
CPU core modules include controller, program counter (PC), memory, decoder and ALU. The controller is responsible for commanding and dispatching the normal work of each module: every time the PC reaches a certain number of stages, it will take instructions - > decode - > execute instructions. Take the instruction to take out the data of the PC value pointing to the address from memory, and then the data is transferred to the decoder for translation. Finally, according to this goal, let Alu complete the arithmetic and logical operation, and save the operation results to the address specified in memory. The content of memory is a bit stream which uses IDE to convert high-level languages such as C/C + + into when we play single-chip microcomputer. It includes code instructions, temporary variables and all data values that need to be saved.
3, Design code and simulation analysis
The following code is only a few changes to the reproduced blog, no substantial changes.
1 `timescale 1ns / 1ps
2
3 // Description:
4 // program counter
5
6 module PC
7 #(parameter ADDR_WIDTH = 5)
8 (
9 input clock,
10 input reset,
11 input en,
12 output reg [ADDR_WIDTH-1:0] pc
13 );
14
15 wire [ADDR_WIDTH-1:0] pc_next;
16
17 always@(posedge clock or posedge reset)begin
18 if(reset)
19 pc <= 0;
20 else if(en)
21 pc <= pc_next;
22 end
23
24 assign pc_next = pc + 1;
25
26 endmodule
PC.v
1 `timescale 1ns / 1ps
2
3 // Description:
4 // memory used for storing instructions, temporary variables, and initialization data
5 //STAļ¼store A to
6 //LDA, load A from
7
8
9 module memory
10 #(
11 parameter ADDR_WIDTH = 5,
12 parameter DATA_WIDTH = 8
13 )
14 (
15 input clock,
16 input reset,
17 input wr_en,
18 input rd_en,
19 input [ADDR_WIDTH-1:0] addr,
20 input [DATA_WIDTH-1:0] din,
21 output reg [DATA_WIDTH-1:0] dout
22 );
23
24 reg [DATA_WIDTH-1:0] mem [0:32-1];
25
26 always@(posedge clock,posedge reset)begin
27 if(reset)begin
28 mem [0] <= 'b000_01011; //LDA 01011
29 mem [1] <= 'b010_01100; //ADD 01100
30 mem [2] <= 'b001_01101; //STA 01101
31 mem [3] <= 'b000_01011; //LDA 01011
32 mem [4] <= 'b100_01100; //AND 01100
33 mem [5] <= 'b001_01110; //STA 01110
34 mem [6] <= 'b000_01011; //LDA 01011
35 mem [7] <= 'b011_01100; //SUB 01100
36 mem [8] <= 'b001_01111; //STA 01111
37 mem [9] <= 'b10100000; //HLT
38 mem [10] <= 'b00000000;
39 mem [11] <= 'b10010101;
40 mem [12] <= 'b01100101;
41 mem [13] <= 'b00000000;
42 mem [14] <= 'b00000000;
43 mem [15] <= 'b00000000;
44 mem [16] <= 'b00000000;
45 mem [17] <= 'b00000000;
46 mem [18] <= 'b00000000;
47 mem [19] <= 'b00000000;
48 mem [20] <= 'b00000000;
49 mem [21] <= 'b00000000;
50 mem [22] <= 'b00000000;
51 mem [23] <= 'b00000000;
52 mem [24] <= 'b00000000;
53 mem [25] <= 'b00000000;
54 mem [26] <= 'b00000000;
55 mem [27] <= 'b00000000;
56 mem [28] <= 'b00000000;
57 mem [29] <= 'b00000000;
58 mem [30] <= 'b00000000;
59 mem [31] <= 'b00000000;
60 end
61 else begin
62 if(wr_en)
63 mem[addr] <= din;
64 else if(rd_en)
65 dout <= mem[addr];
66 end
67 end
68 endmodule
memory.v
`timescale 1ns / 1ps
// Description:
// instruction decoder
module idec
#(
parameter DATA_WIDTH = 8,
parameter ADDR_WIDTH = 5
)
(
input clock,
input reset,
input en,
input [DATA_WIDTH-1:0] instruction,//from memory
output reg [DATA_WIDTH-ADDR_WIDTH-1:0] opcode,
output reg [ADDR_WIDTH-1:0] addr
);
always@(posedge clock,posedge reset)begin
if(reset)begin
opcode <= 0;
addr <= 0;
end
else if(en)begin
opcode <= instruction[DATA_WIDTH-1 -:3];
addr <= instruction[ADDR_WIDTH-1:0];
end
end
endmodule
idec.v
1 `timescale 1ns / 1ps
2
3 // Description:
4 // arithmetic logic unit
5
6
7 module alu
8 #(parameter OP_WIDTH = 8)
9 (
10 input clock,
11 input reset,
12
13 input en,
14 input add_en,//Add operation enable
15 input sub_en,
16 input and_en,
17 input pass_en,
18 input [OP_WIDTH-1:0] din,
19
20 output n,//Negative sign
21 output z,//0 sign
22 output reg c,//Output carry flag
23 output v,//Output overflow flag
24 output reg [OP_WIDTH-1:0] a//Accumulator output register dout
25
26 );
27
28 assign n = (c == 1) ? 1: 0 ; //Negative flag, if carry flag is 1,be n=1
29 assign z = (a == 'd0) ? 1: 0 ; //0 flag, if accumulator is 0, z=1
30 assign v = ((a>2**(OP_WIDTH-1)-1) || (a<-2**(OP_WIDTH-1)) ? 1:0 ); //Value range of overflow flag complement:-2^(n-1)~~~~~2^(n-1)-1 n=8
31
32 always @(posedge clock or posedge reset)begin
33 if (reset) begin
34 a <= 0; //Reset accumulator clear 0,
35 c <= 0;
36 end
37 else begin
38 if(en) begin
39 if(add_en)
40 {c,a} <= a + din;
41 else if(sub_en)
42 {c,a} <= a - din;
43 else if(and_en)
44 a <= a & din;
45 else if(pass_en)
46 a <= din;
47 end
48 end
49 end
50
51 endmodule
alu.v
1 `timescale 1ns / 1ps
2
3
4 module control#(
5 parameter DATA_WIDTH = 8,
6 parameter ADDR_WIDTH = 5
7 )
8 (
9 input clock,
10 input reset,
11 input [DATA_WIDTH-ADDR_WIDTH-1:0] opcode,//Instruction from decoder after decoding
12
13 output reg [6-1:0] s,//Enable signal
14 output reg addr_sel,//Program or data address strobe
15 output reg [4-1:0] instrs
16
17 );
18
19 parameter [DATA_WIDTH-ADDR_WIDTH-1:0] LDA = 'b000,
20 STA = 'b001,
21 ADD = 'b010,
22 SUB = 'b011,
23 AND = 'b100;
24
25 reg [8-1:0] cnt;
26 wire add_cnt,end_cnt;
27
28 always@(posedge clock, posedge reset)begin
29 if(reset)
30 cnt <= 0;
31 else if(add_cnt)begin
32 if(end_cnt)
33 cnt <= 0;
34 else
35 cnt <= cnt + 1;
36 end
37 end
38
39 assign add_cnt = 1;
40 assign end_cnt = add_cnt && cnt == 6-1;
41
42 always@(*)begin
43 case(cnt)
44 0:begin//Fetch instruction
45 s = 'b100_000;
46 addr_sel = 0;
47 instrs = 0;
48 end
49 1:begin//Decode
50 s = 'b010_000;
51 addr_sel = 0;
52 end
53 2:begin//read from the memory
54 addr_sel = 1;
55 if(
56 (opcode == LDA) ||
57 (opcode == ADD) ||
58 (opcode == SUB) ||
59 (opcode == AND)
60 )
61 s = 'b001_000;
62 else
63 s = 'b000_000;
64 end
65 3:begin//ALU operations
66 s = 'b000_100;
67 addr_sel = 1;
68 case(opcode)
69 LDA:instrs = 'b0001;
70 ADD:instrs = 'b1000;
71 SUB:instrs = 'b0100;
72 AND:instrs = 'b0010;
73 STA:instrs = 'b0000;
74 default:instrs = 'b0000;
75 endcase
76 end
77 4:begin//write to the memory
78 addr_sel = 1;
79 if(opcode == STA)
80 s = 'b000_010;
81 else
82 s = 'b000_000;
83 end
84 5:begin// PC
85 s = 'b000_001;
86 addr_sel = 1;
87 end
88 default:begin
89 s = 'b000_000;
90 addr_sel = 0;
91 instrs = 0;
92 end
93 endcase
94 end
95
96 endmodule
control.v
1 `timescale 1ns / 1ps
2
3 module cpu_top
4 (
5 input clock,
6 input reset,
7
8 output n,//Negative sign
9 output z,//0 sign
10 output c,//Output carry flag
11 output v//Output overflow flag
12 );
13
14 parameter DATA_WIDTH = 8,
15 ADDR_WIDTH = 5;
16
17
18 wire [6-1:0] s;
19 wire [ADDR_WIDTH-1:0] addr_mem,addr_idec,addr_pc;
20 wire addr_sel;
21 wire [DATA_WIDTH-1:0] dout_mem,din_mem;
22 wire [DATA_WIDTH-ADDR_WIDTH-1:0] opcode;
23 wire [4-1:0] alu_oper;
24
25 assign addr_mem = addr_sel == 1 ? addr_idec: addr_pc;
26
27 control#(
28 .DATA_WIDTH (DATA_WIDTH),
29 .ADDR_WIDTH (ADDR_WIDTH)
30 )
31 controlor
32 (
33 .clock (clock),
34 .reset (reset),
35 .opcode (opcode),//Instruction from decoder after decoding
36 .s (s),//Enable signal
37 .addr_sel (addr_sel),//Program or data address strobe
38 .instrs (alu_oper)
39
40 );
41
42 PC
43 #(.ADDR_WIDTH (ADDR_WIDTH))
44 pointer_counter
45 (
46 .clock (clock),
47 .reset (reset),
48 .en (s[0]),
49 .pc (addr_pc)//code address
50 );
51
52
53 memory
54 #(
55 .ADDR_WIDTH(ADDR_WIDTH),
56 .DATA_WIDTH (DATA_WIDTH)
57 )
58 memory
59 (
60 .clock (clock),
61 .reset (reset),
62 .wr_en (s[1]),
63 .rd_en (s[5] | s[3]),
64 .addr (addr_mem),
65 .din (din_mem),
66 .dout (dout_mem)
67 );
68
69 idec
70 #(
71 .DATA_WIDTH (DATA_WIDTH),
72 .ADDR_WIDTH (ADDR_WIDTH)
73 )
74 instr_decoder
75 (
76 .clock (clock),
77 .reset (reset),
78 .en (s[4]),
79 .instruction(dout_mem),//from memory
80
81 .opcode (opcode),
82 .addr (addr_idec)//data address
83 );
84
85 alu
86 #(.OP_WIDTH(DATA_WIDTH))
87 alu
88 (
89 .clock (clock),
90 .reset (reset),
91 .en (s[2]),
92 .add_en (alu_oper[3]),//Add operation enable
93 .sub_en (alu_oper[2]),
94 .and_en (alu_oper[1]),
95 .pass_en (alu_oper[0]),
96 .din (dout_mem),
97 .n (n),//Negative sign
98 .z (z),//0 sign
99 .c (c),//Output carry flag
100 .v (v),//Output overflow flag
101 .a (din_mem)//Accumulator output register dout
102
103 );
104
105
106 endmodule
cpu_top.v
Now the simulation looks to see if the logic works as expected. The Questasim tool is used here. The Windows/Linux versions of the tool are easy to download, and the support for SV UVM is high. It is the first choice for chip self-study. Only a simple testbench is written to toggle clock and reset.
`timescale 1ns/1ps;
module tb_top;
parameter T = 10;
logic clock;
logic reset;
logic n,z,c,v;
initial begin:clock_toggle
clock = 1;
forever begin
#(T/2.0);
clock = ~clock;
end
end
initial begin
reset = 0;
#1;
reset = 1;
#T;
reset = 0;
#20;
$stop;
end
cpu_top DUT
(
.clock (clock),
.reset (reset),
.n (n),//Negative sign
.z (z),//0 sign
.c (c),//Output carry flag
.v (v)//Output overflow flag
);
endmodule
testbench.sv
The PC counts from 0 to 5. In each counting cycle, the enable signal s of each module is also alternately pulled up, indicating that different operation steps are currently carried out. Let's take the third cycle as an example:
s5: read memory's' h1 address data 'b010
s4: 8'h4c is obtained, and the current operation code is 3'h2(ADD) in the upper three bits, and the operation address is 5'h0c in the fifth bit
s3: read the data in the 5'h0c address
s2: call ALU to add the last calculation result and the data in the current read memory to DIN? Mem. 'h95+'h65='hfa
s1: because the operation code does not include writing, the current clock does not operate
s0:PC plus 1 to prepare for the next instruction cycle
This "CPU" is so simple that it can hardly do anything, but it is very helpful for people who have preliminary contact with it. The instruction set of modern CPU is very large, including some registers, bus units and other special hardware logic, so there is a lot to learn. From the perspective of application, it is more important to master the structure and principle of MCU at a higher level.
Posted by cottonbuds2005 on Mon, 03 Feb 2020 01:53:57 -0800