Skip to content

Commit

Permalink
Optimize shift operations
Browse files Browse the repository at this point in the history
  • Loading branch information
olofk committed Jan 18, 2021
1 parent d5febe8 commit 5e4181d
Show file tree
Hide file tree
Showing 10 changed files with 40 additions and 72 deletions.
6 changes: 3 additions & 3 deletions doc/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,11 @@ serv_alu

.. image:: serv_alu.png

serv_alu handles alu and shift operations. The first input operand (A) comes from i_rs1 and the second operand (B) comes from i_rs2 or i_imm depending on the type of operation. The data passes through one or more of the add/sub, shift control or bool logic unit and finally ends up in o_rd to be written to the destination register. The output o_cmp is used for conditional branches to decide whether or not to take the branch.
serv_alu handles alu and shift operations. The first input operand (A) comes from i_rs1 and the second operand (B) comes from i_rs2 or i_imm depending on the type of operation. The data passes through the add/sub or bool logic unit and finally ends up in o_rd to be written to the destination register. The output o_cmp is used for conditional branches to decide whether or not to take the branch.

The add/sub unit can do additions A+B or subtractions A-B by converting it to A+B̅+1. Subtraction mode (i_sub = 1) is also used for the comparisions in the slt* and conditional branch instructions. Finally, it is also used to negate the B operand for left shifts by clearing the A operand (i_shift_op =1). The +1 used in subtraction mode is done by preloading the carry input with 1. Less-than comparisons are handled by converting the expression A<B to A-B<0 and checking the MSB, which will be set when the result is less than 0. This however requires sign-extending the operands to 33-bit inputs. For signed operands (when i_cmp_sig is set), the extra bit is the same as the MSB. For unsigned, the extra bit is always 0. Because the ALU is only active for 32 cycles, the 33rd bit must be calculated in parallel to the ordinary addition. The result from this operations is available in result_lt. For equality checks, result_eq checks that all bits are 0 from the subtraction.
The add/sub unit can do additions A+B or subtractions A-B by converting it to A+B̅+1. Subtraction mode (i_sub = 1) is also used for the comparisions in the slt* and conditional branch instructions. The +1 used in subtraction mode is done by preloading the carry input with 1. Less-than comparisons are handled by converting the expression A<B to A-B<0 and checking the MSB, which will be set when the result is less than 0. This however requires sign-extending the operands to 33-bit inputs. For signed operands (when i_cmp_sig is set), the extra bit is the same as the MSB. For unsigned, the extra bit is always 0. Because the ALU is only active for 32 cycles, the 33rd bit must be calculated in parallel to the ordinary addition. The result from this operations is available in result_lt. For equality checks, result_eq checks that all bits are 0 from the subtraction.

For shift operations, the data to be shifted resides in bufreg. The shift control unit in the ALU keeps track of how many steps to shift the bufreg and sign-extends/zero-pads the shifted data depending on the type (arithmetic/logic right/left) of shift operation
For shift operations, the data to be shifted resides in bufreg. The shift control unit in the ALU keeps track of how many steps to shift the bufreg.

.. image:: serv_alu_int.png

Expand Down
Binary file modified doc/serv_alu_int.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified doc/serv_bufreg_int.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
37 changes: 11 additions & 26 deletions rtl/serv_alu.v
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,19 @@ module serv_alu
(
input wire clk,
//State
input wire i_init,
input wire i_en,
input wire i_cnt0,
input wire i_cnt_done,
input wire i_shamt_en,
output wire o_cmp,
output wire o_sh_done,
output wire o_sh_done_r,
//Control
input wire i_shift_op,
input wire i_op_b_rs2,
input wire i_sub,
input wire [1:0] i_bool_op,
input wire i_cmp_eq,
input wire i_cmp_sig,
input wire i_sh_right,
input wire i_sh_signed,
input wire [3:0] i_rd_sel,
//Data
input wire i_rs1,
Expand All @@ -28,41 +26,26 @@ module serv_alu

wire result_add;
wire result_eq;
wire result_sh;

reg result_lt_r;
reg eq_r;

reg [4:0] shamt;
reg shamt_msb;
reg [5:0] shamt_r;

wire add_cy;
reg add_cy_r;

wire op_b = i_op_b_rs2 ? i_rs2 : i_imm;

serv_shift shift
(
.i_clk (clk),
.i_load (i_cnt_done),
.i_shamt (shamt),
.i_shamt_msb (shamt_msb),
.i_signbit (i_sh_signed & i_rs1),
.i_right (i_sh_right),
.o_done (o_sh_done),
.i_d (i_buf),
.o_q (result_sh));

//Sign-extended operands
wire rs1_sx = i_rs1 & i_cmp_sig;
wire op_b_sx = op_b & i_cmp_sig;

wire result_lt = rs1_sx + ~op_b_sx + add_cy;

wire add_a = i_rs1 & ~i_shift_op;
wire add_b = op_b^i_sub;

assign {add_cy,result_add} = add_a+add_b+add_cy_r;
assign {add_cy,result_add} = i_rs1+add_b+add_cy_r;

assign result_eq = !result_add & eq_r;

Expand All @@ -72,11 +55,15 @@ module serv_alu
wire result_bool = BOOL_LUT[{i_bool_op, i_rs1, op_b}];

assign o_rd = (i_rd_sel[0] & result_add) |
(i_rd_sel[1] & result_sh) |
(i_rd_sel[1] & i_buf) |
(i_rd_sel[2] & result_lt_r & i_cnt0) |
(i_rd_sel[3] & result_bool);


wire [5:0] shamt = i_init ? {1'b0,op_b,shamt_r[4:1]} : shamt_r-1;
assign o_sh_done = shamt[5];
assign o_sh_done_r = shamt_r[5];

always @(posedge clk) begin
add_cy_r <= i_en ? add_cy : i_sub;

Expand All @@ -85,10 +72,8 @@ module serv_alu
end
eq_r <= result_eq | ~i_en;

if (i_shamt_en) begin
shamt_msb <= add_cy;
shamt <= {result_add,shamt[4:1]};
end
if (i_shamt_en)
shamt_r <= shamt;
end

endmodule
5 changes: 3 additions & 2 deletions rtl/serv_bufreg.v
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ module serv_bufreg
input wire i_rs1_en,
input wire i_imm_en,
input wire i_clr_lsb,
input wire i_sh_signed,
//Data
input wire i_rs1,
input wire i_imm,
Expand All @@ -31,14 +32,14 @@ module serv_bufreg
c_r <= c & i_en;

if (i_en)
data <= {i_init ? q : o_q, data[31:3]};
data <= {i_init ? q : (data[31] & i_sh_signed), data[31:3]};

if (i_init ? (i_cnt0 | i_cnt1) : i_en)
o_lsb <= {i_init ? q : data[2],o_lsb[1]};

end

assign o_q = o_lsb[0];
assign o_q = o_lsb[0] & i_en;
assign o_dbus_adr = {data, 2'b00};

endmodule
14 changes: 11 additions & 3 deletions rtl/serv_decode.v
Original file line number Diff line number Diff line change
Expand Up @@ -131,10 +131,18 @@ module serv_decode
assign o_e_op = opcode[4] & opcode[2] & !op21 & !(|funct3);

//opcode & funct3 & imm30
//True for sub, sll*, b*, slt*
//False for add*, sr*
assign o_alu_sub = (!funct3[2] & (funct3[0] | (opcode[3] & imm30))) | funct3[1] | opcode[4];

/*
True for sub, b*, slt*
False for add*
op opcode f3 i30
b* 11000 xxx x t
addi 00100 000 x f
slt* 0x100 01x x t
add 01100 000 0 f
sub 01100 000 1 t
*/
assign o_alu_sub = funct3[1] | funct3[0] | (opcode[3] & imm30) | opcode[4];

/*
Bits 26, 22, 21 and 20 are enough to uniquely identify the eight supported CSR regs
Expand Down
30 changes: 0 additions & 30 deletions rtl/serv_shift.v

This file was deleted.

9 changes: 6 additions & 3 deletions rtl/serv_state.v
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ module serv_state
input wire i_branch_op,
input wire i_mem_op,
input wire i_shift_op,
input wire i_sh_right,
input wire i_slt_op,
input wire i_e_op,
input wire i_rd_op,
Expand All @@ -37,6 +38,7 @@ module serv_state
input wire i_ctrl_misalign,
output wire o_alu_shamt_en,
input wire i_alu_sh_done,
input wire i_alu_sh_done_r,
output wire o_dbus_cyc,
output wire [1:0] o_mem_bytecnt,
input wire i_mem_misalign,
Expand Down Expand Up @@ -67,7 +69,8 @@ module serv_state
assign cnt4 = (o_cnt[4:2] == 3'd1) & o_cnt_r[0];
assign o_cnt7 = (o_cnt[4:2] == 3'd1) & o_cnt_r[3];

assign o_alu_shamt_en = (o_cnt0to3 | cnt4) & o_init;

assign o_alu_shamt_en = o_cnt0to3 | cnt4 | !o_init;

//Take branch for jump or branch instructions (opcode == 1x0xx) if
//a) It's an unconditional branch (opcode[0] == 1)
Expand All @@ -89,7 +92,7 @@ module serv_state
assign o_rf_rreq = i_ibus_ack | (stage_two_req & trap_pending);

//Prepare RF for writes when everything is ready to enter stage two
assign o_rf_wreq = ((i_shift_op & i_alu_sh_done & init_done) | (i_mem_op & i_dbus_ack) | (stage_two_req & (i_slt_op | i_branch_op))) & !trap_pending;
assign o_rf_wreq = ((i_shift_op & (i_alu_sh_done | !i_sh_right) & init_done) | (i_mem_op & i_dbus_ack) | (stage_two_req & (i_slt_op | i_branch_op))) & !trap_pending;

assign o_rf_rd_en = i_rd_op & o_cnt_en & !o_init;

Expand All @@ -104,7 +107,7 @@ module serv_state
shift : Shift in during phase 1. Continue shifting between phases (except
for the first cycle after init). Shift out during phase 2
*/
assign o_bufreg_en = (o_cnt_en & (o_init | o_ctrl_trap | i_branch_op)) | (!stage_two_req & i_shift_op);
assign o_bufreg_en = (o_cnt_en & (o_init | o_ctrl_trap | i_branch_op)) | (i_shift_op & !stage_two_req & (i_sh_right | i_alu_sh_done_r));

assign o_ibus_cyc = ibus_cyc & !i_rst;

Expand Down
10 changes: 6 additions & 4 deletions rtl/serv_top.v
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ module serv_top
wire alu_sh_signed;
wire alu_sh_right;
wire alu_sh_done;
wire alu_sh_done_r;
wire [3:0] alu_rd_sel;

wire rs1;
Expand Down Expand Up @@ -180,6 +181,7 @@ module serv_top
.i_ctrl_misalign(lsb[1]),
.o_alu_shamt_en (alu_shamt_en),
.i_alu_sh_done (alu_sh_done),
.i_alu_sh_done_r (alu_sh_done_r),
.o_mem_bytecnt (mem_bytecnt),
.i_mem_misalign (mem_misalign),
//Control
Expand All @@ -188,6 +190,7 @@ module serv_top
.i_branch_op (branch_op),
.i_mem_op (mem_op),
.i_shift_op (shift_op),
.i_sh_right (alu_sh_right),
.i_slt_op (slt_op),
.i_e_op (e_op),
.i_rd_op (rd_op),
Expand Down Expand Up @@ -284,6 +287,7 @@ module serv_top
.i_init (init),
.o_lsb (lsb),
//Control
.i_sh_signed (alu_sh_signed),
.i_rs1_en (bufreg_rs1_en),
.i_imm_en (bufreg_imm_en),
.i_clr_lsb (bufreg_clr_lsb),
Expand Down Expand Up @@ -328,20 +332,18 @@ module serv_top
.clk (clk),
//State
.i_en (cnt_en),
.i_init (init),
.i_cnt0 (cnt0),
.i_cnt_done (cnt_done),
.i_shamt_en (alu_shamt_en),
.o_cmp (alu_cmp),
.o_sh_done (alu_sh_done),
.o_sh_done_r (alu_sh_done_r),
//Control
.i_shift_op (shift_op),
.i_op_b_rs2 (op_b_source),
.i_sub (alu_sub),
.i_bool_op (alu_bool_op),
.i_cmp_eq (alu_cmp_eq),
.i_cmp_sig (alu_cmp_sig),
.i_sh_right (alu_sh_right),
.i_sh_signed (alu_sh_signed),
.i_rd_sel (alu_rd_sel),
//Data
.i_rs1 (rs1),
Expand Down
1 change: 0 additions & 1 deletion serv.core
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ filesets:
core:
files:
- rtl/serv_params.vh : {is_include_file : true}
- rtl/serv_shift.v
- rtl/serv_bufreg.v
- rtl/serv_alu.v
- rtl/serv_csr.v
Expand Down

0 comments on commit 5e4181d

Please sign in to comment.