verilog11、pipeline 学习笔记
文章目录pipeline 总结1、[简易流水线实现](https://zhuanlan.zhihu.com/p/56317767)不用流水线方式流水线方式2、[ valid/ready 信号结合pipeline](https://blog.csdn.net/rill_zhen/article/details/45980039)3、[流水线设计高速乘法器(移位实现)](https://www.run
文章目录
pipeline 总结
1、简易流水线实现
参考:https://zhuanlan.zhihu.com/p/56317767
不用流水线方式
always@(posedge clk or negedge rst_n) begin
if(!rst_n)
y <= 1'b0;
else
y <= a_reg * b_reg + c_reg * d_reg + (e_reg - f_reg)
end
流水线方式
always@(posedge clk or negedge rst_n) begin
if(!rst_n) begin
mux_pre_1 <= 'b0;
mux_pre_2 <= 'b0;
sub_pre_1 <= 'b0;
end
else begin
mux_pre_1 <= a_reg * b_reg;
mux_pre_2 <= c_reg * d_reg;
sub_pre_1 <= e_reg - f_reg;
end
end
always@(posedge clk or negedge rst_n) begin
if(!rst_n) begin
add_pre_1 <= 'b0;
sub_pre_2 <= 'b0;
end
else begin
add_pre_1 <= mux_pre1 + mux_pre_2;
sub_pre_2 <= sub_pre_1;
end
end
always@(posedge clk or negedge rst_n) begin
if(!rst_n)
add_pre_2 <= 'b0;
else
add_pre_2 <= add_pre_1 + sub_pre_2;
end
仿真结果
2、 valid/ready 信号结合pipeline
参考:https://blog.csdn.net/rill_zhen/article/details/45980039
module Mpipeline(
input clk,
input rst_n,
input en_i,
input [7:0] data_i,
output en_o,
output [7:0] data_o,
output idle);
wire rdy_pb2pa;
wire vld_pa2pb;
wire [7:0] data_pa2pb;
wire rdy_pc2pb;
wire vld_pb2pc;
wire [7:0] data_pb2pc;
wire rdy_pa;
Mpa pa(
.clk (clk),
.rst_n (rst_n),
.valid_i (en_i),
.data_i (data_i),
.ready_i (rdy_pb2pa),
.ready_o (rdy_pa),
.valid_o (vld_pa2pb),
.data_o (data_pa2pb)
);
Mpb pb(
.clk (clk),
.rst_n (rst_n),
.valid_i (vld_pa2pb),
.data_i (data_pa2pb),
.ready_i (rdy_pc2pb),
.ready_o (rdy_pb2pa),
.valid_o (vld_pb2pc),
.data_o (data_pb2pc)
);
Mpc pc(
.clk (clk),
.rst_n (rst_n),
.valid_i (vld_pb2pc),
.data_i (data_pb2pc),
.ready_i (1'b1),
.ready_o (rdy_pc2pb),
.valid_o (en_o),
.data_o (data_o)
);
assign idle = ~vld_pa2pb & ~vld_pb2pc & ~en_o;
endmodule
module Mpa(
input clk,
input rst_n,
input valid_i, //from pre-stage
input [7:0] data_i, //from pre-stage
input ready_i, //from post-stage
output ready_o,//to pre-stage
output valid_o, //to post-stage
output [7:0] data_o //to post-stage
);
reg valid_o_r;
reg [7:0] data_o_r;
wire [7:0] calc;
assign calc = data_i + 1'b1;
// module Mpb:assign calc = data_i << 1'b1;
// module Mpc:assign calc = data_i - 1'b1;
always @(posedge clk)
if(~rst_n)
valid_o_r <= 1'b0;
else if(valid_i)
valid_o_r <= 1'b1;
else if(~valid_i)
valid_o_r <= 1'b0;
always @(posedge clk)
if(~rst_n)
data_o_r <= 8'b0;
else if(valid_i)
data_o_r <= calc;
assign ready_o = ready_i;
assign valid_o = valid_o_r;
assign data_o = data_o_r;
endmodule
testbench
module Ttb;
reg clk;
reg rst_n;
reg en_i_r;
reg [7:0] data_i_r;
wire en_o;
wire [7:0] data_o;
wire idle;
Mpipeline pipeline
(
.clk (clk),
.rst_n (rst_n),
.en_i (en_i_r),
.data_i (data_i_r),
.en_o (en_o),
.data_o (data_o),
.idle (idle)
);
initial
begin
clk = 1'b0;
rst_n = 1'b0;
en_i_r = 1'b0;
data_i_r = 8'b0;
fork
forever #5 clk = ~clk;
join_none
repeat(10) @(posedge clk);
rst_n = 1'b1;
repeat(10) @(posedge clk);
@(posedge clk);
en_i_r <= 1'b1;
data_i_r <= 8'h1;
@(posedge clk);
en_i_r <= 1'b1;
data_i_r <= 8'h2;
@(posedge clk);
en_i_r <= 1'b1;
data_i_r <= 8'h3;
@(posedge clk);
en_i_r <= 1'b1;
data_i_r <= 8'h4;
@(posedge clk);
en_i_r <= 1'b0;
data_i_r <= 8'h0;
repeat(10) @(posedge clk);
$finish();
end
endmodule
思路:
- 将几个结构相似的模块串联起来,前一个模块的输出作为后一个模块的输入,前一个模块的输出有效信号 valid_o 作为后一个模块的输入有效信号valid_i
3、流水线设计高速乘法器(移位实现)
参考:https://www.runoob.com/w3cnote/verilog-pipeline-design.html
直接用 * 或 调用IP 实现乘法,可能产生的电路性能不好。用移位实现高速乘法器,适用于硬件
设计原理
多比特数相乘,相当于被乘数按照乘数对应bit位进行移位累加
乘法器设计 - 非流水线
思路:将乘数展开为和结果相同位数,用位数较少的操作数作为乘数,根据乘数逐位是1或0 决定部分积是对位(部分积的最低位和对应的乘数所在位对齐)后的被乘数,还是0,然后将部分积加起来即得到结果
module mult_low
#(parameter N=4,
parameter M=4
)(
input clk,
input rstn,
input data_rdy , //数据输入使能
input [N-1:0] mult1, //被乘数
input [M-1:0] mult2, //乘数
output res_rdy , //数据输出使能
output [N+M-1:0] res //乘法结果
);
//calculate counter 乘法周期计数器
reg [31:0] cnt ;
wire [31:0] cnt_temp = (cnt == M)? 'b0 : cnt + 1'b1 ;
always @(posedge clk or negedge rstn) begin
if (!rstn) begin
cnt <= 'b0 ;
end
else if (data_rdy) begin //数据使能时开始计数
cnt <= cnt_temp ;
end
else if (cnt != 0 ) begin //防止输入使能端持续时间过短
cnt <= cnt_temp ;
end
else begin
cnt <= 'b0 ;
end
end
//multiply
reg [M-1:0] mult2_shift ;
reg [M+N-1:0] mult1_shift ;
reg [M+N-1:0] mult1_acc ;
always @(posedge clk or negedge rstn) begin
if (!rstn) begin
mult2_shift <= 'b0 ;
mult2_shift <= 'b0 ;
mult1_acc <= 'b0 ;
end
else if (data_rdy && cnt=='b0) begin //初始化
mult1_shift <= {{(N){1'b0}}, mult1} << 1 ;
mult2_shift <= mult2 >> 1 ;
mult1_acc <= mult2[0] ? {{(N){1'b0}}, mult1} : 'b0 ;
end
else if (cnt != M) begin
mult1_shift <= mult1_shift << 1 ; //被乘数乘2
mult2_shift <= mult2_shift >> 1 ; //乘数右移,方便判断
//判断乘数对应为是否为1,为1则累加
mult1_acc <= mult2_shift[0] ? mult1_acc + mult1_shift : mult1_acc;
end
else begin
mult2_shift <= 'b0 ;
mult2_shift <= 'b0 ;
mult1_acc <= 'b0 ;
end
end
//results
reg [M+N-1:0] res_r ;
reg res_rdy_r ;
always @(posedge clk or negedge rstn) begin
if (!rstn) begin
res_r <= 'b0 ;
res_rdy_r <= 'b0 ;
end
else if (cnt == M) begin
res_r <= mult1_acc ; //乘法周期结束时输出结果
res_rdy_r <= 1'b1 ;
end
else begin
res_r <= 'b0 ;
res_rdy_r <= 'b0 ;
end
end
assign res_rdy = res_rdy_r;
assign res = res_r;
endmodule
testbench
`timescale 1ns/1ns
module test ;
parameter N = 8 ;
parameter M = 4 ;
reg clk, rstn;
//clock
always begin
clk = 0 ; #5 ;
clk = 1 ; #5 ;
end
//reset
initial begin
rstn = 1'b0 ;
#8 ; rstn = 1'b1 ;
end
=============================================//
//no pipeline
reg data_rdy_low ;
reg [N-1:0] mult1_low ;
reg [M-1:0] mult2_low ;
wire [M+N-1:0] res_low ;
wire res_rdy_low ;
//使用任务周期激励
task mult_data_in ;
input [M+N-1:0] mult1_task, mult2_task ;
wait(!test.u_mult_low.res_rdy) ; //not output state
@(negedge clk ) ;
data_rdy_low = 1'b1 ;
mult1_low = mult1_task ;
mult2_low = mult2_task ;
@(negedge clk ) ;
data_rdy_low = 1'b0 ;
wait(test.u_mult_low.res_rdy) ; //test the output state
endtask
//driver
initial begin
#55 ;
mult_data_in(25, 5 ) ;
mult_data_in(16, 10 ) ;
mult_data_in(10, 4 ) ;
mult_data_in(15, 7) ;
mult_data_in(215, 9) ;
end
mult_low #(.N(N), .M(M))
u_mult_low
(
.clk (clk),
.rstn (rstn),
.data_rdy (data_rdy_low),
.mult1 (mult1_low),
.mult2 (mult2_low),
.res_rdy (res_rdy_low),
.res (res_low));
//simulation finish
initial begin
forever begin
#100;
if ($time >= 10000) $finish ;
end
end
endmodule // test
仿真结果
乘法器设计- 流水线
思路:将每次的数据移位(部分积最低位和乘数对应位移位对齐,乘数对应位右移,将乘数右移后的最低位作为下次的乘数,来得到最终结果)和得到部分积操作,作为流水线的基本组件
// 单次累加计算
module mult_cell
#(parameter N=4,
parameter M=4)
(
input clk,
input rstn,
input en,
input [M+N-1:0] mult1, //被乘数
input [M-1:0] mult2, //乘数
input [M+N-1:0] mult1_acci, //上次累加结果
output reg [M+N-1:0] mult1_o, //被乘数移位后保存值
output reg [M-1:0] mult2_shift, //乘数移位后保存值
output reg [N+M-1:0] mult1_acco, //当前累加结果
output reg rdy );
always @(posedge clk or negedge rstn) begin
if (!rstn) begin
rdy <= 'b0 ;
mult1_o <= 'b0 ;
mult1_acco <= 'b0 ;
mult2_shift <= 'b0 ;
end
else if (en) begin
rdy <= 1'b1 ;
mult2_shift <= mult2 >> 1 ;
mult1_o <= mult1 << 1 ;
if (mult2[0]) begin //乘数对应位为1则累加
mult1_acco <= mult1_acci + mult1 ;
end
else begin
mult1_acco <= mult1_acci ; //乘数对应位为0则保持
end
end
else begin
rdy <= 'b0 ;
mult1_o <= 'b0 ;
mult1_acco <= 'b0 ;
mult2_shift <= 'b0 ;
end
end
endmodule
// 顶层模块
module mult_man
#(parameter N=4,
parameter M=4)
(
input clk,
input rstn,
input data_rdy ,
input [N-1:0] mult1,
input [M-1:0] mult2,
output res_rdy ,
output [N+M-1:0] res );
wire [N+M-1:0] mult1_t [M-1:0] ;
wire [M-1:0] mult2_t [M-1:0] ;
wire [N+M-1:0] mult1_acc_t [M-1:0] ;
wire [M-1:0] rdy_t ;
//第一次例化相当于初始化,不能用 generate 语句
mult_cell #(.N(N), .M(M))
u_mult_step0
(
.clk (clk),
.rstn (rstn),
.en (data_rdy),
.mult1 ({{(M){1'b0}}, mult1}),
.mult2 (mult2),
.mult1_acci ({(N+M){1'b0}}),
//output
.mult1_acco (mult1_acc_t[0]),
.mult2_shift (mult2_t[0]),
.mult1_o (mult1_t[0]),
.rdy (rdy_t[0]) );
//多次模块例化,用 generate 语句
genvar i ;
generate
for(i=1; i<=M-1; i=i+1) begin: mult_stepx
mult_cell #(.N(N), .M(M))
u_mult_step
(
.clk (clk),
.rstn (rstn),
.en (rdy_t[i-1]),
.mult1 (mult1_t[i-1]),
.mult2 (mult2_t[i-1]),
//上一次累加结果作为下一次累加输入
.mult1_acci (mult1_acc_t[i-1]),
//output
.mult1_acco (mult1_acc_t[i]),
.mult1_o (mult1_t[i]), //被乘数移位状态传递
.mult2_shift (mult2_t[i]), //乘数移位状态传递
.rdy (rdy_t[i]) );
end
endgenerate
assign res_rdy = rdy_t[M-1];
assign res = mult1_acc_t[M-1];
endmodule
testbench 关键部分
reg data_rdy ;
reg [N-1:0] mult1 ;
reg [M-1:0] mult2 ;
wire res_rdy ;
wire [N+M-1:0] res ;
//driver
initial begin
#55 ;
@(negedge clk ) ;
data_rdy = 1'b1 ;
mult1 = 25; mult2 = 5;
#10 ; mult1 = 16; mult2 = 10;
#10 ; mult1 = 10; mult2 = 4;
#10 ; mult1 = 15; mult2 = 7;
mult2 = 7; repeat(32) #10 mult1 = mult1 + 1 ;
mult2 = 1; repeat(32) #10 mult1 = mult1 + 1 ;
mult2 = 15; repeat(32) #10 mult1 = mult1 + 1 ;
mult2 = 3; repeat(32) #10 mult1 = mult1 + 1 ;
mult2 = 11; repeat(32) #10 mult1 = mult1 + 1 ;
mult2 = 4; repeat(32) #10 mult1 = mult1 + 1 ;
mult2 = 9; repeat(32) #10 mult1 = mult1 + 1 ;
end
//对输入数据进行移位,方便后续校验
reg [N-1:0] mult1_ref [M-1:0];
reg [M-1:0] mult2_ref [M-1:0];
always @(posedge clk) begin
mult1_ref[0] <= mult1 ;
mult2_ref[0] <= mult2 ;
end
genvar i;
generate
for(i=1; i<=M-1; i=i+1) begin
always @(posedge clk) begin
mult1_ref[i] <= mult1_ref[i-1];
mult2_ref[i] <= mult2_ref[i-1];
end
end
endgenerate
//自校验
reg error_flag ;
always @(posedge clk) begin
# 1 ;
if (mult1_ref[M-1] * mult2_ref[M-1] != res && res_rdy) begin
error_flag <= 1'b1 ;
end
else begin
error_flag <= 1'b0 ;
end
end
//module instantiation
mult_man #(.N(N), .M(M))
u_mult(
.clk (clk),
.rstn (rstn),
.data_rdy (data_rdy),
.mult1 (mult1),
.mult2 (mult2),
.res_rdy (res_rdy),
.res (res));
仿真结果
开放原子开发者工作坊旨在鼓励更多人参与开源活动,与志同道合的开发者们相互交流开发经验、分享开发心得、获取前沿技术趋势。工作坊有多种形式的开发者活动,如meetup、训练营等,主打技术交流,干货满满,真诚地邀请各位开发者共同参与!
更多推荐
所有评论(0)