单个PE的代码
`timescale 1ns / 1ps
//
// Company:
// Engineer:
//
// Create Date: 2021/02/10 19:09:28
// Design Name:
// Module Name: systolic_pe
// Project Name:
// Target Devices:
// Tool Versions:
// Description:
//
// Dependencies:
//
// Revision:
// Revision 0.01 - File Created
// Additional Comments:
//
//
module systolic_pe
#(parameter DATA_WIDTH = 16)
(
input clk,
input [DATA_WIDTH-1:0]A_shift_in,
input [DATA_WIDTH-1:0]B_shift_in,
input clear, //clear,set psum=0
input calc_en,
output [DATA_WIDTH-1:0]A_out,
output [DATA_WIDTH-1:0]B_out,
output [2*DATA_WIDTH-1:0]Sum
);
logic [DATA_WIDTH-1:0] A;
logic [DATA_WIDTH-1:0] B;
logic [2*DATA_WIDTH-1:0] Psum;
assign A_out=A;
assign B_out=B;
//Psum
always_ff@(posedge clk)
begin
if(calc_en)
if(clear)
Psum<=A_shift_in*B_shift_in;
else
Psum<=Psum+A_shift_in*B_shift_in;
end
//A,B
always_ff@(posedge clk)
begin
A<=A_shift_in;
B<=B_shift_in;
end
//Sum
assign Sum=Psum;
endmodule
PE脉动阵列
`timescale 1ns / 1ps
//
// Company:
// Engineer:
//
// Create Date: 2021/02/10 19:22:59
// Design Name:
// Module Name: systolic_pe_array
// Project Name:
// Target Devices:
// Tool Versions:
// Description:
//
// Dependencies:
//
// Revision:
// Revision 0.01 - File Created
// Additional Comments:
//
//
module systolic_pe_array
#(parameter DATA_WIDTH = 16,
parameter N = 4)
(
input logic clk,
input logic rst,
input [15:0]k,
input logic [DATA_WIDTH-1:0] A_in [0:N-1],
input logic [DATA_WIDTH-1:0] B_in [0:N-1],
input logic start,
output logic done,
output logic [2*DATA_WIDTH-1:0] Sum [0:N-1][0:N-1]
);
logic [9:0]count;
logic busy;
logic busy_ff;
logic clear;
logic calc_en;
logic [DATA_WIDTH-1:0] A_shift [0:N-1][0:N];
logic [DATA_WIDTH-1:0] B_shift [0:N][0:N-1];
//count
always_ff@(posedge clk,posedge rst)
if(rst)
count<=0;
else if(start)
count<=0;
else if(busy)
count<=count+1;
//busy
always_ff@(posedge clk,posedge rst)
if(rst)
busy<=0;
else if(start)
busy<=1;
else if(count==2*N+k-3)
busy<=0;
//busy_ff
always_ff@(posedge clk,posedge rst)
if(rst)
busy_ff<=0;
else
busy_ff<=busy;
//done
assign done=(busy_ff&&~busy);
//clear
always_ff@(posedge clk,posedge rst)
if(rst)
clear<=0;
else if(start)
clear<=1;
else
clear<=0;
//calc_en
assign calc_en=busy;
//A_in
always_comb
begin
for(int i=0;i<N;i++)
A_shift[i][0]=A_in[i];
end
//B_in
always_comb
begin
for(int j=0;j<N;j++)
B_shift[0][j]=B_in[j];
end
//pe_array
genvar i,j;
generate
for(i=0;i<N;i++)
begin:pe_loop_i
for(j=0;j<N;j++)
begin: pe_loop_j
systolic_pe pe_inst(
.clk(clk),
.A_shift_in(A_shift[i][j]),
.B_shift_in(B_shift[i][j]),
.clear(clear), //clear,set psum=0
.calc_en(calc_en),
.A_out(A_shift[i][j+1]),
.B_out(B_shift[i+1][j]),
.Sum(Sum[i][j])
);
end
end
endgenerate
endmodule
测试平台
`timescale 1ns / 1ps
//
// Company:
// Engineer:
//
// Create Date: 2021/02/10 19:46:38
// Design Name:
// Module Name: test
// Project Name:
// Target Devices:
// Tool Versions:
// Description:
//
// Dependencies:
//
// Revision:
// Revision 0.01 - File Created
// Additional Comments:
//
//
module test;
parameter N = 4;
parameter DATA_WIDTH = 16;
parameter k = 3;
logic [9:0]count;
logic busy;
logic [DATA_WIDTH-1:0] MatrixA [0:N-1][0:k-1];
logic [DATA_WIDTH-1:0] MatrixB [0:k-1][0:N-1];
logic clk;
logic rst;
logic start;
logic done;
logic [DATA_WIDTH-1:0] A_shift_in [0:N-1];
logic [DATA_WIDTH-1:0] B_shift_in [0:N-1];
logic [2*DATA_WIDTH-1:0] Sum [0:N-1][0:N-1];
//MatrixA and B
initial
begin
MatrixA='{'{1,2,3},'{4,5,6},'{7,8,9},'{10,11,12}};
MatrixB='{'{1,2,3,4},'{5,6,7,8},'{9,10,11,12}};
$display("A:");
for(int i=0;i<N;i++)
for(int j=0;j<k;j++)
begin
if(j==k-1)
$write("%d\n",MatrixA[i][j]);
else
$write("%d,",MatrixA[i][j]);
end
$display("B:");
for(int i=0;i<k;i++)
for(int j=0;j<N;j++)
begin
if(j==N-1)
$write("%d\n",MatrixB[i][j]);
else
$write("%d,",MatrixB[i][j]);
end
end
//clk
initial
begin
clk=0;
forever begin
#5 clk=~clk;
end
end
//rst
initial
begin
rst=1;
#10
rst=0;
end
//start
initial
begin
start=0;
#50
start=1;
#10
start=0;
end
//count
always_ff@(posedge clk,posedge rst)
if(rst)
count<=0;
else if(start)
count<=0;
else if(busy)
count<=count+1;
//busy
always_ff@(posedge clk,posedge rst)
if(rst)
busy<=0;
else if(start)
busy<=1;
else if(count==2*N+k-3)
busy<=0;
//A_shift_in
always_comb
begin
for(int i=0;i<N;i++)
A_shift_in[i]=(count>=i&&count<k+i)?MatrixA[i][count-i]:0;
end
//B_shift_in
always_comb
begin
for(int j=0;j<N;j++)
B_shift_in[j]=(count>=j&&count<k+j)?MatrixB[count-j][j]:0;
end
//inst
systolic_pe_array U
(
.clk(clk),
.rst(rst),
.k(k),
.A_in(A_shift_in),
.B_in(B_shift_in),
.start(start),
.done(done),
.Sum(Sum)
);
endmodule
版权声明:本文为qq_40268672原创文章,遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接和本声明。