该算法采用双调排序算法,是一种可流水的递推算法,且算法的消耗时长可算,具体细节参考视频:
module bitonic_sort(
input i_clk ,
input i_rst ,
input [16 * 16 - 1:0] i_buff ,
output [16 * 16 - 1:0] o_buff
);
localparam WIN = 16;
wire signed [15:0] w_data_buff [0:15];
genvar i;
genvar j;
generate
for(i = 0; i < 16; i = i + 1) begin
assign w_data_buff[i] = i_buff[i * 16 + 15:i * 16];
end
endgenerate
/*
双调排序:Bitonic
首先将相邻两个元素作为一个数组:
(0,1)(2,3)(4,5)(6,7)(8,9)(10,11)(12,13)(14,15)
将其按照顺序、逆序交替排序,并两个相邻数组组成一个四元素的双调数组
(0,1,2,3)(4,5,6,7)(8,9,10,11)(12,13,14,15)
通过对每个四元素数组进行2、1间隔排序,使四元素数组按照顺序、逆序交替排序,并两个相邻数组组成一个8元素数组
(0,1,2,3,4,5,6,7)(8,9,10,11,12,13,14,15)
对每个8元素数组进行4、2、1间隔排序,使其按照顺序、逆序交替排序,最终组成16元素的回调数组
(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15)
对回调数组按照8、4、2、1间隔排序,完成顺序或逆序排列
*/
reg signed [15:0] r_bitonic_pipe0 [0:WIN - 1];//间隔1
generate
for(i = 0; i < WIN / 4; i = i + 1) begin
always@(posedge i_clk or posedge i_rst) begin
if(i_rst) begin
r_bitonic_pipe0[i * 4] <= 'd0;
r_bitonic_pipe0[i * 4 + 1]<= 'd0;
end
else begin
if(w_data_buff[i * 4] > w_data_buff[i * 4 + 1]) begin//升序
r_bitonic_pipe0[i * 4] <= w_data_buff[i * 4 + 1];
r_bitonic_pipe0[i * 4 + 1] <= w_data_buff[i * 4];
end
else begin
r_bitonic_pipe0[i * 4] <= w_data_buff[i * 4];
r_bitonic_pipe0[i * 4 + 1] <= w_data_buff[i * 4 + 1];
end
end
end
always@(posedge i_clk or posedge i_rst) begin
if(i_rst) begin
r_bitonic_pipe0[i * 4 + 2]<= 'd0;
r_bitonic_pipe0[i * 4 + 3]<= 'd0;
end
else begin
if(w_data_buff[i * 4 + 2] < w_data_buff[i * 4 + 3]) begin//降序
r_bitonic_pipe0[i * 4 + 2]<= w_data_buff[i * 4 + 3];
r_bitonic_pipe0[i * 4 + 3]<= w_data_buff[i * 4 + 2];
end
else begin
r_bitonic_pipe0[i * 4 + 2]<= w_data_buff[i * 4 + 2];
r_bitonic_pipe0[i * 4 + 3]<= w_data_buff[i * 4 + 3];
end
end
end
end
endgenerate
reg signed [15:0] r_bitonic_pipe1 [0:WIN - 1];//间隔2
generate
for(i = 0; i < WIN / 8; i = i + 1) begin
for(j = 0; j < WIN / 8; j = j + 1) begin
always@(posedge i_clk or posedge i_rst) begin
if(i_rst) begin
r_bitonic_pipe1[i * 8 + j] <= 'd0;
r_bitonic_pipe1[i * 8 + j + 2]<= 'd0;
end
else begin
if(r_bitonic_pipe0[i * 8 + j] > r_bitonic_pipe0[i * 8 + j + 2]) begin//升序
r_bitonic_pipe1[i * 8 + j] <= r_bitonic_pipe0[i * 8 + j + 2];
r_bitonic_pipe1[i * 8 + j + 2] <= r_bitonic_pipe0[i * 8 + j];
end
else begin
r_bitonic_pipe1[i * 8 + j] <= r_bitonic_pipe0[i * 8 + j];
r_bitonic_pipe1[i * 8 + j + 2] <= r_bitonic_pipe0[i * 8 + j + 2];
end
end
end
always@(posedge i_clk or posedge i_rst) begin
if(i_rst) begin
r_bitonic_pipe1[i * 8 + j + 4]<= 'd0;
r_bitonic_pipe1[i * 8 + j + 6]<= 'd0;
end
else begin
if(r_bitonic_pipe0[i * 8 + j + 4] < r_bitonic_pipe0[i * 8 + j + 6]) begin//降序
r_bitonic_pipe1[i * 8 + j + 4]<= r_bitonic_pipe0[i * 8 + j + 6];
r_bitonic_pipe1[i * 8 + j + 6]<= r_bitonic_pipe0[i * 8 + j + 4];
end
else begin
r_bitonic_pipe1[i * 8 + j + 4]<= r_bitonic_pipe0[i * 8 + j + 4];
r_bitonic_pipe1[i * 8 + j + 6]<= r_bitonic_pipe0[i * 8 + j + 6];
end
end
end
end
end
endgenerate
reg signed [15:0] r_bitonic_pipe2 [0:WIN - 1];
generate
for(i = 0; i < WIN / 8; i = i + 1) begin
for(j = 0; j < WIN / 8; j = j + 1) begin
always@(posedge i_clk or posedge i_rst) begin
if(i_rst) begin
r_bitonic_pipe2[i * 8 + j * 2] <= 'd0;
r_bitonic_pipe2[i * 8 + j * 2 + 1] <= 'd0;
end
else begin
if(r_bitonic_pipe1[i * 8 + j * 2] > r_bitonic_pipe1[i * 8 + j * 2 + 1]) begin//升序
r_bitonic_pipe2[i * 8 + j * 2] <= r_bitonic_pipe1[i * 8 + j * 2+ 1];
r_bitonic_pipe2[i * 8 + j * 2 + 1] <= r_bitonic_pipe1[i * 8 + j * 2];
end
else begin
r_bitonic_pipe2[i * 8 + j * 2] <= r_bitonic_pipe1[i * 8 + j * 2];
r_bitonic_pipe2[i * 8 + j * 2 + 1] <= r_bitonic_pipe1[i * 8 + j * 2 + 1];
end
end
end
always@(posedge i_clk or posedge i_rst) begin
if(i_rst) begin
r_bitonic_pipe2[i * 8 + j * 2 + 4] <= 'd0;
r_bitonic_pipe2[i * 8 + j * 2 + 5] <= 'd0;
end
else begin
if(r_bitonic_pipe1[i * 8 + j * 2 + 4] < r_bitonic_pipe1[i * 8 + j * 2 + 5]) begin//降序
r_bitonic_pipe2[i * 8 + j * 2 + 4] <= r_bitonic_pipe1[i * 8 + j * 2 + 5];
r_bitonic_pipe2[i * 8 + j * 2 + 5] <= r_bitonic_pipe1[i * 8 + j * 2 + 4];
end
else begin
r_bitonic_pipe2[i * 8 + j * 2 + 4] <= r_bitonic_pipe1[i * 8 + j * 2 + 4];
r_bitonic_pipe2[i * 8 + j * 2 + 5] <= r_bitonic_pipe1[i * 8 + j * 2 + 5];
end
end
end
end
end
endgenerate
reg signed [15:0] r_bitonic_pipe3 [0:WIN - 1];//间隔4
generate
for(i = 0; i < WIN / 4; i = i + 1) begin
always@(posedge i_clk or posedge i_rst) begin
if(i_rst) begin
r_bitonic_pipe3[i] <= 'd0;
r_bitonic_pipe3[i + 4]<= 'd0;
end
else begin
if(r_bitonic_pipe2[i] > r_bitonic_pipe2[i + 4]) begin//升序
r_bitonic_pipe3[i] <= r_bitonic_pipe2[i + 4];
r_bitonic_pipe3[i + 4] <= r_bitonic_pipe2[i];
end
else begin
r_bitonic_pipe3[i] <= r_bitonic_pipe2[i];
r_bitonic_pipe3[i + 4] <= r_bitonic_pipe2[i + 4];
end
end
end
always@(posedge i_clk or posedge i_rst) begin
if(i_rst) begin
r_bitonic_pipe3[i + 8] <= 'd0;
r_bitonic_pipe3[i + 12]<= 'd0;
end
else begin
if(r_bitonic_pipe2[i + 8] < r_bitonic_pipe2[i + 12]) begin//降序
r_bitonic_pipe3[i + 8] <= r_bitonic_pipe2[i + 12];
r_bitonic_pipe3[i + 12]<= r_bitonic_pipe2[i + 8];
end
else begin
r_bitonic_pipe3[i + 8] <= r_bitonic_pipe2[i + 8];
r_bitonic_pipe3[i + 12]<= r_bitonic_pipe2[i + 12];
end
end
end
end
endgenerate
reg signed [15:0] r_bitonic_pipe4 [0:WIN - 1];//间隔2
generate
for(i = 0; i < WIN / 8; i = i + 1) begin
for(j = 0; j < WIN / 8; j = j + 1) begin
always@(posedge i_clk or posedge i_rst) begin
if(i_rst) begin
r_bitonic_pipe4[i * 4 + j] <= 'd0;
r_bitonic_pipe4[i * 4 + j + 2] <= 'd0;
end
else begin
if(r_bitonic_pipe3[i * 4 + j] > r_bitonic_pipe3[i * 4 + j + 2]) begin
r_bitonic_pipe4[i * 4 + j] <= r_bitonic_pipe3[i * 4 + j + 2];
r_bitonic_pipe4[i * 4 + j + 2] <= r_bitonic_pipe3[i * 4 + j];
end
else begin
r_bitonic_pipe4[i * 4 + j] <= r_bitonic_pipe3[i * 4 + j];
r_bitonic_pipe4[i * 4 + j + 2] <= r_bitonic_pipe3[i * 4 + j + 2];
end
end
end
always@(posedge i_clk or posedge i_rst) begin
if(i_rst) begin
r_bitonic_pipe4[i * 4 + j + 8] <= 'd0;
r_bitonic_pipe4[i * 4 + j + 10] <= 'd0;
end
else begin
if(r_bitonic_pipe3[i * 4 + j + 8] < r_bitonic_pipe3[i * 4 + j + 10]) begin
r_bitonic_pipe4[i * 4 + j + 8] <= r_bitonic_pipe3[i * 4 + j + 10];
r_bitonic_pipe4[i * 4 + j + 10] <= r_bitonic_pipe3[i * 4 + j + 8];
end
else begin
r_bitonic_pipe4[i * 4 + j + 8] <= r_bitonic_pipe3[i * 4 + j + 8];
r_bitonic_pipe4[i * 4 + j + 10] <= r_bitonic_pipe3[i * 4 + j + 10];
end
end
end
end
end
endgenerate
reg signed [15:0] r_bitonic_pipe5 [0:WIN - 1];//间隔1
generate
for(i = 0; i < WIN / 4; i = i + 1) begin
always@(posedge i_clk or posedge i_rst) begin
if(i_rst) begin
r_bitonic_pipe5[i * 2] <= 'd0;
r_bitonic_pipe5[i * 2 + 1] <= 'd0;
end
else begin
if(r_bitonic_pipe4[i * 2] > r_bitonic_pipe4[i * 2 + 1]) begin
r_bitonic_pipe5[i * 2] <= r_bitonic_pipe4[i * 2 + 1];
r_bitonic_pipe5[i * 2 + 1] <= r_bitonic_pipe4[i * 2];
end
else begin
r_bitonic_pipe5[i * 2] <= r_bitonic_pipe4[i * 2];
r_bitonic_pipe5[i * 2 + 1] <= r_bitonic_pipe4[i * 2 + 1];
end
end
end
always@(posedge i_clk or posedge i_rst) begin
if(i_rst) begin
r_bitonic_pipe5[i * 2 + 8] <= 'd0;
r_bitonic_pipe5[i * 2 + 9] <= 'd0;
end
else begin
if(r_bitonic_pipe4[i * 2 + 8] < r_bitonic_pipe4[i * 2 + 9]) begin
r_bitonic_pipe5[i * 2 + 8] <= r_bitonic_pipe4[i * 2 + 9];
r_bitonic_pipe5[i * 2 + 9] <= r_bitonic_pipe4[i * 2 + 8];
end
else begin
r_bitonic_pipe5[i * 2 + 8] <= r_bitonic_pipe4[i * 2 + 8];
r_bitonic_pipe5[i * 2 + 9] <= r_bitonic_pipe4[i * 2 + 9];
end
end
end
end
endgenerate
reg signed [15:0] r_bitonic_pipe6 [0:WIN - 1];//间隔8
generate
for(i = 0; i < WIN / 2; i = i + 1) begin
always@(posedge i_clk or posedge i_rst) begin
if(i_rst) begin
r_bitonic_pipe6[i] <= 'd0;
r_bitonic_pipe6[i + 8] <= 'd0;
end
else begin
if(r_bitonic_pipe5[i] > r_bitonic_pipe5[i + 8]) begin
r_bitonic_pipe6[i] <= r_bitonic_pipe5[i + 8];
r_bitonic_pipe6[i + 8] <= r_bitonic_pipe5[i];
end
else begin
r_bitonic_pipe6[i] <= r_bitonic_pipe5[i];
r_bitonic_pipe6[i + 8] <= r_bitonic_pipe5[i + 8];
end
end
end
end
endgenerate
reg signed [15:0] r_bitonic_pipe7 [0:WIN - 1];//间隔4
generate
for(i = 0; i < WIN / 8; i = i + 1) begin
for(j = 0; j < WIN / 4; j = j + 1) begin
always@(posedge i_clk or posedge i_rst) begin
if(i_rst) begin
r_bitonic_pipe7[i * 8 + j] <= 'd0;
r_bitonic_pipe7[i * 8 + j + 4] <= 'd0;
end
else begin
if(r_bitonic_pipe6[i * 8 + j] > r_bitonic_pipe6[i * 8 + j + 4]) begin
r_bitonic_pipe7[i * 8 + j] <= r_bitonic_pipe6[i * 8 + j + 4];
r_bitonic_pipe7[i * 8 + j + 4] <= r_bitonic_pipe6[i * 8 + j];
end
else begin
r_bitonic_pipe7[i * 8 + j] <= r_bitonic_pipe6[i * 8 + j] ;
r_bitonic_pipe7[i * 8 + j + 4] <= r_bitonic_pipe6[i * 8 + j + 4];
end
end
end
end
end
endgenerate
reg signed [15:0] r_bitonic_pipe8 [0:WIN - 1];//间隔2
generate
for(i = 0; i < WIN / 4; i = i + 1) begin
for(j = 0; j < WIN / 8; j = j + 1) begin
always@(posedge i_clk or posedge i_rst) begin
if(i_rst) begin
r_bitonic_pipe8[i * 4 + j] <= 'd0;
r_bitonic_pipe8[i * 4 + j + 2] <= 'd0;
end
else begin
if(r_bitonic_pipe7[i * 4 + j] > r_bitonic_pipe7[i * 4 + j + 2]) begin
r_bitonic_pipe8[i * 4 + j] <= r_bitonic_pipe7[i * 4 + j + 2];
r_bitonic_pipe8[i * 4 + j + 2] <= r_bitonic_pipe7[i * 4 + j];
end
else begin
r_bitonic_pipe8[i * 4 + j] <= r_bitonic_pipe7[i * 4 + j] ;
r_bitonic_pipe8[i * 4 + j + 2] <= r_bitonic_pipe7[i * 4 + j + 2];
end
end
end
end
end
endgenerate
reg signed [15:0] r_bitonic_pipe9 [0:WIN - 1];//间隔1
generate
for(i = 0; i < WIN / 2; i = i + 1) begin
always@(posedge i_clk or posedge i_rst) begin
if(i_rst) begin
r_bitonic_pipe9[i * 2] <= 'd0;
r_bitonic_pipe9[i * 2 + 1] <= 'd0;
end
else begin
if(r_bitonic_pipe8[i * 2] > r_bitonic_pipe8[i * 2 + 1]) begin
r_bitonic_pipe9[i * 2] <= r_bitonic_pipe8[i * 2 + 1];
r_bitonic_pipe9[i * 2 + 1] <= r_bitonic_pipe8[i * 2];
end
else begin
r_bitonic_pipe9[i * 2] <= r_bitonic_pipe8[i * 2] ;
r_bitonic_pipe9[i * 2 + 1] <= r_bitonic_pipe8[i * 2 + 1];
end
end
end
end
endgenerate
generate
for(i = 0; i < 16; i = i + 1) begin
assign o_buff[i * 16 + 15:i * 16] = r_bitonic_pipe9[i];
end
endgenerate
endmodule