Author | SHA1 | Message | Date |
---|---|---|---|
|
801dcad9b7 | add | 1 year ago |
|
ad1d7765d5 | add | 1 year ago |
|
9d05d40d9c | add | 1 year ago |
|
041d017053 | add | 1 year ago |
|
a51acf7951 | add | 1 year ago |
|
e55a45ed6c | add | 1 year ago |
|
737640e4ab | add | 1 year ago |
|
573285de96 | add | 1 year ago |
|
6b1dca986c | add | 1 year ago |
|
9f0b889029 | add | 1 year ago |
|
c7100af9dd | add | 1 year ago |
|
7bb2153d48 | add | 1 year ago |
|
31a6f4b805 | add | 1 year ago |
|
7c71c856ca | add | 1 year ago |
|
4d5e4ceb60 | add | 1 year ago |
|
32704b549e | add | 1 year ago |
@@ -0,0 +1,63 @@ | |||
// Copyright 2018 ETH Zurich and University of Bologna. | |||
// Copyright and related rights are licensed under the Solderpad Hardware | |||
// License, Version 0.51 (the "License"); you may not use this file except in | |||
// compliance with the License. You may obtain a copy of the License at | |||
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law | |||
// or agreed to in writing, software, hardware and materials distributed under | |||
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR | |||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the | |||
// specific language governing permissions and limitations under the License. | |||
// | |||
// Author: Florian Zaruba, ETH Zurich | |||
// Date: 15.09.2018 | |||
// Description: Combinatorial AMO unit | |||
module amo_alu ( | |||
// AMO interface | |||
input ariane_pkg::amo_t amo_op_i, | |||
input logic [63:0] amo_operand_a_i, | |||
input logic [63:0] amo_operand_b_i, | |||
output logic [63:0] amo_result_o // result of atomic memory operation | |||
); | |||
logic [64:0] adder_sum; | |||
logic [64:0] adder_operand_a, adder_operand_b; | |||
assign adder_sum = adder_operand_a + adder_operand_b; | |||
always_comb begin | |||
adder_operand_a = $signed(amo_operand_a_i); | |||
adder_operand_b = $signed(amo_operand_b_i); | |||
amo_result_o = amo_operand_b_i; | |||
unique case (amo_op_i) | |||
// the default is to output operand_b | |||
ariane_pkg::AMO_SC:; | |||
ariane_pkg::AMO_SWAP:; | |||
ariane_pkg::AMO_ADD: amo_result_o = adder_sum[63:0]; | |||
ariane_pkg::AMO_AND: amo_result_o = amo_operand_a_i & amo_operand_b_i; | |||
ariane_pkg::AMO_OR: amo_result_o = amo_operand_a_i | amo_operand_b_i; | |||
ariane_pkg::AMO_XOR: amo_result_o = amo_operand_a_i ^ amo_operand_b_i; | |||
ariane_pkg::AMO_MAX: begin | |||
adder_operand_b = -$signed(amo_operand_b_i); | |||
amo_result_o = adder_sum[64] ? amo_operand_b_i : amo_operand_a_i; | |||
end | |||
ariane_pkg::AMO_MIN: begin | |||
adder_operand_b = -$signed(amo_operand_b_i); | |||
amo_result_o = adder_sum[64] ? amo_operand_a_i : amo_operand_b_i; | |||
end | |||
ariane_pkg::AMO_MAXU: begin | |||
adder_operand_a = $unsigned(amo_operand_a_i); | |||
adder_operand_b = -$unsigned(amo_operand_b_i); | |||
amo_result_o = adder_sum[64] ? amo_operand_b_i : amo_operand_a_i; | |||
end | |||
ariane_pkg::AMO_MINU: begin | |||
adder_operand_a = $unsigned(amo_operand_a_i); | |||
adder_operand_b = -$unsigned(amo_operand_b_i); | |||
amo_result_o = adder_sum[64] ? amo_operand_a_i : amo_operand_b_i; | |||
end | |||
default: amo_result_o = '0; | |||
endcase | |||
end | |||
endmodule |
@@ -0,0 +1,462 @@ | |||
// Copyright 2018 ETH Zurich and University of Bologna. | |||
// Copyright and related rights are licensed under the Solderpad Hardware | |||
// License, Version 0.51 (the "License"); you may not use this file except in | |||
// compliance with the License. You may obtain a copy of the License at | |||
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law | |||
// or agreed to in writing, software, hardware and materials distributed under | |||
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR | |||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the | |||
// specific language governing permissions and limitations under the License. | |||
// | |||
// File: cache_ctrl.svh | |||
// Author: Florian Zaruba <zarubaf@ethz.ch> | |||
// Date: 14.10.2017 | |||
// | |||
// Copyright (C) 2017 ETH Zurich, University of Bologna | |||
// All rights reserved. | |||
// | |||
// Description: Cache controller | |||
module cache_ctrl import ariane_pkg::*; import std_cache_pkg::*; #( | |||
parameter ariane_cfg_t ArianeCfg = ArianeDefaultConfig // contains cacheable regions | |||
) ( | |||
input logic clk_i, // Clock | |||
input logic rst_ni, // Asynchronous reset active low | |||
input logic flush_i, | |||
input logic bypass_i, // enable cache | |||
output logic busy_o, | |||
// Core request ports | |||
input dcache_req_i_t req_port_i, | |||
output dcache_req_o_t req_port_o, | |||
// SRAM interface | |||
output logic [DCACHE_SET_ASSOC-1:0] req_o, // req is valid | |||
output logic [DCACHE_INDEX_WIDTH-1:0] addr_o, // address into cache array | |||
input logic gnt_i, | |||
output cache_line_t data_o, | |||
output cl_be_t be_o, | |||
output logic [DCACHE_TAG_WIDTH-1:0] tag_o, //valid one cycle later | |||
input cache_line_t [DCACHE_SET_ASSOC-1:0] data_i, | |||
output logic we_o, | |||
input logic [DCACHE_SET_ASSOC-1:0] hit_way_i, | |||
// Miss handling | |||
output miss_req_t miss_req_o, | |||
// return | |||
input logic miss_gnt_i, | |||
input logic active_serving_i, // the miss unit is currently active for this unit, serving the miss | |||
input logic [63:0] critical_word_i, | |||
input logic critical_word_valid_i, | |||
// bypass ports | |||
input logic bypass_gnt_i, | |||
input logic bypass_valid_i, | |||
input logic [63:0] bypass_data_i, | |||
// check MSHR for aliasing | |||
output logic [55:0] mshr_addr_o, | |||
input logic mshr_addr_matches_i, | |||
input logic mshr_index_matches_i | |||
); | |||
enum logic [3:0] { | |||
IDLE, // 0 | |||
WAIT_TAG, // 1 | |||
WAIT_TAG_BYPASSED, // 2 | |||
WAIT_GNT, // 3 | |||
WAIT_GNT_SAVED, // 4 | |||
STORE_REQ, // 5 | |||
WAIT_REFILL_VALID, // 6 | |||
WAIT_REFILL_GNT, // 7 | |||
WAIT_TAG_SAVED, // 8 | |||
WAIT_MSHR, // 9 | |||
WAIT_CRITICAL_WORD // 10 | |||
} state_d, state_q; | |||
typedef struct packed { | |||
logic [DCACHE_INDEX_WIDTH-1:0] index; | |||
logic [DCACHE_TAG_WIDTH-1:0] tag; | |||
logic [7:0] be; | |||
logic [1:0] size; | |||
logic we; | |||
logic [63:0] wdata; | |||
logic bypass; | |||
logic killed; | |||
} mem_req_t; | |||
logic [DCACHE_SET_ASSOC-1:0] hit_way_d, hit_way_q; | |||
mem_req_t mem_req_d, mem_req_q; | |||
assign busy_o = (state_q != IDLE); | |||
assign tag_o = mem_req_d.tag; | |||
logic [DCACHE_LINE_WIDTH-1:0] cl_i; | |||
always_comb begin : way_select | |||
cl_i = '0; | |||
for (int unsigned i = 0; i < DCACHE_SET_ASSOC; i++) | |||
if (hit_way_i[i]) | |||
cl_i = data_i[i].data; | |||
// cl_i = data_i[one_hot_to_bin(hit_way_i)].data; | |||
end | |||
// -------------- | |||
// Cache FSM | |||
// -------------- | |||
always_comb begin : cache_ctrl_fsm | |||
automatic logic [$clog2(DCACHE_LINE_WIDTH)-1:0] cl_offset; | |||
// incoming cache-line -> this is needed as synthesis is not supporting +: indexing in a multi-dimensional array | |||
// cache-line offset -> multiple of 64 | |||
cl_offset = mem_req_q.index[DCACHE_BYTE_OFFSET-1:3] << 6; // shift by 6 to the left | |||
// default assignments | |||
state_d = state_q; | |||
mem_req_d = mem_req_q; | |||
hit_way_d = hit_way_q; | |||
// output assignments | |||
req_port_o.data_gnt = 1'b0; | |||
req_port_o.data_rvalid = 1'b0; | |||
req_port_o.data_rdata = '0; | |||
miss_req_o = '0; | |||
mshr_addr_o = '0; | |||
// Memory array communication | |||
req_o = '0; | |||
addr_o = req_port_i.address_index; | |||
data_o = '0; | |||
be_o = '0; | |||
we_o = '0; | |||
mem_req_d.killed |= req_port_i.kill_req; | |||
case (state_q) | |||
IDLE: begin | |||
// a new request arrived | |||
if (req_port_i.data_req && !flush_i) begin | |||
// request the cache line - we can do this speculatively | |||
req_o = '1; | |||
// save index, be and we | |||
mem_req_d.index = req_port_i.address_index; | |||
mem_req_d.be = req_port_i.data_be; | |||
mem_req_d.size = req_port_i.data_size; | |||
mem_req_d.we = req_port_i.data_we; | |||
mem_req_d.wdata = req_port_i.data_wdata; | |||
mem_req_d.killed = req_port_i.kill_req; | |||
// Bypass mode, check for uncacheable address here as well | |||
if (bypass_i) begin | |||
state_d = WAIT_TAG_BYPASSED; | |||
// grant this access only if it was a load | |||
req_port_o.data_gnt = (req_port_i.data_we) ? 1'b0 : 1'b1; | |||
mem_req_d.bypass = 1'b1; | |||
// ------------------ | |||
// Cache is enabled | |||
// ------------------ | |||
end else begin | |||
// Wait that we have access on the memory array | |||
if (gnt_i) begin | |||
state_d = WAIT_TAG; | |||
mem_req_d.bypass = 1'b0; | |||
// only for a read | |||
if (!req_port_i.data_we) | |||
req_port_o.data_gnt = 1'b1; | |||
end | |||
end | |||
end | |||
end | |||
// cache enabled and waiting for tag | |||
WAIT_TAG, WAIT_TAG_SAVED: begin | |||
// check that the client really wants to do the request and that we have a valid tag | |||
if (!req_port_i.kill_req && (req_port_i.tag_valid || state_q == WAIT_TAG_SAVED || mem_req_q.we)) begin | |||
// save tag if we didn't already save it | |||
if (state_q != WAIT_TAG_SAVED) begin | |||
mem_req_d.tag = req_port_i.address_tag; | |||
end | |||
// we speculatively request another transfer | |||
if (req_port_i.data_req && !flush_i) begin | |||
req_o = '1; | |||
end | |||
// ------------ | |||
// HIT CASE | |||
// ------------ | |||
if (|hit_way_i) begin | |||
// we can request another cache-line if this was a load | |||
if (req_port_i.data_req && !mem_req_q.we && !flush_i) begin | |||
state_d = WAIT_TAG; // switch back to WAIT_TAG | |||
mem_req_d.index = req_port_i.address_index; | |||
mem_req_d.be = req_port_i.data_be; | |||
mem_req_d.size = req_port_i.data_size; | |||
mem_req_d.we = req_port_i.data_we; | |||
mem_req_d.wdata = req_port_i.data_wdata; | |||
mem_req_d.killed = req_port_i.kill_req; | |||
mem_req_d.bypass = 1'b0; | |||
req_port_o.data_gnt = gnt_i; | |||
if (!gnt_i) begin | |||
state_d = IDLE; | |||
end | |||
end else begin | |||
state_d = IDLE; | |||
end | |||
// this is timing critical | |||
// req_port_o.data_rdata = cl_i[cl_offset +: 64]; | |||
case (mem_req_q.index[3]) | |||
1'b0: req_port_o.data_rdata = cl_i[63:0]; | |||
1'b1: req_port_o.data_rdata = cl_i[127:64]; | |||
endcase | |||
// report data for a read | |||
if (!mem_req_q.we) begin | |||
req_port_o.data_rvalid = ~mem_req_q.killed; | |||
// else this was a store so we need an extra step to handle it | |||
end else begin | |||
state_d = STORE_REQ; | |||
hit_way_d = hit_way_i; | |||
end | |||
// ------------ | |||
// MISS CASE | |||
// ------------ | |||
end else begin | |||
// make a miss request | |||
state_d = WAIT_REFILL_GNT; | |||
end | |||
// ---------------------------------------------- | |||
// Check MSHR - Miss Status Handling Register | |||
// ---------------------------------------------- | |||
mshr_addr_o = {tag_o, mem_req_q.index}; | |||
// 1. We've got a match on MSHR and while are going down the | |||
// store path. This means that the miss controller is | |||
// currently evicting our cache-line. As the store is | |||
// non-atomic we need to constantly check whether we are | |||
// matching the address the miss handler is serving. | |||
// Furthermore we need to check for the whole index | |||
// because a completely different memory line could alias | |||
// with the cache-line we are evicting. | |||
// 2. The second case is where we are currently loading and | |||
// the address matches the exact CL the miss controller | |||
// is currently serving. That means we need to wait for | |||
// the miss controller to finish its request before we | |||
// can continue to serve this CL. Otherwise we will fetch | |||
// the cache-line again and potentially loosing any | |||
// content we've written so far. This as a consequence | |||
// means we can't have hit on the CL which mean the | |||
// req_port_o.data_rvalid will be de-asserted. | |||
if ((mshr_index_matches_i && mem_req_q.we) || mshr_addr_matches_i) begin | |||
state_d = WAIT_MSHR; | |||
end | |||
// ------------------------- | |||
// Check for cache-ability | |||
// ------------------------- | |||
if (!is_inside_cacheable_regions(ArianeCfg, {{{64-riscv::PLEN}{1'b0}}, tag_o, {DCACHE_INDEX_WIDTH{1'b0}}})) begin | |||
mem_req_d.bypass = 1'b1; | |||
state_d = WAIT_REFILL_GNT; | |||
end | |||
// we are still waiting for a valid tag | |||
end else begin | |||
// request cache line for saved index | |||
addr_o = mem_req_q.index; | |||
req_o = '1; | |||
// check that we still have a memory grant | |||
if (!gnt_i) begin | |||
state_d = WAIT_GNT; | |||
end | |||
end | |||
end | |||
// ~> we already granted the request but lost the memory grant while waiting for the tag | |||
WAIT_GNT, WAIT_GNT_SAVED: begin | |||
// request cache line for saved index | |||
addr_o = mem_req_q.index; | |||
req_o = '1; | |||
// if we get a valid tag while waiting for the memory grant, save it | |||
if (req_port_i.tag_valid) begin | |||
mem_req_d.tag = req_port_i.address_tag; | |||
state_d = WAIT_GNT_SAVED; | |||
end | |||
// we have a memory grant again ~> go back to WAIT_TAG | |||
if (gnt_i) begin | |||
state_d = (state_d == WAIT_GNT) ? WAIT_TAG : WAIT_TAG_SAVED; | |||
end | |||
end | |||
// ~> we are here as we need a second round of memory access for a store | |||
STORE_REQ: begin | |||
// check if the MSHR still doesn't match | |||
mshr_addr_o = {mem_req_q.tag, mem_req_q.index}; | |||
// We need to re-check for MSHR aliasing here as the store requires at least | |||
// two memory look-ups on a single-ported SRAM and therefore is non-atomic | |||
if (!mshr_index_matches_i) begin | |||
// store data, write dirty bit | |||
req_o = hit_way_q; | |||
addr_o = mem_req_q.index; | |||
we_o = 1'b1; | |||
be_o.vldrty = hit_way_q; | |||
// set the correct byte enable | |||
be_o.data[cl_offset>>3 +: 8] = mem_req_q.be; | |||
data_o.data[cl_offset +: 64] = mem_req_q.wdata; | |||
// ~> change the state | |||
data_o.dirty = 1'b1; | |||
data_o.valid = 1'b1; | |||
// got a grant ~> this is finished now | |||
if (gnt_i) begin | |||
req_port_o.data_gnt = 1'b1; | |||
state_d = IDLE; | |||
end | |||
end else begin | |||
state_d = WAIT_MSHR; | |||
end | |||
end // case: STORE_REQ | |||
// we've got a match on MSHR ~> miss unit is currently serving a request | |||
WAIT_MSHR: begin | |||
mshr_addr_o = {mem_req_q.tag, mem_req_q.index}; | |||
// we can start a new request | |||
if (!mshr_index_matches_i) begin | |||
req_o = '1; | |||
addr_o = mem_req_q.index; | |||
if (gnt_i) state_d = WAIT_TAG_SAVED; | |||
end | |||
end | |||
// its for sure a miss | |||
WAIT_TAG_BYPASSED: begin | |||
// check that the client really wants to do the request and that we have a valid tag | |||
if (!req_port_i.kill_req && (req_port_i.tag_valid || mem_req_q.we)) begin | |||
// save tag | |||
mem_req_d.tag = req_port_i.address_tag; | |||
state_d = WAIT_REFILL_GNT; | |||
end | |||
end | |||
// ~> wait for grant from miss unit | |||
WAIT_REFILL_GNT: begin | |||
mshr_addr_o = {mem_req_q.tag, mem_req_q.index}; | |||
miss_req_o.valid = 1'b1; | |||
miss_req_o.bypass = mem_req_q.bypass; | |||
miss_req_o.addr = {mem_req_q.tag, mem_req_q.index}; | |||
miss_req_o.be = mem_req_q.be; | |||
miss_req_o.size = mem_req_q.size; | |||
miss_req_o.we = mem_req_q.we; | |||
miss_req_o.wdata = mem_req_q.wdata; | |||
// got a grant so go to valid | |||
if (bypass_gnt_i) begin | |||
state_d = WAIT_REFILL_VALID; | |||
// if this was a write we still need to give a grant to the store unit | |||
if (mem_req_q.we) | |||
req_port_o.data_gnt = 1'b1; | |||
end | |||
if (miss_gnt_i && !mem_req_q.we) | |||
state_d = WAIT_CRITICAL_WORD; | |||
else if (miss_gnt_i) begin | |||
state_d = IDLE; | |||
req_port_o.data_gnt = 1'b1; | |||
end | |||
// it can be the case that the miss unit is currently serving a | |||
// request which matches ours | |||
// so we need to check the MSHR for matching continuously | |||
// if the MSHR matches we need to go to a different state -> we should never get a matching MSHR and a high miss_gnt_i | |||
if (mshr_addr_matches_i && !active_serving_i) begin | |||
state_d = WAIT_MSHR; | |||
end | |||
end | |||
// ~> wait for critical word to arrive | |||
WAIT_CRITICAL_WORD: begin | |||
// speculatively request another word | |||
if (req_port_i.data_req) begin | |||
// request the cache line | |||
req_o = '1; | |||
end | |||
if (critical_word_valid_i) begin | |||
req_port_o.data_rvalid = ~mem_req_q.killed; | |||
req_port_o.data_rdata = critical_word_i; | |||
// we can make another request | |||
if (req_port_i.data_req) begin | |||
// save index, be and we | |||
mem_req_d.index = req_port_i.address_index; | |||
mem_req_d.be = req_port_i.data_be; | |||
mem_req_d.size = req_port_i.data_size; | |||
mem_req_d.we = req_port_i.data_we; | |||
mem_req_d.wdata = req_port_i.data_wdata; | |||
mem_req_d.killed = req_port_i.kill_req; | |||
state_d = IDLE; | |||
// Wait until we have access on the memory array | |||
if (gnt_i) begin | |||
state_d = WAIT_TAG; | |||
mem_req_d.bypass = 1'b0; | |||
req_port_o.data_gnt = 1'b1; | |||
end | |||
end else begin | |||
state_d = IDLE; | |||
end | |||
end | |||
end | |||
// ~> wait until the bypass request is valid | |||
WAIT_REFILL_VALID: begin | |||
// got a valid answer | |||
if (bypass_valid_i) begin | |||
req_port_o.data_rdata = bypass_data_i; | |||
req_port_o.data_rvalid = ~mem_req_q.killed; | |||
state_d = IDLE; | |||
end | |||
end | |||
endcase | |||
if (req_port_i.kill_req) begin | |||
req_port_o.data_rvalid = 1'b1; | |||
if (!(state_q inside { | |||
WAIT_REFILL_GNT, | |||
WAIT_CRITICAL_WORD})) begin | |||
state_d = IDLE; | |||
end | |||
end | |||
end | |||
// -------------- | |||
// Registers | |||
// -------------- | |||
always_ff @(posedge clk_i or negedge rst_ni) begin | |||
if (~rst_ni) begin | |||
state_q <= IDLE; | |||
mem_req_q <= '0; | |||
hit_way_q <= '0; | |||
end else begin | |||
state_q <= state_d; | |||
mem_req_q <= mem_req_d; | |||
hit_way_q <= hit_way_d; | |||
end | |||
end | |||
//pragma translate_off | |||
`ifndef VERILATOR | |||
initial begin | |||
assert (DCACHE_LINE_WIDTH == 128) else $error ("Cacheline width has to be 128 for the moment. But only small changes required in data select logic"); | |||
end | |||
// if the full MSHR address matches so should also match the partial one | |||
partial_full_mshr_match: assert property(@(posedge clk_i) disable iff (~rst_ni) mshr_addr_matches_i -> mshr_index_matches_i) else $fatal (1, "partial mshr index doesn't match"); | |||
// there should never be a valid answer when the MSHR matches and we are not being served | |||
no_valid_on_mshr_match: assert property(@(posedge clk_i) disable iff (~rst_ni) (mshr_addr_matches_i && !active_serving_i)-> !req_port_o.data_rvalid || req_port_i.kill_req) else $fatal (1, "rvalid_o should not be set on MSHR match"); | |||
`endif | |||
//pragma translate_on | |||
endmodule |
@@ -0,0 +1,552 @@ | |||
// Copyright 2018 ETH Zurich and University of Bologna. | |||
// Copyright and related rights are licensed under the Solderpad Hardware | |||
// License, Version 0.51 (the "License"); you may not use this file except in | |||
// compliance with the License. You may obtain a copy of the License at | |||
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law | |||
// or agreed to in writing, software, hardware and materials distributed under | |||
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR | |||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the | |||
// specific language governing permissions and limitations under the License. | |||
// | |||
// Author: Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich | |||
// Date: 15.08.2018 | |||
// Description: Instruction cache that is compatible with openpiton. | |||
// | |||
// Some notes: | |||
// | |||
// 1) refills always have the size of one cache line, except for accesses to the I/O region, which is mapped | |||
// to the top half of the physical address space (bit 39 = 1). the data width of the interface has the width | |||
// of one cache line, and hence the ifills can be transferred in a single cycle. note that the ifills must be | |||
// consumed unconditionally. | |||
// | |||
// 2) instruction fetches are always assumed to be aligned to 32bit (lower 2 bits are ignored) | |||
// | |||
// 3) NC accesses to I/O space are expected to return 32bit from memory. | |||
// | |||
module cva6_icache import ariane_pkg::*; import wt_cache_pkg::*; #( | |||
parameter logic [CACHE_ID_WIDTH-1:0] RdTxId = 0, // ID to be used for read transactions | |||
parameter ariane_pkg::ariane_cfg_t ArianeCfg = ariane_pkg::ArianeDefaultConfig // contains cacheable regions | |||
) ( | |||
input logic clk_i, | |||
input logic rst_ni, | |||
input logic flush_i, // flush the icache, flush and kill have to be asserted together | |||
input logic en_i, // enable icache | |||
output logic miss_o, // to performance counter | |||
// address translation requests | |||
input icache_areq_i_t areq_i, | |||
output icache_areq_o_t areq_o, | |||
// data requests | |||
input icache_dreq_i_t dreq_i, | |||
output icache_dreq_o_t dreq_o, | |||
// refill port | |||
input logic mem_rtrn_vld_i, | |||
input icache_rtrn_t mem_rtrn_i, | |||
output logic mem_data_req_o, | |||
input logic mem_data_ack_i, | |||
output icache_req_t mem_data_o | |||
); | |||
// signals | |||
logic cache_en_d, cache_en_q; // cache is enabled | |||
logic [riscv::VLEN-1:0] vaddr_d, vaddr_q; | |||
logic paddr_is_nc; // asserted if physical address is non-cacheable | |||
logic [ICACHE_SET_ASSOC-1:0] cl_hit; // hit from tag compare | |||
logic cache_rden; // triggers cache lookup | |||
logic cache_wren; // triggers write to cacheline | |||
logic cmp_en_d, cmp_en_q; // enable tag comparison in next cycle. used to cut long path due to NC signal. | |||
logic flush_d, flush_q; // used to register and signal pending flushes | |||
// replacement strategy | |||
logic update_lfsr; // shift the LFSR | |||
logic [$clog2(ICACHE_SET_ASSOC)-1:0] inv_way; // first non-valid encountered | |||
logic [$clog2(ICACHE_SET_ASSOC)-1:0] rnd_way; // random index for replacement | |||
logic [$clog2(ICACHE_SET_ASSOC)-1:0] repl_way; // way to replace | |||
logic [ICACHE_SET_ASSOC-1:0] repl_way_oh_d, repl_way_oh_q; // way to replace (onehot) | |||
logic all_ways_valid; // we need to switch repl strategy since all are valid | |||
// invalidations / flushing | |||
logic inv_en; // incoming invalidations | |||
logic inv_d, inv_q; // invalidation in progress | |||
logic flush_en, flush_done; // used to flush cache entries | |||
logic [ICACHE_CL_IDX_WIDTH-1:0] flush_cnt_d, flush_cnt_q; // used to flush cache entries | |||
// mem arrays | |||
logic cl_we; // write enable to memory array | |||
logic [ICACHE_SET_ASSOC-1:0] cl_req; // request to memory array | |||
logic [ICACHE_CL_IDX_WIDTH-1:0] cl_index; // this is a cache-line index, to memory array | |||
logic [ICACHE_OFFSET_WIDTH-1:0] cl_offset_d, cl_offset_q; // offset in cache line | |||
logic [ICACHE_TAG_WIDTH-1:0] cl_tag_d, cl_tag_q; // this is the cache tag | |||
logic [ICACHE_TAG_WIDTH-1:0] cl_tag_rdata [ICACHE_SET_ASSOC-1:0]; // these are the tags coming from the tagmem | |||
logic [ICACHE_LINE_WIDTH-1:0] cl_rdata [ICACHE_SET_ASSOC-1:0]; // these are the cachelines coming from the cache | |||
logic [ICACHE_USER_LINE_WIDTH-1:0] cl_ruser[ICACHE_SET_ASSOC-1:0]; // these are the cachelines coming from the user cache | |||
logic [ICACHE_SET_ASSOC-1:0][FETCH_WIDTH-1:0]cl_sel; // selected word from each cacheline | |||
logic [ICACHE_SET_ASSOC-1:0][FETCH_USER_WIDTH-1:0] cl_user; // selected word from each cacheline | |||
logic [ICACHE_SET_ASSOC-1:0] vld_req; // bit enable for valid regs | |||
logic vld_we; // valid bits write enable | |||
logic [ICACHE_SET_ASSOC-1:0] vld_wdata; // valid bits to write | |||
logic [ICACHE_SET_ASSOC-1:0] vld_rdata; // valid bits coming from valid regs | |||
logic [ICACHE_CL_IDX_WIDTH-1:0] vld_addr; // valid bit | |||
// cpmtroller FSM | |||
typedef enum logic[2:0] {FLUSH, IDLE, READ, MISS, KILL_ATRANS, KILL_MISS} state_e; | |||
state_e state_d, state_q; | |||
/////////////////////////////////////////////////////// | |||
// address -> cl_index mapping, interface plumbing | |||
/////////////////////////////////////////////////////// | |||
// extract tag from physical address, check if NC | |||
assign cl_tag_d = (areq_i.fetch_valid) ? areq_i.fetch_paddr[ICACHE_TAG_WIDTH+ICACHE_INDEX_WIDTH-1:ICACHE_INDEX_WIDTH] : cl_tag_q; | |||
// noncacheable if request goes to I/O space, or if cache is disabled | |||
assign paddr_is_nc = (~cache_en_q) | (~ariane_pkg::is_inside_cacheable_regions(ArianeCfg, {{{64-riscv::PLEN}{1'b0}}, cl_tag_d, {ICACHE_INDEX_WIDTH{1'b0}}})); | |||
// pass exception through | |||
assign dreq_o.ex = areq_i.fetch_exception; | |||
// latch this in case we have to stall later on | |||
// make sure this is 32bit aligned | |||
assign vaddr_d = (dreq_o.ready & dreq_i.req) ? dreq_i.vaddr : vaddr_q; | |||
assign areq_o.fetch_vaddr = {vaddr_q>>2, 2'b0}; | |||
// split virtual address into index and offset to address cache arrays | |||
assign cl_index = vaddr_d[ICACHE_INDEX_WIDTH-1:ICACHE_OFFSET_WIDTH]; | |||
if (ArianeCfg.Axi64BitCompliant) begin : gen_axi_offset | |||
// if we generate a noncacheable access, the word will be at offset 0 or 4 in the cl coming from memory | |||
assign cl_offset_d = ( dreq_o.ready & dreq_i.req) ? {dreq_i.vaddr>>2, 2'b0} : | |||
( paddr_is_nc & mem_data_req_o ) ? cl_offset_q[2]<<2 : // needed since we transfer 32bit over a 64bit AXI bus in this case | |||
cl_offset_q; | |||
// request word address instead of cl address in case of NC access | |||
assign mem_data_o.paddr = (paddr_is_nc) ? {cl_tag_d, vaddr_q[ICACHE_INDEX_WIDTH-1:3], 3'b0} : // align to 64bit | |||
{cl_tag_d, vaddr_q[ICACHE_INDEX_WIDTH-1:ICACHE_OFFSET_WIDTH], {ICACHE_OFFSET_WIDTH{1'b0}}}; // align to cl | |||
end else begin : gen_piton_offset | |||
// icache fills are either cachelines or 4byte fills, depending on whether they go to the Piton I/O space or not. | |||
// since the piton cache system replicates the data, we can always index the full CL | |||
assign cl_offset_d = ( dreq_o.ready & dreq_i.req) ? {dreq_i.vaddr>>2, 2'b0} : | |||
cl_offset_q; | |||
// request word address instead of cl address in case of NC access | |||
assign mem_data_o.paddr = (paddr_is_nc) ? {cl_tag_d, vaddr_q[ICACHE_INDEX_WIDTH-1:2], 2'b0} : // align to 32bit | |||
{cl_tag_d, vaddr_q[ICACHE_INDEX_WIDTH-1:ICACHE_OFFSET_WIDTH], {ICACHE_OFFSET_WIDTH{1'b0}}}; // align to cl | |||
end | |||
assign mem_data_o.tid = RdTxId; | |||
assign mem_data_o.nc = paddr_is_nc; | |||
// way that is being replaced | |||
assign mem_data_o.way = repl_way; | |||
assign dreq_o.vaddr = vaddr_q; | |||
// invalidations take two cycles | |||
assign inv_d = inv_en; | |||
/////////////////////////////////////////////////////// | |||
// main control logic | |||
/////////////////////////////////////////////////////// | |||
logic addr_ni; | |||
assign addr_ni = is_inside_nonidempotent_regions(ArianeCfg, areq_i.fetch_paddr); | |||
always_comb begin : p_fsm | |||
// default assignment | |||
state_d = state_q; | |||
cache_en_d = cache_en_q & en_i;// disabling the cache is always possible, enable needs to go via flush | |||
flush_en = 1'b0; | |||
cmp_en_d = 1'b0; | |||
cache_rden = 1'b0; | |||
cache_wren = 1'b0; | |||
inv_en = 1'b0; | |||
flush_d = flush_q | flush_i; // register incoming flush | |||
// interfaces | |||
dreq_o.ready = 1'b0; | |||
areq_o.fetch_req = 1'b0; | |||
dreq_o.valid = 1'b0; | |||
mem_data_req_o = 1'b0; | |||
// performance counter | |||
miss_o = 1'b0; | |||
// handle invalidations unconditionally | |||
// note: invald are mutually exclusive with | |||
// ifills, since both arrive over the same IF | |||
// however, we need to make sure below that we | |||
// do not trigger a cache readout at the same time... | |||
if (mem_rtrn_vld_i && mem_rtrn_i.rtype == ICACHE_INV_REQ) begin | |||
inv_en = 1'b1; | |||
end | |||
unique case (state_q) | |||
////////////////////////////////// | |||
// this clears all valid bits | |||
FLUSH: begin | |||
flush_en = 1'b1; | |||
if (flush_done) begin | |||
state_d = IDLE; | |||
flush_d = 1'b0; | |||
// if the cache was not enabled set this | |||
cache_en_d = en_i; | |||
end | |||
end | |||
////////////////////////////////// | |||
// wait for an incoming request | |||
IDLE: begin | |||
// only enable tag comparison if cache is enabled | |||
cmp_en_d = cache_en_q; | |||
// handle pending flushes, or perform cache clear upon enable | |||
if (flush_d || (en_i && !cache_en_q)) begin | |||
state_d = FLUSH; | |||
// wait for incoming requests | |||
end else begin | |||
// mem requests are for sure invals here | |||
if (!mem_rtrn_vld_i) begin | |||
dreq_o.ready = 1'b1; | |||
// we have a new request | |||
if (dreq_i.req) begin | |||
cache_rden = 1'b1; | |||
state_d = READ; | |||
end | |||
end | |||
if (dreq_i.kill_s1) begin | |||
state_d = IDLE; | |||
end | |||
end | |||
end | |||
////////////////////////////////// | |||
// check whether we have a hit | |||
// in case the cache is disabled, | |||
// or in case the address is NC, we | |||
// reuse the miss mechanism to handle | |||
// the request | |||
READ: begin | |||
areq_o.fetch_req = '1; | |||
// only enable tag comparison if cache is enabled | |||
cmp_en_d = cache_en_q; | |||
// readout speculatively | |||
cache_rden = cache_en_q; | |||
if (areq_i.fetch_valid && (!dreq_i.spec || !addr_ni) ) begin | |||
// check if we have to flush | |||
if (flush_d) begin | |||
state_d = IDLE; | |||
// we have a hit or an exception output valid result | |||
end else if (((|cl_hit && cache_en_q) || areq_i.fetch_exception.valid) && !inv_q) begin | |||
dreq_o.valid = ~dreq_i.kill_s2;// just don't output in this case | |||
state_d = IDLE; | |||
// we can accept another request | |||
// and stay here, but only if no inval is coming in | |||
// note: we are not expecting ifill return packets here... | |||
if (!mem_rtrn_vld_i) begin | |||
dreq_o.ready = 1'b1; | |||
if (dreq_i.req) begin | |||
state_d = READ; | |||
end | |||
end | |||
// if a request is being killed at this stage, | |||
// we have to bail out and wait for the address translation to complete | |||
if (dreq_i.kill_s1) begin | |||
state_d = IDLE; | |||
end | |||
// we have a miss / NC transaction | |||
end else if (dreq_i.kill_s2) begin | |||
state_d = IDLE; | |||
end else if (!inv_q) begin | |||
cmp_en_d = 1'b0; | |||
// only count this as a miss if the cache is enabled, and | |||
// the address is cacheable | |||
// send out ifill request | |||
mem_data_req_o = 1'b1; | |||
if (mem_data_ack_i) begin | |||
miss_o = ~paddr_is_nc; | |||
state_d = MISS; | |||
end | |||
end | |||
// bail out if this request is being killed (and we missed on the TLB) | |||
end else if (dreq_i.kill_s2 || flush_d) begin | |||
state_d = KILL_ATRANS; | |||
end | |||
end | |||
////////////////////////////////// | |||
// wait until the memory transaction | |||
// returns. do not write to memory | |||
// if the nc bit is set. | |||
MISS: begin | |||
// note: this is mutually exclusive with ICACHE_INV_REQ, | |||
// so we do not have to check for invals here | |||
if (mem_rtrn_vld_i && mem_rtrn_i.rtype == ICACHE_IFILL_ACK) begin | |||
state_d = IDLE; | |||
// only return data if request is not being killed | |||
if (!(dreq_i.kill_s2 || flush_d)) begin | |||
dreq_o.valid = 1'b1; | |||
// only write to cache if this address is cacheable | |||
cache_wren = ~paddr_is_nc; | |||
end | |||
// bail out if this request is being killed | |||
end else if (dreq_i.kill_s2 || flush_d) begin | |||
state_d = KILL_MISS; | |||
end | |||
end | |||
////////////////////////////////// | |||
// killed address translation, | |||
// wait until paddr is valid, and go | |||
// back to idle | |||
KILL_ATRANS: begin | |||
areq_o.fetch_req = '1; | |||
if (areq_i.fetch_valid) begin | |||
state_d = IDLE; | |||
end | |||
end | |||
////////////////////////////////// | |||
// killed miss, | |||
// wait until memory responds and | |||
// go back to idle | |||
KILL_MISS: begin | |||
if (mem_rtrn_vld_i && mem_rtrn_i.rtype == ICACHE_IFILL_ACK) begin | |||
state_d = IDLE; | |||
end | |||
end | |||
default: begin | |||
// we should never get here | |||
state_d = FLUSH; | |||
end | |||
endcase // state_q | |||
end | |||
/////////////////////////////////////////////////////// | |||
// valid bit invalidation and replacement strategy | |||
/////////////////////////////////////////////////////// | |||
// note: it cannot happen that we get an invalidation + a cl replacement | |||
// in the same cycle as these requests arrive via the same interface | |||
// flushes take precedence over invalidations (it is ok if we ignore | |||
// the inval since the cache is cleared anyway) | |||
assign flush_cnt_d = (flush_done) ? '0 : | |||
(flush_en) ? flush_cnt_q + 1 : | |||
flush_cnt_q; | |||
assign flush_done = (flush_cnt_q==(ICACHE_NUM_WORDS-1)); | |||
// invalidation/clearing address | |||
// flushing takes precedence over invals | |||
assign vld_addr = (flush_en) ? flush_cnt_q : | |||
(inv_en) ? mem_rtrn_i.inv.idx[ICACHE_INDEX_WIDTH-1:ICACHE_OFFSET_WIDTH] : | |||
cl_index; | |||
assign vld_req = (flush_en || cache_rden) ? '1 : | |||
(mem_rtrn_i.inv.all && inv_en) ? '1 : | |||
(mem_rtrn_i.inv.vld && inv_en) ? icache_way_bin2oh(mem_rtrn_i.inv.way) : | |||
repl_way_oh_q; | |||
assign vld_wdata = (cache_wren) ? '1 : '0; | |||
assign vld_we = (cache_wren | inv_en | flush_en); | |||
// assign vld_req = (vld_we | cache_rden); | |||
// chose random replacement if all are valid | |||
assign update_lfsr = cache_wren & all_ways_valid; | |||
assign repl_way = (all_ways_valid) ? rnd_way : inv_way; | |||
assign repl_way_oh_d = (cmp_en_q) ? icache_way_bin2oh(repl_way) : repl_way_oh_q; | |||
// enable signals for memory arrays | |||
assign cl_req = (cache_rden) ? '1 : | |||
(cache_wren) ? repl_way_oh_q : | |||
'0; | |||
assign cl_we = cache_wren; | |||
// find invalid cache line | |||
lzc #( | |||
.WIDTH ( ICACHE_SET_ASSOC ) | |||
) i_lzc ( | |||
.in_i ( ~vld_rdata ), | |||
.cnt_o ( inv_way ), | |||
.empty_o ( all_ways_valid ) | |||
); | |||
// generate random cacheline index | |||
lfsr #( | |||
.LfsrWidth ( ariane_pkg::ICACHE_SET_ASSOC ), | |||
.OutWidth ( $clog2(ariane_pkg::ICACHE_SET_ASSOC)) | |||
) i_lfsr ( | |||
.clk_i ( clk_i ), | |||
.rst_ni ( rst_ni ), | |||
.en_i ( update_lfsr ), | |||
.out_o ( rnd_way ) | |||
); | |||
/////////////////////////////////////////////////////// | |||
// tag comparison, hit generation | |||
/////////////////////////////////////////////////////// | |||
logic [$clog2(ICACHE_SET_ASSOC)-1:0] hit_idx; | |||
for (genvar i=0;i<ICACHE_SET_ASSOC;i++) begin : gen_tag_cmpsel | |||
assign cl_hit[i] = (cl_tag_rdata[i] == cl_tag_d) & vld_rdata[i]; | |||
assign cl_sel[i] = cl_rdata[i][{cl_offset_q,3'b0} +: FETCH_WIDTH]; | |||
assign cl_user[i] = cl_ruser[i][{cl_offset_q,3'b0} +: FETCH_USER_WIDTH]; | |||
end | |||
lzc #( | |||
.WIDTH ( ICACHE_SET_ASSOC ) | |||
) i_lzc_hit ( | |||
.in_i ( cl_hit ), | |||
.cnt_o ( hit_idx ), | |||
.empty_o ( ) | |||
); | |||
always_comb begin | |||
if (cmp_en_q) begin | |||
dreq_o.data = cl_sel[hit_idx]; | |||
dreq_o.user = cl_user[hit_idx]; | |||
end else begin | |||
dreq_o.data = mem_rtrn_i.data[{cl_offset_q,3'b0} +: FETCH_WIDTH]; | |||
dreq_o.user = mem_rtrn_i.user[{cl_offset_q,3'b0} +: FETCH_USER_WIDTH]; | |||
end | |||
end | |||
/////////////////////////////////////////////////////// | |||
// memory arrays and regs | |||
/////////////////////////////////////////////////////// | |||
logic [ICACHE_TAG_WIDTH:0] cl_tag_valid_rdata [ICACHE_SET_ASSOC-1:0]; | |||
for (genvar i = 0; i < ICACHE_SET_ASSOC; i++) begin : gen_sram | |||
// Tag RAM | |||
sram #( | |||
// tag + valid bit | |||
.DATA_WIDTH ( ICACHE_TAG_WIDTH+1 ), | |||
.NUM_WORDS ( ICACHE_NUM_WORDS ) | |||
) tag_sram ( | |||
.clk_i ( clk_i ), | |||
.rst_ni ( rst_ni ), | |||
.req_i ( vld_req[i] ), | |||
.we_i ( vld_we ), | |||
.addr_i ( vld_addr ), | |||
// we can always use the saved tag here since it takes a | |||
// couple of cycle until we write to the cache upon a miss | |||
.wuser_i ( '0 ), | |||
.wdata_i ( {vld_wdata[i], cl_tag_q} ), | |||
.be_i ( '1 ), | |||
.ruser_o ( ), | |||
.rdata_o ( cl_tag_valid_rdata[i] ) | |||
); | |||
assign cl_tag_rdata[i] = cl_tag_valid_rdata[i][ICACHE_TAG_WIDTH-1:0]; | |||
assign vld_rdata[i] = cl_tag_valid_rdata[i][ICACHE_TAG_WIDTH]; | |||
// Data RAM | |||
sram #( | |||
.USER_WIDTH ( ICACHE_USER_LINE_WIDTH ), | |||
.DATA_WIDTH ( ICACHE_LINE_WIDTH ), | |||
.USER_EN ( ariane_pkg::FETCH_USER_EN ), | |||
.NUM_WORDS ( ICACHE_NUM_WORDS ) | |||
) data_sram ( | |||
.clk_i ( clk_i ), | |||
.rst_ni ( rst_ni ), | |||
.req_i ( cl_req[i] ), | |||
.we_i ( cl_we ), | |||
.addr_i ( cl_index ), | |||
.wuser_i ( mem_rtrn_i.user ), | |||
.wdata_i ( mem_rtrn_i.data ), | |||
.be_i ( '1 ), | |||
.ruser_o ( cl_ruser[i] ), | |||
.rdata_o ( cl_rdata[i] ) | |||
); | |||
end | |||
always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs | |||
if(!rst_ni) begin | |||
cl_tag_q <= '0; | |||
flush_cnt_q <= '0; | |||
vaddr_q <= '0; | |||
cmp_en_q <= '0; | |||
cache_en_q <= '0; | |||
flush_q <= '0; | |||
state_q <= FLUSH; | |||
cl_offset_q <= '0; | |||
repl_way_oh_q <= '0; | |||
inv_q <= '0; | |||
end else begin | |||
cl_tag_q <= cl_tag_d; | |||
flush_cnt_q <= flush_cnt_d; | |||
vaddr_q <= vaddr_d; | |||
cmp_en_q <= cmp_en_d; | |||
cache_en_q <= cache_en_d; | |||
flush_q <= flush_d; | |||
state_q <= state_d; | |||
cl_offset_q <= cl_offset_d; | |||
repl_way_oh_q <= repl_way_oh_d; | |||
inv_q <= inv_d; | |||
end | |||
end | |||
/////////////////////////////////////////////////////// | |||
// assertions | |||
/////////////////////////////////////////////////////// | |||
//pragma translate_off | |||
`ifndef VERILATOR | |||
repl_inval0: assert property ( | |||
@(posedge clk_i) disable iff (!rst_ni) cache_wren |-> !(mem_rtrn_i.inv.all | mem_rtrn_i.inv.vld)) | |||
else $fatal(1,"[l1 icache] cannot replace cacheline and invalidate cacheline simultaneously"); | |||
repl_inval1: assert property ( | |||
@(posedge clk_i) disable iff (!rst_ni) (mem_rtrn_i.inv.all | mem_rtrn_i.inv.vld) |-> !cache_wren) | |||
else $fatal(1,"[l1 icache] cannot replace cacheline and invalidate cacheline simultaneously"); | |||
invalid_state: assert property ( | |||
@(posedge clk_i) disable iff (!rst_ni) (state_q inside {FLUSH, IDLE, READ, MISS, KILL_ATRANS, KILL_MISS})) | |||
else $fatal(1,"[l1 icache] fsm reached an invalid state"); | |||
hot1: assert property ( | |||
@(posedge clk_i) disable iff (!rst_ni) (!inv_en) |-> cache_rden |=> cmp_en_q |-> $onehot0(cl_hit)) | |||
else $fatal(1,"[l1 icache] cl_hit signal must be hot1"); | |||
// this is only used for verification! | |||
logic vld_mirror[wt_cache_pkg::ICACHE_NUM_WORDS-1:0][ariane_pkg::ICACHE_SET_ASSOC-1:0]; | |||
logic [ariane_pkg::ICACHE_TAG_WIDTH-1:0] tag_mirror[wt_cache_pkg::ICACHE_NUM_WORDS-1:0][ariane_pkg::ICACHE_SET_ASSOC-1:0]; | |||
logic [ariane_pkg::ICACHE_SET_ASSOC-1:0] tag_write_duplicate_test; | |||
always_ff @(posedge clk_i or negedge rst_ni) begin : p_mirror | |||
if(!rst_ni) begin | |||
vld_mirror <= '{default:'0}; | |||
tag_mirror <= '{default:'0}; | |||
end else begin | |||
for (int i = 0; i < ICACHE_SET_ASSOC; i++) begin | |||
if(vld_req[i] & vld_we) begin | |||
vld_mirror[vld_addr][i] <= vld_wdata[i]; | |||
tag_mirror[vld_addr][i] <= cl_tag_q; | |||
end | |||
end | |||
end | |||
end | |||
for (genvar i = 0; i < ICACHE_SET_ASSOC; i++) begin : gen_tag_dupl | |||
assign tag_write_duplicate_test[i] = (tag_mirror[vld_addr][i] == cl_tag_q) & vld_mirror[vld_addr][i] & (|vld_wdata); | |||
end | |||
tag_write_duplicate: assert property ( | |||
@(posedge clk_i) disable iff (!rst_ni) |vld_req |-> vld_we |-> !(|tag_write_duplicate_test)) | |||
else $fatal(1,"[l1 icache] cannot allocate a CL that is already present in the cache"); | |||
initial begin | |||
// assert wrong parameterizations | |||
assert (ICACHE_INDEX_WIDTH<=12) | |||
else $fatal(1,"[l1 icache] cache index width can be maximum 12bit since VM uses 4kB pages"); | |||
end | |||
`endif | |||
//pragma translate_on | |||
endmodule // cva6_icache |
@@ -0,0 +1,191 @@ | |||
// Copyright 2018 ETH Zurich and University of Bologna. | |||
// Copyright and related rights are licensed under the Solderpad Hardware | |||
// License, Version 0.51 (the "License"); you may not use this file except in | |||
// compliance with the License. You may obtain a copy of the License at | |||
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law | |||
// or agreed to in writing, software, hardware and materials distributed under | |||
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR | |||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the | |||
// specific language governing permissions and limitations under the License. | |||
// | |||
// Author: Nils Wistoff <nwistoff@iis.ee.ethz.ch>, ETH Zurich | |||
// Date: 07.09.2020 | |||
// Description: wrapper module to connect the L1I$ to a 64bit AXI bus. | |||
// | |||
module cva6_icache_axi_wrapper import ariane_pkg::*; import wt_cache_pkg::*; #( | |||
parameter ariane_cfg_t ArianeCfg = ArianeDefaultConfig // contains cacheable regions | |||
) ( | |||
input logic clk_i, | |||
input logic rst_ni, | |||
input riscv::priv_lvl_t priv_lvl_i, | |||
input logic flush_i, // flush the icache, flush and kill have to be asserted together | |||
input logic en_i, // enable icache | |||
output logic miss_o, // to performance counter | |||
// address translation requests | |||
input icache_areq_i_t areq_i, | |||
output icache_areq_o_t areq_o, | |||
// data requests | |||
input icache_dreq_i_t dreq_i, | |||
output icache_dreq_o_t dreq_o, | |||
// AXI refill port | |||
output ariane_axi::req_t axi_req_o, | |||
input ariane_axi::resp_t axi_resp_i | |||
); | |||
localparam AxiNumWords = (ICACHE_LINE_WIDTH/64) * (ICACHE_LINE_WIDTH > DCACHE_LINE_WIDTH) + | |||
(DCACHE_LINE_WIDTH/64) * (ICACHE_LINE_WIDTH <= DCACHE_LINE_WIDTH) ; | |||
logic icache_mem_rtrn_vld; | |||
icache_rtrn_t icache_mem_rtrn; | |||
logic icache_mem_data_req; | |||
logic icache_mem_data_ack; | |||
icache_req_t icache_mem_data; | |||
logic axi_rd_req; | |||
logic axi_rd_gnt; | |||
logic [63:0] axi_rd_addr; | |||
logic [$clog2(AxiNumWords)-1:0] axi_rd_blen; | |||
logic [1:0] axi_rd_size; | |||
logic [$size(axi_resp_i.r.id)-1:0] axi_rd_id_in; | |||
logic axi_rd_rdy; | |||
logic axi_rd_lock; | |||
logic axi_rd_last; | |||
logic axi_rd_valid; | |||
logic [63:0] axi_rd_data; | |||
logic [$size(axi_resp_i.r.id)-1:0] axi_rd_id_out; | |||
logic axi_rd_exokay; | |||
logic req_valid_d, req_valid_q; | |||
icache_req_t req_data_d, req_data_q; | |||
logic first_d, first_q; | |||
logic [ICACHE_LINE_WIDTH/64-1:0][63:0] rd_shift_d, rd_shift_q; | |||
// Keep read request asserted until we have an AXI grant. This is not guaranteed by icache (but | |||
// required by AXI). | |||
assign req_valid_d = ~axi_rd_gnt & (icache_mem_data_req | req_valid_q); | |||
// Update read request information on a new request | |||
assign req_data_d = (icache_mem_data_req) ? icache_mem_data : req_data_q; | |||
// We have a new or pending read request | |||
assign axi_rd_req = icache_mem_data_req | req_valid_q; | |||
assign axi_rd_addr = {{64-riscv::PLEN{1'b0}}, req_data_d.paddr}; | |||
// Fetch a full cache line on a cache miss, or a single word on a bypassed access | |||
assign axi_rd_blen = (req_data_d.nc) ? '0 : ariane_pkg::ICACHE_LINE_WIDTH/64-1; | |||
assign axi_rd_size = 2'b11; | |||
assign axi_rd_id_in = req_data_d.tid; | |||
assign axi_rd_rdy = 1'b1; | |||
assign axi_rd_lock = 1'b0; | |||
// Immediately acknowledge read request. This is an implicit requirement for the icache. | |||
assign icache_mem_data_ack = icache_mem_data_req; | |||
// Return data as soon as last word arrives | |||
assign icache_mem_rtrn_vld = axi_rd_valid & axi_rd_last; | |||
assign icache_mem_rtrn.data = rd_shift_d; | |||
assign icache_mem_rtrn.tid = req_data_q.tid; | |||
assign icache_mem_rtrn.rtype = wt_cache_pkg::ICACHE_IFILL_ACK; | |||
assign icache_mem_rtrn.inv = '0; | |||
// ------- | |||
// I-Cache | |||
// ------- | |||
cva6_icache #( | |||
// use ID 0 for icache reads | |||
.RdTxId ( 0 ), | |||
.ArianeCfg ( ArianeCfg ) | |||
) i_cva6_icache ( | |||
.clk_i ( clk_i ), | |||
.rst_ni ( rst_ni ), | |||
.flush_i ( flush_i ), | |||
.en_i ( en_i ), | |||
.miss_o ( miss_o ), | |||
.areq_i ( areq_i ), | |||
.areq_o ( areq_o ), | |||
.dreq_i ( dreq_i ), | |||
.dreq_o ( dreq_o ), | |||
.mem_rtrn_vld_i ( icache_mem_rtrn_vld ), | |||
.mem_rtrn_i ( icache_mem_rtrn ), | |||
.mem_data_req_o ( icache_mem_data_req ), | |||
.mem_data_ack_i ( icache_mem_data_ack ), | |||
.mem_data_o ( icache_mem_data ) | |||
); | |||
// -------- | |||
// AXI shim | |||
// -------- | |||
axi_shim #( | |||
.AxiUserWidth ( AXI_USER_WIDTH ), | |||
.AxiNumWords ( AxiNumWords ), | |||
.AxiIdWidth ( $size(axi_resp_i.r.id) ) | |||
) i_axi_shim ( | |||
.clk_i ( clk_i ), | |||
.rst_ni ( rst_ni ), | |||
.rd_req_i ( axi_rd_req ), | |||
.rd_gnt_o ( axi_rd_gnt ), | |||
.rd_addr_i ( axi_rd_addr ), | |||
.rd_blen_i ( axi_rd_blen ), | |||
.rd_size_i ( axi_rd_size ), | |||
.rd_id_i ( axi_rd_id_in ), | |||
.rd_rdy_i ( axi_rd_rdy ), | |||
.rd_lock_i ( axi_rd_lock ), | |||
.rd_last_o ( axi_rd_last ), | |||
.rd_valid_o ( axi_rd_valid ), | |||
.rd_data_o ( axi_rd_data ), | |||
.rd_user_o ( ), | |||
.rd_id_o ( axi_rd_id_out ), | |||
.rd_exokay_o ( axi_rd_exokay ), | |||
.wr_req_i ( '0 ), | |||
.wr_gnt_o ( ), | |||
.wr_addr_i ( '0 ), | |||
.wr_data_i ( '0 ), | |||
.wr_user_i ( '0 ), | |||
.wr_be_i ( '0 ), | |||
.wr_blen_i ( '0 ), | |||
.wr_size_i ( '0 ), | |||
.wr_id_i ( '0 ), | |||
.wr_lock_i ( '0 ), | |||
.wr_atop_i ( '0 ), | |||
.wr_rdy_i ( '0 ), | |||
.wr_valid_o ( ), | |||
.wr_id_o ( ), | |||
.wr_exokay_o ( ), | |||
.axi_req_o ( axi_req_o ), | |||
.axi_resp_i ( axi_resp_i ) | |||
); | |||
// Buffer burst data in shift register | |||
always_comb begin : p_axi_rtrn_shift | |||
first_d = first_q; | |||
rd_shift_d = rd_shift_q; | |||
if (axi_rd_valid) begin | |||
first_d = axi_rd_last; | |||
rd_shift_d = {axi_rd_data, rd_shift_q[ICACHE_LINE_WIDTH/64-1:1]}; | |||
// If this is a single word transaction, we need to make sure that word is placed at offset 0 | |||
if (first_q) begin | |||
rd_shift_d[0] = axi_rd_data; | |||
end | |||
end | |||
end | |||
// Registers | |||
always_ff @(posedge clk_i or negedge rst_ni) begin : p_rd_buf | |||
if (!rst_ni) begin | |||
req_valid_q <= 1'b0; | |||
req_data_q <= '0; | |||
first_q <= 1'b1; | |||
rd_shift_q <= '0; | |||
end else begin | |||
req_valid_q <= req_valid_d; | |||
req_data_q <= req_data_d; | |||
first_q <= first_d; | |||
rd_shift_q <= rd_shift_d; | |||
end | |||
end | |||
endmodule // cva6_icache_axi_wrapper |
@@ -0,0 +1,741 @@ | |||
// Copyright 2018 ETH Zurich and University of Bologna. | |||
// Copyright and related rights are licensed under the Solderpad Hardware | |||
// License, Version 0.51 (the "License"); you may not use this file except in | |||
// compliance with the License. You may obtain a copy of the License at | |||
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law | |||
// or agreed to in writing, software, hardware and materials distributed under | |||
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR | |||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the | |||
// specific language governing permissions and limitations under the License. | |||
// | |||
// Author: Florian Zaruba, ETH Zurich | |||
// Date: 12.11.2017 | |||
// Description: Handles cache misses. | |||
// -------------- | |||
// MISS Handler | |||
// -------------- | |||
module miss_handler import ariane_pkg::*; import std_cache_pkg::*; #( | |||
parameter int unsigned NR_PORTS = 3 | |||
)( | |||
input logic clk_i, | |||
input logic rst_ni, | |||
input logic flush_i, // flush request | |||
output logic flush_ack_o, // acknowledge successful flush | |||
output logic miss_o, | |||
input logic busy_i, // dcache is busy with something | |||
// Bypass or miss | |||
input logic [NR_PORTS-1:0][$bits(miss_req_t)-1:0] miss_req_i, | |||
// Bypass handling | |||
output logic [NR_PORTS-1:0] bypass_gnt_o, | |||
output logic [NR_PORTS-1:0] bypass_valid_o, | |||
output logic [NR_PORTS-1:0][63:0] bypass_data_o, | |||
// AXI port | |||
output ariane_axi::req_t axi_bypass_o, | |||
input ariane_axi::resp_t axi_bypass_i, | |||
// Miss handling (~> cacheline refill) | |||
output logic [NR_PORTS-1:0] miss_gnt_o, | |||
output logic [NR_PORTS-1:0] active_serving_o, | |||
output logic [63:0] critical_word_o, | |||
output logic critical_word_valid_o, | |||
output ariane_axi::req_t axi_data_o, | |||
input ariane_axi::resp_t axi_data_i, | |||
input logic [NR_PORTS-1:0][55:0] mshr_addr_i, | |||
output logic [NR_PORTS-1:0] mshr_addr_matches_o, | |||
output logic [NR_PORTS-1:0] mshr_index_matches_o, | |||
// AMO | |||
input amo_req_t amo_req_i, | |||
output amo_resp_t amo_resp_o, | |||
// Port to SRAMs, for refill and eviction | |||
output logic [DCACHE_SET_ASSOC-1:0] req_o, | |||
output logic [DCACHE_INDEX_WIDTH-1:0] addr_o, // address into cache array | |||
output cache_line_t data_o, | |||
output cl_be_t be_o, | |||
input cache_line_t [DCACHE_SET_ASSOC-1:0] data_i, | |||
output logic we_o | |||
); | |||
// Three MSHR ports + AMO port | |||
parameter NR_BYPASS_PORTS = NR_PORTS + 1; | |||
// FSM states | |||
enum logic [3:0] { | |||
IDLE, // 0 | |||
FLUSHING, // 1 | |||
FLUSH, // 2 | |||
WB_CACHELINE_FLUSH, // 3 | |||
FLUSH_REQ_STATUS, // 4 | |||
WB_CACHELINE_MISS, // 5 | |||
WAIT_GNT_SRAM, // 6 | |||
MISS, // 7 | |||
REQ_CACHELINE, // 8 | |||
MISS_REPL, // 9 | |||
SAVE_CACHELINE, // A | |||
INIT, // B | |||
AMO_REQ, // C | |||
AMO_WAIT_RESP // D | |||
} state_d, state_q; | |||
// Registers | |||
mshr_t mshr_d, mshr_q; | |||
logic [DCACHE_INDEX_WIDTH-1:0] cnt_d, cnt_q; | |||
logic [DCACHE_SET_ASSOC-1:0] evict_way_d, evict_way_q; | |||
// cache line to evict | |||
cache_line_t evict_cl_d, evict_cl_q; | |||
logic serve_amo_d, serve_amo_q; | |||
// Request from one FSM | |||
logic [NR_PORTS-1:0] miss_req_valid; | |||
logic [NR_PORTS-1:0] miss_req_bypass; | |||
logic [NR_PORTS-1:0][63:0] miss_req_addr; | |||
logic [NR_PORTS-1:0][63:0] miss_req_wdata; | |||
logic [NR_PORTS-1:0] miss_req_we; | |||
logic [NR_PORTS-1:0][7:0] miss_req_be; | |||
logic [NR_PORTS-1:0][1:0] miss_req_size; | |||
// Bypass AMO port | |||
bypass_req_t amo_bypass_req; | |||
bypass_rsp_t amo_bypass_rsp; | |||
// Bypass ports <-> Arbiter | |||
bypass_req_t [NR_BYPASS_PORTS-1:0] bypass_ports_req; | |||
bypass_rsp_t [NR_BYPASS_PORTS-1:0] bypass_ports_rsp; | |||
// Arbiter <-> Bypass AXI adapter | |||
bypass_req_t bypass_adapter_req; | |||
bypass_rsp_t bypass_adapter_rsp; | |||
// Cache Line Refill <-> AXI | |||
logic req_fsm_miss_valid; | |||
logic [63:0] req_fsm_miss_addr; | |||
logic [DCACHE_LINE_WIDTH-1:0] req_fsm_miss_wdata; | |||
logic req_fsm_miss_we; | |||
logic [(DCACHE_LINE_WIDTH/8)-1:0] req_fsm_miss_be; | |||
ariane_axi::ad_req_t req_fsm_miss_req; | |||
logic [1:0] req_fsm_miss_size; | |||
logic gnt_miss_fsm; | |||
logic valid_miss_fsm; | |||
logic [(DCACHE_LINE_WIDTH/64)-1:0][63:0] data_miss_fsm; | |||
// Cache Management <-> LFSR | |||
logic lfsr_enable; | |||
logic [DCACHE_SET_ASSOC-1:0] lfsr_oh; | |||
logic [$clog2(DCACHE_SET_ASSOC-1)-1:0] lfsr_bin; | |||
// AMOs | |||
ariane_pkg::amo_t amo_op; | |||
logic [63:0] amo_operand_b; | |||
// ------------------------------ | |||
// Cache Management | |||
// ------------------------------ | |||
always_comb begin : cache_management | |||
automatic logic [DCACHE_SET_ASSOC-1:0] evict_way, valid_way; | |||
for (int unsigned i = 0; i < DCACHE_SET_ASSOC; i++) begin | |||
evict_way[i] = data_i[i].valid & data_i[i].dirty; | |||
valid_way[i] = data_i[i].valid; | |||
end | |||
// ---------------------- | |||
// Default Assignments | |||
// ---------------------- | |||
// memory array | |||
req_o = '0; | |||
addr_o = '0; | |||
data_o = '0; | |||
be_o = '0; | |||
we_o = '0; | |||
// Cache controller | |||
miss_gnt_o = '0; | |||
active_serving_o = '0; | |||
// LFSR replacement unit | |||
lfsr_enable = 1'b0; | |||
// to AXI refill | |||
req_fsm_miss_valid = 1'b0; | |||
req_fsm_miss_addr = '0; | |||
req_fsm_miss_wdata = '0; | |||
req_fsm_miss_we = 1'b0; | |||
req_fsm_miss_be = '0; | |||
req_fsm_miss_req = ariane_axi::CACHE_LINE_REQ; | |||
req_fsm_miss_size = 2'b11; | |||
// to AXI bypass | |||
amo_bypass_req.req = 1'b0; | |||
amo_bypass_req.reqtype = ariane_axi::SINGLE_REQ; | |||
amo_bypass_req.amo = ariane_pkg::AMO_NONE; | |||
amo_bypass_req.addr = '0; | |||
amo_bypass_req.we = 1'b0; | |||
amo_bypass_req.wdata = '0; | |||
amo_bypass_req.be = '0; | |||
amo_bypass_req.size = 2'b11; | |||
amo_bypass_req.id = 4'b1011; | |||
// core | |||
flush_ack_o = 1'b0; | |||
miss_o = 1'b0; // to performance counter | |||
serve_amo_d = serve_amo_q; | |||
// -------------------------------- | |||
// Flush and Miss operation | |||
// -------------------------------- | |||
state_d = state_q; | |||
cnt_d = cnt_q; | |||
evict_way_d = evict_way_q; | |||
evict_cl_d = evict_cl_q; | |||
mshr_d = mshr_q; | |||
// communicate to the requester which unit we are currently serving | |||
active_serving_o[mshr_q.id] = mshr_q.valid; | |||
// AMOs | |||
amo_resp_o.ack = 1'b0; | |||
amo_resp_o.result = '0; | |||
amo_operand_b = '0; | |||
case (state_q) | |||
IDLE: begin | |||
// lowest priority are AMOs, wait until everything else is served before going for the AMOs | |||
if (amo_req_i.req && !busy_i) begin | |||
// 1. Flush the cache | |||
if (!serve_amo_q) begin | |||
state_d = FLUSH_REQ_STATUS; | |||
serve_amo_d = 1'b1; | |||
cnt_d = '0; | |||
// 2. Do the AMO | |||
end else begin | |||
state_d = AMO_REQ; | |||
serve_amo_d = 1'b0; | |||
end | |||
end | |||
// check if we want to flush and can flush e.g.: we are not busy anymore | |||
// TODO: Check that the busy flag is indeed needed | |||
if (flush_i && !busy_i) begin | |||
state_d = FLUSH_REQ_STATUS; | |||
cnt_d = '0; | |||
end | |||
// check if one of the state machines missed | |||
for (int unsigned i = 0; i < NR_PORTS; i++) begin | |||
// here comes the refill portion of code | |||
if (miss_req_valid[i] && !miss_req_bypass[i]) begin | |||
state_d = MISS; | |||
// we are taking another request so don't take the AMO | |||
serve_amo_d = 1'b0; | |||
// save to MSHR | |||
mshr_d.valid = 1'b1; | |||
mshr_d.we = miss_req_we[i]; | |||
mshr_d.id = i; | |||
mshr_d.addr = miss_req_addr[i][DCACHE_TAG_WIDTH+DCACHE_INDEX_WIDTH-1:0]; | |||
mshr_d.wdata = miss_req_wdata[i]; | |||
mshr_d.be = miss_req_be[i]; | |||
break; | |||
end | |||
end | |||
end | |||
// ~> we missed on the cache | |||
MISS: begin | |||
// 1. Check if there is an empty cache-line | |||
// 2. If not -> evict one | |||
req_o = '1; | |||
addr_o = mshr_q.addr[DCACHE_INDEX_WIDTH-1:0]; | |||
state_d = MISS_REPL; | |||
miss_o = 1'b1; | |||
end | |||
// ~> second miss cycle | |||
MISS_REPL: begin | |||
// if all are valid we need to evict one, pseudo random from LFSR | |||
if (&valid_way) begin | |||
lfsr_enable = 1'b1; | |||
evict_way_d = lfsr_oh; | |||
// do we need to write back the cache line? | |||
if (data_i[lfsr_bin].dirty) begin | |||
state_d = WB_CACHELINE_MISS; | |||
evict_cl_d.tag = data_i[lfsr_bin].tag; | |||
evict_cl_d.data = data_i[lfsr_bin].data; | |||
cnt_d = mshr_q.addr[DCACHE_INDEX_WIDTH-1:0]; | |||
// no - we can request a cache line now | |||
end else | |||
state_d = REQ_CACHELINE; | |||
// we have at least one free way | |||
end else begin | |||
// get victim cache-line by looking for the first non-valid bit | |||
evict_way_d = get_victim_cl(~valid_way); | |||
state_d = REQ_CACHELINE; | |||
end | |||
end | |||
// ~> we can just load the cache-line, the way is store in evict_way_q | |||
REQ_CACHELINE: begin | |||
req_fsm_miss_valid = 1'b1; | |||
req_fsm_miss_addr = mshr_q.addr; | |||
if (gnt_miss_fsm) begin | |||
state_d = SAVE_CACHELINE; | |||
miss_gnt_o[mshr_q.id] = 1'b1; | |||
end | |||
end | |||
// ~> replace the cacheline | |||
SAVE_CACHELINE: begin | |||
// calculate cacheline offset | |||
automatic logic [$clog2(DCACHE_LINE_WIDTH)-1:0] cl_offset; | |||
cl_offset = mshr_q.addr[DCACHE_BYTE_OFFSET-1:3] << 6; | |||
// we've got a valid response from refill unit | |||
if (valid_miss_fsm) begin | |||
addr_o = mshr_q.addr[DCACHE_INDEX_WIDTH-1:0]; | |||
req_o = evict_way_q; | |||
we_o = 1'b1; | |||
be_o = '1; | |||
be_o.vldrty = evict_way_q; | |||
data_o.tag = mshr_q.addr[DCACHE_TAG_WIDTH+DCACHE_INDEX_WIDTH-1:DCACHE_INDEX_WIDTH]; | |||
data_o.data = data_miss_fsm; | |||
data_o.valid = 1'b1; | |||
data_o.dirty = 1'b0; | |||
// is this a write? | |||
if (mshr_q.we) begin | |||
// Yes, so safe the updated data now | |||
for (int i = 0; i < 8; i++) begin | |||
// check if we really want to write the corresponding byte | |||
if (mshr_q.be[i]) | |||
data_o.data[(cl_offset + i*8) +: 8] = mshr_q.wdata[i]; | |||
end | |||
// its immediately dirty if we write | |||
data_o.dirty = 1'b1; | |||
end | |||
// reset MSHR | |||
mshr_d.valid = 1'b0; | |||
// go back to idle | |||
state_d = IDLE; | |||
end | |||
end | |||
// ------------------------------ | |||
// Write Back Operation | |||
// ------------------------------ | |||
// ~> evict a cache line from way saved in evict_way_q | |||
WB_CACHELINE_FLUSH, WB_CACHELINE_MISS: begin | |||
req_fsm_miss_valid = 1'b1; | |||
req_fsm_miss_addr = {evict_cl_q.tag, cnt_q[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET], {{DCACHE_BYTE_OFFSET}{1'b0}}}; | |||
req_fsm_miss_be = '1; | |||
req_fsm_miss_we = 1'b1; | |||
req_fsm_miss_wdata = evict_cl_q.data; | |||
// we've got a grant --> this is timing critical, think about it | |||
if (gnt_miss_fsm) begin | |||
// write status array | |||
addr_o = cnt_q; | |||
req_o = 1'b1; | |||
we_o = 1'b1; | |||
data_o.valid = INVALIDATE_ON_FLUSH ? 1'b0 : 1'b1; | |||
// invalidate | |||
be_o.vldrty = evict_way_q; | |||
// go back to handling the miss or flushing, depending on where we came from | |||
state_d = (state_q == WB_CACHELINE_MISS) ? MISS : FLUSH_REQ_STATUS; | |||
end | |||
end | |||
// ------------------------------ | |||
// Flushing & Initialization | |||
// ------------------------------ | |||
// ~> make another request to check the same cache-line if there are still some valid entries | |||
FLUSH_REQ_STATUS: begin | |||
req_o = '1; | |||
addr_o = cnt_q; | |||
state_d = FLUSHING; | |||
end | |||
FLUSHING: begin | |||
// this has priority | |||
// at least one of the cache lines is dirty | |||
if (|evict_way) begin | |||
// evict cache line, look for the first cache-line which is dirty | |||
evict_way_d = get_victim_cl(evict_way); | |||
evict_cl_d = data_i[one_hot_to_bin(evict_way)]; | |||
state_d = WB_CACHELINE_FLUSH; | |||
// not dirty ~> increment and continue | |||
end else begin | |||
// increment and re-request | |||
cnt_d = cnt_q + (1'b1 << DCACHE_BYTE_OFFSET); | |||
state_d = FLUSH_REQ_STATUS; | |||
addr_o = cnt_q; | |||
req_o = 1'b1; | |||
be_o.vldrty = INVALIDATE_ON_FLUSH ? '1 : '0; | |||
we_o = 1'b1; | |||
// finished with flushing operation, go back to idle | |||
if (cnt_q[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET] == DCACHE_NUM_WORDS-1) begin | |||
// only acknowledge if the flush wasn't triggered by an atomic | |||
flush_ack_o = ~serve_amo_q; | |||
state_d = IDLE; | |||
end | |||
end | |||
end | |||
// ~> only called after reset | |||
INIT: begin | |||
// initialize status array | |||
addr_o = cnt_q; | |||
req_o = 1'b1; | |||
we_o = 1'b1; | |||
// only write the dirty array | |||
be_o.vldrty = '1; | |||
cnt_d = cnt_q + (1'b1 << DCACHE_BYTE_OFFSET); | |||
// finished initialization | |||
if (cnt_q[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET] == DCACHE_NUM_WORDS-1) | |||
state_d = IDLE; | |||
end | |||
// ---------------------- | |||
// AMOs | |||
// ---------------------- | |||
// ~> we are here because we need to do the AMO, the cache is clean at this point | |||
AMO_REQ: begin | |||
amo_bypass_req.req = 1'b1; | |||
amo_bypass_req.reqtype = ariane_axi::SINGLE_REQ; | |||
amo_bypass_req.amo = amo_req_i.amo_op; | |||
// address is in operand a | |||
amo_bypass_req.addr = amo_req_i.operand_a; | |||
if (amo_req_i.amo_op != AMO_LR) begin | |||
amo_bypass_req.we = 1'b1; | |||
end | |||
amo_bypass_req.size = amo_req_i.size; | |||
// AXI implements CLR op instead of AND, negate operand | |||
if (amo_req_i.amo_op == AMO_AND) begin | |||
amo_operand_b = ~amo_req_i.operand_b; | |||
end else begin | |||
amo_operand_b = amo_req_i.operand_b; | |||
end | |||
// align data and byte-enable to correct byte lanes | |||
amo_bypass_req.wdata = amo_operand_b; | |||
if (amo_req_i.size == 2'b11) begin | |||
// 64b transfer | |||
amo_bypass_req.be = 8'b11111111; | |||
end else begin | |||
// 32b transfer | |||
if (amo_req_i.operand_a[2:0] == '0) begin | |||
// 64b aligned -> activate lower 4 byte lanes | |||
amo_bypass_req.be = 8'b00001111; | |||
end else begin | |||
// 64b unaligned -> activate upper 4 byte lanes | |||
amo_bypass_req.be = 8'b11110000; | |||
amo_bypass_req.wdata = amo_operand_b[31:0] << 32; | |||
end | |||
end | |||
// when request is accepted, wait for response | |||
if (amo_bypass_rsp.gnt) begin | |||
if (amo_bypass_rsp.valid) begin | |||
state_d = IDLE; | |||
amo_resp_o.ack = 1'b1; | |||
amo_resp_o.result = amo_bypass_rsp.rdata; | |||
end else begin | |||
state_d = AMO_WAIT_RESP; | |||
end | |||
end | |||
end | |||
AMO_WAIT_RESP: begin | |||
if (amo_bypass_rsp.valid) begin | |||
state_d = IDLE; | |||
amo_resp_o.ack = 1'b1; | |||
// Request is assumed to be still valid (ack not granted yet) | |||
if (amo_req_i.size == 2'b10) begin | |||
// 32b request | |||
logic [31:0] halfword; | |||
if (amo_req_i.operand_a[2:0] == '0) begin | |||
// 64b aligned -> activate lower 4 byte lanes | |||
halfword = amo_bypass_rsp.rdata[31:0]; | |||
end else begin | |||
// 64b unaligned -> activate upper 4 byte lanes | |||
halfword = amo_bypass_rsp.rdata[63:32]; | |||
end | |||
// Sign-extend 32b requests as per RISC-V spec | |||
amo_resp_o.result = {{32{halfword[31]}}, halfword}; | |||
end else begin | |||
// 64b request | |||
amo_resp_o.result = amo_bypass_rsp.rdata; | |||
end | |||
end | |||
end | |||
endcase | |||
end | |||
// check MSHR for aliasing | |||
always_comb begin | |||
mshr_addr_matches_o = 'b0; | |||
mshr_index_matches_o = 'b0; | |||
for (int i = 0; i < NR_PORTS; i++) begin | |||
// check mshr for potential matching of other units, exclude the unit currently being served | |||
if (mshr_q.valid && mshr_addr_i[i][55:DCACHE_BYTE_OFFSET] == mshr_q.addr[55:DCACHE_BYTE_OFFSET]) begin | |||
mshr_addr_matches_o[i] = 1'b1; | |||
end | |||
// same as previous, but checking only the index | |||
if (mshr_q.valid && mshr_addr_i[i][DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET] == mshr_q.addr[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET]) begin | |||
mshr_index_matches_o[i] = 1'b1; | |||
end | |||
end | |||
end | |||
// -------------------- | |||
// Sequential Process | |||
// -------------------- | |||
always_ff @(posedge clk_i or negedge rst_ni) begin | |||
if (~rst_ni) begin | |||
mshr_q <= '0; | |||
state_q <= INIT; | |||
cnt_q <= '0; | |||
evict_way_q <= '0; | |||
evict_cl_q <= '0; | |||
serve_amo_q <= 1'b0; | |||
end else begin | |||
mshr_q <= mshr_d; | |||
state_q <= state_d; | |||
cnt_q <= cnt_d; | |||
evict_way_q <= evict_way_d; | |||
evict_cl_q <= evict_cl_d; | |||
serve_amo_q <= serve_amo_d; | |||
end | |||
end | |||
//pragma translate_off | |||
`ifndef VERILATOR | |||
// assert that cache only hits on one way | |||
assert property ( | |||
@(posedge clk_i) $onehot0(evict_way_q)) else $warning("Evict-way should be one-hot encoded"); | |||
`endif | |||
//pragma translate_on | |||
// ---------------------- | |||
// Pack bypass ports | |||
// ---------------------- | |||
always_comb begin | |||
logic [$clog2(NR_BYPASS_PORTS)-1:0] id; | |||
// Pack MHSR ports first | |||
for (id = 0; id < NR_PORTS; id++) begin | |||
bypass_ports_req[id].req = miss_req_valid[id] & miss_req_bypass[id]; | |||
bypass_ports_req[id].reqtype = ariane_axi::SINGLE_REQ; | |||
bypass_ports_req[id].amo = AMO_NONE; | |||
bypass_ports_req[id].id = {2'b10, id}; | |||
bypass_ports_req[id].addr = miss_req_addr[id]; | |||
bypass_ports_req[id].wdata = miss_req_wdata[id]; | |||
bypass_ports_req[id].we = miss_req_we[id]; | |||
bypass_ports_req[id].be = miss_req_be[id]; | |||
bypass_ports_req[id].size = miss_req_size[id]; | |||
bypass_gnt_o[id] = bypass_ports_rsp[id].gnt; | |||
bypass_valid_o[id] = bypass_ports_rsp[id].valid; | |||
bypass_data_o[id] = bypass_ports_rsp[id].rdata; | |||
end | |||
// AMO port has lowest priority | |||
bypass_ports_req[id] = amo_bypass_req; | |||
amo_bypass_rsp = bypass_ports_rsp[id]; | |||
end | |||
// ---------------------- | |||
// Arbitrate bypass ports | |||
// ---------------------- | |||
axi_adapter_arbiter #( | |||
.NR_PORTS(NR_BYPASS_PORTS), | |||
.req_t (bypass_req_t), | |||
.rsp_t (bypass_rsp_t) | |||
) i_bypass_arbiter ( | |||
.clk_i (clk_i), | |||
.rst_ni(rst_ni), | |||
// Master Side | |||
.req_i (bypass_ports_req), | |||
.rsp_o (bypass_ports_rsp), | |||
// Slave Side | |||
.req_o (bypass_adapter_req), | |||
.rsp_i (bypass_adapter_rsp) | |||
); | |||
// ---------------------- | |||
// Bypass AXI Interface | |||
// ---------------------- | |||
axi_adapter #( | |||
.DATA_WIDTH (64), | |||
.AXI_ID_WIDTH (4), | |||
.CACHELINE_BYTE_OFFSET(DCACHE_BYTE_OFFSET) | |||
) i_bypass_axi_adapter ( | |||
.clk_i (clk_i), | |||
.rst_ni (rst_ni), | |||
.req_i (bypass_adapter_req.req), | |||
.type_i (bypass_adapter_req.reqtype), | |||
.amo_i (bypass_adapter_req.amo), | |||
.id_i (bypass_adapter_req.id), | |||
.addr_i (bypass_adapter_req.addr), | |||
.wdata_i (bypass_adapter_req.wdata), | |||
.we_i (bypass_adapter_req.we), | |||
.be_i (bypass_adapter_req.be), | |||
.size_i (bypass_adapter_req.size), | |||
.gnt_o (bypass_adapter_rsp.gnt), | |||
.valid_o (bypass_adapter_rsp.valid), | |||
.rdata_o (bypass_adapter_rsp.rdata), | |||
.id_o (), // not used, single outstanding request in arbiter | |||
.critical_word_o (), // not used for single requests | |||
.critical_word_valid_o(), // not used for single requests | |||
.axi_req_o (axi_bypass_o), | |||
.axi_resp_i (axi_bypass_i) | |||
); | |||
// ---------------------- | |||
// Cache Line AXI Refill | |||
// ---------------------- | |||
axi_adapter #( | |||
.DATA_WIDTH ( DCACHE_LINE_WIDTH ), | |||
.AXI_ID_WIDTH ( 4 ), | |||
.CACHELINE_BYTE_OFFSET ( DCACHE_BYTE_OFFSET ) | |||
) i_miss_axi_adapter ( | |||
.clk_i, | |||
.rst_ni, | |||
.req_i ( req_fsm_miss_valid ), | |||
.type_i ( req_fsm_miss_req ), | |||
.amo_i ( AMO_NONE ), | |||
.gnt_o ( gnt_miss_fsm ), | |||
.addr_i ( req_fsm_miss_addr ), | |||
.we_i ( req_fsm_miss_we ), | |||
.wdata_i ( req_fsm_miss_wdata ), | |||
.be_i ( req_fsm_miss_be ), | |||
.size_i ( req_fsm_miss_size ), | |||
.id_i ( 4'b1100 ), | |||
.valid_o ( valid_miss_fsm ), | |||
.rdata_o ( data_miss_fsm ), | |||
.id_o ( ), | |||
.critical_word_o ( critical_word_o ), | |||
.critical_word_valid_o (critical_word_valid_o), | |||
.axi_req_o ( axi_data_o ), | |||
.axi_resp_i ( axi_data_i ) | |||
); | |||
// ----------------- | |||
// Replacement LFSR | |||
// ----------------- | |||
lfsr_8bit #(.WIDTH (DCACHE_SET_ASSOC)) i_lfsr ( | |||
.en_i ( lfsr_enable ), | |||
.refill_way_oh ( lfsr_oh ), | |||
.refill_way_bin ( lfsr_bin ), | |||
.* | |||
); | |||
// ----------------- | |||
// Struct Split | |||
// ----------------- | |||
// Hack as system verilog support in modelsim seems to be buggy here | |||
always_comb begin | |||
automatic miss_req_t miss_req; | |||
for (int unsigned i = 0; i < NR_PORTS; i++) begin | |||
miss_req = miss_req_t'(miss_req_i[i]); | |||
miss_req_valid [i] = miss_req.valid; | |||
miss_req_bypass [i] = miss_req.bypass; | |||
miss_req_addr [i] = miss_req.addr; | |||
miss_req_wdata [i] = miss_req.wdata; | |||
miss_req_we [i] = miss_req.we; | |||
miss_req_be [i] = miss_req.be; | |||
miss_req_size [i] = miss_req.size; | |||
end | |||
end | |||
endmodule | |||
// -------------- | |||
// AXI Arbiter | |||
// -------------- | |||
// | |||
// Description: Arbitrates access to AXI refill/bypass | |||
// | |||
module axi_adapter_arbiter #( | |||
parameter NR_PORTS = 4, | |||
parameter type req_t = std_cache_pkg::bypass_req_t, | |||
parameter type rsp_t = std_cache_pkg::bypass_rsp_t | |||
)( | |||
input logic clk_i, // Clock | |||
input logic rst_ni, // Asynchronous reset active low | |||
// Master ports | |||
input req_t [NR_PORTS-1:0] req_i, | |||
output rsp_t [NR_PORTS-1:0] rsp_o, | |||
// Slave port | |||
output req_t req_o, | |||
input rsp_t rsp_i | |||
); | |||
enum logic { IDLE, SERVING } state_d, state_q; | |||
req_t req_d, req_q; | |||
logic [NR_PORTS-1:0] sel_d, sel_q; | |||
always_comb begin | |||
sel_d = sel_q; | |||
state_d = state_q; | |||
req_d = req_q; | |||
req_o = req_q; | |||
rsp_o = '0; | |||
rsp_o[sel_q].rdata = rsp_i.rdata; | |||
case (state_q) | |||