16 Commits

Author SHA1 Message Date
  nancy1 801dcad9b7 add 1 year ago
  nancy1 ad1d7765d5 add 1 year ago
  nancy1 9d05d40d9c add 1 year ago
  nancy1 041d017053 add 1 year ago
  nancy1 a51acf7951 add 1 year ago
  nancy1 e55a45ed6c add 1 year ago
  nancy1 737640e4ab add 1 year ago
  nancy1 573285de96 add 1 year ago
  nancy1 6b1dca986c add 1 year ago
  nancy1 9f0b889029 add 1 year ago
  nancy1 c7100af9dd add 1 year ago
  nancy1 7bb2153d48 add 1 year ago
  nancy1 31a6f4b805 add 1 year ago
  nancy1 7c71c856ca add 1 year ago
  nancy1 4d5e4ceb60 add 1 year ago
  nancy1 32704b549e add 1 year ago
100 changed files with 307421 additions and 0 deletions
Split View
  1. +63
    -0
      app/src/core/amo_alu.sv
  2. +462
    -0
      app/src/core/cache_ctrl.sv
  3. +552
    -0
      app/src/core/cva6_icache.sv
  4. +191
    -0
      app/src/core/cva6_icache_axi_wrapper.sv
  5. +741
    -0
      app/src/core/miss_handler.sv
  6. +274
    -0
      app/src/core/std_cache_subsystem.sv
  7. +268
    -0
      app/src/core/std_nbdcache.sv
  8. +328
    -0
      app/src/core/std_no_dcache.sv
  9. +106
    -0
      app/src/core/tag_cmp.sv
  10. +623
    -0
      app/src/core/wt_axi_adapter.sv
  11. +195
    -0
      app/src/core/wt_cache_subsystem.sv
  12. +339
    -0
      app/src/core/wt_dcache.sv
  13. +276
    -0
      app/src/core/wt_dcache_ctrl.sv
  14. +383
    -0
      app/src/core/wt_dcache_mem.sv
  15. +577
    -0
      app/src/core/wt_dcache_missunit.sv
  16. +576
    -0
      app/src/core/wt_dcache_wbuffer.sv
  17. +387
    -0
      app/src/core/wt_l15_adapter.sv
  18. +153
    -0
      app/src/cvxif_example/cvxif_example_coprocessor.sv
  19. +47
    -0
      app/src/cvxif_example/include/cvxif_instr_pkg.sv
  20. +48
    -0
      app/src/cvxif_example/instr_decoder.sv
  21. +450
    -0
      app/src/mmu_sv32/cva6_mmu_sv32.sv
  22. +407
    -0
      app/src/mmu_sv32/cva6_ptw_sv32.sv
  23. +261
    -0
      app/src/mmu_sv32/cva6_tlb_sv32.sv
  24. +458
    -0
      app/src/mmu_sv39/mmu.sv
  25. +405
    -0
      app/src/mmu_sv39/ptw.sv
  26. +273
    -0
      app/src/mmu_sv39/tlb.sv
  27. BIN
      app/src/verilator_results/Vcva6_core_only_tb
  28. BIN
      app/src/verilator_work/Vcva6_core_only_tb
  29. +7664
    -0
      app/src/verilator_work/Vcva6_core_only_tb.cpp
  30. +6
    -0
      app/src/verilator_work/Vcva6_core_only_tb.d
  31. +3227
    -0
      app/src/verilator_work/Vcva6_core_only_tb.h
  32. +66
    -0
      app/src/verilator_work/Vcva6_core_only_tb.mk
  33. +8
    -0
      app/src/verilator_work/Vcva6_core_only_tb_AXI_BUS__A40_AB40_AC5_AD1.cpp
  34. +8
    -0
      app/src/verilator_work/Vcva6_core_only_tb_AXI_BUS__A40_AB40_AC5_AD1.d
  35. +51
    -0
      app/src/verilator_work/Vcva6_core_only_tb_AXI_BUS__A40_AB40_AC5_AD1.h
  36. +37
    -0
      app/src/verilator_work/Vcva6_core_only_tb_AXI_BUS__A40_AB40_AC5_AD1__Slow.cpp
  37. +8
    -0
      app/src/verilator_work/Vcva6_core_only_tb_AXI_BUS__A40_AB40_AC5_AD1__Slow.d
  38. +21631
    -0
      app/src/verilator_work/Vcva6_core_only_tb__1.cpp
  39. +6
    -0
      app/src/verilator_work/Vcva6_core_only_tb__1.d
  40. +12483
    -0
      app/src/verilator_work/Vcva6_core_only_tb__10.cpp
  41. +6
    -0
      app/src/verilator_work/Vcva6_core_only_tb__10.d
  42. +11670
    -0
      app/src/verilator_work/Vcva6_core_only_tb__10__Slow.cpp
  43. +7
    -0
      app/src/verilator_work/Vcva6_core_only_tb__10__Slow.d
  44. +9605
    -0
      app/src/verilator_work/Vcva6_core_only_tb__11.cpp
  45. +6
    -0
      app/src/verilator_work/Vcva6_core_only_tb__11.d
  46. +9538
    -0
      app/src/verilator_work/Vcva6_core_only_tb__11__Slow.cpp
  47. +7
    -0
      app/src/verilator_work/Vcva6_core_only_tb__11__Slow.d
  48. +2708
    -0
      app/src/verilator_work/Vcva6_core_only_tb__12.cpp
  49. +6
    -0
      app/src/verilator_work/Vcva6_core_only_tb__12.d
  50. +3240
    -0
      app/src/verilator_work/Vcva6_core_only_tb__12__Slow.cpp
  51. +7
    -0
      app/src/verilator_work/Vcva6_core_only_tb__12__Slow.d
  52. +17141
    -0
      app/src/verilator_work/Vcva6_core_only_tb__1__Slow.cpp
  53. +7
    -0
      app/src/verilator_work/Vcva6_core_only_tb__1__Slow.d
  54. +16855
    -0
      app/src/verilator_work/Vcva6_core_only_tb__2.cpp
  55. +6
    -0
      app/src/verilator_work/Vcva6_core_only_tb__2.d
  56. +13697
    -0
      app/src/verilator_work/Vcva6_core_only_tb__2__Slow.cpp
  57. +7
    -0
      app/src/verilator_work/Vcva6_core_only_tb__2__Slow.d
  58. +10616
    -0
      app/src/verilator_work/Vcva6_core_only_tb__3.cpp
  59. +6
    -0
      app/src/verilator_work/Vcva6_core_only_tb__3.d
  60. +9211
    -0
      app/src/verilator_work/Vcva6_core_only_tb__3__Slow.cpp
  61. +7
    -0
      app/src/verilator_work/Vcva6_core_only_tb__3__Slow.d
  62. +9746
    -0
      app/src/verilator_work/Vcva6_core_only_tb__4.cpp
  63. +6
    -0
      app/src/verilator_work/Vcva6_core_only_tb__4.d
  64. +11764
    -0
      app/src/verilator_work/Vcva6_core_only_tb__4__Slow.cpp
  65. +7
    -0
      app/src/verilator_work/Vcva6_core_only_tb__4__Slow.d
  66. +13209
    -0
      app/src/verilator_work/Vcva6_core_only_tb__5.cpp
  67. +6
    -0
      app/src/verilator_work/Vcva6_core_only_tb__5.d
  68. +10865
    -0
      app/src/verilator_work/Vcva6_core_only_tb__5__Slow.cpp
  69. +7
    -0
      app/src/verilator_work/Vcva6_core_only_tb__5__Slow.d
  70. +13370
    -0
      app/src/verilator_work/Vcva6_core_only_tb__6.cpp
  71. +6
    -0
      app/src/verilator_work/Vcva6_core_only_tb__6.d
  72. +8647
    -0
      app/src/verilator_work/Vcva6_core_only_tb__6__Slow.cpp
  73. +7
    -0
      app/src/verilator_work/Vcva6_core_only_tb__6__Slow.d
  74. +9566
    -0
      app/src/verilator_work/Vcva6_core_only_tb__7.cpp
  75. +6
    -0
      app/src/verilator_work/Vcva6_core_only_tb__7.d
  76. +13405
    -0
      app/src/verilator_work/Vcva6_core_only_tb__7__Slow.cpp
  77. +7
    -0
      app/src/verilator_work/Vcva6_core_only_tb__7__Slow.d
  78. +9839
    -0
      app/src/verilator_work/Vcva6_core_only_tb__8.cpp
  79. +6
    -0
      app/src/verilator_work/Vcva6_core_only_tb__8.d
  80. +12376
    -0
      app/src/verilator_work/Vcva6_core_only_tb__8__Slow.cpp
  81. +7
    -0
      app/src/verilator_work/Vcva6_core_only_tb__8__Slow.d
  82. +12175
    -0
      app/src/verilator_work/Vcva6_core_only_tb__9.cpp
  83. +6
    -0
      app/src/verilator_work/Vcva6_core_only_tb__9.d
  84. +12336
    -0
      app/src/verilator_work/Vcva6_core_only_tb__9__Slow.cpp
  85. +7
    -0
      app/src/verilator_work/Vcva6_core_only_tb__9__Slow.d
  86. BIN
      app/src/verilator_work/Vcva6_core_only_tb__ALL.a
  87. +11140
    -0
      app/src/verilator_work/Vcva6_core_only_tb__Slow.cpp
  88. +7
    -0
      app/src/verilator_work/Vcva6_core_only_tb__Slow.d
  89. +38
    -0
      app/src/verilator_work/Vcva6_core_only_tb__Syms.cpp
  90. +7
    -0
      app/src/verilator_work/Vcva6_core_only_tb__Syms.d
  91. +44
    -0
      app/src/verilator_work/Vcva6_core_only_tb__Syms.h
  92. +8
    -0
      app/src/verilator_work/Vcva6_core_only_tb___024unit.cpp
  93. +7
    -0
      app/src/verilator_work/Vcva6_core_only_tb___024unit.d
  94. +40
    -0
      app/src/verilator_work/Vcva6_core_only_tb___024unit.h
  95. +27
    -0
      app/src/verilator_work/Vcva6_core_only_tb___024unit__Slow.cpp
  96. +7
    -0
      app/src/verilator_work/Vcva6_core_only_tb___024unit__Slow.d
  97. +1
    -0
      app/src/verilator_work/Vcva6_core_only_tb__ver.d
  98. +138
    -0
      app/src/verilator_work/Vcva6_core_only_tb__verFiles.dat
  99. +78
    -0
      app/src/verilator_work/Vcva6_core_only_tb_classes.mk
  100. +112
    -0
      app/src/verilator_work/ariane_axi_pkg.sv

+ 63
- 0
app/src/core/amo_alu.sv View File

@@ -0,0 +1,63 @@
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Author: Florian Zaruba, ETH Zurich
// Date: 15.09.2018
// Description: Combinatorial AMO unit
module amo_alu (
// AMO interface
input ariane_pkg::amo_t amo_op_i,
input logic [63:0] amo_operand_a_i,
input logic [63:0] amo_operand_b_i,
output logic [63:0] amo_result_o // result of atomic memory operation
);

logic [64:0] adder_sum;
logic [64:0] adder_operand_a, adder_operand_b;

assign adder_sum = adder_operand_a + adder_operand_b;

always_comb begin

adder_operand_a = $signed(amo_operand_a_i);
adder_operand_b = $signed(amo_operand_b_i);

amo_result_o = amo_operand_b_i;

unique case (amo_op_i)
// the default is to output operand_b
ariane_pkg::AMO_SC:;
ariane_pkg::AMO_SWAP:;
ariane_pkg::AMO_ADD: amo_result_o = adder_sum[63:0];
ariane_pkg::AMO_AND: amo_result_o = amo_operand_a_i & amo_operand_b_i;
ariane_pkg::AMO_OR: amo_result_o = amo_operand_a_i | amo_operand_b_i;
ariane_pkg::AMO_XOR: amo_result_o = amo_operand_a_i ^ amo_operand_b_i;
ariane_pkg::AMO_MAX: begin
adder_operand_b = -$signed(amo_operand_b_i);
amo_result_o = adder_sum[64] ? amo_operand_b_i : amo_operand_a_i;
end
ariane_pkg::AMO_MIN: begin
adder_operand_b = -$signed(amo_operand_b_i);
amo_result_o = adder_sum[64] ? amo_operand_a_i : amo_operand_b_i;
end
ariane_pkg::AMO_MAXU: begin
adder_operand_a = $unsigned(amo_operand_a_i);
adder_operand_b = -$unsigned(amo_operand_b_i);
amo_result_o = adder_sum[64] ? amo_operand_b_i : amo_operand_a_i;
end
ariane_pkg::AMO_MINU: begin
adder_operand_a = $unsigned(amo_operand_a_i);
adder_operand_b = -$unsigned(amo_operand_b_i);
amo_result_o = adder_sum[64] ? amo_operand_a_i : amo_operand_b_i;
end
default: amo_result_o = '0;
endcase
end
endmodule

+ 462
- 0
app/src/core/cache_ctrl.sv View File

@@ -0,0 +1,462 @@
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// File: cache_ctrl.svh
// Author: Florian Zaruba <zarubaf@ethz.ch>
// Date: 14.10.2017
//
// Copyright (C) 2017 ETH Zurich, University of Bologna
// All rights reserved.
//
// Description: Cache controller


module cache_ctrl import ariane_pkg::*; import std_cache_pkg::*; #(
parameter ariane_cfg_t ArianeCfg = ArianeDefaultConfig // contains cacheable regions
) (
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input logic flush_i,
input logic bypass_i, // enable cache
output logic busy_o,
// Core request ports
input dcache_req_i_t req_port_i,
output dcache_req_o_t req_port_o,
// SRAM interface
output logic [DCACHE_SET_ASSOC-1:0] req_o, // req is valid
output logic [DCACHE_INDEX_WIDTH-1:0] addr_o, // address into cache array
input logic gnt_i,
output cache_line_t data_o,
output cl_be_t be_o,
output logic [DCACHE_TAG_WIDTH-1:0] tag_o, //valid one cycle later
input cache_line_t [DCACHE_SET_ASSOC-1:0] data_i,
output logic we_o,
input logic [DCACHE_SET_ASSOC-1:0] hit_way_i,
// Miss handling
output miss_req_t miss_req_o,
// return
input logic miss_gnt_i,
input logic active_serving_i, // the miss unit is currently active for this unit, serving the miss
input logic [63:0] critical_word_i,
input logic critical_word_valid_i,
// bypass ports
input logic bypass_gnt_i,
input logic bypass_valid_i,
input logic [63:0] bypass_data_i,
// check MSHR for aliasing
output logic [55:0] mshr_addr_o,
input logic mshr_addr_matches_i,
input logic mshr_index_matches_i
);

enum logic [3:0] {
IDLE, // 0
WAIT_TAG, // 1
WAIT_TAG_BYPASSED, // 2
WAIT_GNT, // 3
WAIT_GNT_SAVED, // 4
STORE_REQ, // 5
WAIT_REFILL_VALID, // 6
WAIT_REFILL_GNT, // 7
WAIT_TAG_SAVED, // 8
WAIT_MSHR, // 9
WAIT_CRITICAL_WORD // 10
} state_d, state_q;

typedef struct packed {
logic [DCACHE_INDEX_WIDTH-1:0] index;
logic [DCACHE_TAG_WIDTH-1:0] tag;
logic [7:0] be;
logic [1:0] size;
logic we;
logic [63:0] wdata;
logic bypass;
logic killed;
} mem_req_t;

logic [DCACHE_SET_ASSOC-1:0] hit_way_d, hit_way_q;

mem_req_t mem_req_d, mem_req_q;

assign busy_o = (state_q != IDLE);
assign tag_o = mem_req_d.tag;

logic [DCACHE_LINE_WIDTH-1:0] cl_i;

always_comb begin : way_select
cl_i = '0;
for (int unsigned i = 0; i < DCACHE_SET_ASSOC; i++)
if (hit_way_i[i])
cl_i = data_i[i].data;

// cl_i = data_i[one_hot_to_bin(hit_way_i)].data;
end

// --------------
// Cache FSM
// --------------
always_comb begin : cache_ctrl_fsm
automatic logic [$clog2(DCACHE_LINE_WIDTH)-1:0] cl_offset;
// incoming cache-line -> this is needed as synthesis is not supporting +: indexing in a multi-dimensional array
// cache-line offset -> multiple of 64
cl_offset = mem_req_q.index[DCACHE_BYTE_OFFSET-1:3] << 6; // shift by 6 to the left
// default assignments
state_d = state_q;
mem_req_d = mem_req_q;
hit_way_d = hit_way_q;
// output assignments
req_port_o.data_gnt = 1'b0;
req_port_o.data_rvalid = 1'b0;
req_port_o.data_rdata = '0;
miss_req_o = '0;
mshr_addr_o = '0;
// Memory array communication
req_o = '0;
addr_o = req_port_i.address_index;
data_o = '0;
be_o = '0;
we_o = '0;

mem_req_d.killed |= req_port_i.kill_req;

case (state_q)

IDLE: begin
// a new request arrived
if (req_port_i.data_req && !flush_i) begin
// request the cache line - we can do this speculatively
req_o = '1;

// save index, be and we
mem_req_d.index = req_port_i.address_index;
mem_req_d.be = req_port_i.data_be;
mem_req_d.size = req_port_i.data_size;
mem_req_d.we = req_port_i.data_we;
mem_req_d.wdata = req_port_i.data_wdata;
mem_req_d.killed = req_port_i.kill_req;

// Bypass mode, check for uncacheable address here as well
if (bypass_i) begin
state_d = WAIT_TAG_BYPASSED;
// grant this access only if it was a load
req_port_o.data_gnt = (req_port_i.data_we) ? 1'b0 : 1'b1;
mem_req_d.bypass = 1'b1;
// ------------------
// Cache is enabled
// ------------------
end else begin
// Wait that we have access on the memory array
if (gnt_i) begin
state_d = WAIT_TAG;
mem_req_d.bypass = 1'b0;
// only for a read
if (!req_port_i.data_we)
req_port_o.data_gnt = 1'b1;
end
end
end
end

// cache enabled and waiting for tag
WAIT_TAG, WAIT_TAG_SAVED: begin
// check that the client really wants to do the request and that we have a valid tag
if (!req_port_i.kill_req && (req_port_i.tag_valid || state_q == WAIT_TAG_SAVED || mem_req_q.we)) begin
// save tag if we didn't already save it
if (state_q != WAIT_TAG_SAVED) begin
mem_req_d.tag = req_port_i.address_tag;
end
// we speculatively request another transfer
if (req_port_i.data_req && !flush_i) begin
req_o = '1;
end
// ------------
// HIT CASE
// ------------
if (|hit_way_i) begin
// we can request another cache-line if this was a load
if (req_port_i.data_req && !mem_req_q.we && !flush_i) begin
state_d = WAIT_TAG; // switch back to WAIT_TAG
mem_req_d.index = req_port_i.address_index;
mem_req_d.be = req_port_i.data_be;
mem_req_d.size = req_port_i.data_size;
mem_req_d.we = req_port_i.data_we;
mem_req_d.wdata = req_port_i.data_wdata;
mem_req_d.killed = req_port_i.kill_req;
mem_req_d.bypass = 1'b0;

req_port_o.data_gnt = gnt_i;

if (!gnt_i) begin
state_d = IDLE;
end
end else begin
state_d = IDLE;
end

// this is timing critical
// req_port_o.data_rdata = cl_i[cl_offset +: 64];
case (mem_req_q.index[3])
1'b0: req_port_o.data_rdata = cl_i[63:0];
1'b1: req_port_o.data_rdata = cl_i[127:64];
endcase

// report data for a read
if (!mem_req_q.we) begin
req_port_o.data_rvalid = ~mem_req_q.killed;
// else this was a store so we need an extra step to handle it
end else begin
state_d = STORE_REQ;
hit_way_d = hit_way_i;
end
// ------------
// MISS CASE
// ------------
end else begin
// make a miss request
state_d = WAIT_REFILL_GNT;
end
// ----------------------------------------------
// Check MSHR - Miss Status Handling Register
// ----------------------------------------------
mshr_addr_o = {tag_o, mem_req_q.index};
// 1. We've got a match on MSHR and while are going down the
// store path. This means that the miss controller is
// currently evicting our cache-line. As the store is
// non-atomic we need to constantly check whether we are
// matching the address the miss handler is serving.
// Furthermore we need to check for the whole index
// because a completely different memory line could alias
// with the cache-line we are evicting.
// 2. The second case is where we are currently loading and
// the address matches the exact CL the miss controller
// is currently serving. That means we need to wait for
// the miss controller to finish its request before we
// can continue to serve this CL. Otherwise we will fetch
// the cache-line again and potentially loosing any
// content we've written so far. This as a consequence
// means we can't have hit on the CL which mean the
// req_port_o.data_rvalid will be de-asserted.
if ((mshr_index_matches_i && mem_req_q.we) || mshr_addr_matches_i) begin
state_d = WAIT_MSHR;
end

// -------------------------
// Check for cache-ability
// -------------------------
if (!is_inside_cacheable_regions(ArianeCfg, {{{64-riscv::PLEN}{1'b0}}, tag_o, {DCACHE_INDEX_WIDTH{1'b0}}})) begin
mem_req_d.bypass = 1'b1;
state_d = WAIT_REFILL_GNT;
end

// we are still waiting for a valid tag
end else begin
// request cache line for saved index
addr_o = mem_req_q.index;
req_o = '1;

// check that we still have a memory grant
if (!gnt_i) begin
state_d = WAIT_GNT;
end
end
end

// ~> we already granted the request but lost the memory grant while waiting for the tag
WAIT_GNT, WAIT_GNT_SAVED: begin
// request cache line for saved index
addr_o = mem_req_q.index;
req_o = '1;

// if we get a valid tag while waiting for the memory grant, save it
if (req_port_i.tag_valid) begin
mem_req_d.tag = req_port_i.address_tag;
state_d = WAIT_GNT_SAVED;
end

// we have a memory grant again ~> go back to WAIT_TAG
if (gnt_i) begin
state_d = (state_d == WAIT_GNT) ? WAIT_TAG : WAIT_TAG_SAVED;
end
end

// ~> we are here as we need a second round of memory access for a store
STORE_REQ: begin
// check if the MSHR still doesn't match
mshr_addr_o = {mem_req_q.tag, mem_req_q.index};

// We need to re-check for MSHR aliasing here as the store requires at least
// two memory look-ups on a single-ported SRAM and therefore is non-atomic
if (!mshr_index_matches_i) begin
// store data, write dirty bit
req_o = hit_way_q;
addr_o = mem_req_q.index;
we_o = 1'b1;

be_o.vldrty = hit_way_q;

// set the correct byte enable
be_o.data[cl_offset>>3 +: 8] = mem_req_q.be;
data_o.data[cl_offset +: 64] = mem_req_q.wdata;
// ~> change the state
data_o.dirty = 1'b1;
data_o.valid = 1'b1;

// got a grant ~> this is finished now
if (gnt_i) begin
req_port_o.data_gnt = 1'b1;
state_d = IDLE;
end
end else begin
state_d = WAIT_MSHR;
end
end // case: STORE_REQ

// we've got a match on MSHR ~> miss unit is currently serving a request
WAIT_MSHR: begin
mshr_addr_o = {mem_req_q.tag, mem_req_q.index};
// we can start a new request
if (!mshr_index_matches_i) begin
req_o = '1;

addr_o = mem_req_q.index;

if (gnt_i) state_d = WAIT_TAG_SAVED;
end
end

// its for sure a miss
WAIT_TAG_BYPASSED: begin
// check that the client really wants to do the request and that we have a valid tag
if (!req_port_i.kill_req && (req_port_i.tag_valid || mem_req_q.we)) begin
// save tag
mem_req_d.tag = req_port_i.address_tag;
state_d = WAIT_REFILL_GNT;
end
end

// ~> wait for grant from miss unit
WAIT_REFILL_GNT: begin

mshr_addr_o = {mem_req_q.tag, mem_req_q.index};

miss_req_o.valid = 1'b1;
miss_req_o.bypass = mem_req_q.bypass;
miss_req_o.addr = {mem_req_q.tag, mem_req_q.index};
miss_req_o.be = mem_req_q.be;
miss_req_o.size = mem_req_q.size;
miss_req_o.we = mem_req_q.we;
miss_req_o.wdata = mem_req_q.wdata;

// got a grant so go to valid
if (bypass_gnt_i) begin
state_d = WAIT_REFILL_VALID;
// if this was a write we still need to give a grant to the store unit
if (mem_req_q.we)
req_port_o.data_gnt = 1'b1;
end

if (miss_gnt_i && !mem_req_q.we)
state_d = WAIT_CRITICAL_WORD;
else if (miss_gnt_i) begin
state_d = IDLE;
req_port_o.data_gnt = 1'b1;
end

// it can be the case that the miss unit is currently serving a
// request which matches ours
// so we need to check the MSHR for matching continuously
// if the MSHR matches we need to go to a different state -> we should never get a matching MSHR and a high miss_gnt_i
if (mshr_addr_matches_i && !active_serving_i) begin
state_d = WAIT_MSHR;
end
end

// ~> wait for critical word to arrive
WAIT_CRITICAL_WORD: begin
// speculatively request another word
if (req_port_i.data_req) begin
// request the cache line
req_o = '1;
end

if (critical_word_valid_i) begin
req_port_o.data_rvalid = ~mem_req_q.killed;
req_port_o.data_rdata = critical_word_i;
// we can make another request
if (req_port_i.data_req) begin
// save index, be and we
mem_req_d.index = req_port_i.address_index;
mem_req_d.be = req_port_i.data_be;
mem_req_d.size = req_port_i.data_size;
mem_req_d.we = req_port_i.data_we;
mem_req_d.wdata = req_port_i.data_wdata;
mem_req_d.killed = req_port_i.kill_req;

state_d = IDLE;

// Wait until we have access on the memory array
if (gnt_i) begin
state_d = WAIT_TAG;
mem_req_d.bypass = 1'b0;
req_port_o.data_gnt = 1'b1;
end
end else begin
state_d = IDLE;
end
end
end
// ~> wait until the bypass request is valid
WAIT_REFILL_VALID: begin
// got a valid answer
if (bypass_valid_i) begin
req_port_o.data_rdata = bypass_data_i;
req_port_o.data_rvalid = ~mem_req_q.killed;
state_d = IDLE;
end
end
endcase

if (req_port_i.kill_req) begin
req_port_o.data_rvalid = 1'b1;
if (!(state_q inside {
WAIT_REFILL_GNT,
WAIT_CRITICAL_WORD})) begin
state_d = IDLE;
end
end
end

// --------------
// Registers
// --------------
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
state_q <= IDLE;
mem_req_q <= '0;
hit_way_q <= '0;
end else begin
state_q <= state_d;
mem_req_q <= mem_req_d;
hit_way_q <= hit_way_d;
end
end

//pragma translate_off
`ifndef VERILATOR
initial begin
assert (DCACHE_LINE_WIDTH == 128) else $error ("Cacheline width has to be 128 for the moment. But only small changes required in data select logic");
end
// if the full MSHR address matches so should also match the partial one
partial_full_mshr_match: assert property(@(posedge clk_i) disable iff (~rst_ni) mshr_addr_matches_i -> mshr_index_matches_i) else $fatal (1, "partial mshr index doesn't match");
// there should never be a valid answer when the MSHR matches and we are not being served
no_valid_on_mshr_match: assert property(@(posedge clk_i) disable iff (~rst_ni) (mshr_addr_matches_i && !active_serving_i)-> !req_port_o.data_rvalid || req_port_i.kill_req) else $fatal (1, "rvalid_o should not be set on MSHR match");
`endif
//pragma translate_on
endmodule

+ 552
- 0
app/src/core/cva6_icache.sv View File

@@ -0,0 +1,552 @@
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Author: Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
// Date: 15.08.2018
// Description: Instruction cache that is compatible with openpiton.
//
// Some notes:
//
// 1) refills always have the size of one cache line, except for accesses to the I/O region, which is mapped
// to the top half of the physical address space (bit 39 = 1). the data width of the interface has the width
// of one cache line, and hence the ifills can be transferred in a single cycle. note that the ifills must be
// consumed unconditionally.
//
// 2) instruction fetches are always assumed to be aligned to 32bit (lower 2 bits are ignored)
//
// 3) NC accesses to I/O space are expected to return 32bit from memory.
//


module cva6_icache import ariane_pkg::*; import wt_cache_pkg::*; #(
parameter logic [CACHE_ID_WIDTH-1:0] RdTxId = 0, // ID to be used for read transactions
parameter ariane_pkg::ariane_cfg_t ArianeCfg = ariane_pkg::ArianeDefaultConfig // contains cacheable regions
) (
input logic clk_i,
input logic rst_ni,

input logic flush_i, // flush the icache, flush and kill have to be asserted together
input logic en_i, // enable icache
output logic miss_o, // to performance counter
// address translation requests
input icache_areq_i_t areq_i,
output icache_areq_o_t areq_o,
// data requests
input icache_dreq_i_t dreq_i,
output icache_dreq_o_t dreq_o,
// refill port
input logic mem_rtrn_vld_i,
input icache_rtrn_t mem_rtrn_i,
output logic mem_data_req_o,
input logic mem_data_ack_i,
output icache_req_t mem_data_o
);

// signals
logic cache_en_d, cache_en_q; // cache is enabled
logic [riscv::VLEN-1:0] vaddr_d, vaddr_q;
logic paddr_is_nc; // asserted if physical address is non-cacheable
logic [ICACHE_SET_ASSOC-1:0] cl_hit; // hit from tag compare
logic cache_rden; // triggers cache lookup
logic cache_wren; // triggers write to cacheline
logic cmp_en_d, cmp_en_q; // enable tag comparison in next cycle. used to cut long path due to NC signal.
logic flush_d, flush_q; // used to register and signal pending flushes

// replacement strategy
logic update_lfsr; // shift the LFSR
logic [$clog2(ICACHE_SET_ASSOC)-1:0] inv_way; // first non-valid encountered
logic [$clog2(ICACHE_SET_ASSOC)-1:0] rnd_way; // random index for replacement
logic [$clog2(ICACHE_SET_ASSOC)-1:0] repl_way; // way to replace
logic [ICACHE_SET_ASSOC-1:0] repl_way_oh_d, repl_way_oh_q; // way to replace (onehot)
logic all_ways_valid; // we need to switch repl strategy since all are valid

// invalidations / flushing
logic inv_en; // incoming invalidations
logic inv_d, inv_q; // invalidation in progress
logic flush_en, flush_done; // used to flush cache entries
logic [ICACHE_CL_IDX_WIDTH-1:0] flush_cnt_d, flush_cnt_q; // used to flush cache entries

// mem arrays
logic cl_we; // write enable to memory array
logic [ICACHE_SET_ASSOC-1:0] cl_req; // request to memory array
logic [ICACHE_CL_IDX_WIDTH-1:0] cl_index; // this is a cache-line index, to memory array
logic [ICACHE_OFFSET_WIDTH-1:0] cl_offset_d, cl_offset_q; // offset in cache line
logic [ICACHE_TAG_WIDTH-1:0] cl_tag_d, cl_tag_q; // this is the cache tag
logic [ICACHE_TAG_WIDTH-1:0] cl_tag_rdata [ICACHE_SET_ASSOC-1:0]; // these are the tags coming from the tagmem
logic [ICACHE_LINE_WIDTH-1:0] cl_rdata [ICACHE_SET_ASSOC-1:0]; // these are the cachelines coming from the cache
logic [ICACHE_USER_LINE_WIDTH-1:0] cl_ruser[ICACHE_SET_ASSOC-1:0]; // these are the cachelines coming from the user cache
logic [ICACHE_SET_ASSOC-1:0][FETCH_WIDTH-1:0]cl_sel; // selected word from each cacheline
logic [ICACHE_SET_ASSOC-1:0][FETCH_USER_WIDTH-1:0] cl_user; // selected word from each cacheline
logic [ICACHE_SET_ASSOC-1:0] vld_req; // bit enable for valid regs
logic vld_we; // valid bits write enable
logic [ICACHE_SET_ASSOC-1:0] vld_wdata; // valid bits to write
logic [ICACHE_SET_ASSOC-1:0] vld_rdata; // valid bits coming from valid regs
logic [ICACHE_CL_IDX_WIDTH-1:0] vld_addr; // valid bit

// cpmtroller FSM
typedef enum logic[2:0] {FLUSH, IDLE, READ, MISS, KILL_ATRANS, KILL_MISS} state_e;
state_e state_d, state_q;

///////////////////////////////////////////////////////
// address -> cl_index mapping, interface plumbing
///////////////////////////////////////////////////////

// extract tag from physical address, check if NC
assign cl_tag_d = (areq_i.fetch_valid) ? areq_i.fetch_paddr[ICACHE_TAG_WIDTH+ICACHE_INDEX_WIDTH-1:ICACHE_INDEX_WIDTH] : cl_tag_q;

// noncacheable if request goes to I/O space, or if cache is disabled
assign paddr_is_nc = (~cache_en_q) | (~ariane_pkg::is_inside_cacheable_regions(ArianeCfg, {{{64-riscv::PLEN}{1'b0}}, cl_tag_d, {ICACHE_INDEX_WIDTH{1'b0}}}));

// pass exception through
assign dreq_o.ex = areq_i.fetch_exception;

// latch this in case we have to stall later on
// make sure this is 32bit aligned
assign vaddr_d = (dreq_o.ready & dreq_i.req) ? dreq_i.vaddr : vaddr_q;
assign areq_o.fetch_vaddr = {vaddr_q>>2, 2'b0};

// split virtual address into index and offset to address cache arrays
assign cl_index = vaddr_d[ICACHE_INDEX_WIDTH-1:ICACHE_OFFSET_WIDTH];


if (ArianeCfg.Axi64BitCompliant) begin : gen_axi_offset
// if we generate a noncacheable access, the word will be at offset 0 or 4 in the cl coming from memory
assign cl_offset_d = ( dreq_o.ready & dreq_i.req) ? {dreq_i.vaddr>>2, 2'b0} :
( paddr_is_nc & mem_data_req_o ) ? cl_offset_q[2]<<2 : // needed since we transfer 32bit over a 64bit AXI bus in this case
cl_offset_q;
// request word address instead of cl address in case of NC access
assign mem_data_o.paddr = (paddr_is_nc) ? {cl_tag_d, vaddr_q[ICACHE_INDEX_WIDTH-1:3], 3'b0} : // align to 64bit
{cl_tag_d, vaddr_q[ICACHE_INDEX_WIDTH-1:ICACHE_OFFSET_WIDTH], {ICACHE_OFFSET_WIDTH{1'b0}}}; // align to cl
end else begin : gen_piton_offset
// icache fills are either cachelines or 4byte fills, depending on whether they go to the Piton I/O space or not.
// since the piton cache system replicates the data, we can always index the full CL
assign cl_offset_d = ( dreq_o.ready & dreq_i.req) ? {dreq_i.vaddr>>2, 2'b0} :
cl_offset_q;

// request word address instead of cl address in case of NC access
assign mem_data_o.paddr = (paddr_is_nc) ? {cl_tag_d, vaddr_q[ICACHE_INDEX_WIDTH-1:2], 2'b0} : // align to 32bit
{cl_tag_d, vaddr_q[ICACHE_INDEX_WIDTH-1:ICACHE_OFFSET_WIDTH], {ICACHE_OFFSET_WIDTH{1'b0}}}; // align to cl
end


assign mem_data_o.tid = RdTxId;

assign mem_data_o.nc = paddr_is_nc;
// way that is being replaced
assign mem_data_o.way = repl_way;
assign dreq_o.vaddr = vaddr_q;

// invalidations take two cycles
assign inv_d = inv_en;

///////////////////////////////////////////////////////
// main control logic
///////////////////////////////////////////////////////
logic addr_ni;
assign addr_ni = is_inside_nonidempotent_regions(ArianeCfg, areq_i.fetch_paddr);
always_comb begin : p_fsm
// default assignment
state_d = state_q;
cache_en_d = cache_en_q & en_i;// disabling the cache is always possible, enable needs to go via flush
flush_en = 1'b0;
cmp_en_d = 1'b0;
cache_rden = 1'b0;
cache_wren = 1'b0;
inv_en = 1'b0;
flush_d = flush_q | flush_i; // register incoming flush

// interfaces
dreq_o.ready = 1'b0;
areq_o.fetch_req = 1'b0;
dreq_o.valid = 1'b0;
mem_data_req_o = 1'b0;
// performance counter
miss_o = 1'b0;

// handle invalidations unconditionally
// note: invald are mutually exclusive with
// ifills, since both arrive over the same IF
// however, we need to make sure below that we
// do not trigger a cache readout at the same time...
if (mem_rtrn_vld_i && mem_rtrn_i.rtype == ICACHE_INV_REQ) begin
inv_en = 1'b1;
end

unique case (state_q)
//////////////////////////////////
// this clears all valid bits
FLUSH: begin
flush_en = 1'b1;
if (flush_done) begin
state_d = IDLE;
flush_d = 1'b0;
// if the cache was not enabled set this
cache_en_d = en_i;
end
end
//////////////////////////////////
// wait for an incoming request
IDLE: begin
// only enable tag comparison if cache is enabled
cmp_en_d = cache_en_q;

// handle pending flushes, or perform cache clear upon enable
if (flush_d || (en_i && !cache_en_q)) begin
state_d = FLUSH;
// wait for incoming requests
end else begin
// mem requests are for sure invals here
if (!mem_rtrn_vld_i) begin
dreq_o.ready = 1'b1;
// we have a new request
if (dreq_i.req) begin
cache_rden = 1'b1;
state_d = READ;
end
end
if (dreq_i.kill_s1) begin
state_d = IDLE;
end
end
end
//////////////////////////////////
// check whether we have a hit
// in case the cache is disabled,
// or in case the address is NC, we
// reuse the miss mechanism to handle
// the request
READ: begin
areq_o.fetch_req = '1;
// only enable tag comparison if cache is enabled
cmp_en_d = cache_en_q;
// readout speculatively
cache_rden = cache_en_q;

if (areq_i.fetch_valid && (!dreq_i.spec || !addr_ni) ) begin
// check if we have to flush
if (flush_d) begin
state_d = IDLE;
// we have a hit or an exception output valid result
end else if (((|cl_hit && cache_en_q) || areq_i.fetch_exception.valid) && !inv_q) begin
dreq_o.valid = ~dreq_i.kill_s2;// just don't output in this case
state_d = IDLE;

// we can accept another request
// and stay here, but only if no inval is coming in
// note: we are not expecting ifill return packets here...
if (!mem_rtrn_vld_i) begin
dreq_o.ready = 1'b1;
if (dreq_i.req) begin
state_d = READ;
end
end
// if a request is being killed at this stage,
// we have to bail out and wait for the address translation to complete
if (dreq_i.kill_s1) begin
state_d = IDLE;
end
// we have a miss / NC transaction
end else if (dreq_i.kill_s2) begin
state_d = IDLE;
end else if (!inv_q) begin
cmp_en_d = 1'b0;
// only count this as a miss if the cache is enabled, and
// the address is cacheable
// send out ifill request
mem_data_req_o = 1'b1;
if (mem_data_ack_i) begin
miss_o = ~paddr_is_nc;
state_d = MISS;
end
end
// bail out if this request is being killed (and we missed on the TLB)
end else if (dreq_i.kill_s2 || flush_d) begin
state_d = KILL_ATRANS;
end
end
//////////////////////////////////
// wait until the memory transaction
// returns. do not write to memory
// if the nc bit is set.
MISS: begin
// note: this is mutually exclusive with ICACHE_INV_REQ,
// so we do not have to check for invals here
if (mem_rtrn_vld_i && mem_rtrn_i.rtype == ICACHE_IFILL_ACK) begin
state_d = IDLE;
// only return data if request is not being killed
if (!(dreq_i.kill_s2 || flush_d)) begin
dreq_o.valid = 1'b1;
// only write to cache if this address is cacheable
cache_wren = ~paddr_is_nc;
end
// bail out if this request is being killed
end else if (dreq_i.kill_s2 || flush_d) begin
state_d = KILL_MISS;
end
end
//////////////////////////////////
// killed address translation,
// wait until paddr is valid, and go
// back to idle
KILL_ATRANS: begin
areq_o.fetch_req = '1;
if (areq_i.fetch_valid) begin
state_d = IDLE;
end
end
//////////////////////////////////
// killed miss,
// wait until memory responds and
// go back to idle
KILL_MISS: begin
if (mem_rtrn_vld_i && mem_rtrn_i.rtype == ICACHE_IFILL_ACK) begin
state_d = IDLE;
end
end
default: begin
// we should never get here
state_d = FLUSH;
end
endcase // state_q
end

///////////////////////////////////////////////////////
// valid bit invalidation and replacement strategy
///////////////////////////////////////////////////////

// note: it cannot happen that we get an invalidation + a cl replacement
// in the same cycle as these requests arrive via the same interface
// flushes take precedence over invalidations (it is ok if we ignore
// the inval since the cache is cleared anyway)

assign flush_cnt_d = (flush_done) ? '0 :
(flush_en) ? flush_cnt_q + 1 :
flush_cnt_q;

assign flush_done = (flush_cnt_q==(ICACHE_NUM_WORDS-1));

// invalidation/clearing address
// flushing takes precedence over invals
assign vld_addr = (flush_en) ? flush_cnt_q :
(inv_en) ? mem_rtrn_i.inv.idx[ICACHE_INDEX_WIDTH-1:ICACHE_OFFSET_WIDTH] :
cl_index;

assign vld_req = (flush_en || cache_rden) ? '1 :
(mem_rtrn_i.inv.all && inv_en) ? '1 :
(mem_rtrn_i.inv.vld && inv_en) ? icache_way_bin2oh(mem_rtrn_i.inv.way) :
repl_way_oh_q;

assign vld_wdata = (cache_wren) ? '1 : '0;

assign vld_we = (cache_wren | inv_en | flush_en);
// assign vld_req = (vld_we | cache_rden);


// chose random replacement if all are valid
assign update_lfsr = cache_wren & all_ways_valid;
assign repl_way = (all_ways_valid) ? rnd_way : inv_way;
assign repl_way_oh_d = (cmp_en_q) ? icache_way_bin2oh(repl_way) : repl_way_oh_q;

// enable signals for memory arrays
assign cl_req = (cache_rden) ? '1 :
(cache_wren) ? repl_way_oh_q :
'0;
assign cl_we = cache_wren;


// find invalid cache line
lzc #(
.WIDTH ( ICACHE_SET_ASSOC )
) i_lzc (
.in_i ( ~vld_rdata ),
.cnt_o ( inv_way ),
.empty_o ( all_ways_valid )
);

// generate random cacheline index
lfsr #(
.LfsrWidth ( ariane_pkg::ICACHE_SET_ASSOC ),
.OutWidth ( $clog2(ariane_pkg::ICACHE_SET_ASSOC))
) i_lfsr (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.en_i ( update_lfsr ),
.out_o ( rnd_way )
);


///////////////////////////////////////////////////////
// tag comparison, hit generation
///////////////////////////////////////////////////////

logic [$clog2(ICACHE_SET_ASSOC)-1:0] hit_idx;

for (genvar i=0;i<ICACHE_SET_ASSOC;i++) begin : gen_tag_cmpsel
assign cl_hit[i] = (cl_tag_rdata[i] == cl_tag_d) & vld_rdata[i];
assign cl_sel[i] = cl_rdata[i][{cl_offset_q,3'b0} +: FETCH_WIDTH];
assign cl_user[i] = cl_ruser[i][{cl_offset_q,3'b0} +: FETCH_USER_WIDTH];
end


lzc #(
.WIDTH ( ICACHE_SET_ASSOC )
) i_lzc_hit (
.in_i ( cl_hit ),
.cnt_o ( hit_idx ),
.empty_o ( )
);

always_comb begin
if (cmp_en_q) begin
dreq_o.data = cl_sel[hit_idx];
dreq_o.user = cl_user[hit_idx];
end else begin
dreq_o.data = mem_rtrn_i.data[{cl_offset_q,3'b0} +: FETCH_WIDTH];
dreq_o.user = mem_rtrn_i.user[{cl_offset_q,3'b0} +: FETCH_USER_WIDTH];
end
end

///////////////////////////////////////////////////////
// memory arrays and regs
///////////////////////////////////////////////////////


logic [ICACHE_TAG_WIDTH:0] cl_tag_valid_rdata [ICACHE_SET_ASSOC-1:0];

for (genvar i = 0; i < ICACHE_SET_ASSOC; i++) begin : gen_sram
// Tag RAM
sram #(
// tag + valid bit
.DATA_WIDTH ( ICACHE_TAG_WIDTH+1 ),
.NUM_WORDS ( ICACHE_NUM_WORDS )
) tag_sram (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.req_i ( vld_req[i] ),
.we_i ( vld_we ),
.addr_i ( vld_addr ),
// we can always use the saved tag here since it takes a
// couple of cycle until we write to the cache upon a miss
.wuser_i ( '0 ),
.wdata_i ( {vld_wdata[i], cl_tag_q} ),
.be_i ( '1 ),
.ruser_o ( ),
.rdata_o ( cl_tag_valid_rdata[i] )
);

assign cl_tag_rdata[i] = cl_tag_valid_rdata[i][ICACHE_TAG_WIDTH-1:0];
assign vld_rdata[i] = cl_tag_valid_rdata[i][ICACHE_TAG_WIDTH];

// Data RAM
sram #(
.USER_WIDTH ( ICACHE_USER_LINE_WIDTH ),
.DATA_WIDTH ( ICACHE_LINE_WIDTH ),
.USER_EN ( ariane_pkg::FETCH_USER_EN ),
.NUM_WORDS ( ICACHE_NUM_WORDS )
) data_sram (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.req_i ( cl_req[i] ),
.we_i ( cl_we ),
.addr_i ( cl_index ),
.wuser_i ( mem_rtrn_i.user ),
.wdata_i ( mem_rtrn_i.data ),
.be_i ( '1 ),
.ruser_o ( cl_ruser[i] ),
.rdata_o ( cl_rdata[i] )
);
end


always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs
if(!rst_ni) begin
cl_tag_q <= '0;
flush_cnt_q <= '0;
vaddr_q <= '0;
cmp_en_q <= '0;
cache_en_q <= '0;
flush_q <= '0;
state_q <= FLUSH;
cl_offset_q <= '0;
repl_way_oh_q <= '0;
inv_q <= '0;
end else begin
cl_tag_q <= cl_tag_d;
flush_cnt_q <= flush_cnt_d;
vaddr_q <= vaddr_d;
cmp_en_q <= cmp_en_d;
cache_en_q <= cache_en_d;
flush_q <= flush_d;
state_q <= state_d;
cl_offset_q <= cl_offset_d;
repl_way_oh_q <= repl_way_oh_d;
inv_q <= inv_d;
end
end

///////////////////////////////////////////////////////
// assertions
///////////////////////////////////////////////////////

//pragma translate_off
`ifndef VERILATOR
repl_inval0: assert property (
@(posedge clk_i) disable iff (!rst_ni) cache_wren |-> !(mem_rtrn_i.inv.all | mem_rtrn_i.inv.vld))
else $fatal(1,"[l1 icache] cannot replace cacheline and invalidate cacheline simultaneously");

repl_inval1: assert property (
@(posedge clk_i) disable iff (!rst_ni) (mem_rtrn_i.inv.all | mem_rtrn_i.inv.vld) |-> !cache_wren)
else $fatal(1,"[l1 icache] cannot replace cacheline and invalidate cacheline simultaneously");

invalid_state: assert property (
@(posedge clk_i) disable iff (!rst_ni) (state_q inside {FLUSH, IDLE, READ, MISS, KILL_ATRANS, KILL_MISS}))
else $fatal(1,"[l1 icache] fsm reached an invalid state");

hot1: assert property (
@(posedge clk_i) disable iff (!rst_ni) (!inv_en) |-> cache_rden |=> cmp_en_q |-> $onehot0(cl_hit))
else $fatal(1,"[l1 icache] cl_hit signal must be hot1");

// this is only used for verification!
logic vld_mirror[wt_cache_pkg::ICACHE_NUM_WORDS-1:0][ariane_pkg::ICACHE_SET_ASSOC-1:0];
logic [ariane_pkg::ICACHE_TAG_WIDTH-1:0] tag_mirror[wt_cache_pkg::ICACHE_NUM_WORDS-1:0][ariane_pkg::ICACHE_SET_ASSOC-1:0];
logic [ariane_pkg::ICACHE_SET_ASSOC-1:0] tag_write_duplicate_test;

always_ff @(posedge clk_i or negedge rst_ni) begin : p_mirror
if(!rst_ni) begin
vld_mirror <= '{default:'0};
tag_mirror <= '{default:'0};
end else begin
for (int i = 0; i < ICACHE_SET_ASSOC; i++) begin
if(vld_req[i] & vld_we) begin
vld_mirror[vld_addr][i] <= vld_wdata[i];
tag_mirror[vld_addr][i] <= cl_tag_q;
end
end
end
end

for (genvar i = 0; i < ICACHE_SET_ASSOC; i++) begin : gen_tag_dupl
assign tag_write_duplicate_test[i] = (tag_mirror[vld_addr][i] == cl_tag_q) & vld_mirror[vld_addr][i] & (|vld_wdata);
end

tag_write_duplicate: assert property (
@(posedge clk_i) disable iff (!rst_ni) |vld_req |-> vld_we |-> !(|tag_write_duplicate_test))
else $fatal(1,"[l1 icache] cannot allocate a CL that is already present in the cache");


initial begin
// assert wrong parameterizations
assert (ICACHE_INDEX_WIDTH<=12)
else $fatal(1,"[l1 icache] cache index width can be maximum 12bit since VM uses 4kB pages");
end
`endif
//pragma translate_on

endmodule // cva6_icache

+ 191
- 0
app/src/core/cva6_icache_axi_wrapper.sv View File

@@ -0,0 +1,191 @@
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Author: Nils Wistoff <nwistoff@iis.ee.ethz.ch>, ETH Zurich
// Date: 07.09.2020
// Description: wrapper module to connect the L1I$ to a 64bit AXI bus.
//

module cva6_icache_axi_wrapper import ariane_pkg::*; import wt_cache_pkg::*; #(
parameter ariane_cfg_t ArianeCfg = ArianeDefaultConfig // contains cacheable regions
) (
input logic clk_i,
input logic rst_ni,
input riscv::priv_lvl_t priv_lvl_i,

input logic flush_i, // flush the icache, flush and kill have to be asserted together
input logic en_i, // enable icache
output logic miss_o, // to performance counter
// address translation requests
input icache_areq_i_t areq_i,
output icache_areq_o_t areq_o,
// data requests
input icache_dreq_i_t dreq_i,
output icache_dreq_o_t dreq_o,
// AXI refill port
output ariane_axi::req_t axi_req_o,
input ariane_axi::resp_t axi_resp_i
);

localparam AxiNumWords = (ICACHE_LINE_WIDTH/64) * (ICACHE_LINE_WIDTH > DCACHE_LINE_WIDTH) +
(DCACHE_LINE_WIDTH/64) * (ICACHE_LINE_WIDTH <= DCACHE_LINE_WIDTH) ;

logic icache_mem_rtrn_vld;
icache_rtrn_t icache_mem_rtrn;
logic icache_mem_data_req;
logic icache_mem_data_ack;
icache_req_t icache_mem_data;

logic axi_rd_req;
logic axi_rd_gnt;
logic [63:0] axi_rd_addr;
logic [$clog2(AxiNumWords)-1:0] axi_rd_blen;
logic [1:0] axi_rd_size;
logic [$size(axi_resp_i.r.id)-1:0] axi_rd_id_in;
logic axi_rd_rdy;
logic axi_rd_lock;
logic axi_rd_last;
logic axi_rd_valid;
logic [63:0] axi_rd_data;
logic [$size(axi_resp_i.r.id)-1:0] axi_rd_id_out;
logic axi_rd_exokay;

logic req_valid_d, req_valid_q;
icache_req_t req_data_d, req_data_q;
logic first_d, first_q;
logic [ICACHE_LINE_WIDTH/64-1:0][63:0] rd_shift_d, rd_shift_q;

// Keep read request asserted until we have an AXI grant. This is not guaranteed by icache (but
// required by AXI).
assign req_valid_d = ~axi_rd_gnt & (icache_mem_data_req | req_valid_q);

// Update read request information on a new request
assign req_data_d = (icache_mem_data_req) ? icache_mem_data : req_data_q;

// We have a new or pending read request
assign axi_rd_req = icache_mem_data_req | req_valid_q;
assign axi_rd_addr = {{64-riscv::PLEN{1'b0}}, req_data_d.paddr};

// Fetch a full cache line on a cache miss, or a single word on a bypassed access
assign axi_rd_blen = (req_data_d.nc) ? '0 : ariane_pkg::ICACHE_LINE_WIDTH/64-1;
assign axi_rd_size = 2'b11;
assign axi_rd_id_in = req_data_d.tid;
assign axi_rd_rdy = 1'b1;
assign axi_rd_lock = 1'b0;

// Immediately acknowledge read request. This is an implicit requirement for the icache.
assign icache_mem_data_ack = icache_mem_data_req;

// Return data as soon as last word arrives
assign icache_mem_rtrn_vld = axi_rd_valid & axi_rd_last;
assign icache_mem_rtrn.data = rd_shift_d;
assign icache_mem_rtrn.tid = req_data_q.tid;
assign icache_mem_rtrn.rtype = wt_cache_pkg::ICACHE_IFILL_ACK;
assign icache_mem_rtrn.inv = '0;

// -------
// I-Cache
// -------
cva6_icache #(
// use ID 0 for icache reads
.RdTxId ( 0 ),
.ArianeCfg ( ArianeCfg )
) i_cva6_icache (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i ( flush_i ),
.en_i ( en_i ),
.miss_o ( miss_o ),
.areq_i ( areq_i ),
.areq_o ( areq_o ),
.dreq_i ( dreq_i ),
.dreq_o ( dreq_o ),
.mem_rtrn_vld_i ( icache_mem_rtrn_vld ),
.mem_rtrn_i ( icache_mem_rtrn ),
.mem_data_req_o ( icache_mem_data_req ),
.mem_data_ack_i ( icache_mem_data_ack ),
.mem_data_o ( icache_mem_data )
);

// --------
// AXI shim
// --------
axi_shim #(
.AxiUserWidth ( AXI_USER_WIDTH ),
.AxiNumWords ( AxiNumWords ),
.AxiIdWidth ( $size(axi_resp_i.r.id) )
) i_axi_shim (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.rd_req_i ( axi_rd_req ),
.rd_gnt_o ( axi_rd_gnt ),
.rd_addr_i ( axi_rd_addr ),
.rd_blen_i ( axi_rd_blen ),
.rd_size_i ( axi_rd_size ),
.rd_id_i ( axi_rd_id_in ),
.rd_rdy_i ( axi_rd_rdy ),
.rd_lock_i ( axi_rd_lock ),
.rd_last_o ( axi_rd_last ),
.rd_valid_o ( axi_rd_valid ),
.rd_data_o ( axi_rd_data ),
.rd_user_o ( ),
.rd_id_o ( axi_rd_id_out ),
.rd_exokay_o ( axi_rd_exokay ),
.wr_req_i ( '0 ),
.wr_gnt_o ( ),
.wr_addr_i ( '0 ),
.wr_data_i ( '0 ),
.wr_user_i ( '0 ),
.wr_be_i ( '0 ),
.wr_blen_i ( '0 ),
.wr_size_i ( '0 ),
.wr_id_i ( '0 ),
.wr_lock_i ( '0 ),
.wr_atop_i ( '0 ),
.wr_rdy_i ( '0 ),
.wr_valid_o ( ),
.wr_id_o ( ),
.wr_exokay_o ( ),
.axi_req_o ( axi_req_o ),
.axi_resp_i ( axi_resp_i )
);

// Buffer burst data in shift register
always_comb begin : p_axi_rtrn_shift
first_d = first_q;
rd_shift_d = rd_shift_q;

if (axi_rd_valid) begin
first_d = axi_rd_last;
rd_shift_d = {axi_rd_data, rd_shift_q[ICACHE_LINE_WIDTH/64-1:1]};

// If this is a single word transaction, we need to make sure that word is placed at offset 0
if (first_q) begin
rd_shift_d[0] = axi_rd_data;
end
end
end

// Registers
always_ff @(posedge clk_i or negedge rst_ni) begin : p_rd_buf
if (!rst_ni) begin
req_valid_q <= 1'b0;
req_data_q <= '0;
first_q <= 1'b1;
rd_shift_q <= '0;
end else begin
req_valid_q <= req_valid_d;
req_data_q <= req_data_d;
first_q <= first_d;
rd_shift_q <= rd_shift_d;
end
end

endmodule // cva6_icache_axi_wrapper

+ 741
- 0
app/src/core/miss_handler.sv View File

@@ -0,0 +1,741 @@
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Author: Florian Zaruba, ETH Zurich
// Date: 12.11.2017
// Description: Handles cache misses.

// --------------
// MISS Handler
// --------------

module miss_handler import ariane_pkg::*; import std_cache_pkg::*; #(
parameter int unsigned NR_PORTS = 3
)(
input logic clk_i,
input logic rst_ni,
input logic flush_i, // flush request
output logic flush_ack_o, // acknowledge successful flush
output logic miss_o,
input logic busy_i, // dcache is busy with something
// Bypass or miss
input logic [NR_PORTS-1:0][$bits(miss_req_t)-1:0] miss_req_i,
// Bypass handling
output logic [NR_PORTS-1:0] bypass_gnt_o,
output logic [NR_PORTS-1:0] bypass_valid_o,
output logic [NR_PORTS-1:0][63:0] bypass_data_o,

// AXI port
output ariane_axi::req_t axi_bypass_o,
input ariane_axi::resp_t axi_bypass_i,

// Miss handling (~> cacheline refill)
output logic [NR_PORTS-1:0] miss_gnt_o,
output logic [NR_PORTS-1:0] active_serving_o,

output logic [63:0] critical_word_o,
output logic critical_word_valid_o,
output ariane_axi::req_t axi_data_o,
input ariane_axi::resp_t axi_data_i,

input logic [NR_PORTS-1:0][55:0] mshr_addr_i,
output logic [NR_PORTS-1:0] mshr_addr_matches_o,
output logic [NR_PORTS-1:0] mshr_index_matches_o,
// AMO
input amo_req_t amo_req_i,
output amo_resp_t amo_resp_o,
// Port to SRAMs, for refill and eviction
output logic [DCACHE_SET_ASSOC-1:0] req_o,
output logic [DCACHE_INDEX_WIDTH-1:0] addr_o, // address into cache array
output cache_line_t data_o,
output cl_be_t be_o,
input cache_line_t [DCACHE_SET_ASSOC-1:0] data_i,
output logic we_o
);

// Three MSHR ports + AMO port
parameter NR_BYPASS_PORTS = NR_PORTS + 1;

// FSM states
enum logic [3:0] {
IDLE, // 0
FLUSHING, // 1
FLUSH, // 2
WB_CACHELINE_FLUSH, // 3
FLUSH_REQ_STATUS, // 4
WB_CACHELINE_MISS, // 5
WAIT_GNT_SRAM, // 6
MISS, // 7
REQ_CACHELINE, // 8
MISS_REPL, // 9
SAVE_CACHELINE, // A
INIT, // B
AMO_REQ, // C
AMO_WAIT_RESP // D
} state_d, state_q;

// Registers
mshr_t mshr_d, mshr_q;
logic [DCACHE_INDEX_WIDTH-1:0] cnt_d, cnt_q;
logic [DCACHE_SET_ASSOC-1:0] evict_way_d, evict_way_q;
// cache line to evict
cache_line_t evict_cl_d, evict_cl_q;

logic serve_amo_d, serve_amo_q;
// Request from one FSM
logic [NR_PORTS-1:0] miss_req_valid;
logic [NR_PORTS-1:0] miss_req_bypass;
logic [NR_PORTS-1:0][63:0] miss_req_addr;
logic [NR_PORTS-1:0][63:0] miss_req_wdata;
logic [NR_PORTS-1:0] miss_req_we;
logic [NR_PORTS-1:0][7:0] miss_req_be;
logic [NR_PORTS-1:0][1:0] miss_req_size;

// Bypass AMO port
bypass_req_t amo_bypass_req;
bypass_rsp_t amo_bypass_rsp;

// Bypass ports <-> Arbiter
bypass_req_t [NR_BYPASS_PORTS-1:0] bypass_ports_req;
bypass_rsp_t [NR_BYPASS_PORTS-1:0] bypass_ports_rsp;

// Arbiter <-> Bypass AXI adapter
bypass_req_t bypass_adapter_req;
bypass_rsp_t bypass_adapter_rsp;

// Cache Line Refill <-> AXI
logic req_fsm_miss_valid;
logic [63:0] req_fsm_miss_addr;
logic [DCACHE_LINE_WIDTH-1:0] req_fsm_miss_wdata;
logic req_fsm_miss_we;
logic [(DCACHE_LINE_WIDTH/8)-1:0] req_fsm_miss_be;
ariane_axi::ad_req_t req_fsm_miss_req;
logic [1:0] req_fsm_miss_size;

logic gnt_miss_fsm;
logic valid_miss_fsm;
logic [(DCACHE_LINE_WIDTH/64)-1:0][63:0] data_miss_fsm;

// Cache Management <-> LFSR
logic lfsr_enable;
logic [DCACHE_SET_ASSOC-1:0] lfsr_oh;
logic [$clog2(DCACHE_SET_ASSOC-1)-1:0] lfsr_bin;
// AMOs
ariane_pkg::amo_t amo_op;
logic [63:0] amo_operand_b;

// ------------------------------
// Cache Management
// ------------------------------
always_comb begin : cache_management
automatic logic [DCACHE_SET_ASSOC-1:0] evict_way, valid_way;

for (int unsigned i = 0; i < DCACHE_SET_ASSOC; i++) begin
evict_way[i] = data_i[i].valid & data_i[i].dirty;
valid_way[i] = data_i[i].valid;
end
// ----------------------
// Default Assignments
// ----------------------
// memory array
req_o = '0;
addr_o = '0;
data_o = '0;
be_o = '0;
we_o = '0;
// Cache controller
miss_gnt_o = '0;
active_serving_o = '0;
// LFSR replacement unit
lfsr_enable = 1'b0;
// to AXI refill
req_fsm_miss_valid = 1'b0;
req_fsm_miss_addr = '0;
req_fsm_miss_wdata = '0;
req_fsm_miss_we = 1'b0;
req_fsm_miss_be = '0;
req_fsm_miss_req = ariane_axi::CACHE_LINE_REQ;
req_fsm_miss_size = 2'b11;
// to AXI bypass
amo_bypass_req.req = 1'b0;
amo_bypass_req.reqtype = ariane_axi::SINGLE_REQ;
amo_bypass_req.amo = ariane_pkg::AMO_NONE;
amo_bypass_req.addr = '0;
amo_bypass_req.we = 1'b0;
amo_bypass_req.wdata = '0;
amo_bypass_req.be = '0;
amo_bypass_req.size = 2'b11;
amo_bypass_req.id = 4'b1011;
// core
flush_ack_o = 1'b0;
miss_o = 1'b0; // to performance counter
serve_amo_d = serve_amo_q;
// --------------------------------
// Flush and Miss operation
// --------------------------------
state_d = state_q;
cnt_d = cnt_q;
evict_way_d = evict_way_q;
evict_cl_d = evict_cl_q;
mshr_d = mshr_q;
// communicate to the requester which unit we are currently serving
active_serving_o[mshr_q.id] = mshr_q.valid;
// AMOs
amo_resp_o.ack = 1'b0;
amo_resp_o.result = '0;
amo_operand_b = '0;

case (state_q)

IDLE: begin
// lowest priority are AMOs, wait until everything else is served before going for the AMOs
if (amo_req_i.req && !busy_i) begin
// 1. Flush the cache
if (!serve_amo_q) begin
state_d = FLUSH_REQ_STATUS;
serve_amo_d = 1'b1;
cnt_d = '0;
// 2. Do the AMO
end else begin
state_d = AMO_REQ;
serve_amo_d = 1'b0;
end
end
// check if we want to flush and can flush e.g.: we are not busy anymore
// TODO: Check that the busy flag is indeed needed
if (flush_i && !busy_i) begin
state_d = FLUSH_REQ_STATUS;
cnt_d = '0;
end

// check if one of the state machines missed
for (int unsigned i = 0; i < NR_PORTS; i++) begin
// here comes the refill portion of code
if (miss_req_valid[i] && !miss_req_bypass[i]) begin
state_d = MISS;
// we are taking another request so don't take the AMO
serve_amo_d = 1'b0;
// save to MSHR
mshr_d.valid = 1'b1;
mshr_d.we = miss_req_we[i];
mshr_d.id = i;
mshr_d.addr = miss_req_addr[i][DCACHE_TAG_WIDTH+DCACHE_INDEX_WIDTH-1:0];
mshr_d.wdata = miss_req_wdata[i];
mshr_d.be = miss_req_be[i];
break;
end
end
end

// ~> we missed on the cache
MISS: begin
// 1. Check if there is an empty cache-line
// 2. If not -> evict one
req_o = '1;
addr_o = mshr_q.addr[DCACHE_INDEX_WIDTH-1:0];
state_d = MISS_REPL;
miss_o = 1'b1;
end

// ~> second miss cycle
MISS_REPL: begin
// if all are valid we need to evict one, pseudo random from LFSR
if (&valid_way) begin
lfsr_enable = 1'b1;
evict_way_d = lfsr_oh;
// do we need to write back the cache line?
if (data_i[lfsr_bin].dirty) begin
state_d = WB_CACHELINE_MISS;
evict_cl_d.tag = data_i[lfsr_bin].tag;
evict_cl_d.data = data_i[lfsr_bin].data;
cnt_d = mshr_q.addr[DCACHE_INDEX_WIDTH-1:0];
// no - we can request a cache line now
end else
state_d = REQ_CACHELINE;
// we have at least one free way
end else begin
// get victim cache-line by looking for the first non-valid bit
evict_way_d = get_victim_cl(~valid_way);
state_d = REQ_CACHELINE;
end
end

// ~> we can just load the cache-line, the way is store in evict_way_q
REQ_CACHELINE: begin
req_fsm_miss_valid = 1'b1;
req_fsm_miss_addr = mshr_q.addr;

if (gnt_miss_fsm) begin
state_d = SAVE_CACHELINE;
miss_gnt_o[mshr_q.id] = 1'b1;
end
end

// ~> replace the cacheline
SAVE_CACHELINE: begin
// calculate cacheline offset
automatic logic [$clog2(DCACHE_LINE_WIDTH)-1:0] cl_offset;
cl_offset = mshr_q.addr[DCACHE_BYTE_OFFSET-1:3] << 6;
// we've got a valid response from refill unit
if (valid_miss_fsm) begin

addr_o = mshr_q.addr[DCACHE_INDEX_WIDTH-1:0];
req_o = evict_way_q;
we_o = 1'b1;
be_o = '1;
be_o.vldrty = evict_way_q;
data_o.tag = mshr_q.addr[DCACHE_TAG_WIDTH+DCACHE_INDEX_WIDTH-1:DCACHE_INDEX_WIDTH];
data_o.data = data_miss_fsm;
data_o.valid = 1'b1;
data_o.dirty = 1'b0;

// is this a write?
if (mshr_q.we) begin
// Yes, so safe the updated data now
for (int i = 0; i < 8; i++) begin
// check if we really want to write the corresponding byte
if (mshr_q.be[i])
data_o.data[(cl_offset + i*8) +: 8] = mshr_q.wdata[i];
end
// its immediately dirty if we write
data_o.dirty = 1'b1;
end
// reset MSHR
mshr_d.valid = 1'b0;
// go back to idle
state_d = IDLE;
end
end

// ------------------------------
// Write Back Operation
// ------------------------------
// ~> evict a cache line from way saved in evict_way_q
WB_CACHELINE_FLUSH, WB_CACHELINE_MISS: begin

req_fsm_miss_valid = 1'b1;
req_fsm_miss_addr = {evict_cl_q.tag, cnt_q[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET], {{DCACHE_BYTE_OFFSET}{1'b0}}};
req_fsm_miss_be = '1;
req_fsm_miss_we = 1'b1;
req_fsm_miss_wdata = evict_cl_q.data;

// we've got a grant --> this is timing critical, think about it
if (gnt_miss_fsm) begin
// write status array
addr_o = cnt_q;
req_o = 1'b1;
we_o = 1'b1;
data_o.valid = INVALIDATE_ON_FLUSH ? 1'b0 : 1'b1;
// invalidate
be_o.vldrty = evict_way_q;
// go back to handling the miss or flushing, depending on where we came from
state_d = (state_q == WB_CACHELINE_MISS) ? MISS : FLUSH_REQ_STATUS;
end
end

// ------------------------------
// Flushing & Initialization
// ------------------------------
// ~> make another request to check the same cache-line if there are still some valid entries
FLUSH_REQ_STATUS: begin
req_o = '1;
addr_o = cnt_q;
state_d = FLUSHING;
end

FLUSHING: begin
// this has priority
// at least one of the cache lines is dirty
if (|evict_way) begin
// evict cache line, look for the first cache-line which is dirty
evict_way_d = get_victim_cl(evict_way);
evict_cl_d = data_i[one_hot_to_bin(evict_way)];
state_d = WB_CACHELINE_FLUSH;
// not dirty ~> increment and continue
end else begin
// increment and re-request
cnt_d = cnt_q + (1'b1 << DCACHE_BYTE_OFFSET);
state_d = FLUSH_REQ_STATUS;
addr_o = cnt_q;
req_o = 1'b1;
be_o.vldrty = INVALIDATE_ON_FLUSH ? '1 : '0;
we_o = 1'b1;
// finished with flushing operation, go back to idle
if (cnt_q[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET] == DCACHE_NUM_WORDS-1) begin
// only acknowledge if the flush wasn't triggered by an atomic
flush_ack_o = ~serve_amo_q;
state_d = IDLE;
end
end
end

// ~> only called after reset
INIT: begin
// initialize status array
addr_o = cnt_q;
req_o = 1'b1;
we_o = 1'b1;
// only write the dirty array
be_o.vldrty = '1;
cnt_d = cnt_q + (1'b1 << DCACHE_BYTE_OFFSET);
// finished initialization
if (cnt_q[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET] == DCACHE_NUM_WORDS-1)
state_d = IDLE;
end
// ----------------------
// AMOs
// ----------------------
// ~> we are here because we need to do the AMO, the cache is clean at this point
AMO_REQ: begin
amo_bypass_req.req = 1'b1;
amo_bypass_req.reqtype = ariane_axi::SINGLE_REQ;
amo_bypass_req.amo = amo_req_i.amo_op;
// address is in operand a
amo_bypass_req.addr = amo_req_i.operand_a;
if (amo_req_i.amo_op != AMO_LR) begin
amo_bypass_req.we = 1'b1;
end
amo_bypass_req.size = amo_req_i.size;
// AXI implements CLR op instead of AND, negate operand
if (amo_req_i.amo_op == AMO_AND) begin
amo_operand_b = ~amo_req_i.operand_b;
end else begin
amo_operand_b = amo_req_i.operand_b;
end
// align data and byte-enable to correct byte lanes
amo_bypass_req.wdata = amo_operand_b;
if (amo_req_i.size == 2'b11) begin
// 64b transfer
amo_bypass_req.be = 8'b11111111;
end else begin
// 32b transfer
if (amo_req_i.operand_a[2:0] == '0) begin
// 64b aligned -> activate lower 4 byte lanes
amo_bypass_req.be = 8'b00001111;
end else begin
// 64b unaligned -> activate upper 4 byte lanes
amo_bypass_req.be = 8'b11110000;
amo_bypass_req.wdata = amo_operand_b[31:0] << 32;
end
end

// when request is accepted, wait for response
if (amo_bypass_rsp.gnt) begin
if (amo_bypass_rsp.valid) begin
state_d = IDLE;
amo_resp_o.ack = 1'b1;
amo_resp_o.result = amo_bypass_rsp.rdata;
end else begin
state_d = AMO_WAIT_RESP;
end
end
end
AMO_WAIT_RESP: begin
if (amo_bypass_rsp.valid) begin
state_d = IDLE;
amo_resp_o.ack = 1'b1;
// Request is assumed to be still valid (ack not granted yet)
if (amo_req_i.size == 2'b10) begin
// 32b request
logic [31:0] halfword;
if (amo_req_i.operand_a[2:0] == '0) begin
// 64b aligned -> activate lower 4 byte lanes
halfword = amo_bypass_rsp.rdata[31:0];
end else begin
// 64b unaligned -> activate upper 4 byte lanes
halfword = amo_bypass_rsp.rdata[63:32];
end
// Sign-extend 32b requests as per RISC-V spec
amo_resp_o.result = {{32{halfword[31]}}, halfword};
end else begin
// 64b request
amo_resp_o.result = amo_bypass_rsp.rdata;
end
end
end
endcase
end

// check MSHR for aliasing
always_comb begin

mshr_addr_matches_o = 'b0;
mshr_index_matches_o = 'b0;

for (int i = 0; i < NR_PORTS; i++) begin
// check mshr for potential matching of other units, exclude the unit currently being served
if (mshr_q.valid && mshr_addr_i[i][55:DCACHE_BYTE_OFFSET] == mshr_q.addr[55:DCACHE_BYTE_OFFSET]) begin
mshr_addr_matches_o[i] = 1'b1;
end

// same as previous, but checking only the index
if (mshr_q.valid && mshr_addr_i[i][DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET] == mshr_q.addr[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET]) begin
mshr_index_matches_o[i] = 1'b1;
end
end
end
// --------------------
// Sequential Process
// --------------------
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
mshr_q <= '0;
state_q <= INIT;
cnt_q <= '0;
evict_way_q <= '0;
evict_cl_q <= '0;
serve_amo_q <= 1'b0;
end else begin
mshr_q <= mshr_d;
state_q <= state_d;
cnt_q <= cnt_d;
evict_way_q <= evict_way_d;
evict_cl_q <= evict_cl_d;
serve_amo_q <= serve_amo_d;
end
end

//pragma translate_off
`ifndef VERILATOR
// assert that cache only hits on one way
assert property (
@(posedge clk_i) $onehot0(evict_way_q)) else $warning("Evict-way should be one-hot encoded");
`endif
//pragma translate_on

// ----------------------
// Pack bypass ports
// ----------------------
always_comb begin
logic [$clog2(NR_BYPASS_PORTS)-1:0] id;

// Pack MHSR ports first
for (id = 0; id < NR_PORTS; id++) begin
bypass_ports_req[id].req = miss_req_valid[id] & miss_req_bypass[id];
bypass_ports_req[id].reqtype = ariane_axi::SINGLE_REQ;
bypass_ports_req[id].amo = AMO_NONE;
bypass_ports_req[id].id = {2'b10, id};
bypass_ports_req[id].addr = miss_req_addr[id];
bypass_ports_req[id].wdata = miss_req_wdata[id];
bypass_ports_req[id].we = miss_req_we[id];
bypass_ports_req[id].be = miss_req_be[id];
bypass_ports_req[id].size = miss_req_size[id];

bypass_gnt_o[id] = bypass_ports_rsp[id].gnt;
bypass_valid_o[id] = bypass_ports_rsp[id].valid;
bypass_data_o[id] = bypass_ports_rsp[id].rdata;
end

// AMO port has lowest priority
bypass_ports_req[id] = amo_bypass_req;
amo_bypass_rsp = bypass_ports_rsp[id];
end

// ----------------------
// Arbitrate bypass ports
// ----------------------
axi_adapter_arbiter #(
.NR_PORTS(NR_BYPASS_PORTS),
.req_t (bypass_req_t),
.rsp_t (bypass_rsp_t)
) i_bypass_arbiter (
.clk_i (clk_i),
.rst_ni(rst_ni),
// Master Side
.req_i (bypass_ports_req),
.rsp_o (bypass_ports_rsp),
// Slave Side
.req_o (bypass_adapter_req),
.rsp_i (bypass_adapter_rsp)
);

// ----------------------
// Bypass AXI Interface
// ----------------------
axi_adapter #(
.DATA_WIDTH (64),
.AXI_ID_WIDTH (4),
.CACHELINE_BYTE_OFFSET(DCACHE_BYTE_OFFSET)
) i_bypass_axi_adapter (
.clk_i (clk_i),
.rst_ni (rst_ni),
.req_i (bypass_adapter_req.req),
.type_i (bypass_adapter_req.reqtype),
.amo_i (bypass_adapter_req.amo),
.id_i (bypass_adapter_req.id),
.addr_i (bypass_adapter_req.addr),
.wdata_i (bypass_adapter_req.wdata),
.we_i (bypass_adapter_req.we),
.be_i (bypass_adapter_req.be),
.size_i (bypass_adapter_req.size),
.gnt_o (bypass_adapter_rsp.gnt),
.valid_o (bypass_adapter_rsp.valid),
.rdata_o (bypass_adapter_rsp.rdata),
.id_o (), // not used, single outstanding request in arbiter
.critical_word_o (), // not used for single requests
.critical_word_valid_o(), // not used for single requests
.axi_req_o (axi_bypass_o),
.axi_resp_i (axi_bypass_i)
);

// ----------------------
// Cache Line AXI Refill
// ----------------------
axi_adapter #(
.DATA_WIDTH ( DCACHE_LINE_WIDTH ),
.AXI_ID_WIDTH ( 4 ),
.CACHELINE_BYTE_OFFSET ( DCACHE_BYTE_OFFSET )
) i_miss_axi_adapter (
.clk_i,
.rst_ni,
.req_i ( req_fsm_miss_valid ),
.type_i ( req_fsm_miss_req ),
.amo_i ( AMO_NONE ),
.gnt_o ( gnt_miss_fsm ),
.addr_i ( req_fsm_miss_addr ),
.we_i ( req_fsm_miss_we ),
.wdata_i ( req_fsm_miss_wdata ),
.be_i ( req_fsm_miss_be ),
.size_i ( req_fsm_miss_size ),
.id_i ( 4'b1100 ),
.valid_o ( valid_miss_fsm ),
.rdata_o ( data_miss_fsm ),
.id_o ( ),
.critical_word_o ( critical_word_o ),
.critical_word_valid_o (critical_word_valid_o),
.axi_req_o ( axi_data_o ),
.axi_resp_i ( axi_data_i )
);

// -----------------
// Replacement LFSR
// -----------------
lfsr_8bit #(.WIDTH (DCACHE_SET_ASSOC)) i_lfsr (
.en_i ( lfsr_enable ),
.refill_way_oh ( lfsr_oh ),
.refill_way_bin ( lfsr_bin ),
.*
);

// -----------------
// Struct Split
// -----------------
// Hack as system verilog support in modelsim seems to be buggy here
always_comb begin
automatic miss_req_t miss_req;

for (int unsigned i = 0; i < NR_PORTS; i++) begin
miss_req = miss_req_t'(miss_req_i[i]);
miss_req_valid [i] = miss_req.valid;
miss_req_bypass [i] = miss_req.bypass;
miss_req_addr [i] = miss_req.addr;
miss_req_wdata [i] = miss_req.wdata;
miss_req_we [i] = miss_req.we;
miss_req_be [i] = miss_req.be;
miss_req_size [i] = miss_req.size;
end
end
endmodule

// --------------
// AXI Arbiter
// --------------
//
// Description: Arbitrates access to AXI refill/bypass
//
module axi_adapter_arbiter #(
parameter NR_PORTS = 4,
parameter type req_t = std_cache_pkg::bypass_req_t,
parameter type rsp_t = std_cache_pkg::bypass_rsp_t
)(
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
// Master ports
input req_t [NR_PORTS-1:0] req_i,
output rsp_t [NR_PORTS-1:0] rsp_o,
// Slave port
output req_t req_o,
input rsp_t rsp_i
);

enum logic { IDLE, SERVING } state_d, state_q;

req_t req_d, req_q;
logic [NR_PORTS-1:0] sel_d, sel_q;

always_comb begin
sel_d = sel_q;

state_d = state_q;
req_d = req_q;

req_o = req_q;

rsp_o = '0;
rsp_o[sel_q].rdata = rsp_i.rdata;

case (state_q)