|
- # Licensed to the Apache Software Foundation (ASF) under one
- # or more contributor license agreements. See the NOTICE file
- # distributed with this work for additional information
- # regarding copyright ownership. The ASF licenses this file
- # to you under the Apache License, Version 2.0 (the
- # "License"); you may not use this file except in compliance
- # with the License. You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing,
- # software distributed under the License is distributed on an
- # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- # KIND, either express or implied. See the License for the
- # specific language governing permissions and limitations
- # under the License.
-
-
- from __future__ import absolute_import, print_function
-
- import os
- import tvm
- import varec
- import numpy as np
-
- from tvm import rpc
- from tvm.contrib import util
- from varec.testing import simulator
- from varec.top import varec_compute as cp
- from varec.top import varec_debug as db
- from varec.top import varec_schedule as sche
-
- # Load varec parameters from the varec/config/varec_config.json file
- env = varec.get_env()
-
- remote = rpc.LocalSession()
-
- ######################################################################
- # Computation Declaration
- # -----------------------
- # Large LeNet-5-like net (16 channels)
-
- import topi
-
- stage0_batch_size = 1
- stage0_height = 32
- stage0_width = 32
- stage0_in_channels = 16
- stage0_out_channels = 16 * 6
- stage0_kernel_h = 5
- stage0_kernel_w = 5
- stage0_pad_h = 0
- stage0_pad_w = 0
- stage0_stride_h = 1
- stage0_stride_w = 1
- assert stage0_batch_size % env.BATCH == 0
- assert stage0_in_channels % env.BLOCK_IN == 0
- assert stage0_out_channels % env.BLOCK_OUT == 0
-
- # Input feature map: (N, IC, H, W, n, ic)
- stage0_data_shape = (stage0_batch_size // env.BATCH,
- stage0_in_channels // env.BLOCK_IN,
- stage0_height,
- stage0_width,
- env.BATCH,
- env.BLOCK_IN)
- # Kernel: (OC, IC, H, W, oc, ic)
- stage0_kernel_shape = (stage0_out_channels // env.BLOCK_OUT,
- stage0_in_channels // env.BLOCK_IN,
- stage0_kernel_h,
- stage0_kernel_w,
- env.BLOCK_OUT,
- env.BLOCK_IN)
- # Derive output feature map dimensions
- stage0_fout_height = (stage0_height + 2 * stage0_pad_h - stage0_kernel_h) // stage0_stride_h + 1
- stage0_fout_width = (stage0_width + 2 * stage0_pad_w - stage0_kernel_w) // stage0_stride_w + 1
-
- # Conv feature map: (N, OC, H, W, n, oc)
- stage0_conv_shape = (stage0_batch_size // env.BATCH,
- stage0_out_channels // env.BLOCK_OUT,
- stage0_fout_height,
- stage0_fout_width,
- env.BATCH,
- env.BLOCK_OUT)
-
- # Output feature map: (N, OC, H, W, n, oc)
- stage0_output_shape = (stage0_batch_size // env.BATCH,
- stage0_out_channels // env.BLOCK_OUT,
- stage0_fout_height // 2,
- stage0_fout_width // 2,
- env.BATCH,
- env.BLOCK_OUT)
-
- # Convolution reduction axes
- stage0_dy = tvm.reduce_axis((0, stage0_kernel_h), name='dy')
- stage0_dx = tvm.reduce_axis((0, stage0_kernel_w), name='dx')
- stage0_ic = tvm.reduce_axis((0, stage0_in_channels // env.BLOCK_IN), name='ic')
- stage0_ic_tns = tvm.reduce_axis((0, env.BLOCK_IN), name='ic_tns')
-
- inp_min = -(1 << (env.INP_WIDTH - 1))
- inp_max = (1 << (env.INP_WIDTH - 1)) - 1
-
- pool_x, pool_y = 2, 2
- pool_type = "max"
-
- # Parameters
- stage0_conv_paras = stage0_dy, stage0_dx, stage0_ic, stage0_ic_tns, stage0_stride_h, stage0_stride_w
- stage0_data_paras = stage0_pad_h, stage0_pad_w
- stage0_pool_paras = pool_type, pool_x, pool_y
-
- # Generate compute graph with varec libs
-
- # Conv
- stage0_data_buf, stage0_data, stage0_kernel_buf, stage0_kernel, stage0_conv =\
- cp.compute_conv(stage0_data_shape, stage0_kernel_shape, stage0_conv_shape,\
- stage0_conv_paras, stage0_data_paras, env)
-
- # Shift and Clip
- stage0_sc = cp.compute_shr_clip(stage0_conv_shape, stage0_conv, inp_min, inp_max)
-
- # Relu
- stage0_relu = cp.relu(stage0_sc[-1])
-
- # Pool
- stage0_pools, stage0_output_buf, stage0_output_ph = \
- cp.compute_pool(stage0_output_shape, stage0_relu, stage0_pool_paras, env)
-
- # Result Tensor
- stage0 = tvm.compute(stage0_output_shape,
- lambda *i: stage0_pools[-1](*i).astype(env.inp_dtype),
- name="stage0")
-
-
- ##################################################
- # Stage 1
-
- stage1_batch_size = 1
- stage1_height = 14
- stage1_width = 14
- stage1_in_channels = 16 * 6
- stage1_out_channels = 16 * 16
- stage1_kernel_h = 5
- stage1_kernel_w = 5
- stage1_pad_h = 0
- stage1_pad_w = 0
- stage1_stride_h = 1
- stage1_stride_w = 1
- assert stage1_batch_size % env.BATCH == 0
- assert stage1_in_channels % env.BLOCK_IN == 0
- assert stage1_out_channels % env.BLOCK_OUT == 0
-
- # Input feature map: (N, IC, H, W, n, ic)
- stage1_data_shape = (stage1_batch_size // env.BATCH,
- stage1_in_channels // env.BLOCK_IN,
- stage1_height,
- stage1_width,
- env.BATCH,
- env.BLOCK_IN)
- # Kernel: (OC, IC, H, W, oc, ic)
- stage1_kernel_shape = (stage1_out_channels // env.BLOCK_OUT,
- stage1_in_channels // env.BLOCK_IN,
- stage1_kernel_h,
- stage1_kernel_w,
- env.BLOCK_OUT,
- env.BLOCK_IN)
- # Derive output feature map dimensions
- stage1_fout_height = (stage1_height + 2 * stage1_pad_h - stage1_kernel_h) // stage1_stride_h + 1
- stage1_fout_width = (stage1_width + 2 * stage1_pad_w - stage1_kernel_w) // stage1_stride_w + 1
-
- #print("fout!!!",stage0_fout_height,stage0_fout_width)
- # Conv feature map: (N, OC, H, W, n, oc)
- stage1_conv_shape = (stage1_batch_size // env.BATCH,
- stage1_out_channels // env.BLOCK_OUT,
- stage1_fout_height,
- stage1_fout_width,
- env.BATCH,
- env.BLOCK_OUT)
-
- # Output feature map: (N, OC, H, W, n, oc)
- stage1_output_shape = (stage1_batch_size // env.BATCH,
- stage1_out_channels // env.BLOCK_OUT,
- stage1_fout_height // 2,
- stage1_fout_width // 2,
- env.BATCH,
- env.BLOCK_OUT)
-
- # Convolution reduction axes
- stage1_dy = tvm.reduce_axis((0, stage1_kernel_h), name='dy')
- stage1_dx = tvm.reduce_axis((0, stage1_kernel_w), name='dx')
- stage1_ic = tvm.reduce_axis((0, stage1_in_channels // env.BLOCK_IN), name='ic')
- stage1_ic_tns = tvm.reduce_axis((0, env.BLOCK_IN), name='ic_tns')
-
-
- # Parameters
- stage1_conv_paras = stage1_dy, stage1_dx, stage1_ic, stage1_ic_tns, stage1_stride_h, stage1_stride_w
- stage1_data_paras = stage1_pad_h, stage1_pad_w
- stage1_pool_paras = pool_type, pool_x, pool_y
-
-
- # Generate compute graph with varec libs
-
- # Conv
- stage1_data_buf, stage1_data, stage1_kernel_buf, stage1_kernel, stage1_conv =\
- cp.compute_conv(stage1_data_shape, stage1_kernel_shape, stage1_conv_shape,\
- stage1_conv_paras, stage1_data_paras, env, stage0)
-
- # Shift and Clip
- stage1_sc = cp.compute_shr_clip(stage1_conv_shape, stage1_conv, inp_min, inp_max)
-
- # Relu
- stage1_relu = cp.relu(stage1_sc[-1])
-
- # Pool
- stage1_pools, stage1_output_buf, stage1_output_ph =\
- cp.compute_pool(stage1_output_shape, stage1_relu, stage1_pool_paras, env)
-
- # Result Tensor
- stage1 = tvm.compute(stage1_output_shape,
- lambda *i: stage1_pools[-1](*i).astype(env.inp_dtype),
- name="stage1")
-
-
- ##################################################
- # Stage 2
-
- stage2_batch_size = 1
- stage2_height = 5
- stage2_width = 5
- stage2_in_channels = 16 * 16
- stage2_out_channels = 16 * 16
- stage2_kernel_h = 5
- stage2_kernel_w = 5
- stage2_pad_h = 0
- stage2_pad_w = 0
- stage2_stride_h = 1
- stage2_stride_w = 1
- assert stage2_batch_size % env.BATCH == 0
- assert stage2_in_channels % env.BLOCK_IN == 0
- assert stage2_out_channels % env.BLOCK_OUT == 0
-
- # Input feature map: (N, IC, H, W, n, ic)
- stage2_data_shape = (stage2_batch_size // env.BATCH,
- stage2_in_channels // env.BLOCK_IN,
- stage2_height,
- stage2_width,
- env.BATCH,
- env.BLOCK_IN)
- # Kernel: (OC, IC, H, W, oc, ic)
- stage2_kernel_shape = (stage2_out_channels // env.BLOCK_OUT,
- stage2_in_channels // env.BLOCK_IN,
- stage2_kernel_h,
- stage2_kernel_w,
- env.BLOCK_OUT,
- env.BLOCK_IN)
- # Derive output feature map dimensions
- stage2_fout_height = (stage2_height + 2 * stage2_pad_h - stage2_kernel_h) // stage2_stride_h + 1
- stage2_fout_width = (stage2_width + 2 * stage2_pad_w - stage2_kernel_w) // stage2_stride_w + 1
-
- #print("fout!!!",stage0_fout_height,stage0_fout_width)
- # Conv feature map: (N, OC, H, W, n, oc)
- stage2_conv_shape = (stage2_batch_size // env.BATCH,
- stage2_out_channels // env.BLOCK_OUT,
- stage2_fout_height,
- stage2_fout_width,
- env.BATCH,
- env.BLOCK_OUT)
-
- # Output feature map: (N, OC, H, W, n, oc)
- stage2_output_shape = (stage2_batch_size // env.BATCH,
- stage2_out_channels // env.BLOCK_OUT,
- stage2_fout_height,
- stage2_fout_width,
- env.BATCH,
- env.BLOCK_OUT)
-
- # Convolution reduction axes
- stage2_dy = tvm.reduce_axis((0, stage2_kernel_h), name='dy')
- stage2_dx = tvm.reduce_axis((0, stage2_kernel_w), name='dx')
- stage2_ic = tvm.reduce_axis((0, stage2_in_channels // env.BLOCK_IN), name='ic')
- stage2_ic_tns = tvm.reduce_axis((0, env.BLOCK_IN), name='ic_tns')
-
-
- # Parameters
- stage2_conv_paras = stage2_dy, stage2_dx, stage2_ic, stage2_ic_tns, stage2_stride_h, stage2_stride_w
- stage2_data_paras = stage2_pad_h, stage2_pad_w
-
-
- # Generate compute graph with varec libs
-
- # Conv
- stage2_data_buf, stage2_data, stage2_kernel_buf, stage2_kernel, stage2_conv =\
- cp.compute_conv(stage2_data_shape, stage2_kernel_shape, stage2_conv_shape,\
- stage2_conv_paras, stage2_data_paras, env, stage1)
-
- # Shift and Clip
- stage2_sc = cp.compute_shr_clip(stage2_conv_shape, stage2_conv, inp_min, inp_max)
-
- # Relu
- stage2_relu = cp.relu(stage2_sc[-1])
-
- # Result Tensor
- stage2 = tvm.compute(stage2_output_shape,
- lambda *i: stage2_relu(*i).astype(env.inp_dtype),
- name="stage2")
-
-
- ######################################################################
- # Scheduling the Computation
-
- # Create TVM schedule
- s = tvm.create_schedule(stage2.op)
- # Let's look at the default TVM schedule
- print(tvm.lower(s, [stage0_data, stage0_kernel, stage0_output_ph,\
- stage1_kernel, stage1_output_ph, stage2_kernel, stage2], simple_mode=True))
- print("+++++++++++++++++")
- ######################################################################
- # Blocking the Computation : Stage 0
-
- # A feasible tiling sizes
- # b_block = 1 // env.BATCH
- # oc_block = 16 * 2// env.BLOCK_OUT
- # ic_block = 16 // env.BLOCK_IN
- # h_block = 7
- # w_block = 14
-
-
- # Define tiling sizes automatically
- wkld = sche.get_workload(stage0_data_shape, stage0_kernel_shape, stage0_output_shape)
- schedules = sche.find_schedules(wkld, env, stage0_output_shape, pool_combined =True, best_only = True)
- b_block, oc_block, ic_block, h_block, w_block = schedules[0].get_sche()
-
- # Target stages and params of tile
- stages = stage0, stage0_conv, stage0_sc, stage0_pools,\
- stage0_relu, stage0_output_buf, stage0_data_buf, stage0_kernel_buf
- params = b_block, oc_block, h_block, w_block, stage0_ic,\
- ic_block, stage0_dy, stage0_dx, stage0_ic_tns
-
- # Tile stages with params
- axis = sche.tile(s, stages, params)
- print(tvm.lower(s, [stage0_data, stage0_kernel, stage0_output_ph,\
- stage1_kernel, stage1_output_ph, stage2_kernel, stage2], simple_mode=True))
- print("+++++++++++++++++")
-
- # Lowering stages with axis ic_out, b_tns
- sche.lowering(s, stages, axis, stage_ID = 0, env = env)
-
- ######################################################################
- # Blocking the Computation : Stage 1
-
- # A feasible tiling sizes
- # b_block = 1 // env.BATCH
- # oc_block = 16 * 16 // env.BLOCK_OUT // 8
- # ic_block = 16 * 6 // env.BLOCK_IN // 2
- # h_block = 5
- # w_block = 1
-
- # Define tiling sizes automatically
- wkld = sche.get_workload(stage1_data_shape, stage1_kernel_shape, stage1_output_shape)
- schedules = sche.find_schedules(wkld, env, stage1_output_shape, pool_combined =True, best_only = True)
- b_block, oc_block, ic_block, h_block, w_block = schedules[0].get_sche()
-
- # Target stages and params of tile
- stages = stage1, stage1_conv, stage1_sc, stage1_pools,\
- stage1_relu, stage1_output_buf, stage1_data_buf, stage1_kernel_buf
- params = b_block, oc_block, h_block, w_block, stage1_ic,\
- ic_block, stage1_dy, stage1_dx, stage1_ic_tns
-
- # Tile stages with params
- axis = sche.tile(s, stages, params)
- print(tvm.lower(s, [stage0_data, stage0_kernel, stage0_output_ph,\
- stage1_kernel, stage1_output_ph, stage2_kernel, stage2], simple_mode=True))
- print("+++++++++++++++++")
-
- # Lowering stages with axis ic_out, b_tns
- sche.lowering(s, stages, axis, stage_ID = 1, env = env)
-
- ######################################################################
- # Blocking the Computation : Stage 2
-
- # Let's define tiling sizes manually because height and width of output is 1
- b_block = 1 // env.BATCH
- oc_block = 16 * 16 // env.BLOCK_OUT // 8
- ic_block = 16 * 16 // env.BLOCK_IN // 4
- h_block = 1
- w_block = 1
-
- # Target stages and params of tile
- stages = stage2, stage2_conv, stage2_sc, (),\
- stage2_relu, (), stage2_data_buf, stage2_kernel_buf
- params = b_block, oc_block, h_block, w_block, stage2_ic,\
- ic_block, stage2_dy, stage2_dx, stage2_ic_tns
-
- # Tile stages with params
- axis = sche.tile(s, stages, params)
- print(tvm.lower(s, [stage0_data, stage0_kernel, stage0_output_ph,\
- stage1_kernel, stage1_output_ph, stage2_kernel, stage2], simple_mode=True))
- print("+++++++++++++++++")
-
- # Lowering stages with axis ic_out, b_tns
- sche.lowering(s, stages, axis, stage_ID = 2, env = env)
-
- # Let's look at the final lowered TVM schedule after lowering memory
- # loads/stores down to DMA copy intrinsics, and the computation down to
- # varec compute intrinsics.
- print(varec.lower(s, [stage0_data, stage0_kernel, stage0_output_ph,\
- stage1_kernel, stage1_output_ph, stage2_kernel, stage2], simple_mode=True))
-
- ######################################################################
- # TVM Compilation and Verification
-
- # This library facilitates 2D convolution testing
- from topi.testing import conv2d_nchw_python
-
- # Compile the TVM module
- my_conv = varec.build(s, [stage0_data, stage0_kernel, stage0_output_ph,\
- stage1_kernel, stage1_output_ph, stage2_kernel, stage2], "ext_dev", env.target_host, name="my_conv")
- temp = util.tempdir()
- my_conv.save(temp.relpath("Lenet-5.o"))
- remote.upload(temp.relpath("Lenet-5.o"))
- f = remote.load_module("Lenet-5.o")
-
- # Get the remote device context
- ctx = remote.ext_dev(0)
-
- # Initialize the data and kernel arrays randomly in the int range
- # of (-128, 128] in NCHW layout
- stage0_data_np = np.random.randint(
- -128, 128,
- size=(stage0_batch_size, stage0_in_channels, stage0_height, stage0_width)).astype(stage0_data.dtype)
- stage0_kernel_np = np.random.randint(
- -128, 128,
- size=(stage0_out_channels, stage0_in_channels, stage0_kernel_h, stage0_kernel_w)).astype(stage0_kernel.dtype)
- stage1_kernel_np = np.random.randint(
- -128, 128,
- size=(stage1_out_channels, stage1_in_channels, stage1_kernel_h, stage1_kernel_w)).astype(stage1_kernel.dtype)
- stage2_kernel_np = np.random.randint(
- -128, 128,
- size=(stage2_out_channels, stage2_in_channels, stage2_kernel_h, stage2_kernel_w)).astype(stage2_kernel.dtype)
- stage0_output_np = np.full(stage0_output_shape, inp_min).astype(stage0_output_ph.dtype)
- stage1_output_np = np.full(stage1_output_shape, inp_min).astype(stage0_output_ph.dtype)
-
- # Apply packing to the data and kernel arrays from a 2D NCHW
- # to a 4D NCHWnc packed layout
- stage0_data_packed = stage0_data_np.reshape(stage0_batch_size // env.BATCH,
- env.BATCH,
- stage0_in_channels // env.BLOCK_IN,
- env.BLOCK_IN,
- stage0_height,
- stage0_width).transpose((0, 2, 4, 5, 1, 3))
-
- stage0_kernel_packed = stage0_kernel_np.reshape(stage0_out_channels // env.BLOCK_OUT,
- env.BLOCK_OUT,
- stage0_in_channels // env.BLOCK_IN,
- env.BLOCK_IN,
- stage0_kernel_h,
- stage0_kernel_w).transpose((0, 2, 4, 5, 1, 3))
-
- stage1_kernel_packed = stage1_kernel_np.reshape(stage1_out_channels // env.BLOCK_OUT,
- env.BLOCK_OUT,
- stage1_in_channels // env.BLOCK_IN,
- env.BLOCK_IN,
- stage1_kernel_h,
- stage1_kernel_w).transpose((0, 2, 4, 5, 1, 3))
-
- stage2_kernel_packed = stage2_kernel_np.reshape(stage2_out_channels // env.BLOCK_OUT,
- env.BLOCK_OUT,
- stage2_in_channels // env.BLOCK_IN,
- env.BLOCK_IN,
- stage2_kernel_h,
- stage2_kernel_w).transpose((0, 2, 4, 5, 1, 3))
-
- # Format the input/output arrays with tvm.nd.array to the DLPack standard
- stage0_data_nd = tvm.nd.array(stage0_data_packed, ctx)
- stage0_kernel_nd = tvm.nd.array(stage0_kernel_packed, ctx)
- stage1_kernel_nd = tvm.nd.array(stage1_kernel_packed, ctx)
- stage2_kernel_nd = tvm.nd.array(stage2_kernel_packed, ctx)
- stage0_output_nd = tvm.nd.array(stage0_output_np, ctx)
- stage1_output_nd = tvm.nd.array(stage1_output_np, ctx)
- stage2_nd = tvm.nd.array(np.zeros(stage2_output_shape).astype(stage2.dtype), ctx)
-
- # Invoke the module to perform the computation
- f(stage0_data_nd, stage0_kernel_nd, stage0_output_nd,\
- stage1_kernel_nd, stage1_output_nd, stage2_kernel_nd, stage2_nd)
-
-
-
- # Verify against numpy implementation
- stage0_C1S2 = conv2d_nchw_python(stage0_data_np.astype(env.acc_dtype),
- stage0_kernel_np.astype(env.acc_dtype),
- (stage0_stride_h, stage0_stride_w),
- (stage0_pad_h, stage0_pad_w)).astype(env.acc_dtype)
- stage0_C1S2 = stage0_C1S2 >> env.INP_WIDTH
- stage0_C1S2 = np.clip(stage0_C1S2, inp_min, inp_max)
- stage0_C1S2 = stage0_C1S2.astype(stage0.dtype)
- stage0_C1S2 = stage0_C1S2.reshape((stage0_batch_size // env.BATCH,
- env.BATCH,
- stage0_out_channels // env.BLOCK_OUT,
- env.BLOCK_OUT,
- stage0_fout_height,
- stage0_fout_width)).transpose((0, 2, 4, 5, 1, 3))
-
- params = stage0_output_shape, stage0_batch_size, env, stage0_out_channels, \
- stage0_fout_width, stage0_fout_height, stage0.dtype
-
- stage0_ref = db.pool(stage0_C1S2, params)
-
- stage1_data_np = stage0_ref.transpose((0, 4, 1, 5, 2, 3))\
- .reshape(stage1_batch_size, stage1_in_channels, stage1_height, stage1_width)
-
- stage1_C3S4 = conv2d_nchw_python(stage1_data_np.astype(env.acc_dtype),
- stage1_kernel_np.astype(env.acc_dtype),
- (stage1_stride_h, stage1_stride_w),
- (stage1_pad_h, stage1_pad_w)).astype(env.acc_dtype)
- stage1_C3S4 = stage1_C3S4 >> env.INP_WIDTH
- stage1_C3S4 = np.clip(stage1_C3S4, inp_min, inp_max)
- stage1_C3S4 = stage1_C3S4.astype(stage1.dtype)
- stage1_C3S4 = stage1_C3S4.reshape((stage1_batch_size // env.BATCH,
- env.BATCH,
- stage1_out_channels // env.BLOCK_OUT,
- env.BLOCK_OUT,
- stage1_fout_height,
- stage1_fout_width)).transpose((0, 2, 4, 5, 1, 3))
-
- params = stage1_output_shape, stage1_batch_size, env, stage1_out_channels, \
- stage1_fout_width, stage1_fout_height, stage1.dtype
-
- stage1_ref = db.pool(stage1_C3S4, params)
-
- stage2_data_np = stage1_ref.transpose((0, 4, 1, 5, 2, 3))\
- .reshape(stage2_batch_size, stage2_in_channels, stage2_height, stage2_width)
-
- stage2_C5 = conv2d_nchw_python(stage2_data_np.astype(env.acc_dtype),
- stage2_kernel_np.astype(env.acc_dtype),
- (stage2_stride_h, stage2_stride_w),
- (stage2_pad_h, stage2_pad_w)).astype(env.acc_dtype)
- stage2_C5 = stage2_C5 >> env.INP_WIDTH
- stage2_C5 = np.clip(stage2_C5, inp_min, inp_max)
- stage2_C5 = stage2_C5.astype(stage2.dtype)
- stage2_C5 = stage2_C5.reshape((stage1_batch_size // env.BATCH,
- env.BATCH,
- stage2_out_channels // env.BLOCK_OUT,
- env.BLOCK_OUT,
- stage2_fout_height,
- stage2_fout_width)).transpose((0, 2, 4, 5, 1, 3))
- stage2_C5 = np.clip(stage2_C5, 0, inp_max)
-
- tvm.testing.assert_allclose(stage2_C5, stage2_nd.asnumpy())
-
- print("Successful Lenet-5 test!")
|