|
30 | 30 | GEMMBenchmark(state, |
31 | 31 | xnn_pqs8_qc8w_gemm_minmax_ukernel_1x32c4__neonsme2, |
32 | 32 | xnn_init_qs8_qc8w_conv_minmax_fp32_scalar_params, |
33 | | - xnn_pack_kai_qs8_qc8w_weights_and_biases_sme2, |
34 | | - xnn_packed_stride_kai_qs8_qc8w_weights_and_biases_sme2, |
| 33 | + xnn_pack_kai_qs8_qc8w_weights_and_biases_sme, |
| 34 | + xnn_packed_stride_kai_qs8_qc8w_weights_and_biases_sme, |
35 | 35 | /*mr=*/[]() -> size_t { |
36 | 36 | const struct xnn_hardware_config* hardware_config = |
37 | 37 | xnn_init_hardware_config(); |
|
70 | 70 | GEMMBenchmark(state, |
71 | 71 | xnn_pqs8_qc8w_gemm_minmax_ukernel_32x32c4__neonsme2, |
72 | 72 | xnn_init_qs8_qc8w_conv_minmax_fp32_scalar_params, |
73 | | - xnn_pack_kai_qs8_qc8w_weights_and_biases_sme2, |
74 | | - xnn_packed_stride_kai_qs8_qc8w_weights_and_biases_sme2, |
| 73 | + xnn_pack_kai_qs8_qc8w_weights_and_biases_sme, |
| 74 | + xnn_packed_stride_kai_qs8_qc8w_weights_and_biases_sme, |
75 | 75 | /*mr=*/[]() -> size_t { |
76 | 76 | const struct xnn_hardware_config* hardware_config = |
77 | 77 | xnn_init_hardware_config(); |
|
109 | 109 | #endif // XNN_ENABLE_ARM_SME2 && XNN_ARCH_ARM64 |
110 | 110 |
|
111 | 111 |
|
| 112 | +#if XNN_ENABLE_ARM_SME && XNN_ARCH_ARM64 |
| 113 | + #if XNN_ENABLE_KLEIDIAI |
| 114 | + static void pqs8_qc8w_gemm_minmax_ukernel_32x32c4__neonsme(benchmark::State& state, const char* net) { |
| 115 | + GEMMBenchmark(state, |
| 116 | + xnn_pqs8_qc8w_gemm_minmax_ukernel_32x32c4__neonsme, |
| 117 | + xnn_init_qs8_qc8w_conv_minmax_fp32_scalar_params, |
| 118 | + xnn_pack_kai_qs8_qc8w_weights_and_biases_sme, |
| 119 | + xnn_packed_stride_kai_qs8_qc8w_weights_and_biases_sme, |
| 120 | + /*mr=*/[]() -> size_t { |
| 121 | + const struct xnn_hardware_config* hardware_config = |
| 122 | + xnn_init_hardware_config(); |
| 123 | + if (hardware_config != nullptr && (hardware_config->arch_flags & xnn_arch_arm_sme) == xnn_arch_arm_sme) { |
| 124 | + return xnn_pqs8_qc8w_gemm_minmax_ukernel_32x32c4__neonsme_get_mr(); |
| 125 | + } else { |
| 126 | + return 0; |
| 127 | + } |
| 128 | + } |
| 129 | + , /*nr=*/[]() -> size_t { |
| 130 | + const struct xnn_hardware_config* hardware_config = |
| 131 | + xnn_init_hardware_config(); |
| 132 | + if (hardware_config != nullptr && (hardware_config->arch_flags & xnn_arch_arm_sme) == xnn_arch_arm_sme) { |
| 133 | + return xnn_pqs8_qc8w_gemm_minmax_ukernel_32x32c4__neonsme_get_nr(); |
| 134 | + } else { |
| 135 | + return 0; |
| 136 | + } |
| 137 | + } |
| 138 | + , /*kr=*/4, /*sr=*/1, |
| 139 | + /*mr_packed=*/[]() -> size_t { |
| 140 | + const struct xnn_hardware_config* hardware_config = |
| 141 | + xnn_init_hardware_config(); |
| 142 | + if (hardware_config != nullptr && (hardware_config->arch_flags & xnn_arch_arm_sme) == xnn_arch_arm_sme) { |
| 143 | + return xnn_pqs8_qc8w_gemm_minmax_ukernel_32x32c4__neonsme_get_mr(); |
| 144 | + } else { |
| 145 | + return 0; |
| 146 | + } |
| 147 | + } |
| 148 | + , |
| 149 | + /*arch_flags=*/xnn_arch_arm_sme); |
| 150 | + } |
| 151 | + |
| 152 | + BENCHMARK_GEMM(pqs8_qc8w_gemm_minmax_ukernel_32x32c4__neonsme) |
| 153 | + #endif // XNN_ENABLE_KLEIDIAI |
| 154 | +#endif // XNN_ENABLE_ARM_SME && XNN_ARCH_ARM64 |
| 155 | + |
| 156 | + |
112 | 157 | #ifndef XNNPACK_BENCHMARK_NO_MAIN |
113 | 158 | XNN_BENCHMARK_MAIN(); |
114 | 159 | #endif |
0 commit comments