riscvarchive · sunshaoce · Jun 15, 2023
diff --git a/v-spec.adoc b/v-spec.adoc
@@ -1331,21 +1331,22 @@ throughput on mixed-width operations in a single loop.
 #  a0 holds the total number of elements to process
 #  a1 holds the address of the source array
 #  a2 holds the address of the destination array
+#  a3 holds the multiplier for the widening multiplication
 
 loop:
-    vsetvli a3, a0, e16, m4, ta, ma  # vtype = 16-bit integer vectors;
-                                     # also update a3 with vl (# of elements this iteration)
+    vsetvli t0, a0, e16, m4, ta, ma  # vtype = 16-bit integer vectors;
+                                     # also update t0 with vl (# of elements this iteration)
     vle16.v v4, (a1)        # Get 16b vector
-    slli t1, a3, 1          # Multiply # elements this iteration by 2 bytes/source element
+    slli t1, t0, 1          # Multiply # elements this iteration by 2 bytes/source element
     add a1, a1, t1          # Bump pointer
-    vwmul.vx v8, v4, x10    # Widening multiply into 32b in <v8--v15>
+    vwmul.vx v8, v4, a3     # Widening multiply into 32b in <v8--v15>
 
     vsetvli x0, x0, e32, m8, ta, ma  # Operate on 32b values
     vsrl.vi v8, v8, 3
     vse32.v v8, (a2)        # Store vector of 32b elements
-    slli t1, a3, 2          # Multiply # elements this iteration by 4 bytes/destination element
+    slli t1, t0, 2          # Multiply # elements this iteration by 4 bytes/destination element
     add a2, a2, t1          # Bump pointer
-    sub a0, a0, a3          # Decrement count by vl
+    sub a0, a0, t0          # Decrement count by vl
     bnez a0, loop           # Any more?
 ----