Skip to content
This repository has been archived by the owner on Mar 20, 2024. It is now read-only.

Use a3 for widening multiply in ch6.4 #891

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 7 additions & 6 deletions v-spec.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -1331,21 +1331,22 @@ throughput on mixed-width operations in a single loop.
# a0 holds the total number of elements to process
# a1 holds the address of the source array
# a2 holds the address of the destination array
# a3 holds the multiplier for the widening multiplication

loop:
vsetvli a3, a0, e16, m4, ta, ma # vtype = 16-bit integer vectors;
# also update a3 with vl (# of elements this iteration)
vsetvli t0, a0, e16, m4, ta, ma # vtype = 16-bit integer vectors;
# also update t0 with vl (# of elements this iteration)
vle16.v v4, (a1) # Get 16b vector
slli t1, a3, 1 # Multiply # elements this iteration by 2 bytes/source element
slli t1, t0, 1 # Multiply # elements this iteration by 2 bytes/source element
add a1, a1, t1 # Bump pointer
vwmul.vx v8, v4, x10 # Widening multiply into 32b in <v8--v15>
vwmul.vx v8, v4, a3 # Widening multiply into 32b in <v8--v15>

vsetvli x0, x0, e32, m8, ta, ma # Operate on 32b values
vsrl.vi v8, v8, 3
vse32.v v8, (a2) # Store vector of 32b elements
slli t1, a3, 2 # Multiply # elements this iteration by 4 bytes/destination element
slli t1, t0, 2 # Multiply # elements this iteration by 4 bytes/destination element
add a2, a2, t1 # Bump pointer
sub a0, a0, a3 # Decrement count by vl
sub a0, a0, t0 # Decrement count by vl
bnez a0, loop # Any more?
----

Expand Down