-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathwssplitter-CLUTTERED.asm
5754 lines (5333 loc) · 233 KB
/
wssplitter-CLUTTERED.asm
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
title "WS281X-Splitter - WS281X segment splitter/breakout/debug for Microchip PIC"
;================================================================================
; File: wssplitter.asm
; Date: 8/11/2021
; Version: 0.21.10
; Author: [email protected], (c)2021 [email protected]
; Device: PIC16F15313 (midrange Microchip 8-pin PIC) or equivalent running @8 MIPS
; Peripherals used: Timer0, Timer1 (gated), Timer2, no-MSSP, EUSART, no-PWM, CLC
; Compiler: mpasmx(v5.35), NOT pic-as; NOTE: custom build line is used for source code fixups
; IDE: MPLABX v5.35 (last one to include mpasm)
; Description:
; WS281X-Splitter can be used for the following purposes:
; 1. split a single WS281X data stream into <= 4 separate segments;
; creates a virtual daisy chain of LED strings instead of using null pixels between
; 2. debugger or signal integrity checker; show 24-bit WS pixel data at end of string
; 3. timing checker; display frame rate (FPS received); alternating color is used as heartbeat
; Build instructions:
;no ?Add this line in the project properties box, pic-as Global Options -> Additional options:
;no -Wa,-a -Wl,-pPor_Vec=0h,-pIsr_Vec=4h
; - use PICKit2 or 3 or equivalent programmer (PICKit2 requires PICKitPlus for newer PICs)
; Wiring:
; RA0 = debug output (32 px WS281X):
; - first 24 px shows segment 1/2/3 quad px length (0 = 1K)
; - next 8 px = FPS (255 max), msb first
; RA1 = output segment 1
; RA2 = output segment 2
; RA3 = WS281X input stream
; - first/second/third byte = segment 1/2/3 quad pixel length
; - first segment data follows immediately
; RA4 = output segment 4; receives anything after segment 1/2/3
; RA5 = output segment 3
; TODO:
; - use PPS to set RA3 as segment 3 out and RA5 as WS input?
; - uart bootloader; ground segment 0 out to enable? auto-baud detect; verify
; - custom pixel dup/skip, enforce max brightness limit?
;================================================================================
NOLIST; reduce clutter in .LST file
;NOTE: ./Makefile += AWK, GREP
;test controller: SP108E_3E6F0D
;check nested #if/#else/#endif: grep -vn ";#" this-file | grep -e "#if" -e "#else" -e "#endif"
;or: sed 's/;.*//' < ~/MP*/ws*/wssplitter.asm | grep -n -e " if " -e " else" -e " end" -e " macro" -e " while "
;grep -viE '^ +((M|[0-9]+) +)?(EXPAND|EXITM|LIST)([ ;_]|$$)' ./build/${ConfName}/${IMAGE_TYPE}/wssplitter.o.lst > wssplitter.LST
EXPAND; show macro expansions
#ifndef HOIST
#define HOIST 0
#include __FILE__; self
messg no hoist, app config/defs @47
LIST_PUSH TRUE
EXPAND_PUSH FALSE
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;//compile-time options:
;#define BITBANG; //dev/test only
;;#define SPI_TEST
#define WANT_DEBUG; //DEV/TEST ONLY!
;#define WANT_ISR; //ISR not used; uncomment to reserve space for ISR (or jump to)
#define WSBIT_FREQ (800 KHz); //WS281X "high" speed
#define WSLATCH (50 -20 usec); //end-of-frame latch time; "cheat" by using shorter interval and use the extra time for processing overhead
;#define MAX_THREADS 2; //anim xmit or frame rcv, breakout xmit
#define FOSC_FREQ (32 MHz); //max speed; NOTE: SPI 3x requires max speed, otherwise lower speed might work
;//pin assignments:
#define WSDI RA3; //RA3 = WS input stream (from controller or previous WS281X pixels)
#define BREAKOUT RA0; //RA0 = WS breakout pixels, or simple LED for dev/debug
#define LEDOUT IIFDEBUG(SEG4OUT, -1); //RA5 = simple LED output; ONLY FOR DEV/DEBUG
;#define WSCLK 4-2; //RA4 = WS input clock (recovered from WS input data signal); EUSART sync rcv clock needs a real I/O pin?
#define SEG1OUT RA1; //RA1 = WS output segment 1
#define SEG2OUT RA2; //RA2 = WS output segment 2
#define SEG3OUT RA#v(3+2); //RA5 = WS output segment 3; RA3 is input-only, use alternate pin for segment 3
#define SEG4OUT RA4; //RA4 = WS output segment 4
;#define RGSWAP 0x321; //3 = R, 2 = G, 1 = B; default = 0x321 = RGB
#define RGSWAP 0x231; //3 = R, 2 = G, 1 = B; default = 0x321 = RGB
;// default test strip
;//order 0x123: RGBYMCW => BRGMCYW
;//order 0x132: RGBYMCW => RBGMYCW
;//order 0x213: RGBYMCW => BGRCMYW
;//order 0x231: RGBYMCW => RGBYMCW ==
;//order 0x312: RGBYMCW => GBRCYMW
;//order 0x321: RGBYMCW => GRBYCMW
messg [TODO] R is sending blue(3rd byte), G is sending red(first byte), B is sending green(second byte)
;test strip is GRB order
EXPAND_POP
LIST_POP
messg end of !hoist @85
#undefine HOIST; //preserve state for plumbing @eof
#else
#if HOIST == 4; //TODO hack: simplified 8-bit parallel wsplayer
messg hoist 4: HACK: 8-bit parallel wsplayer @89
LIST_PUSH TRUE
EXPAND_PUSH FALSE
;; 8-bit parallel wsplayer ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
#define PXOUT RA0
#define UNIV_LEN 1600/100; //33; //10; //<= 2x banked GPRAM (for in-memory bitmaps), else prog space
;#define RGB_ORDER 213; //0x213; //GRB (normal is 0x123 = RGB)
#define WS_ENV 235; // 2/3/5 @ 8MIPS completely meets WS2811 + WS2812 specs
;//#define WS_ENV 334; //make start pulse longer
#ifdef LEDOUT
#undefine LEDOUT
#endif
#define LEDOUT RA4
;send 1 WS data bit to each IO pin:
;bits are assumed to be in WREG
;2/3/5 is the ideal instr env @8 MIPS- conforms strictly to WS2811 *and* WS2812 timing specs
;2/3/5 env uses 30% CPU time (3 instr), leaves 70% for caller (7 instr)
;chainable- last 4 instr of env time (idle) are typically used for call/return, loop control, or other glue logic
;earlier 1-3 instr of idle env time are typically used for data prep
;heavy rendering typically must be done between outside WS env (while waiting on timer for next frame) - there's not enough time during WS env except for the most trivial rendering
; doing_init TRUE
;not needed: IO pin init does this
; mov8 LATA, LITERAL(0); //start with WS data lines low; NOTE: this is required for correct send startup
; doing_init FALSE
;ws8_sendbit_wreg macro glue_reserved
; ws8_sendbit ORG$, ORG$, NOP #v(4 - ABS(glue_reserved))
; endm
VARIABLE IDLER = -1; tell idler which timeslot it's in
ws8_sendbit macro idler1, idler2, idler4
ERRIF((WS_ENV != 235) || (FOSC_FREQ != 8 MIPS), [ERROR] WS envelope WS_ENV !implemented @ fosc FOSC_FREQ - use 235 @8MIPS @__LINE__)
COMF LATA, F; //bit start; CAUTION: LATA must be 0 prior (which it should be)
; ORG $+1; placeholder
LOCAL here1 = $
IDLER = 1
idler1
nopif $ == here1, 1
MOVWF LATA; //bit data
; ORG $+2; placeholder
LOCAL here2 = $
IDLER = 2
idler2
nopif $ == here2, 2
CLRF LATA; //bit end
; ORG $+4; placeholder
LOCAL here3 = $
IDLER = 4
idler4
nopif $ == here3, 4
IDLER = -1
endm
;wrappers for use with ws_sendbit:
;use with NOP 2 timeslot
;rewindpx macro fsr
; addfsr fsr, -24+1; first bit was already sent, only need 23 more
;; NOP 1
; setbit BITVARS, log2(HAS_WSDATA), FALSE; will be eof @loop exit; set it now during idle time
; endm
#define get_rgbbyte(rgb24, byteinx) get_rgbbyte_#v(byteinx) rgb24
VARIABLE GETRGBBYTE_INSTR; 2 or 4 instr; tell caller
get_rgbbyte(macro, 0) rgbval
LOCAL here = $
mov8 BYTEOF(rgb_next, 0), BYTEOF(rgbval, 0);
BANKCHK LATA
GETRGBBYTE_INSTR = $ - here
; NOP #v(here + 4 - $); rgbval might require banksel
endm
get_rgbbyte(macro, 1) rgbval
LOCAL here = $
mov8 BYTEOF(rgb_next, 1), BYTEOF(rgbval, 1);
BANKCHK LATA
GETRGBBYTE_INSTR = $ - here
endm
get_rgbbyte(macro, 2) rgbval
LOCAL here = $
mov8 BYTEOF(rgb_next, 2), BYTEOF(rgbval, 2);
BANKCHK LATA
GETRGBBYTE_INSTR = $ - here
endm
remove_1channel macro chbuf
COMF chmask, W; need to load (inverted) mask for first channel
ANDWF chbuf, F
endm
remove_4channels macro chbuf
ANDWF chbuf+0, F ;mask is already loaded, just remove bits
ANDWF chbuf+1, F
ANDWF chbuf+2, F
ANDWF chbuf+3, F
endm
add_0channels macro
MOVF chmask, W; need to load mask for first channel
NOP 1; not enough time to apply
endm
add_2channels macro chbuf
if (chbuf >= pxbuf_odd) && (chbuf < pxbuf_odd+24)
local chnum = ?TODO
ifbit rgb_next + chnum / 8, 7 - chnum % 8, TRUE, dest_arg(F) IORWF chbuf+0
ifbit rgb_next + chnum / 8, 7-1 - chnum % 8, TRUE, dest_arg(F) IORWF chbuf+1
endm
prefetch_lastbit macro
MOVIW +23[FSR_send];
NOP 1;
endm
;rendering "engine":
;uses control block to send WS data to 8 IO pins in parallel
;control block is a struct supplied by caller:
;'0-23 = 24-bit rgb values for 8 WS channels (3 bytes per IO pin)
;'24-25 = 16-bit #pixels to send
;caller can have multiple control blocks, tells engine which to use
;rendering engine updates send count and (optionally) 1 or more rgb values (8-bit mask)
;this allows display commands to be "chained" into a larger frame buffer display
;4 instr are left between calls for "glue" logic
;rendering engine also maintains global state:
;FSR0 = ptr to active control block
;global bit var WS_SENDING = current IO status
;----
;//send (up to) 24 ws pixel bits:
;chainable
;//FSR0 or 1 points to parallel pixel data (double-buffered)
;//NOTE: FSR changes after each pixel (auto-inc); caller is responsible for rewind
;this is a generic callable function to reduce code space
;leading bits can be inlined in caller to use custom logic (and 1 trailer bit for loop control)
;while sending each ws pixel, a single rgb update command will be processed:
;upd_rgb_cmd macro chmask, rgb24; ~60 instr total = 10 ws bits
; mov24 rgb_next_nb, rgb24; 3-8 instr
;^^^custom addressing done in caller; xfr to generic place
; comf-reg chmask, W
; comf-const = movlw ~chmask; 1-2 instr
; pxbuf[0..23] &= wreg; 25 instr
; comf w; 1 instr
; if bit [0..23] pxbuf[0..23] |= w; 24 instr
;nope-rgb_channel_update macro fsr, pxbuf
;rendering engine:
;caller must set FSR0 to clock block before calling
;rendering engine loops until all pixels sent (returns 1 WS node early for custom flow control)
;NOTE: caller can inline bits from first node to reclaim busy-wait time for custom setup logic
;rendering engine applies state changes while sending last node (double-buffering !needed)
;state changes are: dec send count, update (masked) rgb channel values
BITDCL HAS_WSDATA; flag telling if xmit is in progress (also used for eof flag)
b0DCL engine_mask,;
b0DCL24 engine_rgb;
b0DCL16 engine_count;
save_mask macro mask
if IDLER == 2
mov8 engine_mask, mask
endif
endm
save_rgb macro newrgb; split up to fit into idler timeslots
if IDLER == 2
mov8 REGHI(engine_rgb), BYTEOF(newrgb, 2); little endian
endif
if IDLER == 4
mov16 engine_rgb, newrgb
endif
endm
save_count macro count
if IDLER == 4
mov16 engine_count, count;
endif
endm
BANKCHK LATA; caller must set BSR; makes timing uniform in here
glue4(mov16 fsr, lit(pxbuf))
ws8_send#v(24)bits: ws8_sendbit MOVIW +0[FSR_send], ORG$, ORG$;
#define FSR_send FSR0; FSR0 dedicated to WS send; points to control block
;wrappers for use with ws_sendbit:
upd_count macro want_update; used as idler2 or 4
if !want_update; idler2 timeslot
DECFSZ REGLO(pxcount), F; //REGLO(count), F; //WREG, F
INCF REGHI(pxcount), F; kludge: cancels out DECF upper count byte
else; idler4 timeslot
setbit BITPARENT(HAS_WSDATA), TRUE; assume eof
DECFSZ REGHI(pxcount), F; //REGLO(count), F; //WREG, F
setbit BITPARENT(HAS_WSDATA), FALSE; not eof
NOP 1
endif
endm
;first 5 bits are generic and can be custom inlined in caller:
ws8_send#v(24)bits: ws8_sendbit MOVIW +0[FSR_send], ORG$, ORG$;
ws8_send#v(23)bits: ws8_sendbit MOVIW +1[FSR_send], ORG$, ORG$;
ws8_send#v(22)bits: ws8_sendbit MOVIW +2[FSR_send], ORG$, ORG$;
ws8_send#v(21)bits: ws8_sendbit MOVIW +3[FSR_send], ORG$, ORG$;
ws8_send#v(20)bits: ws8_sendbit MOVIW +4[FSR_send], ORG$, ORG$;
;update send count + set eof flag (controls buf update):
ws8_sendbit MOVIW +5[FSR_send], upd_count FALSE, upd_count TRUE;
;next 5 bits remove old rgb value (double buffered):
ws8_send#v(19)bits_using:
ws8_sendbit MOVIW +6[FSR_send], remove_1channel pxbuf+4, remove_4channels pxbuf+5; 5..8
ws8_sendbit MOVIW +7[FSR_send], remove_1channel pxbuf+9, remove_4channels pxbuf+10; 10..13
ws8_sendbit MOVIW +8[FSR_send], remove_1channel pxbuf+14, remove_4channels pxbuf+15; 15..18
ws8_sendbit MOVIW +9[FSR_send], remove_1channel pxbuf+19, remove_4channels pxbuf+20; 20..23
;next 12 bits merge in new rgb value: (double buffered):
ws8_sendbit MOVIW +10[FSR_send], add_0channels, add_2channels pxbuf+0; 0..1
ws8_sendbit MOVIW +11[FSR_send], add_0channels, add_2channels pxbuf+2; 2..3
ws8_sendbit MOVIW +12[FSR_send], add_0channels, add_2channels pxbuf+4; 4..5
ws8_sendbit MOVIW +13[FSR_send], add_0channels, add_2channels pxbuf+6; 6..7
ws8_sendbit MOVIW +14[FSR_send], add_0channels, add_2channels pxbuf+8; 8..9
ws8_sendbit MOVIW +15[FSR_send], add_0channels, add_2channels pxbuf+10; 10.11
ws8_sendbit MOVIW +16[FSR_send], add_0channels, add_2channels pxbuf+12; 12..13
ws8_sendbit MOVIW +17[FSR_send], add_0channels, add_2channels pxbuf+14; 14..15
ws8_sendbit MOVIW +18[FSR_send], add_0channels, add_2channels pxbuf+16; 16..17
ws8_sendbit MOVIW +19[FSR_send], add_0channels, add_2channels pxbuf+18; 18..19
ws8_sendbit MOVIW +20[FSR_send], add_0channels, add_2channels pxbuf+20; 20..21
ws8_sendbit MOVIW +21[FSR_send], add_0channels, add_2channels pxbuf+22; 22.23
;second-to-last bit xfr back to caller:
ws8_sendbit MOVIW +22[FSR_send], prefetch_lastbit, return; //leaves 2 instr for next call/goto
;last bit must be inlined in caller for loop control:
; ws8_sendbit (WREG preloaded), predec_count, (loop ctl); //inlined by caller
; endm
;nope-generate 2 copies from template above (double buffered):
; rgb_channel_update FSR_even, pxbuf_odd
; rgb_channel_update FSR_odd, pxbuf_even
;send 0 or more WS pixels:
;sets up control vars and then calls rendering engine
;2 instr from previous call are reserved for glue so calls can be chained without WS data interruption
;NOTE: 0-len send only valid at start (useful for scrolling)
;once xmit started, len must be > 0 else WS data stream will stall and pixels will latch
ws8_sendpx macro pxbuf, portmask, newrgb, count; 0, prep1, prep2, prep3, prep4, prep5, prep6, prep7, prep8, prep9, prep10, prep11, prep12, prep13, prep14, prep15, prep16, prep17, prep18, prep19, prep20, prep21, prep22, prep23
; if !count ret; CAUTION: WS will latch if stream runs out; safe only @start or if previous count > 1
; if !sending then prep
; else save rgb24_next + next count16 while sending
; while currentlen--;
; swap bufs;
; LOCAL sendpx_loop, prep_only, noprep
; CONTEXT_SAVE help_mpasm#v(NUM_SENDPX);
; ifbit BITPARENT(HAS_WSDATA), FALSE, GOTO prep_only; _#v(NUM_SENDPX); CAUTION: true case (fall-thru) must be == 2 instr
; ERRIF(NEEDS_BANKSEL(LATA, BANK_TRACKER), [ERROR] banksel LATA before calling @__LINE__)
ERRIF(!ISLIT(portmask) && NEEDS_BANKSEL(portmask, LATA), [ERROR] portmask needs to be !banked or in LATA bank #v(BANKOF(LATA) @__LINE__)
ERRIF(!ISLIT(newrgb) && NEEDS_BANKSEL(newrgb, LATA), [ERROR] newrgb needs to be !banked or in LATA bank #v(BANKOF(LATA) @__LINE__)
ERRIF(!ISLIT(count) && NEEDS_BANKSEL(count, LATA), [ERROR] count needs to be !banked or in LATA bank #v(BANKOF(LATA) @__LINE__)
;custom glue (4 instr): point FSR to control block and check/set BSR to LATA:
LOCAL here1 = $
mov16 FSR_send, LITERAL(pxbuf); 3-4 instr
BANKCHK LATA; paranoid; check for safety
ERROF($ > here1+4, [ERROR] either banksel LATA before calling or put pxbuf in bank 0 @__LINE__); too much glue
nopif $ < here1+4, here1+4 - $
;first 2.5 bits inlined to allow params to be saved:
ws8_sendbit MOVIW +0[FSR_send], save_rgb newrgb, save_rgb newrgb
ws8_sendbit MOVIW +1[FSR_send], save_mask portmask, save_count count;
ws8_sendbit MOVIW +2[FSR_send], call ws8_send#v(21)half_bits;
endm
#if 0
=====
;//send 24 WS data bits (1 WS pixel) to each IO pin:
;count is #pixels to send: >0 unique values, <0 repeating values
;"double buffered" using FSR0/FSR2 -next pixel is prepped while sending current pixel
;chainable
;below must be non-banked or in same bank as LATA to avoid extra banksel:
; b0DCL16 send_count; current send length
b0DCL16 count_next; next send length
;put in bank0 so BSR will match LATA:
; b0DCL16 pxbuf_next; //pxbuf to swap in on next pixel group
b0DCL24 rgb_next; //hold next rgb val for prep while sending current ws data (to avoid extra banksel and allow generic code)
b0DCL chmask,; which channels up update with new rgb value
; b0DCL pxbuf_even, :24; //8 parallel 24-bit values (1 for each IO pin)
; b0DCL pxbuf_odd, :24; //double-buffered
;#define bufnum(pxbuf) (((pxbuf) - pxbuf0) / 24)
;add as many as dsired (>= 2 needed for double-buffering):
; b0DCL pxbuf1, :24; //8 parallel 24-bit values (1 for each IO pin)
; b0DCL pxbuf2, :24; //8 parallel 24-bit values (1 for each IO pin)
; b0DCL PXBUFE, :0; end of pxbufs
; CONSTANT NUM_PXBUFS = bufnum(PXBUFE);
;#define pxbuf(i) ((i) % NUM_PXBUFS)
;#define PREP_NONE no_prep, 0; , ORG$, ORG$, ORG$, ORG$, ORG$, ORG$, ORG$, ORG$, ORG$, ORG$, ORG$, ORG$, ORG$, ORG$, ORG$, ORG$, ORG$, ORG$, ORG$, ORG$, ORG$, ORG$, ORG$
;#define PREP_RGB(rgb) prep_rgb_bits, rgb; PREP_BIT(rgb, 23), PREP_BIT(rgb, 23), PREP_BIT(rgb, 23), PREP_BIT(rgb, 23), PREP_BIT(rgb, 23), PREP_BIT(rgb, 23), PREP_BIT(rgb, 23), PREP_BIT(rgb, 23), PREP_BIT(rgb, 23), PREP_BIT(rgb, 23), PREP_BIT(rgb, 23), PREP_BIT(rgb, 23), PREP_BIT(rgb, 23), PREP_BIT(rgb, 23), PREP_BIT(rgb, 23), PREP_BIT(rgb, 23), PREP_BIT(rgb, 23), PREP_BIT(rgb, 23), PREP_BIT(rgb, 23), PREP_BIT(rgb, 23), PREP_BIT(rgb, 23), PREP_BIT(rgb, 23), PREP_BIT(rgb, 23), PREP_BIT(rgb, 23)
;#define PREP_BIT(rgb, bit) rgbbit_#v(bit)(rgb)
; VARIABLE sendpx_init = FALSE;
; CONSTANT UNIV_SCALE = divup(UNIV_LEN, 256); //8; //octal nodes to scale UNIV_LEN down to 8 bits
;#define ws8_preppx ws_handlepx FALSE,
;#define ws8_sendpx ws_handlepx TRUE,
;ws8_px macro want_send, pxbuf, nextpx, count; 0, prep1, prep2, prep3, prep4, prep5, prep6, prep7, prep8, prep9, prep10, prep11, prep12, prep13, prep14, prep15, prep16, prep17, prep18, prep19, prep20, prep21, prep22, prep23
ERRIF(NEEDS_BANKSEL(send_count, LATA), [ERROR] send_count is in bank #v(BANKOF(send_count)) but LATA is in #v(BANKOF(LATA)) (needs to be same or unbanked) @__LINE__)
ERRIF(NEEDS_BANKSEL(count_next, LATA), [ERROR] count_next is in bank #v(BANKOF(count_next)) but LATA is in #v(BANKOF(LATA)) (needs to be same or unbanked) @__LINE__)
ERRIF(NEEDS_BANKSEL(pxbuf_next, LATA), [ERROR] pxbuf_next is in bank #v(BANKOF(pxbuf_next)) but LATA is in #v(BANKOF(LATA)) (needs to be same or unbanked) @__LINE__)
ERRIF(NEEDS_BANKSEL(rgb_next, LATA), [ERROR] rgb_next is in bank #v(BANKOF(rgb_next)) but LATA is in #v(BANKOF(LATA)) (needs to be same or unbanked) @__LINE__)
ERRIF(NEEDS_BANKSEL(chmask, LATA), [ERROR] chmask is in bank #v(BANKOF(chmask)) but LATA is in #v(BANKOF(LATA)) (needs to be same or unbanked) @__LINE__)
; ERRIF(NEEDS_BANKSEL(pxbuf_even, LATA), [ERROR] pxbuf_even is in bank #v(BANKOF(pxbuf_even)) but LATA is in #v(BANKOF(LATA)) (needs to be same or unbanked) @__LINE__)
; ERRIF(NEEDS_BANKSEL(pxbuf_odd, LATA), [ERROR] pxbuf_odd is in bank #v(BANKOF(pxbuf_odd)) but LATA is in #v(BANKOF(LATA)) (needs to be same or unbanked) @__LINE__)
;#define FSR_prep FSR1; don't disturb send ptr
;#define FSR_even FSR0
;#define FSR_odd FSR1
;#define INDF_even INDF0
;#define INDF_odd INDF1
; VARIABLE NUM_SENDPX = 0;
#define FSR_send FSR0; FSR0 dedicated to WS send; points to 8x3 pxbuf
ws8_sendpx macro pxbuf, portmask, rgb24, count; 0, prep1, prep2, prep3, prep4, prep5, prep6, prep7, prep8, prep9, prep10, prep11, prep12, prep13, prep14, prep15, prep16, prep17, prep18, prep19, prep20, prep21, prep22, prep23
; LOCAL want_prep = (rgb24 != 0); all lit and reg exc INDF are !0, and INDF can't be used because FSR is used
; LOCAL bufinx = IIF(pxbuf < pxbuf0, pxbuf, bufnum(pxbuf)); //allow inx or addr
; LOCAL pxprep = pxbuf#v(bufinx % NUM_PXBUFS), pxsend = pxbuf#v((bufinx +NUM_PXBUFS - 1) % NUM_PXBUFS); circular fifo (wrap)
;#define FSR_prep FSR#v(bufinx & 1)
;#define FSR_send FSR#v(!(bufinx & 1)1; //reg addr
;#define INDF_prep INDF#v(bufinx & 1)
;#define INDF_send INDF#v(!(bufinx & 1)1; //reg addr
; LOCAL sendwhich = BOOL2INT(FSR_send == FSR1);
; if count == LITERAL(0); prep only (before first px sent)
; while bitnum
; nextpx_prep pxprep, prep_arg, bitnum
;bitnum -= 1;
; endw
; exitm
; endif
; if !sendpx_init
; nbDCL pxcount,;
; constant UNIV_SCALE = divup(UNIV_LEN, 256); //8; //octal nodes to scale UNIV_LEN down to 8 bits
; constant SEND_COUNT = divup(UNIV_LEN, UNIV_SCALE);
;; messg [INFO] univ len #v(UNIV_LEN), sends #v(SEND_COUNT * UNIV_SCALE) nodes with granularity #v(UNIV_SCALE) nodes @__LINE__
; WARNIF(SEND_COUNT * UNIV_SCALE != UNIV_LEN, [WARNING/TODO] univ len #v(UNIV_LEN) rounds to #v(SEND_COUNT * UNIV_SCALE) during send @__LINE__)
;;TODO: fix ^^^ by adding 1x send after loop
;sendpx_init = TRUE;
; endif
; local want_prep = 1; TODO
; ERRIF(!ISLIT(count), [TODO] var count @__LINE__)
;NOTE: reg count 0 => UNIV_LEN (no time avail to check != 0 < send)
; ERRIF(want_send && (NUM_PXBUFS < 2), [ERROR] need >= 2 px bufs for double buffering: #v(NUM_PXBUFS) @__LINE__);
; LOCAL bufinx = (buf_parity == FSR0, color0, color1); //addr !data
; LOCAL prepbuf = sendbuf ^ color0 ^ color; //addr !data
; mov8 pxcount, count; LITERAL(SEND_COUNT); //divup(UNIV_LEN / UNIV_SCALE)); //scale to fit in 8-bit counter
; addfsr FSR#v(BG), 24; //compensate for first rewind
; mov16 FSR_send, LITERAL(pxsend + 24); //point to END of palette entry (compensate for resend)
; BANKCHK LATA; //pre-select BSR to simplify timing
;already prepped (for chaining): BSR on LATA, pxcount and FSR_send
==========
LOCAL sendpx_loop, prep_only, noprep
; CONTEXT_SAVE help_mpasm#v(NUM_SENDPX);
ifbit BITVARS, log2(HAS_WSDATA), FALSE, GOTO prep_only; _#v(NUM_SENDPX); CAUTION: true case (fall-thru) must be == 2 instr
;NOTE: sendlen can only be 0 at start (delays send); once send has started, need steady data stream
; ORG+2
;no- "ifbit" above used up 2 leader instr, none left for "call"; need to inline-send first bit:
NOP 2; //replaces "call" (next bit is inlined)
sendpx_loop: ;CAUTION: do not yield within this loop - will interfere with timing
if rgb24 == rgb_next; just send, no prep needed
ws8_sendbit MOVIW -24[FSR_send], rewindpx FSR_send, NOP 2
call ws8_send#v(24-2)bits_using#v(FSR_send);
else; && want_prep; full data prep takes 73 instr; xfr to dedicated addr to allow generic code to prep during send
; ERRIF(!ISLIT(rgb24) && ISBANKED(rgb24) && (BANKOF(rgb24) != BANKOF(LATA)), [ERROR] rgb24 must be !banked or in LATA bank #v(BANKOF(LATA)) @__LINE__)
ws8_sendbit MOVIW -24[FSR_send], rewindpx FSR_send, get_rgbbyte(rgb24, 0); call ws8_send#v(24-2)bits_using#v(FSR_send);
;inline a few bits while setting up data prep:
; if ISLIT(rgb24) || !NEEDS_BANKSEL(rgb24, LATA))
if GETRGBBYTE_INSTR == 2; fits into fewer timeslots
get_rgbbyte(rgb24, 1); pad out remainder of previous bit time
ws8_sendbit MOVIW INDF_send++, get_rgbbyte(rgb24, 2), NOP 2
call ws8_send#v(24-3)bits_using#v(FSR_send);
else; takes more timeslots
ws8_sendbit MOVIW INDF_send++, ORG$, get_rgbbyte(rgb24, 1);
ws8_sendbit MOVIW INDF_send++, ORG$, get_rgbbyte(rgb24, 2);
ws8_sendbit MOVIW INDF_send++, ORG$, NOP 2
call ws8_send#v(24-5)bits_using#v(FSR_send);
endif
; setbit BITVARS, log2(WSDATA_WAITING), FALSE; eof
; call ws8_send#v(8)bits_using#v(FSR_send);
; call ws8_send#v(8-1)bits_using#v(FSR_send);
endif
NOP 2; //replaces "call" (next bit is inlined)
;//call+call = 4 instr
; if UNIV_SCALE > 1
; REPEAT LITERAL(UNIV_SCALE - 1), call ws8_resendpx_using#v(FSR_send);
; NOP 2; //replaces "call" (next bit is inlined)
; endif
;sendrewindpx with custom last bit:
; ws8_sendbit MOVIW -24[FSR_send], rewindpx BG, call ws8_sendBGbit_#v(8-1); //call+call = 4 instr
; call ws8_sendBGbit_#v(8);
; call ws8_sendBGbit_#v(8-1); //custom bit below
ws8_sendbit MOVIW INDF_send++, upd_count_low, NOP 1; //reserve 3 instr for loop ctl
DECFSZ REGHI(pxcount), F; //NOTE: upd_count_low cancels this out if low !wrap
goto sendpx_loop
NOP 1
endm
#endif
; GOTO noprep; prep was already done during send above
;prep_only: DROP_CONTEXT;
;#undefine FSR_prep
;#undefine FSR_send
;#undefine INDF_prep
;#undefine INDF_send
; LOCAL noprep
#if 0
ws8_preppx macro rgb24, count; 0, prep1, prep2, prep3, prep4, prep5, prep6, prep7, prep8, prep9, prep10, prep11, prep12, prep13, prep14, prep15, prep16, prep17, prep18, prep19, prep20, prep21, prep22, prep23
if count == LITERAL(0); && want_prep; prep only, no send
exitm
endm
mov16 pxcount, count;
;no- don't want to start send for len 0: this is almost as much work as just doing the mov24, so skip it :P
LOCAL noprep
if !ISLIT(count)
mov8 WREG, BYTEOF(count, 0); WREG might already contain this (if count is < 256 in mov16 above), so do it first
IORWF BYTEOF(count, 1), W;
ifbit EQUALS0 TRUE, GOTO noprep
endif
if rgb24 != rgb_next
mov24 rgb_next, rgb24; put into uniform place so it can be handled by generic code (callable func)
setbit BITVARS, log2(NEED_WSUNPACK), TRUE; need to unpack
;not really needed, since unpack is generic anyway- CALL rgb_unpack;
endif
; if !ISLIT(rgb24)
; memset(pxbuf0, LITERAL(0), LITERAL(24));
; nbDCL memlen,;
;memset macro dest, val, len
; if val != WREG
; mov8 memlen, len;
; mov16(FSR1, LITERAL(dest));
; mov8 WREG, val
;memset_loop:
; MOVIW INDF1++
; DECFSZ memlen, F
; GOTO memset_loop
; else
; mov8 dest, val; kludge: use dest as temp (since it will get that value anyway)
; mov16(FSR1, LITERAL(dest));
; else
; endif
; endif
mov16 FSR_send, LITERAL(pxbuf0 + 24); NOTE: points to end of pxbuf so rewind will work
setbit BITVARS, log2(HAS_WSDATA), TRUE; mark ready to send, but don't send until caller gives next rgb value (to prep *during* send)
BANKCHK LATA
; endif
;noprep:
;NUM_SENDPX += 1; flag for code expansion; no-kludge: need unique value to help out mpasm :(
endm
;recallable data prep helper:
; VARIABLE bitnum = 23
;rgb_unpack: DROP_CONTEXT;
; while bitnum; && (rgb24 != pxbuf0); NOTE: caller can custom-prep directly into pxbuf0 if desired
;; if ISLIT(rgb24)
;; mov8 pxbuf0 + 23 - bitnum, LITERAL(BOOL2INT((rgb24) & BIT(bitnum)) * 0xFF)
;; else
; CLRF pxbuf0 + 23 - bitnum;
; ifbit rgb_next + 2 - bitnum / 8, bitnum % 8, TRUE, dest_arg(F) DECF pxbuf0 + 23 - bitnum; big endian
;; endif
;bitnum -= 1
; endw
;;more setup for send:
; mov16 FSR_send, LITERAL(pxbuf0 + 24); NOTE: points to end of pxbuf so rewind will work
; setbit BITVARS, log2(HAS_WSDATA), TRUE; mark ready to send, but don't send until caller gives next rgb value (to prep *during* send)
; return;
#endif
constant devpanel_mask = 0x80;
b1DCL tree_mask;
b1DCL pxbuf, :24; //8 parallel 24-bit values (1 for each IO pin)
b1DCL altbuf, :24; //alternate pxbuf
#if 0
const pxbuf =
[
0x111111,
0x222222,
0x333333,
0x444444,
0x555555,
0x666666,
0x777777,
0x888888,
];
console.log(pivot32x8(pxbuf).map(row => hex(row)).join(", "), srcline());
function pivot32x8(buf32x8)
{
const retval = [];
for (let bit = u32(0x80000000), count = 0; bit; bit >>>= 1, ++count)
retval.push(buf32x8.reduce((colval, rowval, y) => colval | ((rowval & bit)? 1 << (8-1 - y): 0), 0));
return retval;
}
#endif
;pxbuf_init:
; DW 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0;
;altbuf_init:
; DW 0x01, 0x1e, 0x66, 0xaa, 0x01, 0x1e, 0x66, 0xaa;
; DW 0x01, 0x1e, 0x66, 0xaa, 0x01, 0x1e, 0x66, 0xaa;
; DW 0x01, 0x1e, 0x66, 0xaa, 0x01, 0x1e, 0x66, 0xaa;
;pxbuf load-immediate:
PBLI macro pxbuf
mov16 FSR0, LITERAL(pxbuf); destination
CALL pbli;
endm
THREAD_DEF ws_player, 4
ws_player: DROP_CONTEXT;
WAIT 1 sec; give power time to settle, and set up timer0 outside player loop
mov8 tree_mask, LITERAL(0x18);
; mov24 altbuf+0*3, LITERAL(0x11111111);
; mov24 altbuf+1*3, LITERAL(0x22222222);
; mov24 altbuf+2*3, LITERAL(0x33333333);
; mov24 altbuf+3*3, LITERAL(0x44444444);
; mov24 altbuf+4*3, LITERAL(0x55555555);
; mov24 altbuf+5*3, LITERAL(0x66666666);
; mov24 altbuf+6*3, LITERAL(0x77777777);
; mov24 altbuf+7*3, LITERAL(0x88888888);
; memset(pxbuf, 0, LITERAL(24));
; memcpy(pxbuf, 0x8000 | pxbuf_init, LITERAL(24));
; memcpy(altuf, 0x8000 | altbuf_init, LITERAL(24));
PBLI pxbuf
DW 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0;
PBLI altbuf
DW 0x01, 0x1e, 0x66, 0xaa, 0x01, 0x1e, 0x66, 0xaa, 0x01, 0x1e, 0x66, 0xaa, 0x01, 0x1e, 0x66, 0xaa, 0x01, 0x1e, 0x66, 0xaa, 0x01, 0x1e, 0x66, 0xaa;
play_loop: ;DROP_CONTEXT
; MOVLW RED_PALINX;
ws8_sendpx pxbuf, devpanel_mask, LITERAL(0x020000), LITERAL(1)
ws8_sendpx pxbuf, devpanel_mask, LITERAL(0x010100), LITERAL(1)
ws8_sendpx pxbuf, devpanel_mask, LITERAL(0x010001), LITERAL(UNIV_LEN-2)
CALL ws8_flushpx
WAIT 1 sec
; MOVLW GREEN_PALINX;
ws8_sendpx pxbuf, LITERAL(-1), LITERAL(0x000200), LITERAL(1)
ws8_sendpx pxbuf, tree_mask, LITERAL(0x000101), LITERAL(1)
ws8_sendpx altbuf, devpanel_mask, LITERAL(0x010100), LITERAL(UNIV_LEN-2)
ws8_sendpx altbuf, LITERAL(0), LITERAL(0x999999), LITERAL(UNIV_LEN-2)
CALL ws8_flushpx
WAIT 1 sec
; MOVLW BLUE_PALINX;
ws8_sendpx pxbuf, LITERAL(-1), LITERAL(0x000002), LITERAL(1);
ws8_sendpx pxbuf, LITERAL(-1), LITERAL(0x010001), LITERAL(1);
ws8_sendpx pxbuf, LITERAL(-1), LITERAL(0x010001), LITERAL(UNIV_LEN-2);
CALL ws8_flushpx
WAIT 1 sec
; MOVLW OFF_PALINX;
ws8_sendpx pxbuf, LITERAL(-1), LITERAL(0), LITERAL(UNIV_LEN-1); //-1 for test
CALL ws8_flushpx; ws8_sendpx -1, 0, LITERAL(0); flush
WAIT 1 sec
GOTO play_loop
THREAD_END;
EXPAND_POP
LIST_POP
messg end of hoist 4 @216
;#else; too deep :(
#endif
#if HOIST == 4444+1; //GOOD hack: 8-bit parallel wsplayer
messg hoist 4: HACK: 8-bit parallel wsplayer @89
LIST_PUSH TRUE
EXPAND_PUSH FALSE
;; 8-bit parallel wsplayer ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
#define PXOUT RA0
#define UNIV_LEN 1600; //33; //10; //<= 2x banked GPRAM (for in-memory bitmaps), else prog space
;#define RGB_ORDER 213; //0x213; //GRB (normal is 0x123 = RGB)
#define WS_ENV 235; // 2/3/5 @ 8MIPS completely meets WS2811 + WS2812 specs
;//#define WS_ENV 334; //make start pulse longer
#ifdef LEDOUT
#undefine LEDOUT
#endif
#define LEDOUT RA4
;send 1 WS data bit to each IO pin:
;bits are assumed to be in WREG
;2/3/5 env @8 MIPS uses 30% CPU time (3 instr), leaves 70% for caller (7 instr)
;last 4 instr of env time (idle) are typically used for call/return or loop control
;earlier 1-3 instr of idle env time are typically used for data prep
;rendering typically must be done between outside WS env (while waiting on timer for next frame) - there's not enough time during WS env except for the most trivial rendering
; doing_init TRUE
;not needed: IO pin init does this
; mov8 LATA, LITERAL(0); //start with WS data lines low; NOTE: this is required for correct send startup
; doing_init FALSE
;ws8_sendbit_wreg macro glue_reserved
; ws8_sendbit ORG$, ORG$, NOP #v(4 - ABS(glue_reserved))
; endm
ws8_sendbit macro idler1, idler2, idler4
ERRIF((WS_ENV != 235) || (FOSC_FREQ != 8 MIPS), [ERROR] WS envelope WS_ENV !implemented @ fosc FOSC_FREQ - use 235 @8MIPS @__LINE__)
COMF LATA, F; //bit start; CAUTION: LATA must be 0 prior (which it should be)
; ORG $+1; placeholder
LOCAL here1 = $
idler1
nopif $ == here1, 1
MOVWF LATA; //bit data
; ORG $+2; placeholder
LOCAL here2 = $
idler2
nopif $ == here2, 2
CLRF LATA; //bit end
; ORG $+4; placeholder
LOCAL here3 = $
idler4
nopif $ == here3, 4
endm
;//send var bit, byte, or pixel:
;//FSR0 or 1 points to parallel pixel data
;//by convention, FSR0 is bg color, FSR1 is fg color
;//NOTE: FSR changes after each call (auto-inc)
CONSTANT BG = 0, FG = 1
BANKCHK LATA; caller must set BSR; makes timing uniform in here
ws8_sendFGbyte:
ws8_sendFGbit_#v(8): ws8_sendbit MOVIW INDF1++, ORG$, ORG$;
ws8_sendFGbit_#v(7): ws8_sendbit MOVIW INDF1++, ORG$, ORG$;
ws8_sendFGbit_#v(6): ws8_sendbit MOVIW INDF1++, ORG$, ORG$;
ws8_sendFGbit_#v(5): ws8_sendbit MOVIW INDF1++, ORG$, ORG$;
ws8_sendFGbit_#v(4): ws8_sendbit MOVIW INDF1++, ORG$, ORG$;
ws8_sendFGbit_#v(3): ws8_sendbit MOVIW INDF1++, ORG$, ORG$;
ws8_sendFGbit_#v(2): ws8_sendbit MOVIW INDF1++, ORG$, ORG$;
ws8_sendFGbit_#v(1): ws8_sendbit MOVIW INDF1++, ORG$, return; //return + next call takes 4 instr
ws8_sendBGbyte:
ws8_sendBGbit_#v(8): ws8_sendbit MOVIW INDF0++, ORG$, ORG$;
ws8_sendBGbit_#v(7): ws8_sendbit MOVIW INDF0++, ORG$, ORG$;
ws8_sendBGbit_#v(6): ws8_sendbit MOVIW INDF0++, ORG$, ORG$;
ws8_sendBGbit_#v(5): ws8_sendbit MOVIW INDF0++, ORG$, ORG$;
ws8_sendBGbit_#v(4): ws8_sendbit MOVIW INDF0++, ORG$, ORG$;
ws8_sendBGbit_#v(3): ws8_sendbit MOVIW INDF0++, ORG$, ORG$;
ws8_sendBGbit_#v(2): ws8_sendbit MOVIW INDF0++, ORG$, ORG$;
ws8_sendBGbit_#v(1): ws8_sendbit MOVIW INDF0++, ORG$, return; //return + next call takes 4 instr
;lamba wrapper for ws_sendbit:
;uses NOP 2 timeslot
rewindpx macro fgbg
addfsr FSR#v(fgbg), -24+1; first bit was already sent, only need 23 more
NOP 1
endm
ws8_resendFGpx:
ws8_sendbit MOVIW -24[FSR#v(FG)], rewindpx FG, goto ws8_sendFG_23bits; //goto+call = 4 instr
ws8_sendFGpx:
ws8_sendbit MOVIW INDF#v(FG)++, ORG$, NOP 2; //reserve 2 instr for next call
ws8_sendFG_23bits:
call ws8_sendFGbit_#v(8-1); //custom bit above
call ws8_sendFGbit_#v(8);
call ws8_sendFGbit_#v(8-1); //custom bit below
NOP 2; //replaces "call" (next bit is inlined)
ws8_sendbit MOVIW INDF#v(FG)++, ORG$, return; //return + next call takes 4 instr
ws8_resendBGpx:
ws8_sendbit MOVIW -24[FSR#v(BG)], rewindpx BG, goto ws8_sendBG_23bits; //goto+call = 4 instr
ws8_sendBGpx:
ws8_sendbit MOVIW INDF#v(BG)++, ORG$, NOP 2; //reserve 2 instr for next call
ws8_sendBG_23bits:
call ws8_sendBGbit_#v(8-1); //custom bit above
call ws8_sendBGbit_#v(8);
call ws8_sendBGbit_#v(8-1); //custom bit below
NOP 2; //replaces "call" (next bit is inlined)
ws8_sendbit MOVIW INDF#v(BG)++, ORG$, return; //return + next call takes 4 instr
nbDCL count,;
constant UNIV_SCALE = divup(UNIV_LEN, 256); //8; //octal nodes to scale UNIV_LEN down to 8 bits
constant SEND_COUNT = divup(UNIV_LEN, UNIV_SCALE);
; messg [INFO] univ len #v(UNIV_LEN), sends #v(SEND_COUNT * UNIV_SCALE) nodes with granularity #v(UNIV_SCALE) nodes @__LINE__
WARNIF(SEND_COUNT * UNIV_SCALE != UNIV_LEN, [WARNING] univ len #v(UNIV_LEN) rounds to #v(SEND_COUNT * UNIV_SCALE) during send @__LINE__)
;TODO: fix ^^^ by adding 1x send after loop
ws_fillbg: DROP_CONTEXT;
mov8 count, LITERAL(SEND_COUNT); //divup(UNIV_LEN / UNIV_SCALE)); //scale to fit in 8-bit counter
; addfsr FSR#v(BG), 24; //compensate for first rewind
mov16 FSR#v(BG), LITERAL(bgcolor + 24); //point to END of palette entry (compensate for resend)
BANKCHK LATA; //pre-select BSR to simplify timing
fill_loop: ;CAUTION: do not yield within this loop - will interfere with timing
if UNIV_SCALE > 0
REPEAT LITERAL(UNIV_SCALE - 1), call ws8_resendBGpx
NOP 2; //replaces "call" (next bit is inlined)
endif
;rewindpx with custom last bit:
ws8_sendbit MOVIW -24[FSR#v(BG)], rewindpx BG, call ws8_sendBGbit_#v(8-1); //call+call = 4 instr
call ws8_sendBGbit_#v(8);
call ws8_sendBGbit_#v(8-1); //custom bit below
NOP 2; //replaces "call" (next bit is inlined)
ws8_sendbit MOVIW INDF#v(BG)++, ORG$, NOP 1; //reserve 3 instr for loop ctl
DECFSZ count, F; //REGLO(count), F; //WREG, F
goto fill_loop
; REPEAT LITERAL(UNIV_LEN * 3 - 1), call wsoff_#v(8); //NOTE: 1-2 extra bytes here @end
return;
;color palette:
;each entry is 24 bytes: colors are 24 bits, and 1 bit from each byte goes to a separate IO pin
;PICs with 256 bytes RAM can only hold 10 palette entries in RAM; palette indexes use <= 4 bits
b0DCL fgcolor, :24; //8 parallel 24-bit values (1 for each IO pin)
b0DCL bgcolor, :24; //8 parallel 24-bit values (1 for each IO pin)
constant I = 255; //all 8 bits on (readbility, src code alignment)
constant O = 0; //BIT(4); //all 8 bits off (or tampered/excluded)
#if 0
palette8_rom:
;TODO? could compress this but would break PALINX arithmetic
CONSTANT OFF_PALINX = ($ - palette8_rom) / 24;
DW O,O,O,O,O,O,O,O, O,O,O,O,O,O,O,O, O,O,O,O,O,O,O,O;
CONSTANT BLUE_PALINX = ($ - palette8_rom) / 24;
DW O,O,O,O,O,O,O,O, O,O,O,O,O,O,O,O, O,O,O,O,O,O,I,O; //dim
;// DW O,O,O,O,O,O,O,O, O,O,O,O,O,O,O,O, I,I,I,I,I,I,I,I; //bright
CONSTANT GREEN_PALINX = ($ - palette8_rom) / 24;
DW O,O,O,O,O,O,O,O, O,O,O,O,O,O,I,O, O,O,O,O,O,O,O,O; //dim
;// DW X,X,X,X,X,X,X,X, I,I,I,I,I,I,I,I, O,O,O,O,O,O,O,O; //bright
CONSTANT CYAN_PALINX = ($ - palette8_rom) / 24;
DW O,O,O,O,O,O,O,O, O,O,O,O,O,O,I,O, O,O,O,O,O,O,I,O; //dim
;// DW O,O,O,O,O,O,O,O, I,I,I,I,I,I,I,I, I,I,I,I,I,I,I,I; //bright
CONSTANT RED_PALINX = ($ - palette8_rom) / 24;
DW O,O,O,O,O,O,I,O, O,O,O,O,O,O,O,O, O,O,O,O,O,O,O,O; //dim
;// DW I,I,I,I,I,I,I,I, O,O,O,O,O,O,O,O, O,O,O,O,O,O,O,O; //bright
CONSTANT MAGENTA_PALINX = ($ - palette8_rom) / 24, PINK_PALINX = MAGENTA_PALINX; easier to spell :P
DW O,O,O,O,O,O,I,O, O,O,O,O,O,O,O,O, O,O,O,O,O,O,I,O; //dim
;// DW I,I,I,I,I,I,I,I, O,O,O,O,O,O,O,O, I,I,I,I,I,I,I,I; //bright
CONSTANT YELLOW_PALINX = ($ - palette8_rom) / 24;
DW O,O,O,O,O,O,I,O, O,O,O,O,O,O,I,O, O,O,O,O,O,O,O,O; //dim
;// DW I,I,I,I,I,I,I,I, I,I,I,I,I,I,I,I, O,O,O,O,O,O,O,O; //bright
CONSTANT WHITE_PALINX = ($ - palette8_rom) / 24;
DW O,O,O,O,O,O,I,O, O,O,O,O,O,O,I,O, O,O,O,O,O,O,I,O; //dim
;// DW I,I,I,I,I,I,I,I, I,I,I,I,I,I,I,I, I,I,I,I,I,I,I,I; //bright
CONSTANT TEST_PALINX = ($ - palette8_rom) / 24; //TEST ONLY; put after END_PALINX?
DW 0x80,0x40,0x20,0x10,0x08,0x04,0x02,0x01, 0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x8, 0xE7,0xD9,0x9D,0x7E,0x18,0x24,0x42,0x81; //test bit pattern to watch in debugger
;//TODO: add more as needed ...
CONSTANT END_PALINX = ($ - palette8_rom) / 24;
;RGB color indexes are 3 lsb; controls R/G/B on/off (for easier color combinations/debug)
;//#define BRIGHT(rgb) ((rgb) + 8); brighter variant
ERRIF(YELLOW_PALINX != (RED_PALINX | GREEN_PALINX), [ERROR] yellow #v(YELLOW_PALINX) != red #v(RED_PALINX) + green #v(GREEN_PALINX) @__LINE__)
ERRIF(CYAN_PALINX != (GREEN_PALINX | BLUE_PALINX), [ERROR] cyan #v(CYAN_PALINX) != green #v(GREEN_PALINX) + blue #v(BLUE_PALINX) @__LINE__)
ERRIF(MAGENTA_PALINX != (RED_PALINX | BLUE_PALINX), [ERROR] magenta #v(MAGENTA_PALINX) != red #v(RED_PALINX) + blue #v(BLUE_PALINX) @__LINE__)
ERRIF(WHITE_PALINX != (RED_PALINX | GREEN_PALINX | BLUE_PALINX), [ERROR] white #v(WHITE_PALINX) != red #v(RED_PALINX) + green #v(GREEN_PALINX) + blue #v(BLUE_PALINX) @__LINE__)
;// CONSTANT CUSTOM_PALINX = BRIGHT(0); caller-defined palette entry
; CONSTANT FB_PALINX = 8
; CONSTANT BG_PALINX = 9
;with_arg(bgcolor + 0) macro stmt
; stmt, bgcolor + 0
; endm
fsrxfr macro
MOVIW INDF1++
MOVWI INDF0++
WREG_TRACKER = WREG_UNKN
endm
memcpy macro dest, src, count
if dest != FSR0
mov16 FSR0, dest
endif
if src != FSR1
mov16 FSR1, src
endif
REPEAT count, fsrxfr
endm
;INDF takes 1 extra instr cycle to access ROM
;copy from ROM to RAM to avoid this (simplifies parallel bit banging timing)
;CAUTION: this is EXPENSIVE (memcpy by itself is 72 instr); only use during frame setup when IO is idle
setbg_frompalette: DROP_CONTEXT;
; ANDLW 0x0F; 4 bpp
swapf WREG, W
ANDLW 0xF0; //x16
MOVWF bgcolor; kludge: use as temp
mov16 FSR1, LITERAL(0x8000 + palette8_rom); //ROM address: NOTE: adds 1 instr cycle overhead each access
; lslf bgcolor, W
; ADDWF bgcolor, W; 3x
; MOVF bgcolor, W;
lsrf bgcolor, W; //x8
addwf bgcolor, W; //x24; CAUTION: assumes <= 10 (no 8-bit wrap)
ADDWF REGLO(FSR1), F;
ifbit CARRY TRUE, dest_arg(F) INCF REGHI(FSR1)
; mov16 FSR#v(BG), LITERAL(bgcolor)
; REPEAT LITERAL(24), with_arg(bgcolor + REPEATER) MOVIW INDF#v(BG)++
;//TODO: use linear addr? 0x2000 skips gaps, but requires extra MOVLW/BSF to set FSR
memcpy LITERAL(bgcolor), FSR1, LITERAL(24);
;moved mov16 FSR#v(BG), LITERAL(bgcolor); //leave FSR pointing to palette entry in RAM; could replace with ADDFSR to save 2 instr
return;
#endif
;non-ROM version of above:
;lit takes up same/less prog space as above and runs faster (with opp'ty for additional optimization)
;var takes up 50% more prog space but also runs faster
setbg_fromrgb macro rgb
; REPEAT LITERAL(24), MOVWF bgcolor + REPEATER, LITERAL(0)
LOCAL bit = 23;
while bit
if ISLIT(rgb)
; if (rgb) & BIT(bit)
; MOVLW 0xFF; //SET8W; set all WREG bits; redundant loads will be optimized out
; MOVWF bgcolor + bit;
; else
; CLRF bgcolor + bit;
; endif
mov8 bgcolor + 23 - bit, LITERAL(BOOL2INT((rgb) & BIT(bit)) * 0xFF)
else
CLRF bgcolor + 23 - bit;
ifbit rgb + 2 - bit / 8, bit % 8, TRUE, dest_arg(F) DECF bgcolor + 23 - bit; big endian
endif
bit -= 1;
endw
endm
THREAD_DEF ws_player, 4
ws_player: DROP_CONTEXT;
WAIT 1 sec; give power time to settle, set up timer1 outside player loop
play_loop: ;DROP_CONTEXT
; MOVLW RED_PALINX;
; CALL setbg_frompalette; doing this while idle < wait
setbg_fromrgb LITERAL(0x020000); dim red
WAIT 1 sec
CALL ws_fillbg;
; setbit LATA, LEDOUT, TRUE;
; MOVLW GREEN_PALINX;
; CALL setbg_frompalette; doing this while idle < wait
setbg_fromrgb LITERAL(0x000200); dim green
WAIT 1 sec
CALL ws_fillbg;
; setbit LATA, LEDOUT, FALSE;
; MOVLW BLUE_PALINX;
; CALL setbg_frompalette; doing this while idle < wait
setbg_fromrgb LITERAL(0x000002); dim blue
WAIT 1 sec
CALL ws_fillbg;
; setbit LATA, LEDOUT, TRUE;
; MOVLW OFF_PALINX;
; CALL setbg_frompalette; doing this while idle < wait
setbg_fromrgb LITERAL(0); off
WAIT 1 sec
CALL ws_fillbg;
; setbit LATA, LEDOUT, FALSE;
GOTO play_loop
THREAD_END;
EXPAND_POP
LIST_POP
messg end of hoist 4 @216
;#else; too deep :(
#endif
#if HOIST == 4444-1; //hack: 8-bit parallel wsplayer
messg hoist 4: HACK: 8-bit parallel wsplayer @89
LIST_PUSH TRUE
EXPAND_PUSH FALSE
;; 8-bit parallel wsplayer ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
#define PXOUT RA0
#define UNIV_LEN 1600; //33; //10; //<= 2x banked GPRAM (for in-memory bitmaps), else prog space
#define UNIV_SCALE 8; //octal nodes to scale UNIV_LEN down to 8 bits
;#define RGB_ORDER 213; //0x213; //GRB (normal is 0x123 = RGB)
#define WS_ENV 235; // 2/3/5 @ 8MIPS completely meets WS2811 + WS2812 specs
;//#define WS_ENV 334; //make start pulse longer
;send 1 WS data bit to each IO pin:
;bits are assumed to be in WREG
;2/3/5 env @8 MIPS uses 30% CPU time (3 instr), leaves 70% for caller (7 instr)
doing_init TRUE
CLRF LATA, 0; //start with WS data lines low
doing_init FALSE
ws8_sendbit_wreg macro glue_reserved
ws8_sendbit ORG$, ORG$, NOP #v(4 - ABS(glue_reserved))
endm
ws8_sendbit macro idler1, idler2, idler4
ERRIF(WS_ENV != 235 || FOSC_FREQ != 8 MIPS, [ERROR] WS envelope WS_ENV !implemented @ FOSC_FREQ - use 235 @8 MIPS @__LINE__)
COMF LATA, F; //bit start; CAUTION: LATA must be 0 prior (which it should be)
; ORG $+1; placeholder
LOCAL here1 = $
idler1
nopif $ == here1, 1
MOVWF LATA; //bit data
; ORG $+2; placeholder
LOCAL here2 = $
idler2
nopif $ == here2, 2
CLRF LATA; //bit end
; ORG $+4; placeholder
LOCAL here3 = $
idler4
nopif $ == here3, 4
endm
;//send colored px to all IO pins:
;//primary colors only
;TODO: custom colors
#define BRIGHTNESS 2; /0xFF
ws8_sendpx_off macro custom_bits
call ws8_byte_#v(0); //ws8_bitoff_#v(8);
call ws8_byte_#v(0); //ws8_bitoff_#v(8);
call ws8_bitoff_#v(8 - ABS(custom_bits));
endm
ws8_sendpx_red macro custom_bits
call ws8_byte_#v(BRIGHTNESS);
call ws8_byte_#v(0);
call ws8_bitoff_#v(8 - ABS(custom_bits));
endm
ws8_sendpx_green macro custom_bits
call ws8_byte_#v(0);
call ws8_byte_#v(BRIGHTNESS);
call ws8_bitoff_#v(8 - ABS(custom_bits));
endm
ws8_sendpx_blue macro custom_bits
call ws8_byte_#v(0);
call ws8_byte_#v(0);
#if BRIGHTNESS == 0xFF; //full bright
call ws8_biton_#v(8 - ABS(custom_bits));
#else; //dim
call ws8_bitoff_#v(8-MIN(ABS(custom_bits), 2));
CALLIF ABS(custom_bits) < 2, ws8_biton_#v(1);
CALLIF ABS(custom_bits) < 1, ws8_bitoff_#v(1);
#endif
endm
;//send bit or byte:
;TODO: implement other 8-bit values as needed
ws8_byte_#v(0xFF):
ws8_biton_#v(8): ws8_sendbit SET8W, ORG$, ORG$;
ws8_biton_#v(7): ws8_sendbit SET8W, ORG$, ORG$;
ws8_biton_#v(6): ws8_sendbit SET8W, ORG$, ORG$;
ws8_biton_#v(5): ws8_sendbit SET8W, ORG$, ORG$;
ws8_biton_#v(4): ws8_sendbit SET8W, ORG$, ORG$;
ws8_biton_#v(3): ws8_sendbit SET8W, ORG$, ORG$;
ws8_biton_#v(2): ws8_sendbit SET8W, ORG$, ORG$;
ws8_biton_#v(1): ws8_sendbit SET8W, ORG$, return; //return + next call takes 4 instr
ws8_byte_#v(0):
ws8_bitoff_#v(8): ws8_sendbit CLRW, ORG$, ORG$;
ws8_bitoff_#v(7): ws8_sendbit CLRW, ORG$, ORG$;
ws8_bitoff_#v(6): ws8_sendbit CLRW, ORG$, ORG$;
ws8_bitoff_#v(5): ws8_sendbit CLRW, ORG$, ORG$;
ws8_bitoff_#v(4): ws8_sendbit CLRW, ORG$, ORG$;
ws8_bitoff_#v(3): ws8_sendbit CLRW, ORG$, ORG$;
ws8_bitoff_#v(2): ws8_sendbit CLRW, ORG$, ORG$;
ws8_bitoff_#v(1): ws8_sendbit CLRW, ORG$, return; //return + next call takes 4 instr
;variable byte/bits from FSR0:
ws8_bytevar0: