Added further optimizations and unit tests.
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
;
|
||||
; 2003-08-20, Ullrich von Bassewitz
|
||||
; 2009-09-13, Christian Krueger -- performance increase (about 20%)
|
||||
; 2009-09-13, Christian Krueger -- performance increase (about 20%), 2013-07-25 improved unrolling
|
||||
; 2015-10-23, Greg King
|
||||
;
|
||||
; void* __fastcall__ memmove (void* dest, const void* src, size_t size);
|
||||
@@ -61,13 +61,10 @@ PageSizeCopy: ; assert Y = 0
|
||||
dec ptr1+1 ; adjust base...
|
||||
dec ptr2+1
|
||||
dey ; in entry case: 0 -> FF
|
||||
lda (ptr1),y ; need to copy this 'intro byte'
|
||||
sta (ptr2),y ; to 'land' later on Y=0! (as a result of the '.repeat'-block!)
|
||||
dey ; FF ->FE
|
||||
@copyBytes:
|
||||
.repeat 2 ; Unroll this a bit to make it faster...
|
||||
lda (ptr1),y
|
||||
sta (ptr2),y
|
||||
.repeat 3 ; unroll this a bit to make it faster...
|
||||
lda (ptr1),y ; important: unrolling three times gives a nice
|
||||
sta (ptr2),y ; 255/3 = 85 loop which ends at 0
|
||||
dey
|
||||
.endrepeat
|
||||
@copyEntry: ; in entry case: 0 -> FF
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
;
|
||||
; Ullrich von Bassewitz, 31.05.1998
|
||||
; Christian Krueger: 2013-Jul-24, minor optimization
|
||||
; Christian Krueger: 2013-Jul-24, minor optimizations
|
||||
;
|
||||
; char* strcat (char* dest, const char* src);
|
||||
;
|
||||
@@ -15,8 +15,12 @@ _strcat:
|
||||
jsr popax ; Get dest
|
||||
sta tmp3 ; Remember for function return
|
||||
tay
|
||||
.if (.cpu .bitand ::CPU_ISET_65SC02)
|
||||
stz ptr2
|
||||
.else
|
||||
lda #0
|
||||
sta ptr2 ; access from page start, y contains low byte
|
||||
.endif
|
||||
stx ptr2+1
|
||||
|
||||
findEndOfDest:
|
||||
|
||||
@@ -14,8 +14,12 @@ _strchr:
|
||||
jsr popax ; get s
|
||||
tay ; low byte of pointer to y
|
||||
stx ptr1+1
|
||||
.if (.cpu .bitand ::CPU_ISET_65SC02)
|
||||
stz ptr1
|
||||
.else
|
||||
lda #0
|
||||
sta ptr1 ; ptr access page wise
|
||||
sta ptr1 ; access from page start, y contains low byte
|
||||
.endif
|
||||
|
||||
Loop: lda (ptr1),y ; Get next char
|
||||
beq EOS ; Jump on end of string
|
||||
|
||||
@@ -1,54 +1,54 @@
|
||||
;
|
||||
; Ullrich von Bassewitz, 11.06.1998
|
||||
; Christian Krueger: 05-Aug-2013, optimization
|
||||
;
|
||||
; size_t strcspn (const char* s1, const char* s2);
|
||||
;
|
||||
|
||||
.export _strcspn
|
||||
.import popax
|
||||
.importzp ptr1, ptr2, tmp1, tmp2, tmp3
|
||||
.import popax, _strlen
|
||||
.importzp ptr1, ptr2, tmp1, tmp2
|
||||
|
||||
_strcspn:
|
||||
sta ptr2 ; Save s2
|
||||
stx ptr2+1
|
||||
jsr popax ; Get s1
|
||||
sta ptr1
|
||||
stx ptr1+1
|
||||
ldx #0 ; low counter byte
|
||||
stx tmp1 ; high counter byte
|
||||
ldy #$00
|
||||
jsr _strlen ; get length in a/x and transfer s2 to ptr1
|
||||
; Note: It does not make sense to
|
||||
; have more than 255 test chars, so
|
||||
; we don't support a high byte here! (ptr1+1 is
|
||||
; also unchanged in strlen then (important!))
|
||||
; -> the original implementation also
|
||||
; ignored this case
|
||||
|
||||
L1: lda (ptr1),y ; get next char from s1
|
||||
beq L6 ; jump if done
|
||||
sta tmp2 ; save char
|
||||
sta tmp1 ; tmp1 = strlen of test chars
|
||||
jsr popax ; get and save s1
|
||||
sta ptr2 ; to ptr2
|
||||
stx ptr2+1
|
||||
ldx #0 ; low counter byte
|
||||
stx tmp2 ; high counter byte
|
||||
|
||||
loadChar:
|
||||
ldy #0
|
||||
lda (ptr2),y ; get next char from s1
|
||||
beq leave ; handly byte of s1
|
||||
advance:
|
||||
inc ptr2 ; advance string position to test
|
||||
bne check
|
||||
inc ptr2+1
|
||||
dey ; correct next iny (faster/shorter than bne...)
|
||||
|
||||
checkNext:
|
||||
iny
|
||||
bne L2
|
||||
inc ptr1+1
|
||||
L2: sty tmp3 ; save index into s1
|
||||
check: cpy tmp1 ; compare with length of test character string
|
||||
beq endOfTestChars
|
||||
cmp (ptr1),y ; found matching char?
|
||||
bne checkNext
|
||||
|
||||
ldy #0 ; get index into s2
|
||||
L3: lda (ptr2),y ;
|
||||
beq L4 ; jump if done
|
||||
cmp tmp2
|
||||
beq L6
|
||||
iny
|
||||
bne L3
|
||||
|
||||
; The character was not found in s2. Increment the counter and start over
|
||||
|
||||
L4: ldy tmp3 ; reload index
|
||||
inx
|
||||
bne L1
|
||||
inc tmp1
|
||||
bne L1
|
||||
|
||||
; The character was found, or we reached the end of s1. Return count of
|
||||
; characters
|
||||
|
||||
L6: txa ; get low counter byte
|
||||
ldx tmp1 ; get high counter byte
|
||||
leave: txa ; restore position of finding
|
||||
ldx tmp2 ; and return
|
||||
rts
|
||||
|
||||
|
||||
|
||||
endOfTestChars:
|
||||
inx
|
||||
bne loadChar
|
||||
inc tmp2
|
||||
bne loadChar ; like bra...
|
||||
|
||||
|
||||
@@ -1,6 +1,10 @@
|
||||
;
|
||||
; Ullrich von Bassewitz, 31.05.1998
|
||||
;
|
||||
; Note: strspn & strcspn call internally this function and rely on
|
||||
; the usage of only ptr1 here! Keep in mind when appling changes
|
||||
; and check the other implementations too!
|
||||
;
|
||||
; int strlen (const char* s);
|
||||
;
|
||||
|
||||
@@ -23,4 +27,3 @@ L1: lda (ptr1),y
|
||||
|
||||
L9: tya ; get low byte of counter, hi's all set
|
||||
rts
|
||||
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
;
|
||||
; Ullrich von Bassewitz, 31.05.1998
|
||||
; Christian Krueger: 12-Aug-2013, minor optimizations
|
||||
;
|
||||
; char* strncat (char* dest, const char* src, size_t n);
|
||||
;
|
||||
@@ -9,64 +10,65 @@
|
||||
.importzp ptr1, ptr2, ptr3, tmp1, tmp2
|
||||
|
||||
_strncat:
|
||||
eor #$FF ; one's complement to count upwards
|
||||
sta tmp1
|
||||
txa
|
||||
eor #$FF
|
||||
sta tmp2
|
||||
jsr popax ; get src
|
||||
sta ptr1
|
||||
stx ptr1+1
|
||||
jsr popax ; get dest
|
||||
sta ptr2
|
||||
stx ptr2+1
|
||||
sta ptr3 ; remember for function return
|
||||
stx ptr3+1
|
||||
ldy #0
|
||||
eor #$FF ; one's complement to count upwards
|
||||
sta tmp1
|
||||
txa
|
||||
eor #$FF
|
||||
sta tmp2
|
||||
|
||||
jsr popax ; get src
|
||||
sta ptr1
|
||||
stx ptr1+1
|
||||
|
||||
jsr popax ; get dest
|
||||
sta ptr3 ; remember for function return
|
||||
stx ptr3+1
|
||||
stx ptr2+1
|
||||
tay ; low byte as offset in Y
|
||||
.if (.cpu .bitand ::CPU_ISET_65SC02)
|
||||
stz ptr2
|
||||
.else
|
||||
ldx #0
|
||||
stx ptr2 ; destination on page boundary
|
||||
.endif
|
||||
|
||||
; find end of dest
|
||||
|
||||
L1: lda (ptr2),y
|
||||
beq L2
|
||||
iny
|
||||
bne L1
|
||||
inc ptr2+1
|
||||
bne L1
|
||||
L1: lda (ptr2),y
|
||||
beq L2
|
||||
iny
|
||||
bne L1
|
||||
inc ptr2+1
|
||||
bne L1
|
||||
|
||||
; end found, get offset in y into pointer
|
||||
|
||||
L2: tya
|
||||
clc
|
||||
adc ptr2
|
||||
sta ptr2
|
||||
bcc L3
|
||||
inc ptr2+1
|
||||
; end found, apply offset to dest ptr and reset y
|
||||
L2: sty ptr2
|
||||
|
||||
; copy src. We've put the ones complement of the count into the counter, so
|
||||
; we'll increment the counter on top of the loop
|
||||
|
||||
L3: ldy #0
|
||||
ldx tmp1 ; low counter byte
|
||||
L3: ldy #0
|
||||
ldx tmp1 ; low counter byte
|
||||
|
||||
L4: inx
|
||||
bne L5
|
||||
inc tmp2
|
||||
beq L6 ; jump if done
|
||||
L5: lda (ptr1),y
|
||||
sta (ptr2),y
|
||||
beq L7
|
||||
iny
|
||||
bne L4
|
||||
inc ptr1+1
|
||||
inc ptr2+1
|
||||
bne L4
|
||||
L4: inx
|
||||
bne L5
|
||||
inc tmp2
|
||||
beq L6 ; jump if done
|
||||
L5: lda (ptr1),y
|
||||
sta (ptr2),y
|
||||
beq L7
|
||||
iny
|
||||
bne L4
|
||||
inc ptr1+1
|
||||
inc ptr2+1
|
||||
bne L4
|
||||
|
||||
; done, set the trailing zero and return pointer to dest
|
||||
|
||||
L6: lda #0
|
||||
sta (ptr2),y
|
||||
L7: lda ptr3
|
||||
ldx ptr3+1
|
||||
rts
|
||||
L6: lda #0
|
||||
sta (ptr2),y
|
||||
L7: lda ptr3
|
||||
ldx ptr3+1
|
||||
rts
|
||||
|
||||
|
||||
|
||||
@@ -1,56 +1,54 @@
|
||||
;
|
||||
; Ullrich von Bassewitz, 11.06.1998
|
||||
; Christian Krueger: 08-Aug-2013, optimization
|
||||
;
|
||||
; size_t strspn (const char* s1, const char* s2);
|
||||
;
|
||||
|
||||
.export _strspn
|
||||
.import popax
|
||||
.importzp ptr1, ptr2, tmp1, tmp2, tmp3
|
||||
.import popax, _strlen
|
||||
.importzp ptr1, ptr2, tmp1, tmp2
|
||||
|
||||
_strspn:
|
||||
sta ptr2 ; Save s2
|
||||
stx ptr2+1
|
||||
jsr popax ; get s1
|
||||
sta ptr1
|
||||
stx ptr1+1
|
||||
ldx #0 ; low counter byte
|
||||
stx tmp1 ; high counter byte
|
||||
ldy #$00
|
||||
jsr _strlen ; get length in a/x and transfer s2 to ptr1
|
||||
; Note: It does not make sense to
|
||||
; have more than 255 test chars, so
|
||||
; we don't support a high byte here! (ptr1+1 is
|
||||
; also unchanged in strlen then (important!))
|
||||
; -> the original implementation also
|
||||
; ignored this case
|
||||
|
||||
L1: lda (ptr1),y ; get next char from s1
|
||||
beq L6 ; jump if done
|
||||
sta tmp2 ; save char
|
||||
sta tmp1 ; tmp1 = strlen of test chars
|
||||
jsr popax ; get and save s1
|
||||
sta ptr2 ; to ptr2
|
||||
stx ptr2+1
|
||||
ldx #0 ; low counter byte
|
||||
stx tmp2 ; high counter byte
|
||||
|
||||
loadChar:
|
||||
ldy #0
|
||||
lda (ptr2),y ; get next char from s1
|
||||
beq leave ; handly byte of s1
|
||||
advance:
|
||||
inc ptr2 ; advance string position to test
|
||||
bne check
|
||||
inc ptr2+1
|
||||
dey ; correct next iny (faster/shorter than bne...)
|
||||
|
||||
checkNext:
|
||||
iny
|
||||
bne L2
|
||||
inc ptr1+1
|
||||
L2: sty tmp3 ; save index into s1
|
||||
check: cpy tmp1 ; compare with length of test character string
|
||||
beq leave
|
||||
cmp (ptr1),y ; found matching char?
|
||||
bne checkNext
|
||||
|
||||
ldy #0 ; get index into s2
|
||||
L3: lda (ptr2),y ;
|
||||
beq L6 ; jump if done
|
||||
cmp tmp2
|
||||
beq L4
|
||||
iny
|
||||
bne L3
|
||||
|
||||
; The character was found in s2. Increment the counter and start over
|
||||
|
||||
L4: ldy tmp3 ; reload index
|
||||
foundTestChar:
|
||||
inx
|
||||
bne L1
|
||||
inc tmp1
|
||||
bne L1
|
||||
bne loadChar
|
||||
inc tmp2
|
||||
bne loadChar ; like bra...
|
||||
|
||||
; The character was not found, or we reached the end of s1. Return count of
|
||||
; characters
|
||||
|
||||
L6: txa ; get low counter byte
|
||||
ldx tmp1 ; get high counter byte
|
||||
leave: txa ; restore position of finding
|
||||
ldx tmp2 ; and return
|
||||
rts
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user