Added further optimizations and unit tests.

This commit is contained in:
IrgendwerA8
2017-03-05 02:09:12 +01:00
parent 09de875330
commit 81115aa826
19 changed files with 327 additions and 177 deletions

View File

@@ -1,6 +1,6 @@
;
; 2003-08-20, Ullrich von Bassewitz
; 2009-09-13, Christian Krueger -- performance increase (about 20%)
; 2009-09-13, Christian Krueger -- performance increase (about 20%), 2013-07-25 improved unrolling
; 2015-10-23, Greg King
;
; void* __fastcall__ memmove (void* dest, const void* src, size_t size);
@@ -61,13 +61,10 @@ PageSizeCopy: ; assert Y = 0
dec ptr1+1 ; adjust base...
dec ptr2+1
dey ; in entry case: 0 -> FF
lda (ptr1),y ; need to copy this 'intro byte'
sta (ptr2),y ; to 'land' later on Y=0! (as a result of the '.repeat'-block!)
dey ; FF ->FE
@copyBytes:
.repeat 2 ; Unroll this a bit to make it faster...
lda (ptr1),y
sta (ptr2),y
.repeat 3 ; unroll this a bit to make it faster...
lda (ptr1),y ; important: unrolling three times gives a nice
sta (ptr2),y ; 255/3 = 85 loop which ends at 0
dey
.endrepeat
@copyEntry: ; in entry case: 0 -> FF

View File

@@ -1,6 +1,6 @@
;
; Ullrich von Bassewitz, 31.05.1998
; Christian Krueger: 2013-Jul-24, minor optimization
; Christian Krueger: 2013-Jul-24, minor optimizations
;
; char* strcat (char* dest, const char* src);
;
@@ -15,8 +15,12 @@ _strcat:
jsr popax ; Get dest
sta tmp3 ; Remember for function return
tay
.if (.cpu .bitand ::CPU_ISET_65SC02)
stz ptr2
.else
lda #0
sta ptr2 ; access from page start, y contains low byte
.endif
stx ptr2+1
findEndOfDest:

View File

@@ -14,8 +14,12 @@ _strchr:
jsr popax ; get s
tay ; low byte of pointer to y
stx ptr1+1
.if (.cpu .bitand ::CPU_ISET_65SC02)
stz ptr1
.else
lda #0
sta ptr1 ; ptr access page wise
sta ptr1 ; access from page start, y contains low byte
.endif
Loop: lda (ptr1),y ; Get next char
beq EOS ; Jump on end of string

View File

@@ -1,54 +1,54 @@
;
; Ullrich von Bassewitz, 11.06.1998
; Christian Krueger: 05-Aug-2013, optimization
;
; size_t strcspn (const char* s1, const char* s2);
;
.export _strcspn
.import popax
.importzp ptr1, ptr2, tmp1, tmp2, tmp3
.import popax, _strlen
.importzp ptr1, ptr2, tmp1, tmp2
_strcspn:
sta ptr2 ; Save s2
stx ptr2+1
jsr popax ; Get s1
sta ptr1
stx ptr1+1
ldx #0 ; low counter byte
stx tmp1 ; high counter byte
ldy #$00
jsr _strlen ; get length in a/x and transfer s2 to ptr1
; Note: It does not make sense to
; have more than 255 test chars, so
; we don't support a high byte here! (ptr1+1 is
; also unchanged in strlen then (important!))
; -> the original implementation also
; ignored this case
L1: lda (ptr1),y ; get next char from s1
beq L6 ; jump if done
sta tmp2 ; save char
sta tmp1 ; tmp1 = strlen of test chars
jsr popax ; get and save s1
sta ptr2 ; to ptr2
stx ptr2+1
ldx #0 ; low counter byte
stx tmp2 ; high counter byte
loadChar:
ldy #0
lda (ptr2),y ; get next char from s1
beq leave ; handly byte of s1
advance:
inc ptr2 ; advance string position to test
bne check
inc ptr2+1
dey ; correct next iny (faster/shorter than bne...)
checkNext:
iny
bne L2
inc ptr1+1
L2: sty tmp3 ; save index into s1
check: cpy tmp1 ; compare with length of test character string
beq endOfTestChars
cmp (ptr1),y ; found matching char?
bne checkNext
ldy #0 ; get index into s2
L3: lda (ptr2),y ;
beq L4 ; jump if done
cmp tmp2
beq L6
iny
bne L3
; The character was not found in s2. Increment the counter and start over
L4: ldy tmp3 ; reload index
inx
bne L1
inc tmp1
bne L1
; The character was found, or we reached the end of s1. Return count of
; characters
L6: txa ; get low counter byte
ldx tmp1 ; get high counter byte
leave: txa ; restore position of finding
ldx tmp2 ; and return
rts
endOfTestChars:
inx
bne loadChar
inc tmp2
bne loadChar ; like bra...

View File

@@ -1,6 +1,10 @@
;
; Ullrich von Bassewitz, 31.05.1998
;
; Note: strspn & strcspn call internally this function and rely on
; the usage of only ptr1 here! Keep in mind when appling changes
; and check the other implementations too!
;
; int strlen (const char* s);
;
@@ -23,4 +27,3 @@ L1: lda (ptr1),y
L9: tya ; get low byte of counter, hi's all set
rts

View File

@@ -1,5 +1,6 @@
;
; Ullrich von Bassewitz, 31.05.1998
; Christian Krueger: 12-Aug-2013, minor optimizations
;
; char* strncat (char* dest, const char* src, size_t n);
;
@@ -9,64 +10,65 @@
.importzp ptr1, ptr2, ptr3, tmp1, tmp2
_strncat:
eor #$FF ; one's complement to count upwards
sta tmp1
txa
eor #$FF
sta tmp2
jsr popax ; get src
sta ptr1
stx ptr1+1
jsr popax ; get dest
sta ptr2
stx ptr2+1
sta ptr3 ; remember for function return
stx ptr3+1
ldy #0
eor #$FF ; one's complement to count upwards
sta tmp1
txa
eor #$FF
sta tmp2
jsr popax ; get src
sta ptr1
stx ptr1+1
jsr popax ; get dest
sta ptr3 ; remember for function return
stx ptr3+1
stx ptr2+1
tay ; low byte as offset in Y
.if (.cpu .bitand ::CPU_ISET_65SC02)
stz ptr2
.else
ldx #0
stx ptr2 ; destination on page boundary
.endif
; find end of dest
L1: lda (ptr2),y
beq L2
iny
bne L1
inc ptr2+1
bne L1
L1: lda (ptr2),y
beq L2
iny
bne L1
inc ptr2+1
bne L1
; end found, get offset in y into pointer
L2: tya
clc
adc ptr2
sta ptr2
bcc L3
inc ptr2+1
; end found, apply offset to dest ptr and reset y
L2: sty ptr2
; copy src. We've put the ones complement of the count into the counter, so
; we'll increment the counter on top of the loop
L3: ldy #0
ldx tmp1 ; low counter byte
L3: ldy #0
ldx tmp1 ; low counter byte
L4: inx
bne L5
inc tmp2
beq L6 ; jump if done
L5: lda (ptr1),y
sta (ptr2),y
beq L7
iny
bne L4
inc ptr1+1
inc ptr2+1
bne L4
L4: inx
bne L5
inc tmp2
beq L6 ; jump if done
L5: lda (ptr1),y
sta (ptr2),y
beq L7
iny
bne L4
inc ptr1+1
inc ptr2+1
bne L4
; done, set the trailing zero and return pointer to dest
L6: lda #0
sta (ptr2),y
L7: lda ptr3
ldx ptr3+1
rts
L6: lda #0
sta (ptr2),y
L7: lda ptr3
ldx ptr3+1
rts

View File

@@ -1,56 +1,54 @@
;
; Ullrich von Bassewitz, 11.06.1998
; Christian Krueger: 08-Aug-2013, optimization
;
; size_t strspn (const char* s1, const char* s2);
;
.export _strspn
.import popax
.importzp ptr1, ptr2, tmp1, tmp2, tmp3
.import popax, _strlen
.importzp ptr1, ptr2, tmp1, tmp2
_strspn:
sta ptr2 ; Save s2
stx ptr2+1
jsr popax ; get s1
sta ptr1
stx ptr1+1
ldx #0 ; low counter byte
stx tmp1 ; high counter byte
ldy #$00
jsr _strlen ; get length in a/x and transfer s2 to ptr1
; Note: It does not make sense to
; have more than 255 test chars, so
; we don't support a high byte here! (ptr1+1 is
; also unchanged in strlen then (important!))
; -> the original implementation also
; ignored this case
L1: lda (ptr1),y ; get next char from s1
beq L6 ; jump if done
sta tmp2 ; save char
sta tmp1 ; tmp1 = strlen of test chars
jsr popax ; get and save s1
sta ptr2 ; to ptr2
stx ptr2+1
ldx #0 ; low counter byte
stx tmp2 ; high counter byte
loadChar:
ldy #0
lda (ptr2),y ; get next char from s1
beq leave ; handly byte of s1
advance:
inc ptr2 ; advance string position to test
bne check
inc ptr2+1
dey ; correct next iny (faster/shorter than bne...)
checkNext:
iny
bne L2
inc ptr1+1
L2: sty tmp3 ; save index into s1
check: cpy tmp1 ; compare with length of test character string
beq leave
cmp (ptr1),y ; found matching char?
bne checkNext
ldy #0 ; get index into s2
L3: lda (ptr2),y ;
beq L6 ; jump if done
cmp tmp2
beq L4
iny
bne L3
; The character was found in s2. Increment the counter and start over
L4: ldy tmp3 ; reload index
foundTestChar:
inx
bne L1
inc tmp1
bne L1
bne loadChar
inc tmp2
bne loadChar ; like bra...
; The character was not found, or we reached the end of s1. Return count of
; characters
L6: txa ; get low counter byte
ldx tmp1 ; get high counter byte
leave: txa ; restore position of finding
ldx tmp2 ; and return
rts