Repository: yohanes/teensy-u2f Branch: master Commit: 2c6ac3780fcc Files: 17 Total size: 299.2 KB Directory structure: gitextract_0rtdnj14/ ├── LICENSE ├── LICENSE-micro-ecc.txt ├── README.md ├── u2f/ │ ├── Makefile.desktop │ ├── asm_arm.h │ ├── asm_arm_mult_square.h │ ├── curve-specific.h │ ├── desktop_test.cpp │ ├── platform-specific.h │ ├── sha256.c │ ├── sha256.h │ ├── types.h │ ├── u2f.ino │ ├── uECC.c │ ├── uECC.h │ └── uECC_vli.h └── usb_desc.h ================================================ FILE CONTENTS ================================================ ================================================ FILE: LICENSE ================================================ Copyright (c) 2015, Yohanes Nugroho All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ================================================ FILE: LICENSE-micro-ecc.txt ================================================ Copyright (c) 2014, Kenneth MacKay All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ================================================ FILE: README.md ================================================ teensy-u2f ========== U2F implementation for Teensy LC. This implementation is simple, works, but a bit insecure in the key handle generation part and user presence check. The key handle is generated from private key XOR-ed with a simple fixed key (note: key handle generation is outside of U2F implementation scope). Attacker that knows the fixed key (or able to deduce it from multiple registrations request) can get the private key sign any authentication request although in practice its not that easy to perform this attack. Because there is no user button in Teensy LC, I didn't actually implement any button handling logic for 'user-presence' check. On first request this implementation will assume button is not pressed, and on next request it will assume the user has pressed the button. When logging in to a website, you may need to unplug and replug the Teensy LC. For the ECDSA key generation and signing this implementation uses the micro-ecc library: License ------- See LICENSE.txt ================================================ FILE: u2f/Makefile.desktop ================================================ all: desktop_test uECC.o : uECC.c gcc -Wall -c uECC.c desktop_test: desktop_test.cpp sha256.c u2f.ino uECC.o g++ -Wall -DIS_DESKTOP_TEST=1 desktop_test.cpp sha256.c uECC.o -o desktop_test ================================================ FILE: u2f/asm_arm.h ================================================ /* Copyright 2015, Kenneth MacKay. Licensed under the BSD 2-clause license. */ #ifndef _UECC_ASM_ARM_H_ #define _UECC_ASM_ARM_H_ #include "asm_arm_mult_square.h" #if (uECC_SUPPORTS_secp256r1 || uECC_SUPPORTS_secp256k1) #define uECC_MIN_WORDS 8 #endif #if uECC_SUPPORTS_secp224r1 #undef uECC_MIN_WORDS #define uECC_MIN_WORDS 7 #endif #if uECC_SUPPORTS_secp192r1 #undef uECC_MIN_WORDS #define uECC_MIN_WORDS 6 #endif #if uECC_SUPPORTS_secp160r1 #undef uECC_MIN_WORDS #define uECC_MIN_WORDS 5 #endif #if (uECC_PLATFORM == uECC_arm_thumb) #define REG_RW "+l" #define REG_WRITE "=l" #else #define REG_RW "+r" #define REG_WRITE "=r" #endif #if (uECC_PLATFORM == uECC_arm_thumb || uECC_PLATFORM == uECC_arm_thumb2) #define REG_RW_LO "+l" #define REG_WRITE_LO "=l" #else #define REG_RW_LO "+r" #define REG_WRITE_LO "=r" #endif #if (uECC_PLATFORM == uECC_arm_thumb2) #define RESUME_SYNTAX #else #define RESUME_SYNTAX ".syntax divided \n\t" #endif #if (uECC_OPTIMIZATION_LEVEL >= 2) uECC_VLI_API uECC_word_t uECC_vli_add(uECC_word_t *result, const uECC_word_t *left, const uECC_word_t *right, wordcount_t num_words) { #if (uECC_MAX_WORDS != uECC_MIN_WORDS) #if (uECC_PLATFORM == uECC_arm_thumb) || (uECC_PLATFORM == uECC_arm_thumb2) uint32_t jump = (uECC_MAX_WORDS - num_words) * 4 * 2 + 1; #else /* ARM */ uint32_t jump = (uECC_MAX_WORDS - num_words) * 4 * 4; #endif #endif uint32_t carry; uint32_t left_word; uint32_t right_word; __asm__ volatile ( ".syntax unified \n\t" "movs %[carry], #0 \n\t" #if (uECC_MAX_WORDS != uECC_MIN_WORDS) "adr %[left], 1f \n\t" ".align 4 \n\t" "adds %[jump], %[left] \n\t" #endif "ldmia %[lptr]!, {%[left]} \n\t" "ldmia %[rptr]!, {%[right]} \n\t" "adds %[left], %[right] \n\t" "stmia %[dptr]!, {%[left]} \n\t" #if (uECC_MAX_WORDS != uECC_MIN_WORDS) "bx %[jump] \n\t" #endif "1: \n\t" REPEAT(DEC(uECC_MAX_WORDS), "ldmia %[lptr]!, {%[left]} \n\t" "ldmia %[rptr]!, {%[right]} \n\t" "adcs %[left], %[right] \n\t" "stmia %[dptr]!, {%[left]} \n\t") "adcs %[carry], %[carry] \n\t" RESUME_SYNTAX : [dptr] REG_RW_LO (result), [lptr] REG_RW_LO (left), [rptr] REG_RW_LO (right), #if (uECC_MAX_WORDS != uECC_MIN_WORDS) [jump] REG_RW_LO (jump), #endif [carry] REG_WRITE_LO (carry), [left] REG_WRITE_LO (left_word), [right] REG_WRITE_LO (right_word) : : "cc", "memory" ); return carry; } #define asm_add 1 uECC_VLI_API uECC_word_t uECC_vli_sub(uECC_word_t *result, const uECC_word_t *left, const uECC_word_t *right, wordcount_t num_words) { #if (uECC_MAX_WORDS != uECC_MIN_WORDS) #if (uECC_PLATFORM == uECC_arm_thumb) || (uECC_PLATFORM == uECC_arm_thumb2) uint32_t jump = (uECC_MAX_WORDS - num_words) * 4 * 2 + 1; #else /* ARM */ uint32_t jump = (uECC_MAX_WORDS - num_words) * 4 * 4; #endif #endif uint32_t carry; uint32_t left_word; uint32_t right_word; __asm__ volatile ( ".syntax unified \n\t" "movs %[carry], #0 \n\t" #if (uECC_MAX_WORDS != uECC_MIN_WORDS) "adr %[left], 1f \n\t" ".align 4 \n\t" "adds %[jump], %[left] \n\t" #endif "ldmia %[lptr]!, {%[left]} \n\t" "ldmia %[rptr]!, {%[right]} \n\t" "subs %[left], %[right] \n\t" "stmia %[dptr]!, {%[left]} \n\t" #if (uECC_MAX_WORDS != uECC_MIN_WORDS) "bx %[jump] \n\t" #endif "1: \n\t" REPEAT(DEC(uECC_MAX_WORDS), "ldmia %[lptr]!, {%[left]} \n\t" "ldmia %[rptr]!, {%[right]} \n\t" "sbcs %[left], %[right] \n\t" "stmia %[dptr]!, {%[left]} \n\t") "adcs %[carry], %[carry] \n\t" RESUME_SYNTAX : [dptr] REG_RW_LO (result), [lptr] REG_RW_LO (left), [rptr] REG_RW_LO (right), #if (uECC_MAX_WORDS != uECC_MIN_WORDS) [jump] REG_RW_LO (jump), #endif [carry] REG_WRITE_LO (carry), [left] REG_WRITE_LO (left_word), [right] REG_WRITE_LO (right_word) : : "cc", "memory" ); return !carry; /* Note that on ARM, carry flag set means "no borrow" when subtracting (for some reason...) */ } #define asm_sub 1 #endif /* (uECC_OPTIMIZATION_LEVEL >= 2) */ #if (uECC_OPTIMIZATION_LEVEL >= 3) #define FAST_MULT_ASM_5_TO_6 \ "cmp r3, #5 \n\t" \ "beq 1f \n\t" \ \ /* r4 = left high, r5 = right high */ \ "ldr r4, [r1] \n\t" \ "ldr r5, [r2] \n\t" \ \ "sub r0, #20 \n\t" \ "sub r1, #20 \n\t" \ "sub r2, #20 \n\t" \ \ "ldr r6, [r0] \n\t" \ "ldr r7, [r1], #4 \n\t" \ "ldr r8, [r2], #4 \n\t" \ "mov r14, #0 \n\t" \ "umull r9, r10, r4, r8 \n\t" \ "umull r11, r12, r5, r7 \n\t" \ "adds r9, r9, r6 \n\t" \ "adc r10, r10, #0 \n\t" \ "adds r9, r9, r11 \n\t" \ "adcs r10, r10, r12 \n\t" \ "adc r14, r14, #0 \n\t" \ "str r9, [r0], #4 \n\t" \ \ "ldr r6, [r0] \n\t" \ "adds r10, r10, r6 \n\t" \ "adcs r14, r14, #0 \n\t" \ "ldr r7, [r1], #4 \n\t" \ "ldr r8, [r2], #4 \n\t" \ "mov r9, #0 \n\t" \ "umull r11, r12, r4, r8 \n\t" \ "adds r10, r10, r11 \n\t" \ "adcs r14, r14, r12 \n\t" \ "adc r9, r9, #0 \n\t" \ "umull r11, r12, r5, r7 \n\t" \ "adds r10, r10, r11 \n\t" \ "adcs r14, r14, r12 \n\t" \ "adc r9, r9, #0 \n\t" \ "str r10, [r0], #4 \n\t" \ \ "ldr r6, [r0] \n\t" \ "adds r14, r14, r6 \n\t" \ "adcs r9, r9, #0 \n\t" \ "ldr r7, [r1], #4 \n\t" \ "ldr r8, [r2], #4 \n\t" \ "mov r10, #0 \n\t" \ "umull r11, r12, r4, r8 \n\t" \ "adds r14, r14, r11 \n\t" \ "adcs r9, r9, r12 \n\t" \ "adc r10, r10, #0 \n\t" \ "umull r11, r12, r5, r7 \n\t" \ "adds r14, r14, r11 \n\t" \ "adcs r9, r9, r12 \n\t" \ "adc r10, r10, #0 \n\t" \ "str r14, [r0], #4 \n\t" \ \ "ldr r6, [r0] \n\t" \ "adds r9, r9, r6 \n\t" \ "adcs r10, r10, #0 \n\t" \ "ldr r7, [r1], #4 \n\t" \ "ldr r8, [r2], #4 \n\t" \ "mov r14, #0 \n\t" \ "umull r11, r12, r4, r8 \n\t" \ "adds r9, r9, r11 \n\t" \ "adcs r10, r10, r12 \n\t" \ "adc r14, r14, #0 \n\t" \ "umull r11, r12, r5, r7 \n\t" \ "adds r9, r9, r11 \n\t" \ "adcs r10, r10, r12 \n\t" \ "adc r14, r14, #0 \n\t" \ "str r9, [r0], #4 \n\t" \ \ "ldr r6, [r0] \n\t" \ "adds r10, r10, r6 \n\t" \ "adcs r14, r14, #0 \n\t" \ /* skip past already-loaded (r4, r5) */ \ "ldr r7, [r1], #8 \n\t" \ "ldr r8, [r2], #8 \n\t" \ "mov r9, #0 \n\t" \ "umull r11, r12, r4, r8 \n\t" \ "adds r10, r10, r11 \n\t" \ "adcs r14, r14, r12 \n\t" \ "adc r9, r9, #0 \n\t" \ "umull r11, r12, r5, r7 \n\t" \ "adds r10, r10, r11 \n\t" \ "adcs r14, r14, r12 \n\t" \ "adc r9, r9, #0 \n\t" \ "str r10, [r0], #4 \n\t" \ \ "umull r11, r12, r4, r5 \n\t" \ "adds r11, r11, r14 \n\t" \ "adc r12, r12, r9 \n\t" \ "stmia r0!, {r11, r12} \n\t" #define FAST_MULT_ASM_6_TO_7 \ "cmp r3, #6 \n\t" \ "beq 1f \n\t" \ \ /* r4 = left high, r5 = right high */ \ "ldr r4, [r1] \n\t" \ "ldr r5, [r2] \n\t" \ \ "sub r0, #24 \n\t" \ "sub r1, #24 \n\t" \ "sub r2, #24 \n\t" \ \ "ldr r6, [r0] \n\t" \ "ldr r7, [r1], #4 \n\t" \ "ldr r8, [r2], #4 \n\t" \ "mov r14, #0 \n\t" \ "umull r9, r10, r4, r8 \n\t" \ "umull r11, r12, r5, r7 \n\t" \ "adds r9, r9, r6 \n\t" \ "adc r10, r10, #0 \n\t" \ "adds r9, r9, r11 \n\t" \ "adcs r10, r10, r12 \n\t" \ "adc r14, r14, #0 \n\t" \ "str r9, [r0], #4 \n\t" \ \ "ldr r6, [r0] \n\t" \ "adds r10, r10, r6 \n\t" \ "adcs r14, r14, #0 \n\t" \ "ldr r7, [r1], #4 \n\t" \ "ldr r8, [r2], #4 \n\t" \ "mov r9, #0 \n\t" \ "umull r11, r12, r4, r8 \n\t" \ "adds r10, r10, r11 \n\t" \ "adcs r14, r14, r12 \n\t" \ "adc r9, r9, #0 \n\t" \ "umull r11, r12, r5, r7 \n\t" \ "adds r10, r10, r11 \n\t" \ "adcs r14, r14, r12 \n\t" \ "adc r9, r9, #0 \n\t" \ "str r10, [r0], #4 \n\t" \ \ "ldr r6, [r0] \n\t" \ "adds r14, r14, r6 \n\t" \ "adcs r9, r9, #0 \n\t" \ "ldr r7, [r1], #4 \n\t" \ "ldr r8, [r2], #4 \n\t" \ "mov r10, #0 \n\t" \ "umull r11, r12, r4, r8 \n\t" \ "adds r14, r14, r11 \n\t" \ "adcs r9, r9, r12 \n\t" \ "adc r10, r10, #0 \n\t" \ "umull r11, r12, r5, r7 \n\t" \ "adds r14, r14, r11 \n\t" \ "adcs r9, r9, r12 \n\t" \ "adc r10, r10, #0 \n\t" \ "str r14, [r0], #4 \n\t" \ \ "ldr r6, [r0] \n\t" \ "adds r9, r9, r6 \n\t" \ "adcs r10, r10, #0 \n\t" \ "ldr r7, [r1], #4 \n\t" \ "ldr r8, [r2], #4 \n\t" \ "mov r14, #0 \n\t" \ "umull r11, r12, r4, r8 \n\t" \ "adds r9, r9, r11 \n\t" \ "adcs r10, r10, r12 \n\t" \ "adc r14, r14, #0 \n\t" \ "umull r11, r12, r5, r7 \n\t" \ "adds r9, r9, r11 \n\t" \ "adcs r10, r10, r12 \n\t" \ "adc r14, r14, #0 \n\t" \ "str r9, [r0], #4 \n\t" \ \ "ldr r6, [r0] \n\t" \ "adds r10, r10, r6 \n\t" \ "adcs r14, r14, #0 \n\t" \ "ldr r7, [r1], #4 \n\t" \ "ldr r8, [r2], #4 \n\t" \ "mov r9, #0 \n\t" \ "umull r11, r12, r4, r8 \n\t" \ "adds r10, r10, r11 \n\t" \ "adcs r14, r14, r12 \n\t" \ "adc r9, r9, #0 \n\t" \ "umull r11, r12, r5, r7 \n\t" \ "adds r10, r10, r11 \n\t" \ "adcs r14, r14, r12 \n\t" \ "adc r9, r9, #0 \n\t" \ "str r10, [r0], #4 \n\t" \ \ "ldr r6, [r0] \n\t" \ "adds r14, r14, r6 \n\t" \ "adcs r9, r9, #0 \n\t" \ /* skip past already-loaded (r4, r5) */ \ "ldr r7, [r1], #8 \n\t" \ "ldr r8, [r2], #8 \n\t" \ "mov r10, #0 \n\t" \ "umull r11, r12, r4, r8 \n\t" \ "adds r14, r14, r11 \n\t" \ "adcs r9, r9, r12 \n\t" \ "adc r10, r10, #0 \n\t" \ "umull r11, r12, r5, r7 \n\t" \ "adds r14, r14, r11 \n\t" \ "adcs r9, r9, r12 \n\t" \ "adc r10, r10, #0 \n\t" \ "str r14, [r0], #4 \n\t" \ \ "umull r11, r12, r4, r5 \n\t" \ "adds r11, r11, r9 \n\t" \ "adc r12, r12, r10 \n\t" \ "stmia r0!, {r11, r12} \n\t" #define FAST_MULT_ASM_7_TO_8 \ "cmp r3, #7 \n\t" \ "beq 1f \n\t" \ \ /* r4 = left high, r5 = right high */ \ "ldr r4, [r1] \n\t" \ "ldr r5, [r2] \n\t" \ \ "sub r0, #28 \n\t" \ "sub r1, #28 \n\t" \ "sub r2, #28 \n\t" \ \ "ldr r6, [r0] \n\t" \ "ldr r7, [r1], #4 \n\t" \ "ldr r8, [r2], #4 \n\t" \ "mov r14, #0 \n\t" \ "umull r9, r10, r4, r8 \n\t" \ "umull r11, r12, r5, r7 \n\t" \ "adds r9, r9, r6 \n\t" \ "adc r10, r10, #0 \n\t" \ "adds r9, r9, r11 \n\t" \ "adcs r10, r10, r12 \n\t" \ "adc r14, r14, #0 \n\t" \ "str r9, [r0], #4 \n\t" \ \ "ldr r6, [r0] \n\t" \ "adds r10, r10, r6 \n\t" \ "adcs r14, r14, #0 \n\t" \ "ldr r7, [r1], #4 \n\t" \ "ldr r8, [r2], #4 \n\t" \ "mov r9, #0 \n\t" \ "umull r11, r12, r4, r8 \n\t" \ "adds r10, r10, r11 \n\t" \ "adcs r14, r14, r12 \n\t" \ "adc r9, r9, #0 \n\t" \ "umull r11, r12, r5, r7 \n\t" \ "adds r10, r10, r11 \n\t" \ "adcs r14, r14, r12 \n\t" \ "adc r9, r9, #0 \n\t" \ "str r10, [r0], #4 \n\t" \ \ "ldr r6, [r0] \n\t" \ "adds r14, r14, r6 \n\t" \ "adcs r9, r9, #0 \n\t" \ "ldr r7, [r1], #4 \n\t" \ "ldr r8, [r2], #4 \n\t" \ "mov r10, #0 \n\t" \ "umull r11, r12, r4, r8 \n\t" \ "adds r14, r14, r11 \n\t" \ "adcs r9, r9, r12 \n\t" \ "adc r10, r10, #0 \n\t" \ "umull r11, r12, r5, r7 \n\t" \ "adds r14, r14, r11 \n\t" \ "adcs r9, r9, r12 \n\t" \ "adc r10, r10, #0 \n\t" \ "str r14, [r0], #4 \n\t" \ \ "ldr r6, [r0] \n\t" \ "adds r9, r9, r6 \n\t" \ "adcs r10, r10, #0 \n\t" \ "ldr r7, [r1], #4 \n\t" \ "ldr r8, [r2], #4 \n\t" \ "mov r14, #0 \n\t" \ "umull r11, r12, r4, r8 \n\t" \ "adds r9, r9, r11 \n\t" \ "adcs r10, r10, r12 \n\t" \ "adc r14, r14, #0 \n\t" \ "umull r11, r12, r5, r7 \n\t" \ "adds r9, r9, r11 \n\t" \ "adcs r10, r10, r12 \n\t" \ "adc r14, r14, #0 \n\t" \ "str r9, [r0], #4 \n\t" \ \ "ldr r6, [r0] \n\t" \ "adds r10, r10, r6 \n\t" \ "adcs r14, r14, #0 \n\t" \ "ldr r7, [r1], #4 \n\t" \ "ldr r8, [r2], #4 \n\t" \ "mov r9, #0 \n\t" \ "umull r11, r12, r4, r8 \n\t" \ "adds r10, r10, r11 \n\t" \ "adcs r14, r14, r12 \n\t" \ "adc r9, r9, #0 \n\t" \ "umull r11, r12, r5, r7 \n\t" \ "adds r10, r10, r11 \n\t" \ "adcs r14, r14, r12 \n\t" \ "adc r9, r9, #0 \n\t" \ "str r10, [r0], #4 \n\t" \ \ "ldr r6, [r0] \n\t" \ "adds r14, r14, r6 \n\t" \ "adcs r9, r9, #0 \n\t" \ "ldr r7, [r1], #4 \n\t" \ "ldr r8, [r2], #4 \n\t" \ "mov r10, #0 \n\t" \ "umull r11, r12, r4, r8 \n\t" \ "adds r14, r14, r11 \n\t" \ "adcs r9, r9, r12 \n\t" \ "adc r10, r10, #0 \n\t" \ "umull r11, r12, r5, r7 \n\t" \ "adds r14, r14, r11 \n\t" \ "adcs r9, r9, r12 \n\t" \ "adc r10, r10, #0 \n\t" \ "str r14, [r0], #4 \n\t" \ \ "ldr r6, [r0] \n\t" \ "adds r9, r9, r6 \n\t" \ "adcs r10, r10, #0 \n\t" \ /* skip past already-loaded (r4, r5) */ \ "ldr r7, [r1], #8 \n\t" \ "ldr r8, [r2], #8 \n\t" \ "mov r14, #0 \n\t" \ "umull r11, r12, r4, r8 \n\t" \ "adds r9, r9, r11 \n\t" \ "adcs r10, r10, r12 \n\t" \ "adc r14, r14, #0 \n\t" \ "umull r11, r12, r5, r7 \n\t" \ "adds r9, r9, r11 \n\t" \ "adcs r10, r10, r12 \n\t" \ "adc r14, r14, #0 \n\t" \ "str r9, [r0], #4 \n\t" \ \ "umull r11, r12, r4, r5 \n\t" \ "adds r11, r11, r10 \n\t" \ "adc r12, r12, r14 \n\t" \ "stmia r0!, {r11, r12} \n\t" #if (uECC_PLATFORM != uECC_arm_thumb) uECC_VLI_API void uECC_vli_mult(uint32_t *result, const uint32_t *left, const uint32_t *right, wordcount_t num_words) { register uint32_t *r0 __asm__("r0") = result; register const uint32_t *r1 __asm__("r1") = left; register const uint32_t *r2 __asm__("r2") = right; register uint32_t r3 __asm__("r3") = num_words; __asm__ volatile ( ".syntax unified \n\t" "push {r3} \n\t" #if (uECC_MIN_WORDS == 5) FAST_MULT_ASM_5 "pop {r3} \n\t" #if (uECC_MAX_WORDS > 5) FAST_MULT_ASM_5_TO_6 #endif #if (uECC_MAX_WORDS > 6) FAST_MULT_ASM_6_TO_7 #endif #if (uECC_MAX_WORDS > 7) FAST_MULT_ASM_7_TO_8 #endif #elif (uECC_MIN_WORDS == 6) FAST_MULT_ASM_6 "pop {r3} \n\t" #if (uECC_MAX_WORDS > 6) FAST_MULT_ASM_6_TO_7 #endif #if (uECC_MAX_WORDS > 7) FAST_MULT_ASM_7_TO_8 #endif #elif (uECC_MIN_WORDS == 7) FAST_MULT_ASM_7 "pop {r3} \n\t" #if (uECC_MAX_WORDS > 7) FAST_MULT_ASM_7_TO_8 #endif #elif (uECC_MIN_WORDS == 8) FAST_MULT_ASM_8 "pop {r3} \n\t" #endif "1: \n\t" RESUME_SYNTAX : "+r" (r0), "+r" (r1), "+r" (r2) : "r" (r3) : "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory" ); } #define asm_mult 1 #if uECC_SQUARE_FUNC #define FAST_SQUARE_ASM_5_TO_6 \ "cmp r2, #5 \n\t" \ "beq 1f \n\t" \ \ /* r3 = high */ \ "ldr r3, [r1] \n\t" \ \ "sub r0, #20 \n\t" \ "sub r1, #20 \n\t" \ \ /* Do off-center multiplication */ \ "ldr r14, [r1], #4 \n\t" \ "umull r4, r5, r3, r14 \n\t" \ "ldr r14, [r1], #4 \n\t" \ "umull r7, r6, r3, r14 \n\t" \ "adds r5, r5, r7 \n\t" \ "ldr r14, [r1], #4 \n\t" \ "umull r8, r7, r3, r14 \n\t" \ "adcs r6, r6, r8 \n\t" \ "ldr r14, [r1], #4 \n\t" \ "umull r9, r8, r3, r14 \n\t" \ "adcs r7, r7, r9 \n\t" \ /* Skip already-loaded r3 */ \ "ldr r14, [r1], #8 \n\t" \ "umull r10, r9, r3, r14 \n\t" \ "adcs r8, r8, r10 \n\t" \ "adcs r9, r9, #0 \n\t" \ \ /* Multiply by 2 */ \ "mov r10, #0 \n\t" \ "adds r4, r4, r4 \n\t" \ "adcs r5, r5, r5 \n\t" \ "adcs r6, r6, r6 \n\t" \ "adcs r7, r7, r7 \n\t" \ "adcs r8, r8, r8 \n\t" \ "adcs r9, r9, r9 \n\t" \ "adcs r10, r10, #0 \n\t" \ \ /* Add into previous */ \ "ldr r14, [r0] \n\t" \ "adds r4, r4, r14 \n\t" \ "str r4, [r0], #4 \n\t" \ "ldr r14, [r0] \n\t" \ "adcs r5, r5, r14 \n\t" \ "str r5, [r0], #4 \n\t" \ "ldr r14, [r0] \n\t" \ "adcs r6, r6, r14 \n\t" \ "str r6, [r0], #4 \n\t" \ "ldr r14, [r0] \n\t" \ "adcs r7, r7, r14 \n\t" \ "str r7, [r0], #4 \n\t" \ "ldr r14, [r0] \n\t" \ "adcs r8, r8, r14 \n\t" \ "str r8, [r0], #4 \n\t" \ "adcs r9, r9, #0 \n\t" \ "adcs r10, r10, #0 \n\t" \ \ /* Perform center multiplication */ \ "umull r4, r5, r3, r3 \n\t" \ "adds r4, r4, r9 \n\t" \ "adc r5, r5, r10 \n\t" \ "stmia r0!, {r4, r5} \n\t" #define FAST_SQUARE_ASM_6_TO_7 \ "cmp r2, #6 \n\t" \ "beq 1f \n\t" \ \ /* r3 = high */ \ "ldr r3, [r1] \n\t" \ \ "sub r0, #24 \n\t" \ "sub r1, #24 \n\t" \ \ /* Do off-center multiplication */ \ "ldr r14, [r1], #4 \n\t" \ "umull r4, r5, r3, r14 \n\t" \ "ldr r14, [r1], #4 \n\t" \ "umull r7, r6, r3, r14 \n\t" \ "adds r5, r5, r7 \n\t" \ "ldr r14, [r1], #4 \n\t" \ "umull r8, r7, r3, r14 \n\t" \ "adcs r6, r6, r8 \n\t" \ "ldr r14, [r1], #4 \n\t" \ "umull r9, r8, r3, r14 \n\t" \ "adcs r7, r7, r9 \n\t" \ "ldr r14, [r1], #4 \n\t" \ "umull r10, r9, r3, r14 \n\t" \ "adcs r8, r8, r10 \n\t" \ /* Skip already-loaded r3 */ \ "ldr r14, [r1], #8 \n\t" \ "umull r11, r10, r3, r14 \n\t" \ "adcs r9, r9, r11 \n\t" \ "adcs r10, r10, #0 \n\t" \ \ /* Multiply by 2 */ \ "mov r11, #0 \n\t" \ "adds r4, r4, r4 \n\t" \ "adcs r5, r5, r5 \n\t" \ "adcs r6, r6, r6 \n\t" \ "adcs r7, r7, r7 \n\t" \ "adcs r8, r8, r8 \n\t" \ "adcs r9, r9, r9 \n\t" \ "adcs r10, r10, r10 \n\t" \ "adcs r11, r11, #0 \n\t" \ \ /* Add into previous */ \ "ldr r14, [r0] \n\t" \ "adds r4, r4, r14 \n\t" \ "str r4, [r0], #4 \n\t" \ "ldr r14, [r0] \n\t" \ "adcs r5, r5, r14 \n\t" \ "str r5, [r0], #4 \n\t" \ "ldr r14, [r0] \n\t" \ "adcs r6, r6, r14 \n\t" \ "str r6, [r0], #4 \n\t" \ "ldr r14, [r0] \n\t" \ "adcs r7, r7, r14 \n\t" \ "str r7, [r0], #4 \n\t" \ "ldr r14, [r0] \n\t" \ "adcs r8, r8, r14 \n\t" \ "str r8, [r0], #4 \n\t" \ "ldr r14, [r0] \n\t" \ "adcs r9, r9, r14 \n\t" \ "str r9, [r0], #4 \n\t" \ "adcs r10, r10, #0 \n\t" \ "adcs r11, r11, #0 \n\t" \ \ /* Perform center multiplication */ \ "umull r4, r5, r3, r3 \n\t" \ "adds r4, r4, r10 \n\t" \ "adc r5, r5, r11 \n\t" \ "stmia r0!, {r4, r5} \n\t" #define FAST_SQUARE_ASM_7_TO_8 \ "cmp r2, #7 \n\t" \ "beq 1f \n\t" \ \ /* r3 = high */ \ "ldr r3, [r1] \n\t" \ \ "sub r0, #28 \n\t" \ "sub r1, #28 \n\t" \ \ /* Do off-center multiplication */ \ "ldr r14, [r1], #4 \n\t" \ "umull r4, r5, r3, r14 \n\t" \ "ldr r14, [r1], #4 \n\t" \ "umull r7, r6, r3, r14 \n\t" \ "adds r5, r5, r7 \n\t" \ "ldr r14, [r1], #4 \n\t" \ "umull r8, r7, r3, r14 \n\t" \ "adcs r6, r6, r8 \n\t" \ "ldr r14, [r1], #4 \n\t" \ "umull r9, r8, r3, r14 \n\t" \ "adcs r7, r7, r9 \n\t" \ "ldr r14, [r1], #4 \n\t" \ "umull r10, r9, r3, r14 \n\t" \ "adcs r8, r8, r10 \n\t" \ "ldr r14, [r1], #4 \n\t" \ "umull r11, r10, r3, r14 \n\t" \ "adcs r9, r9, r11 \n\t" \ /* Skip already-loaded r3 */ \ "ldr r14, [r1], #8 \n\t" \ "umull r12, r11, r3, r14 \n\t" \ "adcs r10, r10, r12 \n\t" \ "adcs r11, r11, #0 \n\t" \ \ /* Multiply by 2 */ \ "mov r12, #0 \n\t" \ "adds r4, r4, r4 \n\t" \ "adcs r5, r5, r5 \n\t" \ "adcs r6, r6, r6 \n\t" \ "adcs r7, r7, r7 \n\t" \ "adcs r8, r8, r8 \n\t" \ "adcs r9, r9, r9 \n\t" \ "adcs r10, r10, r10 \n\t" \ "adcs r11, r11, r11 \n\t" \ "adcs r12, r12, #0 \n\t" \ \ /* Add into previous */ \ "ldr r14, [r0] \n\t" \ "adds r4, r4, r14 \n\t" \ "str r4, [r0], #4 \n\t" \ "ldr r14, [r0] \n\t" \ "adcs r5, r5, r14 \n\t" \ "str r5, [r0], #4 \n\t" \ "ldr r14, [r0] \n\t" \ "adcs r6, r6, r14 \n\t" \ "str r6, [r0], #4 \n\t" \ "ldr r14, [r0] \n\t" \ "adcs r7, r7, r14 \n\t" \ "str r7, [r0], #4 \n\t" \ "ldr r14, [r0] \n\t" \ "adcs r8, r8, r14 \n\t" \ "str r8, [r0], #4 \n\t" \ "ldr r14, [r0] \n\t" \ "adcs r9, r9, r14 \n\t" \ "str r9, [r0], #4 \n\t" \ "ldr r14, [r0] \n\t" \ "adcs r10, r10, r14 \n\t" \ "str r10, [r0], #4 \n\t" \ "adcs r11, r11, #0 \n\t" \ "adcs r12, r12, #0 \n\t" \ \ /* Perform center multiplication */ \ "umull r4, r5, r3, r3 \n\t" \ "adds r4, r4, r11 \n\t" \ "adc r5, r5, r12 \n\t" \ "stmia r0!, {r4, r5} \n\t" uECC_VLI_API void uECC_vli_square(uECC_word_t *result, const uECC_word_t *left, wordcount_t num_words) { register uint32_t *r0 __asm__("r0") = result; register const uint32_t *r1 __asm__("r1") = left; register uint32_t r2 __asm__("r2") = num_words; __asm__ volatile ( ".syntax unified \n\t" "push {r1, r2} \n\t" #if (uECC_MIN_WORDS == 5) FAST_SQUARE_ASM_5 "pop {r1, r2} \n\t" #if (uECC_MAX_WORDS > 5) "add r1, #20 \n\t" FAST_SQUARE_ASM_5_TO_6 #endif #if (uECC_MAX_WORDS > 6) FAST_SQUARE_ASM_6_TO_7 #endif #if (uECC_MAX_WORDS > 7) FAST_SQUARE_ASM_7_TO_8 #endif #elif (uECC_MIN_WORDS == 6) FAST_SQUARE_ASM_6 "pop {r1, r2} \n\t" #if (uECC_MAX_WORDS > 6) "add r1, #24 \n\t" FAST_SQUARE_ASM_6_TO_7 #endif #if (uECC_MAX_WORDS > 7) FAST_SQUARE_ASM_7_TO_8 #endif #elif (uECC_MIN_WORDS == 7) FAST_SQUARE_ASM_7 "pop {r1, r2} \n\t" #if (uECC_MAX_WORDS > 7) "add r1, #28 \n\t" FAST_SQUARE_ASM_7_TO_8 #endif #elif (uECC_MIN_WORDS == 8) FAST_SQUARE_ASM_8 "pop {r1, r2} \n\t" #endif "1: \n\t" RESUME_SYNTAX : "+r" (r0), "+r" (r1) : "r" (r2) : "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory" ); } #define asm_square 1 #endif /* uECC_SQUARE_FUNC */ #endif /* uECC_PLATFORM != uECC_arm_thumb */ #endif /* (uECC_OPTIMIZATION_LEVEL >= 3) */ /* ---- "Small" implementations ---- */ #if !asm_add uECC_VLI_API uECC_word_t uECC_vli_add(uECC_word_t *result, const uECC_word_t *left, const uECC_word_t *right, wordcount_t num_words) { uint32_t carry = 0; uint32_t left_word; uint32_t right_word; __asm__ volatile ( ".syntax unified \n\t" "1: \n\t" "ldmia %[lptr]!, {%[left]} \n\t" /* Load left word. */ "ldmia %[rptr]!, {%[right]} \n\t" /* Load right word. */ "lsrs %[carry], #1 \n\t" /* Set up carry flag (carry = 0 after this). */ "adcs %[left], %[left], %[right] \n\t" /* Add with carry. */ "adcs %[carry], %[carry], %[carry] \n\t" /* Store carry bit. */ "stmia %[dptr]!, {%[left]} \n\t" /* Store result word. */ "subs %[ctr], #1 \n\t" /* Decrement counter. */ "bne 1b \n\t" /* Loop until counter == 0. */ RESUME_SYNTAX : [dptr] REG_RW (result), [lptr] REG_RW (left), [rptr] REG_RW (right), [ctr] REG_RW (num_words), [carry] REG_RW (carry), [left] REG_WRITE (left_word), [right] REG_WRITE (right_word) : : "cc", "memory" ); return carry; } #define asm_add 1 #endif #if !asm_sub uECC_VLI_API uECC_word_t uECC_vli_sub(uECC_word_t *result, const uECC_word_t *left, const uECC_word_t *right, wordcount_t num_words) { uint32_t carry = 1; /* carry = 1 initially (means don't borrow) */ uint32_t left_word; uint32_t right_word; __asm__ volatile ( ".syntax unified \n\t" "1: \n\t" "ldmia %[lptr]!, {%[left]} \n\t" /* Load left word. */ "ldmia %[rptr]!, {%[right]} \n\t" /* Load right word. */ "lsrs %[carry], #1 \n\t" /* Set up carry flag (carry = 0 after this). */ "sbcs %[left], %[left], %[right] \n\t" /* Subtract with borrow. */ "adcs %[carry], %[carry], %[carry] \n\t" /* Store carry bit. */ "stmia %[dptr]!, {%[left]} \n\t" /* Store result word. */ "subs %[ctr], #1 \n\t" /* Decrement counter. */ "bne 1b \n\t" /* Loop until counter == 0. */ RESUME_SYNTAX : [dptr] REG_RW (result), [lptr] REG_RW (left), [rptr] REG_RW (right), [ctr] REG_RW (num_words), [carry] REG_RW (carry), [left] REG_WRITE (left_word), [right] REG_WRITE (right_word) : : "cc", "memory" ); return !carry; } #define asm_sub 1 #endif #if !asm_mult uECC_VLI_API void uECC_vli_mult(uECC_word_t *result, const uECC_word_t *left, const uECC_word_t *right, wordcount_t num_words) { #if (uECC_PLATFORM != uECC_arm_thumb) uint32_t c0 = 0; uint32_t c1 = 0; uint32_t c2 = 0; uint32_t k = 0; uint32_t i; uint32_t t0, t1; __asm__ volatile ( ".syntax unified \n\t" "1: \n\t" /* outer loop (k < num_words) */ "movs %[i], #0 \n\t" /* i = 0 */ "b 3f \n\t" "2: \n\t" /* outer loop (k >= num_words) */ "movs %[i], %[k] \n\t" /* i = k */ "subs %[i], %[last_word] \n\t" /* i = k - (num_words - 1) (times 4) */ "3: \n\t" /* inner loop */ "subs %[t0], %[k], %[i] \n\t" /* t0 = k-i */ "ldr %[t1], [%[right], %[t0]] \n\t" /* t1 = right[k - i] */ "ldr %[t0], [%[left], %[i]] \n\t" /* t0 = left[i] */ "umull %[t0], %[t1], %[t0], %[t1] \n\t" /* (t0, t1) = left[i] * right[k - i] */ "adds %[c0], %[c0], %[t0] \n\t" /* add low word to c0 */ "adcs %[c1], %[c1], %[t1] \n\t" /* add high word to c1, including carry */ "adcs %[c2], %[c2], #0 \n\t" /* add carry to c2 */ "adds %[i], #4 \n\t" /* i += 4 */ "cmp %[i], %[last_word] \n\t" /* i > (num_words - 1) (times 4)? */ "bgt 4f \n\t" /* if so, exit the loop */ "cmp %[i], %[k] \n\t" /* i <= k? */ "ble 3b \n\t" /* if so, continue looping */ "4: \n\t" /* end inner loop */ "str %[c0], [%[result], %[k]] \n\t" /* result[k] = c0 */ "mov %[c0], %[c1] \n\t" /* c0 = c1 */ "mov %[c1], %[c2] \n\t" /* c1 = c2 */ "movs %[c2], #0 \n\t" /* c2 = 0 */ "adds %[k], #4 \n\t" /* k += 4 */ "cmp %[k], %[last_word] \n\t" /* k <= (num_words - 1) (times 4) ? */ "ble 1b \n\t" /* if so, loop back, start with i = 0 */ "cmp %[k], %[last_word], lsl #1 \n\t" /* k <= (num_words * 2 - 2) (times 4) ? */ "ble 2b \n\t" /* if so, loop back, start with i = (k + 1) - num_words */ /* end outer loop */ "str %[c0], [%[result], %[k]] \n\t" /* result[num_words * 2 - 1] = c0 */ RESUME_SYNTAX : [c0] "+r" (c0), [c1] "+r" (c1), [c2] "+r" (c2), [k] "+r" (k), [i] "=&r" (i), [t0] "=&r" (t0), [t1] "=&r" (t1) : [result] "r" (result), [left] "r" (left), [right] "r" (right), [last_word] "r" ((num_words - 1) * 4) : "cc", "memory" ); #else /* Thumb-1 */ uint32_t r4, r5, r6, r7; __asm__ volatile ( ".syntax unified \n\t" "subs %[r3], #1 \n\t" /* r3 = num_words - 1 */ "lsls %[r3], #2 \n\t" /* r3 = (num_words - 1) * 4 */ "mov r8, %[r3] \n\t" /* r8 = (num_words - 1) * 4 */ "lsls %[r3], #1 \n\t" /* r3 = (num_words - 1) * 8 */ "mov r9, %[r3] \n\t" /* r9 = (num_words - 1) * 8 */ "movs %[r3], #0 \n\t" /* c0 = 0 */ "movs %[r4], #0 \n\t" /* c1 = 0 */ "movs %[r5], #0 \n\t" /* c2 = 0 */ "movs %[r6], #0 \n\t" /* k = 0 */ "push {%[r0]} \n\t" /* keep result on the stack */ "1: \n\t" /* outer loop (k < num_words) */ "movs %[r7], #0 \n\t" /* r7 = i = 0 */ "b 3f \n\t" "2: \n\t" /* outer loop (k >= num_words) */ "movs %[r7], %[r6] \n\t" /* r7 = k */ "mov %[r0], r8 \n\t" /* r0 = (num_words - 1) * 4 */ "subs %[r7], %[r0] \n\t" /* r7 = i = k - (num_words - 1) (times 4) */ "3: \n\t" /* inner loop */ "push {%[r6]} \n\t" "push {%[r5]} \n\t" "push {%[r4]} \n\t" "push {%[r3]} \n\t" /* push things, r3 (c0) is at the top of stack. */ "subs %[r0], %[r6], %[r7] \n\t" /* r0 = k - i */ "ldr %[r4], [%[r2], %[r0]] \n\t" /* r4 = right[k - i] */ "ldr %[r0], [%[r1], %[r7]] \n\t" /* r0 = left[i] */ "lsrs %[r3], %[r0], #16 \n\t" /* r3 = a1 */ "uxth %[r0], %[r0] \n\t" /* r0 = a0 */ "lsrs %[r5], %[r4], #16 \n\t" /* r5 = b1 */ "uxth %[r4], %[r4] \n\t" /* r4 = b0 */ "movs %[r6], %[r3] \n\t" /* r6 = a1 */ "muls %[r6], %[r5], %[r6] \n\t" /* r6 = a1 * b1 */ "muls %[r3], %[r4], %[r3] \n\t" /* r3 = b0 * a1 */ "muls %[r5], %[r0], %[r5] \n\t" /* r5 = a0 * b1 */ "muls %[r0], %[r4], %[r0] \n\t" /* r0 = a0 * b0 */ "movs %[r4], #0 \n\t" /* r4 = 0 */ "adds %[r3], %[r5] \n\t" /* r3 = b0 * a1 + a0 * b1 */ "adcs %[r4], %[r4] \n\t" /* r4 = carry */ "lsls %[r4], #16 \n\t" /* r4 = carry << 16 */ "adds %[r6], %[r4] \n\t" /* r6 = a1 * b1 + carry */ "lsls %[r4], %[r3], #16 \n\t" /* r4 = (b0 * a1 + a0 * b1) << 16 */ "lsrs %[r3], #16 \n\t" /* r3 = (b0 * a1 + a0 * b1) >> 16 */ "adds %[r0], %[r4] \n\t" /* r0 = low word = a0 * b0 + ((b0 * a1 + a0 * b1) << 16) */ "adcs %[r6], %[r3] \n\t" /* r6 = high word = a1 * b1 + carry + ((b0 * a1 + a0 * b1) >> 16) */ "pop {%[r3]} \n\t" /* r3 = c0 */ "pop {%[r4]} \n\t" /* r4 = c1 */ "pop {%[r5]} \n\t" /* r5 = c2 */ "adds %[r3], %[r0] \n\t" /* add low word to c0 */ "adcs %[r4], %[r6] \n\t" /* add high word to c1, including carry */ "movs %[r0], #0 \n\t" /* r0 = 0 (does not affect carry bit) */ "adcs %[r5], %[r0] \n\t" /* add carry to c2 */ "pop {%[r6]} \n\t" /* r6 = k */ "adds %[r7], #4 \n\t" /* i += 4 */ "cmp %[r7], r8 \n\t" /* i > (num_words - 1) (times 4)? */ "bgt 4f \n\t" /* if so, exit the loop */ "cmp %[r7], %[r6] \n\t" /* i <= k? */ "ble 3b \n\t" /* if so, continue looping */ "4: \n\t" /* end inner loop */ "ldr %[r0], [sp, #0] \n\t" /* r0 = result */ "str %[r3], [%[r0], %[r6]] \n\t" /* result[k] = c0 */ "mov %[r3], %[r4] \n\t" /* c0 = c1 */ "mov %[r4], %[r5] \n\t" /* c1 = c2 */ "movs %[r5], #0 \n\t" /* c2 = 0 */ "adds %[r6], #4 \n\t" /* k += 4 */ "cmp %[r6], r8 \n\t" /* k <= (num_words - 1) (times 4) ? */ "ble 1b \n\t" /* if so, loop back, start with i = 0 */ "cmp %[r6], r9 \n\t" /* k <= (num_words * 2 - 2) (times 4) ? */ "ble 2b \n\t" /* if so, loop back, with i = (k + 1) - num_words */ /* end outer loop */ "str %[r3], [%[r0], %[r6]] \n\t" /* result[num_words * 2 - 1] = c0 */ "pop {%[r0]} \n\t" /* pop result off the stack */ ".syntax divided \n\t" : [r3] "+l" (num_words), [r4] "=&l" (r4), [r5] "=&l" (r5), [r6] "=&l" (r6), [r7] "=&l" (r7) : [r0] "l" (result), [r1] "l" (left), [r2] "l" (right) : "r8", "r9", "cc", "memory" ); #endif } #define asm_mult 1 #endif #if uECC_SQUARE_FUNC #if !asm_square uECC_VLI_API void uECC_vli_square(uECC_word_t *result, const uECC_word_t *left, wordcount_t num_words) { #if (uECC_PLATFORM != uECC_arm_thumb) uint32_t c0 = 0; uint32_t c1 = 0; uint32_t c2 = 0; uint32_t k = 0; uint32_t i, tt; uint32_t t0, t1; __asm__ volatile ( ".syntax unified \n\t" "1: \n\t" /* outer loop (k < num_words) */ "movs %[i], #0 \n\t" /* i = 0 */ "b 3f \n\t" "2: \n\t" /* outer loop (k >= num_words) */ "movs %[i], %[k] \n\t" /* i = k */ "subs %[i], %[last_word] \n\t" /* i = k - (num_words - 1) (times 4) */ "3: \n\t" /* inner loop */ "subs %[tt], %[k], %[i] \n\t" /* tt = k-i */ "ldr %[t1], [%[left], %[tt]] \n\t" /* t1 = left[k - i] */ "ldr %[t0], [%[left], %[i]] \n\t" /* t0 = left[i] */ "umull %[t0], %[t1], %[t0], %[t1] \n\t" /* (t0, t1) = left[i] * right[k - i] */ "cmp %[i], %[tt] \n\t" /* (i < k - i) ? */ "bge 4f \n\t" /* if i >= k - i, skip */ "lsls %[t1], #1 \n\t" /* high word << 1 */ "adc %[c2], %[c2], #0 \n\t" /* add carry bit to c2 */ "lsls %[t0], #1 \n\t" /* low word << 1 */ "adc %[t1], %[t1], #0 \n\t" /* add carry bit to high word */ "4: \n\t" "adds %[c0], %[c0], %[t0] \n\t" /* add low word to c0 */ "adcs %[c1], %[c1], %[t1] \n\t" /* add high word to c1, including carry */ "adcs %[c2], %[c2], #0 \n\t" /* add carry to c2 */ "adds %[i], #4 \n\t" /* i += 4 */ "cmp %[i], %[k] \n\t" /* i >= k? */ "bge 5f \n\t" /* if so, exit the loop */ "subs %[tt], %[k], %[i] \n\t" /* tt = k - i */ "cmp %[i], %[tt] \n\t" /* i <= k - i? */ "ble 3b \n\t" /* if so, continue looping */ "5: \n\t" /* end inner loop */ "str %[c0], [%[result], %[k]] \n\t" /* result[k] = c0 */ "mov %[c0], %[c1] \n\t" /* c0 = c1 */ "mov %[c1], %[c2] \n\t" /* c1 = c2 */ "movs %[c2], #0 \n\t" /* c2 = 0 */ "adds %[k], #4 \n\t" /* k += 4 */ "cmp %[k], %[last_word] \n\t" /* k <= (num_words - 1) (times 4) ? */ "ble 1b \n\t" /* if so, loop back, start with i = 0 */ "cmp %[k], %[last_word], lsl #1 \n\t" /* k <= (num_words * 2 - 2) (times 4) ? */ "ble 2b \n\t" /* if so, loop back, start with i = (k + 1) - num_words */ /* end outer loop */ "str %[c0], [%[result], %[k]] \n\t" /* result[num_words * 2 - 1] = c0 */ RESUME_SYNTAX : [c0] "+r" (c0), [c1] "+r" (c1), [c2] "+r" (c2), [k] "+r" (k), [i] "=&r" (i), [tt] "=&r" (tt), [t0] "=&r" (t0), [t1] "=&r" (t1) : [result] "r" (result), [left] "r" (left), [last_word] "r" ((num_words - 1) * 4) : "cc", "memory" ); #else uint32_t r3, r4, r5, r6, r7; __asm__ volatile ( ".syntax unified \n\t" "subs %[r2], #1 \n\t" /* r2 = num_words - 1 */ "lsls %[r2], #2 \n\t" /* r2 = (num_words - 1) * 4 */ "mov r8, %[r2] \n\t" /* r8 = (num_words - 1) * 4 */ "lsls %[r2], #1 \n\t" /* r2 = (num_words - 1) * 8 */ "mov r9, %[r2] \n\t" /* r9 = (num_words - 1) * 8 */ "movs %[r2], #0 \n\t" /* c0 = 0 */ "movs %[r3], #0 \n\t" /* c1 = 0 */ "movs %[r4], #0 \n\t" /* c2 = 0 */ "movs %[r5], #0 \n\t" /* k = 0 */ "push {%[r0]} \n\t" /* keep result on the stack */ "1: \n\t" /* outer loop (k < num_words) */ "movs %[r6], #0 \n\t" /* r6 = i = 0 */ "b 3f \n\t" "2: \n\t" /* outer loop (k >= num_words) */ "movs %[r6], %[r5] \n\t" /* r6 = k */ "mov %[r0], r8 \n\t" /* r0 = (num_words - 1) * 4 */ "subs %[r6], %[r0] \n\t" /* r6 = i = k - (num_words - 1) (times 4) */ "3: \n\t" /* inner loop */ "push {%[r5]} \n\t" "push {%[r4]} \n\t" "push {%[r3]} \n\t" "push {%[r2]} \n\t" /* push things, r2 (c0) is at the top of stack. */ "subs %[r7], %[r5], %[r6] \n\t" /* r7 = k - i */ "ldr %[r3], [%[r1], %[r7]] \n\t" /* r3 = left[k - i] */ "ldr %[r0], [%[r1], %[r6]] \n\t" /* r0 = left[i] */ "lsrs %[r2], %[r0], #16 \n\t" /* r2 = a1 */ "uxth %[r0], %[r0] \n\t" /* r0 = a0 */ "lsrs %[r4], %[r3], #16 \n\t" /* r4 = b1 */ "uxth %[r3], %[r3] \n\t" /* r3 = b0 */ "movs %[r5], %[r2] \n\t" /* r5 = a1 */ "muls %[r5], %[r4], %[r5] \n\t" /* r5 = a1 * b1 */ "muls %[r2], %[r3], %[r2] \n\t" /* r2 = b0 * a1 */ "muls %[r4], %[r0], %[r4] \n\t" /* r4 = a0 * b1 */ "muls %[r0], %[r3], %[r0] \n\t" /* r0 = a0 * b0 */ "movs %[r3], #0 \n\t" /* r3 = 0 */ "adds %[r2], %[r4] \n\t" /* r2 = b0 * a1 + a0 * b1 */ "adcs %[r3], %[r3] \n\t" /* r3 = carry */ "lsls %[r3], #16 \n\t" /* r3 = carry << 16 */ "adds %[r5], %[r3] \n\t" /* r5 = a1 * b1 + carry */ "lsls %[r3], %[r2], #16 \n\t" /* r3 = (b0 * a1 + a0 * b1) << 16 */ "lsrs %[r2], #16 \n\t" /* r2 = (b0 * a1 + a0 * b1) >> 16 */ "adds %[r0], %[r3] \n\t" /* r0 = low word = a0 * b0 + ((b0 * a1 + a0 * b1) << 16) */ "adcs %[r5], %[r2] \n\t" /* r5 = high word = a1 * b1 + carry + ((b0 * a1 + a0 * b1) >> 16) */ "movs %[r3], #0 \n\t" /* r3 = 0 */ "cmp %[r6], %[r7] \n\t" /* (i < k - i) ? */ "mov %[r7], %[r3] \n\t" /* r7 = 0 (does not affect condition) */ "bge 4f \n\t" /* if i >= k - i, skip */ "lsls %[r5], #1 \n\t" /* high word << 1 */ "adcs %[r7], %[r3] \n\t" /* r7 = carry bit for c2 */ "lsls %[r0], #1 \n\t" /* low word << 1 */ "adcs %[r5], %[r3] \n\t" /* add carry from shift to high word */ "4: \n\t" "pop {%[r2]} \n\t" /* r2 = c0 */ "pop {%[r3]} \n\t" /* r3 = c1 */ "pop {%[r4]} \n\t" /* r4 = c2 */ "adds %[r2], %[r0] \n\t" /* add low word to c0 */ "adcs %[r3], %[r5] \n\t" /* add high word to c1, including carry */ "movs %[r0], #0 \n\t" /* r0 = 0 (does not affect carry bit) */ "adcs %[r4], %[r0] \n\t" /* add carry to c2 */ "adds %[r4], %[r7] \n\t" /* add carry from doubling (if any) */ "pop {%[r5]} \n\t" /* r5 = k */ "adds %[r6], #4 \n\t" /* i += 4 */ "cmp %[r6], %[r5] \n\t" /* i >= k? */ "bge 5f \n\t" /* if so, exit the loop */ "subs %[r7], %[r5], %[r6] \n\t" /* r7 = k - i */ "cmp %[r6], %[r7] \n\t" /* i <= k - i? */ "ble 3b \n\t" /* if so, continue looping */ "5: \n\t" /* end inner loop */ "ldr %[r0], [sp, #0] \n\t" /* r0 = result */ "str %[r2], [%[r0], %[r5]] \n\t" /* result[k] = c0 */ "mov %[r2], %[r3] \n\t" /* c0 = c1 */ "mov %[r3], %[r4] \n\t" /* c1 = c2 */ "movs %[r4], #0 \n\t" /* c2 = 0 */ "adds %[r5], #4 \n\t" /* k += 4 */ "cmp %[r5], r8 \n\t" /* k <= (num_words - 1) (times 4) ? */ "ble 1b \n\t" /* if so, loop back, start with i = 0 */ "cmp %[r5], r9 \n\t" /* k <= (num_words * 2 - 2) (times 4) ? */ "ble 2b \n\t" /* if so, loop back, with i = (k + 1) - num_words */ /* end outer loop */ "str %[r2], [%[r0], %[r5]] \n\t" /* result[num_words * 2 - 1] = c0 */ "pop {%[r0]} \n\t" /* pop result off the stack */ ".syntax divided \n\t" : [r2] "+l" (num_words), [r3] "=&l" (r3), [r4] "=&l" (r4), [r5] "=&l" (r5), [r6] "=&l" (r6), [r7] "=&l" (r7) : [r0] "l" (result), [r1] "l" (left) : "r8", "r9", "cc", "memory" ); #endif } #define asm_square 1 #endif #endif /* uECC_SQUARE_FUNC */ #endif /* _UECC_ASM_ARM_H_ */ ================================================ FILE: u2f/asm_arm_mult_square.h ================================================ /* Copyright 2015, Kenneth MacKay. Licensed under the BSD 2-clause license. */ #ifndef _UECC_ASM_ARM_MULT_SQUARE_H_ #define _UECC_ASM_ARM_MULT_SQUARE_H_ #define FAST_MULT_ASM_5 \ "add r0, 12 \n\t" \ "add r2, 12 \n\t" \ "ldmia r1!, {r3,r4} \n\t" \ "ldmia r2!, {r6,r7} \n\t" \ \ "umull r11, r12, r3, r6 \n\t" \ "stmia r0!, {r11} \n\t" \ \ "mov r10, #0 \n\t" \ "umull r11, r9, r3, r7 \n\t" \ "adds r12, r12, r11 \n\t" \ "adc r9, r9, #0 \n\t" \ "umull r11, r14, r4, r6 \n\t" \ "adds r12, r12, r11 \n\t" \ "adcs r9, r9, r14 \n\t" \ "adc r10, r10, #0 \n\t" \ "stmia r0!, {r12} \n\t" \ \ "umull r12, r14, r4, r7 \n\t" \ "adds r9, r9, r12 \n\t" \ "adc r10, r10, r14 \n\t" \ "stmia r0!, {r9, r10} \n\t" \ \ "sub r0, 28 \n\t" \ "sub r2, 20 \n\t" \ "ldmia r2!, {r6,r7,r8} \n\t" \ "ldmia r1!, {r5} \n\t" \ \ "umull r11, r12, r3, r6 \n\t" \ "stmia r0!, {r11} \n\t" \ \ "mov r10, #0 \n\t" \ "umull r11, r9, r3, r7 \n\t" \ "adds r12, r12, r11 \n\t" \ "adc r9, r9, #0 \n\t" \ "umull r11, r14, r4, r6 \n\t" \ "adds r12, r12, r11 \n\t" \ "adcs r9, r9, r14 \n\t" \ "adc r10, r10, #0 \n\t" \ "stmia r0!, {r12} \n\t" \ \ "mov r11, #0 \n\t" \ "umull r12, r14, r3, r8 \n\t" \ "adds r9, r9, r12 \n\t" \ "adcs r10, r10, r14 \n\t" \ "adc r11, r11, #0 \n\t" \ "umull r12, r14, r4, r7 \n\t" \ "adds r9, r9, r12 \n\t" \ "adcs r10, r10, r14 \n\t" \ "adc r11, r11, #0 \n\t" \ "umull r12, r14, r5, r6 \n\t" \ "adds r9, r9, r12 \n\t" \ "adcs r10, r10, r14 \n\t" \ "adc r11, r11, #0 \n\t" \ "stmia r0!, {r9} \n\t" \ \ "ldmia r1!, {r3} \n\t" \ "mov r12, #0 \n\t" \ "umull r14, r9, r4, r8 \n\t" \ "adds r10, r10, r14 \n\t" \ "adcs r11, r11, r9 \n\t" \ "adc r12, r12, #0 \n\t" \ "umull r14, r9, r5, r7 \n\t" \ "adds r10, r10, r14 \n\t" \ "adcs r11, r11, r9 \n\t" \ "adc r12, r12, #0 \n\t" \ "umull r14, r9, r3, r6 \n\t" \ "adds r10, r10, r14 \n\t" \ "adcs r11, r11, r9 \n\t" \ "adc r12, r12, #0 \n\t" \ "ldr r14, [r0] \n\t" \ "adds r10, r10, r14 \n\t" \ "adcs r11, r11, #0 \n\t" \ "adc r12, r12, #0 \n\t" \ "stmia r0!, {r10} \n\t" \ \ "ldmia r1!, {r4} \n\t" \ "mov r14, #0 \n\t" \ "umull r9, r10, r5, r8 \n\t" \ "adds r11, r11, r9 \n\t" \ "adcs r12, r12, r10 \n\t" \ "adc r14, r14, #0 \n\t" \ "umull r9, r10, r3, r7 \n\t" \ "adds r11, r11, r9 \n\t" \ "adcs r12, r12, r10 \n\t" \ "adc r14, r14, #0 \n\t" \ "umull r9, r10, r4, r6 \n\t" \ "adds r11, r11, r9 \n\t" \ "adcs r12, r12, r10 \n\t" \ "adc r14, r14, #0 \n\t" \ "ldr r9, [r0] \n\t" \ "adds r11, r11, r9 \n\t" \ "adcs r12, r12, #0 \n\t" \ "adc r14, r14, #0 \n\t" \ "stmia r0!, {r11} \n\t" \ \ "ldmia r2!, {r6} \n\t" \ "mov r9, #0 \n\t" \ "umull r10, r11, r5, r6 \n\t" \ "adds r12, r12, r10 \n\t" \ "adcs r14, r14, r11 \n\t" \ "adc r9, r9, #0 \n\t" \ "umull r10, r11, r3, r8 \n\t" \ "adds r12, r12, r10 \n\t" \ "adcs r14, r14, r11 \n\t" \ "adc r9, r9, #0 \n\t" \ "umull r10, r11, r4, r7 \n\t" \ "adds r12, r12, r10 \n\t" \ "adcs r14, r14, r11 \n\t" \ "adc r9, r9, #0 \n\t" \ "ldr r10, [r0] \n\t" \ "adds r12, r12, r10 \n\t" \ "adcs r14, r14, #0 \n\t" \ "adc r9, r9, #0 \n\t" \ "stmia r0!, {r12} \n\t" \ \ "ldmia r2!, {r7} \n\t" \ "mov r10, #0 \n\t" \ "umull r11, r12, r5, r7 \n\t" \ "adds r14, r14, r11 \n\t" \ "adcs r9, r9, r12 \n\t" \ "adc r10, r10, #0 \n\t" \ "umull r11, r12, r3, r6 \n\t" \ "adds r14, r14, r11 \n\t" \ "adcs r9, r9, r12 \n\t" \ "adc r10, r10, #0 \n\t" \ "umull r11, r12, r4, r8 \n\t" \ "adds r14, r14, r11 \n\t" \ "adcs r9, r9, r12 \n\t" \ "adc r10, r10, #0 \n\t" \ "ldr r11, [r0] \n\t" \ "adds r14, r14, r11 \n\t" \ "adcs r9, r9, #0 \n\t" \ "adc r10, r10, #0 \n\t" \ "stmia r0!, {r14} \n\t" \ \ "mov r11, #0 \n\t" \ "umull r12, r14, r3, r7 \n\t" \ "adds r9, r9, r12 \n\t" \ "adcs r10, r10, r14 \n\t" \ "adc r11, r11, #0 \n\t" \ "umull r12, r14, r4, r6 \n\t" \ "adds r9, r9, r12 \n\t" \ "adcs r10, r10, r14 \n\t" \ "adc r11, r11, #0 \n\t" \ "stmia r0!, {r9} \n\t" \ \ "umull r14, r9, r4, r7 \n\t" \ "adds r10, r10, r14 \n\t" \ "adc r11, r11, r9 \n\t" \ "stmia r0!, {r10, r11} \n\t" #define FAST_MULT_ASM_6 \ "add r0, 12 \n\t" \ "add r2, 12 \n\t" \ "ldmia r1!, {r3,r4,r5} \n\t" \ "ldmia r2!, {r6,r7,r8} \n\t" \ \ "umull r11, r12, r3, r6 \n\t" \ "stmia r0!, {r11} \n\t" \ \ "mov r10, #0 \n\t" \ "umull r11, r9, r3, r7 \n\t" \ "adds r12, r12, r11 \n\t" \ "adc r9, r9, #0 \n\t" \ "umull r11, r14, r4, r6 \n\t" \ "adds r12, r12, r11 \n\t" \ "adcs r9, r9, r14 \n\t" \ "adc r10, r10, #0 \n\t" \ "stmia r0!, {r12} \n\t" \ \ "mov r11, #0 \n\t" \ "umull r12, r14, r3, r8 \n\t" \ "adds r9, r9, r12 \n\t" \ "adcs r10, r10, r14 \n\t" \ "adc r11, r11, #0 \n\t" \ "umull r12, r14, r4, r7 \n\t" \ "adds r9, r9, r12 \n\t" \ "adcs r10, r10, r14 \n\t" \ "adc r11, r11, #0 \n\t" \ "umull r12, r14, r5, r6 \n\t" \ "adds r9, r9, r12 \n\t" \ "adcs r10, r10, r14 \n\t" \ "adc r11, r11, #0 \n\t" \ "stmia r0!, {r9} \n\t" \ \ "mov r12, #0 \n\t" \ "umull r14, r9, r4, r8 \n\t" \ "adds r10, r10, r14 \n\t" \ "adcs r11, r11, r9 \n\t" \ "adc r12, r12, #0 \n\t" \ "umull r14, r9, r5, r7 \n\t" \ "adds r10, r10, r14 \n\t" \ "adcs r11, r11, r9 \n\t" \ "adc r12, r12, #0 \n\t" \ "stmia r0!, {r10} \n\t" \ \ "umull r9, r10, r5, r8 \n\t" \ "adds r11, r11, r9 \n\t" \ "adc r12, r12, r10 \n\t" \ "stmia r0!, {r11, r12} \n\t" \ \ "sub r0, 36 \n\t" \ "sub r2, 24 \n\t" \ "ldmia r2!, {r6,r7,r8} \n\t" \ \ "umull r11, r12, r3, r6 \n\t" \ "stmia r0!, {r11} \n\t" \ \ "mov r10, #0 \n\t" \ "umull r11, r9, r3, r7 \n\t" \ "adds r12, r12, r11 \n\t" \ "adc r9, r9, #0 \n\t" \ "umull r11, r14, r4, r6 \n\t" \ "adds r12, r12, r11 \n\t" \ "adcs r9, r9, r14 \n\t" \ "adc r10, r10, #0 \n\t" \ "stmia r0!, {r12} \n\t" \ \ "mov r11, #0 \n\t" \ "umull r12, r14, r3, r8 \n\t" \ "adds r9, r9, r12 \n\t" \ "adcs r10, r10, r14 \n\t" \ "adc r11, r11, #0 \n\t" \ "umull r12, r14, r4, r7 \n\t" \ "adds r9, r9, r12 \n\t" \ "adcs r10, r10, r14 \n\t" \ "adc r11, r11, #0 \n\t" \ "umull r12, r14, r5, r6 \n\t" \ "adds r9, r9, r12 \n\t" \ "adcs r10, r10, r14 \n\t" \ "adc r11, r11, #0 \n\t" \ "stmia r0!, {r9} \n\t" \ \ "ldmia r1!, {r3} \n\t" \ "mov r12, #0 \n\t" \ "umull r14, r9, r4, r8 \n\t" \ "adds r10, r10, r14 \n\t" \ "adcs r11, r11, r9 \n\t" \ "adc r12, r12, #0 \n\t" \ "umull r14, r9, r5, r7 \n\t" \ "adds r10, r10, r14 \n\t" \ "adcs r11, r11, r9 \n\t" \ "adc r12, r12, #0 \n\t" \ "umull r14, r9, r3, r6 \n\t" \ "adds r10, r10, r14 \n\t" \ "adcs r11, r11, r9 \n\t" \ "adc r12, r12, #0 \n\t" \ "ldr r14, [r0] \n\t" \ "adds r10, r10, r14 \n\t" \ "adcs r11, r11, #0 \n\t" \ "adc r12, r12, #0 \n\t" \ "stmia r0!, {r10} \n\t" \ \ "ldmia r1!, {r4} \n\t" \ "mov r14, #0 \n\t" \ "umull r9, r10, r5, r8 \n\t" \ "adds r11, r11, r9 \n\t" \ "adcs r12, r12, r10 \n\t" \ "adc r14, r14, #0 \n\t" \ "umull r9, r10, r3, r7 \n\t" \ "adds r11, r11, r9 \n\t" \ "adcs r12, r12, r10 \n\t" \ "adc r14, r14, #0 \n\t" \ "umull r9, r10, r4, r6 \n\t" \ "adds r11, r11, r9 \n\t" \ "adcs r12, r12, r10 \n\t" \ "adc r14, r14, #0 \n\t" \ "ldr r9, [r0] \n\t" \ "adds r11, r11, r9 \n\t" \ "adcs r12, r12, #0 \n\t" \ "adc r14, r14, #0 \n\t" \ "stmia r0!, {r11} \n\t" \ \ "ldmia r1!, {r5} \n\t" \ "mov r9, #0 \n\t" \ "umull r10, r11, r3, r8 \n\t" \ "adds r12, r12, r10 \n\t" \ "adcs r14, r14, r11 \n\t" \ "adc r9, r9, #0 \n\t" \ "umull r10, r11, r4, r7 \n\t" \ "adds r12, r12, r10 \n\t" \ "adcs r14, r14, r11 \n\t" \ "adc r9, r9, #0 \n\t" \ "umull r10, r11, r5, r6 \n\t" \ "adds r12, r12, r10 \n\t" \ "adcs r14, r14, r11 \n\t" \ "adc r9, r9, #0 \n\t" \ "ldr r10, [r0] \n\t" \ "adds r12, r12, r10 \n\t" \ "adcs r14, r14, #0 \n\t" \ "adc r9, r9, #0 \n\t" \ "stmia r0!, {r12} \n\t" \ \ "ldmia r2!, {r6} \n\t" \ "mov r10, #0 \n\t" \ "umull r11, r12, r3, r6 \n\t" \ "adds r14, r14, r11 \n\t" \ "adcs r9, r9, r12 \n\t" \ "adc r10, r10, #0 \n\t" \ "umull r11, r12, r4, r8 \n\t" \ "adds r14, r14, r11 \n\t" \ "adcs r9, r9, r12 \n\t" \ "adc r10, r10, #0 \n\t" \ "umull r11, r12, r5, r7 \n\t" \ "adds r14, r14, r11 \n\t" \ "adcs r9, r9, r12 \n\t" \ "adc r10, r10, #0 \n\t" \ "ldr r11, [r0] \n\t" \ "adds r14, r14, r11 \n\t" \ "adcs r9, r9, #0 \n\t" \ "adc r10, r10, #0 \n\t" \ "stmia r0!, {r14} \n\t" \ \ "ldmia r2!, {r7} \n\t" \ "mov r11, #0 \n\t" \ "umull r12, r14, r3, r7 \n\t" \ "adds r9, r9, r12 \n\t" \ "adcs r10, r10, r14 \n\t" \ "adc r11, r11, #0 \n\t" \ "umull r12, r14, r4, r6 \n\t" \ "adds r9, r9, r12 \n\t" \ "adcs r10, r10, r14 \n\t" \ "adc r11, r11, #0 \n\t" \ "umull r12, r14, r5, r8 \n\t" \ "adds r9, r9, r12 \n\t" \ "adcs r10, r10, r14 \n\t" \ "adc r11, r11, #0 \n\t" \ "ldr r12, [r0] \n\t" \ "adds r9, r9, r12 \n\t" \ "adcs r10, r10, #0 \n\t" \ "adc r11, r11, #0 \n\t" \ "stmia r0!, {r9} \n\t" \ \ "ldmia r2!, {r8} \n\t" \ "mov r12, #0 \n\t" \ "umull r14, r9, r3, r8 \n\t" \ "adds r10, r10, r14 \n\t" \ "adcs r11, r11, r9 \n\t" \ "adc r12, r12, #0 \n\t" \ "umull r14, r9, r4, r7 \n\t" \ "adds r10, r10, r14 \n\t" \ "adcs r11, r11, r9 \n\t" \ "adc r12, r12, #0 \n\t" \ "umull r14, r9, r5, r6 \n\t" \ "adds r10, r10, r14 \n\t" \ "adcs r11, r11, r9 \n\t" \ "adc r12, r12, #0 \n\t" \ "ldr r14, [r0] \n\t" \ "adds r10, r10, r14 \n\t" \ "adcs r11, r11, #0 \n\t" \ "adc r12, r12, #0 \n\t" \ "stmia r0!, {r10} \n\t" \ \ "mov r14, #0 \n\t" \ "umull r9, r10, r4, r8 \n\t" \ "adds r11, r11, r9 \n\t" \ "adcs r12, r12, r10 \n\t" \ "adc r14, r14, #0 \n\t" \ "umull r9, r10, r5, r7 \n\t" \ "adds r11, r11, r9 \n\t" \ "adcs r12, r12, r10 \n\t" \ "adc r14, r14, #0 \n\t" \ "stmia r0!, {r11} \n\t" \ \ "umull r10, r11, r5, r8 \n\t" \ "adds r12, r12, r10 \n\t" \ "adc r14, r14, r11 \n\t" \ "stmia r0!, {r12, r14} \n\t" #define FAST_MULT_ASM_7 \ "add r0, 24 \n\t" \ "add r2, 24 \n\t" \ "ldmia r1!, {r3} \n\t" \ "ldmia r2!, {r6} \n\t" \ \ "umull r9, r10, r3, r6 \n\t" \ "stmia r0!, {r9, r10} \n\t" \ \ "sub r0, 20 \n\t" \ "sub r2, 16 \n\t" \ "ldmia r2!, {r6, r7, r8} \n\t" \ "ldmia r1!, {r4, r5} \n\t" \ \ "umull r9, r10, r3, r6 \n\t" \ "stmia r0!, {r9} \n\t" \ \ "mov r14, #0 \n\t" \ "umull r9, r12, r3, r7 \n\t" \ "adds r10, r10, r9 \n\t" \ "adc r12, r12, #0 \n\t" \ "umull r9, r11, r4, r6 \n\t" \ "adds r10, r10, r9 \n\t" \ "adcs r12, r12, r11 \n\t" \ "adc r14, r14, #0 \n\t" \ "stmia r0!, {r10} \n\t" \ \ "mov r9, #0 \n\t" \ "umull r10, r11, r3, r8 \n\t" \ "adds r12, r12, r10 \n\t" \ "adcs r14, r14, r11 \n\t" \ "adc r9, r9, #0 \n\t" \ "umull r10, r11, r4, r7 \n\t" \ "adds r12, r12, r10 \n\t" \ "adcs r14, r14, r11 \n\t" \ "adc r9, r9, #0 \n\t" \ "umull r10, r11, r5, r6 \n\t" \ "adds r12, r12, r10 \n\t" \ "adcs r14, r14, r11 \n\t" \ "adc r9, r9, #0 \n\t" \ "stmia r0!, {r12} \n\t" \ \ "ldmia r1!, {r3} \n\t" \ "mov r10, #0 \n\t" \ "umull r11, r12, r4, r8 \n\t" \ "adds r14, r14, r11 \n\t" \ "adcs r9, r9, r12 \n\t" \ "adc r10, r10, #0 \n\t" \ "umull r11, r12, r5, r7 \n\t" \ "adds r14, r14, r11 \n\t" \ "adcs r9, r9, r12 \n\t" \ "adc r10, r10, #0 \n\t" \ "umull r11, r12, r3, r6 \n\t" \ "adds r14, r14, r11 \n\t" \ "adcs r9, r9, r12 \n\t" \ "adc r10, r10, #0 \n\t" \ "ldr r11, [r0] \n\t" \ "adds r14, r14, r11 \n\t" \ "adcs r9, r9, #0 \n\t" \ "adc r10, r10, #0 \n\t" \ "stmia r0!, {r14} \n\t" \ \ "ldmia r2!, {r6} \n\t" \ "mov r11, #0 \n\t" \ "umull r12, r14, r4, r6 \n\t" \ "adds r9, r9, r12 \n\t" \ "adcs r10, r10, r14 \n\t" \ "adc r11, r11, #0 \n\t" \ "umull r12, r14, r5, r8 \n\t" \ "adds r9, r9, r12 \n\t" \ "adcs r10, r10, r14 \n\t" \ "adc r11, r11, #0 \n\t" \ "umull r12, r14, r3, r7 \n\t" \ "adds r9, r9, r12 \n\t" \ "adcs r10, r10, r14 \n\t" \ "adc r11, r11, #0 \n\t" \ "ldr r12, [r0] \n\t" \ "adds r9, r9, r12 \n\t" \ "adcs r10, r10, #0 \n\t" \ "adc r11, r11, #0 \n\t" \ "stmia r0!, {r9} \n\t" \ \ "mov r12, #0 \n\t" \ "umull r14, r9, r5, r6 \n\t" \ "adds r10, r10, r14 \n\t" \ "adcs r11, r11, r9 \n\t" \ "adc r12, r12, #0 \n\t" \ "umull r14, r9, r3, r8 \n\t" \ "adds r10, r10, r14 \n\t" \ "adcs r11, r11, r9 \n\t" \ "adc r12, r12, #0 \n\t" \ "stmia r0!, {r10} \n\t" \ \ "umull r9, r10, r3, r6 \n\t" \ "adds r11, r11, r9 \n\t" \ "adc r12, r12, r10 \n\t" \ "stmia r0!, {r11, r12} \n\t" \ \ "sub r0, 44 \n\t" \ "sub r1, 16 \n\t" \ "sub r2, 28 \n\t" \ "ldmia r1!, {r3,r4,r5} \n\t" \ "ldmia r2!, {r6,r7,r8} \n\t" \ \ "umull r9, r10, r3, r6 \n\t" \ "stmia r0!, {r9} \n\t" \ \ "mov r14, #0 \n\t" \ "umull r9, r12, r3, r7 \n\t" \ "adds r10, r10, r9 \n\t" \ "adc r12, r12, #0 \n\t" \ "umull r9, r11, r4, r6 \n\t" \ "adds r10, r10, r9 \n\t" \ "adcs r12, r12, r11 \n\t" \ "adc r14, r14, #0 \n\t" \ "stmia r0!, {r10} \n\t" \ \ "mov r9, #0 \n\t" \ "umull r10, r11, r3, r8 \n\t" \ "adds r12, r12, r10 \n\t" \ "adcs r14, r14, r11 \n\t" \ "adc r9, r9, #0 \n\t" \ "umull r10, r11, r4, r7 \n\t" \ "adds r12, r12, r10 \n\t" \ "adcs r14, r14, r11 \n\t" \ "adc r9, r9, #0 \n\t" \ "umull r10, r11, r5, r6 \n\t" \ "adds r12, r12, r10 \n\t" \ "adcs r14, r14, r11 \n\t" \ "adc r9, r9, #0 \n\t" \ "stmia r0!, {r12} \n\t" \ \ "ldmia r1!, {r3} \n\t" \ "mov r10, #0 \n\t" \ "umull r11, r12, r4, r8 \n\t" \ "adds r14, r14, r11 \n\t" \ "adcs r9, r9, r12 \n\t" \ "adc r10, r10, #0 \n\t" \ "umull r11, r12, r5, r7 \n\t" \ "adds r14, r14, r11 \n\t" \ "adcs r9, r9, r12 \n\t" \ "adc r10, r10, #0 \n\t" \ "umull r11, r12, r3, r6 \n\t" \ "adds r14, r14, r11 \n\t" \ "adcs r9, r9, r12 \n\t" \ "adc r10, r10, #0 \n\t" \ "ldr r11, [r0] \n\t" \ "adds r14, r14, r11 \n\t" \ "adcs r9, r9, #0 \n\t" \ "adc r10, r10, #0 \n\t" \ "stmia r0!, {r14} \n\t" \ \ "ldmia r1!, {r4} \n\t" \ "mov r11, #0 \n\t" \ "umull r12, r14, r5, r8 \n\t" \ "adds r9, r9, r12 \n\t" \ "adcs r10, r10, r14 \n\t" \ "adc r11, r11, #0 \n\t" \ "umull r12, r14, r3, r7 \n\t" \ "adds r9, r9, r12 \n\t" \ "adcs r10, r10, r14 \n\t" \ "adc r11, r11, #0 \n\t" \ "umull r12, r14, r4, r6 \n\t" \ "adds r9, r9, r12 \n\t" \ "adcs r10, r10, r14 \n\t" \ "adc r11, r11, #0 \n\t" \ "ldr r12, [r0] \n\t" \ "adds r9, r9, r12 \n\t" \ "adcs r10, r10, #0 \n\t" \ "adc r11, r11, #0 \n\t" \ "stmia r0!, {r9} \n\t" \ \ "ldmia r1!, {r5} \n\t" \ "mov r12, #0 \n\t" \ "umull r14, r9, r3, r8 \n\t" \ "adds r10, r10, r14 \n\t" \ "adcs r11, r11, r9 \n\t" \ "adc r12, r12, #0 \n\t" \ "umull r14, r9, r4, r7 \n\t" \ "adds r10, r10, r14 \n\t" \ "adcs r11, r11, r9 \n\t" \ "adc r12, r12, #0 \n\t" \ "umull r14, r9, r5, r6 \n\t" \ "adds r10, r10, r14 \n\t" \ "adcs r11, r11, r9 \n\t" \ "adc r12, r12, #0 \n\t" \ "ldr r14, [r0] \n\t" \ "adds r10, r10, r14 \n\t" \ "adcs r11, r11, #0 \n\t" \ "adc r12, r12, #0 \n\t" \ "stmia r0!, {r10} \n\t" \ \ "ldmia r1!, {r3} \n\t" \ "mov r14, #0 \n\t" \ "umull r9, r10, r4, r8 \n\t" \ "adds r11, r11, r9 \n\t" \ "adcs r12, r12, r10 \n\t" \ "adc r14, r14, #0 \n\t" \ "umull r9, r10, r5, r7 \n\t" \ "adds r11, r11, r9 \n\t" \ "adcs r12, r12, r10 \n\t" \ "adc r14, r14, #0 \n\t" \ "umull r9, r10, r3, r6 \n\t" \ "adds r11, r11, r9 \n\t" \ "adcs r12, r12, r10 \n\t" \ "adc r14, r14, #0 \n\t" \ "ldr r9, [r0] \n\t" \ "adds r11, r11, r9 \n\t" \ "adcs r12, r12, #0 \n\t" \ "adc r14, r14, #0 \n\t" \ "stmia r0!, {r11} \n\t" \ \ "ldmia r2!, {r6} \n\t" \ "mov r9, #0 \n\t" \ "umull r10, r11, r4, r6 \n\t" \ "adds r12, r12, r10 \n\t" \ "adcs r14, r14, r11 \n\t" \ "adc r9, r9, #0 \n\t" \ "umull r10, r11, r5, r8 \n\t" \ "adds r12, r12, r10 \n\t" \ "adcs r14, r14, r11 \n\t" \ "adc r9, r9, #0 \n\t" \ "umull r10, r11, r3, r7 \n\t" \ "adds r12, r12, r10 \n\t" \ "adcs r14, r14, r11 \n\t" \ "adc r9, r9, #0 \n\t" \ "ldr r10, [r0] \n\t" \ "adds r12, r12, r10 \n\t" \ "adcs r14, r14, #0 \n\t" \ "adc r9, r9, #0 \n\t" \ "stmia r0!, {r12} \n\t" \ \ "ldmia r2!, {r7} \n\t" \ "mov r10, #0 \n\t" \ "umull r11, r12, r4, r7 \n\t" \ "adds r14, r14, r11 \n\t" \ "adcs r9, r9, r12 \n\t" \ "adc r10, r10, #0 \n\t" \ "umull r11, r12, r5, r6 \n\t" \ "adds r14, r14, r11 \n\t" \ "adcs r9, r9, r12 \n\t" \ "adc r10, r10, #0 \n\t" \ "umull r11, r12, r3, r8 \n\t" \ "adds r14, r14, r11 \n\t" \ "adcs r9, r9, r12 \n\t" \ "adc r10, r10, #0 \n\t" \ "ldr r11, [r0] \n\t" \ "adds r14, r14, r11 \n\t" \ "adcs r9, r9, #0 \n\t" \ "adc r10, r10, #0 \n\t" \ "stmia r0!, {r14} \n\t" \ \ "ldmia r2!, {r8} \n\t" \ "mov r11, #0 \n\t" \ "umull r12, r14, r4, r8 \n\t" \ "adds r9, r9, r12 \n\t" \ "adcs r10, r10, r14 \n\t" \ "adc r11, r11, #0 \n\t" \ "umull r12, r14, r5, r7 \n\t" \ "adds r9, r9, r12 \n\t" \ "adcs r10, r10, r14 \n\t" \ "adc r11, r11, #0 \n\t" \ "umull r12, r14, r3, r6 \n\t" \ "adds r9, r9, r12 \n\t" \ "adcs r10, r10, r14 \n\t" \ "adc r11, r11, #0 \n\t" \ "ldr r12, [r0] \n\t" \ "adds r9, r9, r12 \n\t" \ "adcs r10, r10, #0 \n\t" \ "adc r11, r11, #0 \n\t" \ "stmia r0!, {r9} \n\t" \ \ "ldmia r2!, {r6} \n\t" \ "mov r12, #0 \n\t" \ "umull r14, r9, r4, r6 \n\t" \ "adds r10, r10, r14 \n\t" \ "adcs r11, r11, r9 \n\t" \ "adc r12, r12, #0 \n\t" \ "umull r14, r9, r5, r8 \n\t" \ "adds r10, r10, r14 \n\t" \ "adcs r11, r11, r9 \n\t" \ "adc r12, r12, #0 \n\t" \ "umull r14, r9, r3, r7 \n\t" \ "adds r10, r10, r14 \n\t" \ "adcs r11, r11, r9 \n\t" \ "adc r12, r12, #0 \n\t" \ "ldr r14, [r0] \n\t" \ "adds r10, r10, r14 \n\t" \ "adcs r11, r11, #0 \n\t" \ "adc r12, r12, #0 \n\t" \ "stmia r0!, {r10} \n\t" \ \ "mov r14, #0 \n\t" \ "umull r9, r10, r5, r6 \n\t" \ "adds r11, r11, r9 \n\t" \ "adcs r12, r12, r10 \n\t" \ "adc r14, r14, #0 \n\t" \ "umull r9, r10, r3, r8 \n\t" \ "adds r11, r11, r9 \n\t" \ "adcs r12, r12, r10 \n\t" \ "adc r14, r14, #0 \n\t" \ "stmia r0!, {r11} \n\t" \ \ "umull r10, r11, r3, r6 \n\t" \ "adds r12, r12, r10 \n\t" \ "adc r14, r14, r11 \n\t" \ "stmia r0!, {r12, r14} \n\t" #define FAST_MULT_ASM_8 \ "add r0, 24 \n\t" \ "add r2, 24 \n\t" \ "ldmia r1!, {r3,r4} \n\t" \ "ldmia r2!, {r6,r7} \n\t" \ \ "umull r11, r12, r3, r6 \n\t" \ "stmia r0!, {r11} \n\t" \ \ "mov r10, #0 \n\t" \ "umull r11, r9, r3, r7 \n\t" \ "adds r12, r12, r11 \n\t" \ "adc r9, r9, #0 \n\t" \ "umull r11, r14, r4, r6 \n\t" \ "adds r12, r12, r11 \n\t" \ "adcs r9, r9, r14 \n\t" \ "adc r10, r10, #0 \n\t" \ "stmia r0!, {r12} \n\t" \ \ "umull r12, r14, r4, r7 \n\t" \ "adds r9, r9, r12 \n\t" \ "adc r10, r10, r14 \n\t" \ "stmia r0!, {r9, r10} \n\t" \ \ "sub r0, 28 \n\t" \ "sub r2, 20 \n\t" \ "ldmia r2!, {r6,r7,r8} \n\t" \ "ldmia r1!, {r5} \n\t" \ \ "umull r11, r12, r3, r6 \n\t" \ "stmia r0!, {r11} \n\t" \ \ "mov r10, #0 \n\t" \ "umull r11, r9, r3, r7 \n\t" \ "adds r12, r12, r11 \n\t" \ "adc r9, r9, #0 \n\t" \ "umull r11, r14, r4, r6 \n\t" \ "adds r12, r12, r11 \n\t" \ "adcs r9, r9, r14 \n\t" \ "adc r10, r10, #0 \n\t" \ "stmia r0!, {r12} \n\t" \ \ "mov r11, #0 \n\t" \ "umull r12, r14, r3, r8 \n\t" \ "adds r9, r9, r12 \n\t" \ "adcs r10, r10, r14 \n\t" \ "adc r11, r11, #0 \n\t" \ "umull r12, r14, r4, r7 \n\t" \ "adds r9, r9, r12 \n\t" \ "adcs r10, r10, r14 \n\t" \ "adc r11, r11, #0 \n\t" \ "umull r12, r14, r5, r6 \n\t" \ "adds r9, r9, r12 \n\t" \ "adcs r10, r10, r14 \n\t" \ "adc r11, r11, #0 \n\t" \ "stmia r0!, {r9} \n\t" \ \ "ldmia r1!, {r3} \n\t" \ "mov r12, #0 \n\t" \ "umull r14, r9, r4, r8 \n\t" \ "adds r10, r10, r14 \n\t" \ "adcs r11, r11, r9 \n\t" \ "adc r12, r12, #0 \n\t" \ "umull r14, r9, r5, r7 \n\t" \ "adds r10, r10, r14 \n\t" \ "adcs r11, r11, r9 \n\t" \ "adc r12, r12, #0 \n\t" \ "umull r14, r9, r3, r6 \n\t" \ "adds r10, r10, r14 \n\t" \ "adcs r11, r11, r9 \n\t" \ "adc r12, r12, #0 \n\t" \ "ldr r14, [r0] \n\t" \ "adds r10, r10, r14 \n\t" \ "adcs r11, r11, #0 \n\t" \ "adc r12, r12, #0 \n\t" \ "stmia r0!, {r10} \n\t" \ \ "ldmia r1!, {r4} \n\t" \ "mov r14, #0 \n\t" \ "umull r9, r10, r5, r8 \n\t" \ "adds r11, r11, r9 \n\t" \ "adcs r12, r12, r10 \n\t" \ "adc r14, r14, #0 \n\t" \ "umull r9, r10, r3, r7 \n\t" \ "adds r11, r11, r9 \n\t" \ "adcs r12, r12, r10 \n\t" \ "adc r14, r14, #0 \n\t" \ "umull r9, r10, r4, r6 \n\t" \ "adds r11, r11, r9 \n\t" \ "adcs r12, r12, r10 \n\t" \ "adc r14, r14, #0 \n\t" \ "ldr r9, [r0] \n\t" \ "adds r11, r11, r9 \n\t" \ "adcs r12, r12, #0 \n\t" \ "adc r14, r14, #0 \n\t" \ "stmia r0!, {r11} \n\t" \ \ "ldmia r2!, {r6} \n\t" \ "mov r9, #0 \n\t" \ "umull r10, r11, r5, r6 \n\t" \ "adds r12, r12, r10 \n\t" \ "adcs r14, r14, r11 \n\t" \ "adc r9, r9, #0 \n\t" \ "umull r10, r11, r3, r8 \n\t" \ "adds r12, r12, r10 \n\t" \ "adcs r14, r14, r11 \n\t" \ "adc r9, r9, #0 \n\t" \ "umull r10, r11, r4, r7 \n\t" \ "adds r12, r12, r10 \n\t" \ "adcs r14, r14, r11 \n\t" \ "adc r9, r9, #0 \n\t" \ "ldr r10, [r0] \n\t" \ "adds r12, r12, r10 \n\t" \ "adcs r14, r14, #0 \n\t" \ "adc r9, r9, #0 \n\t" \ "stmia r0!, {r12} \n\t" \ \ "ldmia r2!, {r7} \n\t" \ "mov r10, #0 \n\t" \ "umull r11, r12, r5, r7 \n\t" \ "adds r14, r14, r11 \n\t" \ "adcs r9, r9, r12 \n\t" \ "adc r10, r10, #0 \n\t" \ "umull r11, r12, r3, r6 \n\t" \ "adds r14, r14, r11 \n\t" \ "adcs r9, r9, r12 \n\t" \ "adc r10, r10, #0 \n\t" \ "umull r11, r12, r4, r8 \n\t" \ "adds r14, r14, r11 \n\t" \ "adcs r9, r9, r12 \n\t" \ "adc r10, r10, #0 \n\t" \ "ldr r11, [r0] \n\t" \ "adds r14, r14, r11 \n\t" \ "adcs r9, r9, #0 \n\t" \ "adc r10, r10, #0 \n\t" \ "stmia r0!, {r14} \n\t" \ \ "mov r11, #0 \n\t" \ "umull r12, r14, r3, r7 \n\t" \ "adds r9, r9, r12 \n\t" \ "adcs r10, r10, r14 \n\t" \ "adc r11, r11, #0 \n\t" \ "umull r12, r14, r4, r6 \n\t" \ "adds r9, r9, r12 \n\t" \ "adcs r10, r10, r14 \n\t" \ "adc r11, r11, #0 \n\t" \ "stmia r0!, {r9} \n\t" \ \ "umull r14, r9, r4, r7 \n\t" \ "adds r10, r10, r14 \n\t" \ "adc r11, r11, r9 \n\t" \ "stmia r0!, {r10, r11} \n\t" \ \ "sub r0, 52 \n\t" \ "sub r1, 20 \n\t" \ "sub r2, 32 \n\t" \ "ldmia r1!, {r3,r4,r5} \n\t" \ "ldmia r2!, {r6,r7,r8} \n\t" \ \ "umull r11, r12, r3, r6 \n\t" \ "stmia r0!, {r11} \n\t" \ \ "mov r10, #0 \n\t" \ "umull r11, r9, r3, r7 \n\t" \ "adds r12, r12, r11 \n\t" \ "adc r9, r9, #0 \n\t" \ "umull r11, r14, r4, r6 \n\t" \ "adds r12, r12, r11 \n\t" \ "adcs r9, r9, r14 \n\t" \ "adc r10, r10, #0 \n\t" \ "stmia r0!, {r12} \n\t" \ \ "mov r11, #0 \n\t" \ "umull r12, r14, r3, r8 \n\t" \ "adds r9, r9, r12 \n\t" \ "adcs r10, r10, r14 \n\t" \ "adc r11, r11, #0 \n\t" \ "umull r12, r14, r4, r7 \n\t" \ "adds r9, r9, r12 \n\t" \ "adcs r10, r10, r14 \n\t" \ "adc r11, r11, #0 \n\t" \ "umull r12, r14, r5, r6 \n\t" \ "adds r9, r9, r12 \n\t" \ "adcs r10, r10, r14 \n\t" \ "adc r11, r11, #0 \n\t" \ "stmia r0!, {r9} \n\t" \ \ "ldmia r1!, {r3} \n\t" \ "mov r12, #0 \n\t" \ "umull r14, r9, r4, r8 \n\t" \ "adds r10, r10, r14 \n\t" \ "adcs r11, r11, r9 \n\t" \ "adc r12, r12, #0 \n\t" \ "umull r14, r9, r5, r7 \n\t" \ "adds r10, r10, r14 \n\t" \ "adcs r11, r11, r9 \n\t" \ "adc r12, r12, #0 \n\t" \ "umull r14, r9, r3, r6 \n\t" \ "adds r10, r10, r14 \n\t" \ "adcs r11, r11, r9 \n\t" \ "adc r12, r12, #0 \n\t" \ "ldr r14, [r0] \n\t" \ "adds r10, r10, r14 \n\t" \ "adcs r11, r11, #0 \n\t" \ "adc r12, r12, #0 \n\t" \ "stmia r0!, {r10} \n\t" \ \ "ldmia r1!, {r4} \n\t" \ "mov r14, #0 \n\t" \ "umull r9, r10, r5, r8 \n\t" \ "adds r11, r11, r9 \n\t" \ "adcs r12, r12, r10 \n\t" \ "adc r14, r14, #0 \n\t" \ "umull r9, r10, r3, r7 \n\t" \ "adds r11, r11, r9 \n\t" \ "adcs r12, r12, r10 \n\t" \ "adc r14, r14, #0 \n\t" \ "umull r9, r10, r4, r6 \n\t" \ "adds r11, r11, r9 \n\t" \ "adcs r12, r12, r10 \n\t" \ "adc r14, r14, #0 \n\t" \ "ldr r9, [r0] \n\t" \ "adds r11, r11, r9 \n\t" \ "adcs r12, r12, #0 \n\t" \ "adc r14, r14, #0 \n\t" \ "stmia r0!, {r11} \n\t" \ \ "ldmia r1!, {r5} \n\t" \ "mov r9, #0 \n\t" \ "umull r10, r11, r3, r8 \n\t" \ "adds r12, r12, r10 \n\t" \ "adcs r14, r14, r11 \n\t" \ "adc r9, r9, #0 \n\t" \ "umull r10, r11, r4, r7 \n\t" \ "adds r12, r12, r10 \n\t" \ "adcs r14, r14, r11 \n\t" \ "adc r9, r9, #0 \n\t" \ "umull r10, r11, r5, r6 \n\t" \ "adds r12, r12, r10 \n\t" \ "adcs r14, r14, r11 \n\t" \ "adc r9, r9, #0 \n\t" \ "ldr r10, [r0] \n\t" \ "adds r12, r12, r10 \n\t" \ "adcs r14, r14, #0 \n\t" \ "adc r9, r9, #0 \n\t" \ "stmia r0!, {r12} \n\t" \ \ "ldmia r1!, {r3} \n\t" \ "mov r10, #0 \n\t" \ "umull r11, r12, r4, r8 \n\t" \ "adds r14, r14, r11 \n\t" \ "adcs r9, r9, r12 \n\t" \ "adc r10, r10, #0 \n\t" \ "umull r11, r12, r5, r7 \n\t" \ "adds r14, r14, r11 \n\t" \ "adcs r9, r9, r12 \n\t" \ "adc r10, r10, #0 \n\t" \ "umull r11, r12, r3, r6 \n\t" \ "adds r14, r14, r11 \n\t" \ "adcs r9, r9, r12 \n\t" \ "adc r10, r10, #0 \n\t" \ "ldr r11, [r0] \n\t" \ "adds r14, r14, r11 \n\t" \ "adcs r9, r9, #0 \n\t" \ "adc r10, r10, #0 \n\t" \ "stmia r0!, {r14} \n\t" \ \ "ldmia r1!, {r4} \n\t" \ "mov r11, #0 \n\t" \ "umull r12, r14, r5, r8 \n\t" \ "adds r9, r9, r12 \n\t" \ "adcs r10, r10, r14 \n\t" \ "adc r11, r11, #0 \n\t" \ "umull r12, r14, r3, r7 \n\t" \ "adds r9, r9, r12 \n\t" \ "adcs r10, r10, r14 \n\t" \ "adc r11, r11, #0 \n\t" \ "umull r12, r14, r4, r6 \n\t" \ "adds r9, r9, r12 \n\t" \ "adcs r10, r10, r14 \n\t" \ "adc r11, r11, #0 \n\t" \ "ldr r12, [r0] \n\t" \ "adds r9, r9, r12 \n\t" \ "adcs r10, r10, #0 \n\t" \ "adc r11, r11, #0 \n\t" \ "stmia r0!, {r9} \n\t" \ \ "ldmia r2!, {r6} \n\t" \ "mov r12, #0 \n\t" \ "umull r14, r9, r5, r6 \n\t" \ "adds r10, r10, r14 \n\t" \ "adcs r11, r11, r9 \n\t" \ "adc r12, r12, #0 \n\t" \ "umull r14, r9, r3, r8 \n\t" \ "adds r10, r10, r14 \n\t" \ "adcs r11, r11, r9 \n\t" \ "adc r12, r12, #0 \n\t" \ "umull r14, r9, r4, r7 \n\t" \ "adds r10, r10, r14 \n\t" \ "adcs r11, r11, r9 \n\t" \ "adc r12, r12, #0 \n\t" \ "ldr r14, [r0] \n\t" \ "adds r10, r10, r14 \n\t" \ "adcs r11, r11, #0 \n\t" \ "adc r12, r12, #0 \n\t" \ "stmia r0!, {r10} \n\t" \ \ "ldmia r2!, {r7} \n\t" \ "mov r14, #0 \n\t" \ "umull r9, r10, r5, r7 \n\t" \ "adds r11, r11, r9 \n\t" \ "adcs r12, r12, r10 \n\t" \ "adc r14, r14, #0 \n\t" \ "umull r9, r10, r3, r6 \n\t" \ "adds r11, r11, r9 \n\t" \ "adcs r12, r12, r10 \n\t" \ "adc r14, r14, #0 \n\t" \ "umull r9, r10, r4, r8 \n\t" \ "adds r11, r11, r9 \n\t" \ "adcs r12, r12, r10 \n\t" \ "adc r14, r14, #0 \n\t" \ "ldr r9, [r0] \n\t" \ "adds r11, r11, r9 \n\t" \ "adcs r12, r12, #0 \n\t" \ "adc r14, r14, #0 \n\t" \ "stmia r0!, {r11} \n\t" \ \ "ldmia r2!, {r8} \n\t" \ "mov r9, #0 \n\t" \ "umull r10, r11, r5, r8 \n\t" \ "adds r12, r12, r10 \n\t" \ "adcs r14, r14, r11 \n\t" \ "adc r9, r9, #0 \n\t" \ "umull r10, r11, r3, r7 \n\t" \ "adds r12, r12, r10 \n\t" \ "adcs r14, r14, r11 \n\t" \ "adc r9, r9, #0 \n\t" \ "umull r10, r11, r4, r6 \n\t" \ "adds r12, r12, r10 \n\t" \ "adcs r14, r14, r11 \n\t" \ "adc r9, r9, #0 \n\t" \ "ldr r10, [r0] \n\t" \ "adds r12, r12, r10 \n\t" \ "adcs r14, r14, #0 \n\t" \ "adc r9, r9, #0 \n\t" \ "stmia r0!, {r12} \n\t" \ \ "ldmia r2!, {r6} \n\t" \ "mov r10, #0 \n\t" \ "umull r11, r12, r5, r6 \n\t" \ "adds r14, r14, r11 \n\t" \ "adcs r9, r9, r12 \n\t" \ "adc r10, r10, #0 \n\t" \ "umull r11, r12, r3, r8 \n\t" \ "adds r14, r14, r11 \n\t" \ "adcs r9, r9, r12 \n\t" \ "adc r10, r10, #0 \n\t" \ "umull r11, r12, r4, r7 \n\t" \ "adds r14, r14, r11 \n\t" \ "adcs r9, r9, r12 \n\t" \ "adc r10, r10, #0 \n\t" \ "ldr r11, [r0] \n\t" \ "adds r14, r14, r11 \n\t" \ "adcs r9, r9, #0 \n\t" \ "adc r10, r10, #0 \n\t" \ "stmia r0!, {r14} \n\t" \ \ "ldmia r2!, {r7} \n\t" \ "mov r11, #0 \n\t" \ "umull r12, r14, r5, r7 \n\t" \ "adds r9, r9, r12 \n\t" \ "adcs r10, r10, r14 \n\t" \ "adc r11, r11, #0 \n\t" \ "umull r12, r14, r3, r6 \n\t" \ "adds r9, r9, r12 \n\t" \ "adcs r10, r10, r14 \n\t" \ "adc r11, r11, #0 \n\t" \ "umull r12, r14, r4, r8 \n\t" \ "adds r9, r9, r12 \n\t" \ "adcs r10, r10, r14 \n\t" \ "adc r11, r11, #0 \n\t" \ "ldr r12, [r0] \n\t" \ "adds r9, r9, r12 \n\t" \ "adcs r10, r10, #0 \n\t" \ "adc r11, r11, #0 \n\t" \ "stmia r0!, {r9} \n\t" \ \ "mov r12, #0 \n\t" \ "umull r14, r9, r3, r7 \n\t" \ "adds r10, r10, r14 \n\t" \ "adcs r11, r11, r9 \n\t" \ "adc r12, r12, #0 \n\t" \ "umull r14, r9, r4, r6 \n\t" \ "adds r10, r10, r14 \n\t" \ "adcs r11, r11, r9 \n\t" \ "adc r12, r12, #0 \n\t" \ "stmia r0!, {r10} \n\t" \ \ "umull r9, r10, r4, r7 \n\t" \ "adds r11, r11, r9 \n\t" \ "adc r12, r12, r10 \n\t" \ "stmia r0!, {r11, r12} \n\t" #define FAST_SQUARE_ASM_5 \ "ldmia r1!, {r2,r3,r4,r5,r6} \n\t" \ \ "umull r11, r12, r2, r2 \n\t" \ "stmia r0!, {r11} \n\t" \ \ "mov r9, #0 \n\t" \ "umull r10, r11, r2, r3 \n\t" \ "adds r12, r12, r10 \n\t" \ "adcs r8, r11, #0 \n\t" \ "adc r9, r9, #0 \n\t" \ "adds r12, r12, r10 \n\t" \ "adcs r8, r8, r11 \n\t" \ "adc r9, r9, #0 \n\t" \ "stmia r0!, {r12} \n\t" \ \ "mov r10, #0 \n\t" \ "umull r11, r12, r2, r4 \n\t" \ "adds r11, r11, r11 \n\t" \ "adcs r12, r12, r12 \n\t" \ "adc r10, r10, #0 \n\t" \ "adds r8, r8, r11 \n\t" \ "adcs r9, r9, r12 \n\t" \ "adc r10, r10, #0 \n\t" \ "umull r11, r12, r3, r3 \n\t" \ "adds r8, r8, r11 \n\t" \ "adcs r9, r9, r12 \n\t" \ "adc r10, r10, #0 \n\t" \ "stmia r0!, {r8} \n\t" \ \ "mov r12, #0 \n\t" \ "umull r8, r11, r2, r5 \n\t" \ "umull r1, r14, r3, r4 \n\t" \ "adds r8, r8, r1 \n\t" \ "adcs r11, r11, r14 \n\t" \ "adc r12, r12, #0 \n\t" \ "adds r8, r8, r8 \n\t" \ "adcs r11, r11, r11 \n\t" \ "adc r12, r12, r12 \n\t" \ "adds r8, r8, r9 \n\t" \ "adcs r11, r11, r10 \n\t" \ "adc r12, r12, #0 \n\t" \ "stmia r0!, {r8} \n\t" \ \ "mov r10, #0 \n\t" \ "umull r8, r9, r2, r6 \n\t" \ "umull r1, r14, r3, r5 \n\t" \ "adds r8, r8, r1 \n\t" \ "adcs r9, r9, r14 \n\t" \ "adc r10, r10, #0 \n\t" \ "adds r8, r8, r8 \n\t" \ "adcs r9, r9, r9 \n\t" \ "adc r10, r10, r10 \n\t" \ "umull r1, r14, r4, r4 \n\t" \ "adds r8, r8, r1 \n\t" \ "adcs r9, r9, r14 \n\t" \ "adc r10, r10, #0 \n\t" \ "adds r8, r8, r11 \n\t" \ "adcs r9, r9, r12 \n\t" \ "adc r10, r10, #0 \n\t" \ "stmia r0!, {r8} \n\t" \ \ "mov r12, #0 \n\t" \ "umull r8, r11, r3, r6 \n\t" \ "umull r1, r14, r4, r5 \n\t" \ "adds r8, r8, r1 \n\t" \ "adcs r11, r11, r14 \n\t" \ "adc r12, r12, #0 \n\t" \ "adds r8, r8, r8 \n\t" \ "adcs r11, r11, r11 \n\t" \ "adc r12, r12, r12 \n\t" \ "adds r8, r8, r9 \n\t" \ "adcs r11, r11, r10 \n\t" \ "adc r12, r12, #0 \n\t" \ "stmia r0!, {r8} \n\t" \ \ "mov r8, #0 \n\t" \ "umull r1, r10, r4, r6 \n\t" \ "adds r1, r1, r1 \n\t" \ "adcs r10, r10, r10 \n\t" \ "adc r8, r8, #0 \n\t" \ "adds r11, r11, r1 \n\t" \ "adcs r12, r12, r10 \n\t" \ "adc r8, r8, #0 \n\t" \ "umull r1, r10, r5, r5 \n\t" \ "adds r11, r11, r1 \n\t" \ "adcs r12, r12, r10 \n\t" \ "adc r8, r8, #0 \n\t" \ "stmia r0!, {r11} \n\t" \ \ "mov r11, #0 \n\t" \ "umull r1, r10, r5, r6 \n\t" \ "adds r1, r1, r1 \n\t" \ "adcs r10, r10, r10 \n\t" \ "adc r11, r11, #0 \n\t" \ "adds r12, r12, r1 \n\t" \ "adcs r8, r8, r10 \n\t" \ "adc r11, r11, #0 \n\t" \ "stmia r0!, {r12} \n\t" \ \ "umull r1, r10, r6, r6 \n\t" \ "adds r8, r8, r1 \n\t" \ "adcs r11, r11, r10 \n\t" \ "stmia r0!, {r8, r11} \n\t" #define FAST_SQUARE_ASM_6 \ "ldmia r1!, {r2,r3,r4,r5,r6,r7} \n\t" \ \ "umull r11, r12, r2, r2 \n\t" \ "stmia r0!, {r11} \n\t" \ \ "mov r9, #0 \n\t" \ "umull r10, r11, r2, r3 \n\t" \ "adds r12, r12, r10 \n\t" \ "adcs r8, r11, #0 \n\t" \ "adc r9, r9, #0 \n\t" \ "adds r12, r12, r10 \n\t" \ "adcs r8, r8, r11 \n\t" \ "adc r9, r9, #0 \n\t" \ "stmia r0!, {r12} \n\t" \ \ "mov r10, #0 \n\t" \ "umull r11, r12, r2, r4 \n\t" \ "adds r11, r11, r11 \n\t" \ "adcs r12, r12, r12 \n\t" \ "adc r10, r10, #0 \n\t" \ "adds r8, r8, r11 \n\t" \ "adcs r9, r9, r12 \n\t" \ "adc r10, r10, #0 \n\t" \ "umull r11, r12, r3, r3 \n\t" \ "adds r8, r8, r11 \n\t" \ "adcs r9, r9, r12 \n\t" \ "adc r10, r10, #0 \n\t" \ "stmia r0!, {r8} \n\t" \ \ "mov r12, #0 \n\t" \ "umull r8, r11, r2, r5 \n\t" \ "umull r1, r14, r3, r4 \n\t" \ "adds r8, r8, r1 \n\t" \ "adcs r11, r11, r14 \n\t" \ "adc r12, r12, #0 \n\t" \ "adds r8, r8, r8 \n\t" \ "adcs r11, r11, r11 \n\t" \ "adc r12, r12, r12 \n\t" \ "adds r8, r8, r9 \n\t" \ "adcs r11, r11, r10 \n\t" \ "adc r12, r12, #0 \n\t" \ "stmia r0!, {r8} \n\t" \ \ "mov r10, #0 \n\t" \ "umull r8, r9, r2, r6 \n\t" \ "umull r1, r14, r3, r5 \n\t" \ "adds r8, r8, r1 \n\t" \ "adcs r9, r9, r14 \n\t" \ "adc r10, r10, #0 \n\t" \ "adds r8, r8, r8 \n\t" \ "adcs r9, r9, r9 \n\t" \ "adc r10, r10, r10 \n\t" \ "umull r1, r14, r4, r4 \n\t" \ "adds r8, r8, r1 \n\t" \ "adcs r9, r9, r14 \n\t" \ "adc r10, r10, #0 \n\t" \ "adds r8, r8, r11 \n\t" \ "adcs r9, r9, r12 \n\t" \ "adc r10, r10, #0 \n\t" \ "stmia r0!, {r8} \n\t" \ \ "mov r12, #0 \n\t" \ "umull r8, r11, r2, r7 \n\t" \ "umull r1, r14, r3, r6 \n\t" \ "adds r8, r8, r1 \n\t" \ "adcs r11, r11, r14 \n\t" \ "adc r12, r12, #0 \n\t" \ "umull r1, r14, r4, r5 \n\t" \ "adds r8, r8, r1 \n\t" \ "adcs r11, r11, r14 \n\t" \ "adc r12, r12, #0 \n\t" \ "adds r8, r8, r8 \n\t" \ "adcs r11, r11, r11 \n\t" \ "adc r12, r12, r12 \n\t" \ "adds r8, r8, r9 \n\t" \ "adcs r11, r11, r10 \n\t" \ "adc r12, r12, #0 \n\t" \ "stmia r0!, {r8} \n\t" \ \ "mov r10, #0 \n\t" \ "umull r8, r9, r3, r7 \n\t" \ "umull r1, r14, r4, r6 \n\t" \ "adds r8, r8, r1 \n\t" \ "adcs r9, r9, r14 \n\t" \ "adc r10, r10, #0 \n\t" \ "adds r8, r8, r8 \n\t" \ "adcs r9, r9, r9 \n\t" \ "adc r10, r10, r10 \n\t" \ "umull r1, r14, r5, r5 \n\t" \ "adds r8, r8, r1 \n\t" \ "adcs r9, r9, r14 \n\t" \ "adc r10, r10, #0 \n\t" \ "adds r8, r8, r11 \n\t" \ "adcs r9, r9, r12 \n\t" \ "adc r10, r10, #0 \n\t" \ "stmia r0!, {r8} \n\t" \ \ "mov r12, #0 \n\t" \ "umull r8, r11, r4, r7 \n\t" \ "umull r1, r14, r5, r6 \n\t" \ "adds r8, r8, r1 \n\t" \ "adcs r11, r11, r14 \n\t" \ "adc r12, r12, #0 \n\t" \ "adds r8, r8, r8 \n\t" \ "adcs r11, r11, r11 \n\t" \ "adc r12, r12, r12 \n\t" \ "adds r8, r8, r9 \n\t" \ "adcs r11, r11, r10 \n\t" \ "adc r12, r12, #0 \n\t" \ "stmia r0!, {r8} \n\t" \ \ "mov r8, #0 \n\t" \ "umull r1, r10, r5, r7 \n\t" \ "adds r1, r1, r1 \n\t" \ "adcs r10, r10, r10 \n\t" \ "adc r8, r8, #0 \n\t" \ "adds r11, r11, r1 \n\t" \ "adcs r12, r12, r10 \n\t" \ "adc r8, r8, #0 \n\t" \ "umull r1, r10, r6, r6 \n\t" \ "adds r11, r11, r1 \n\t" \ "adcs r12, r12, r10 \n\t" \ "adc r8, r8, #0 \n\t" \ "stmia r0!, {r11} \n\t" \ \ "mov r11, #0 \n\t" \ "umull r1, r10, r6, r7 \n\t" \ "adds r1, r1, r1 \n\t" \ "adcs r10, r10, r10 \n\t" \ "adc r11, r11, #0 \n\t" \ "adds r12, r12, r1 \n\t" \ "adcs r8, r8, r10 \n\t" \ "adc r11, r11, #0 \n\t" \ "stmia r0!, {r12} \n\t" \ \ "umull r1, r10, r7, r7 \n\t" \ "adds r8, r8, r1 \n\t" \ "adcs r11, r11, r10 \n\t" \ "stmia r0!, {r8, r11} \n\t" #define FAST_SQUARE_ASM_7 \ "ldmia r1!, {r2} \n\t" \ "add r1, 20 \n\t" \ "ldmia r1!, {r5} \n\t" \ "add r0, 24 \n\t" \ "umull r8, r9, r2, r5 \n\t" \ "stmia r0!, {r8, r9} \n\t" \ "sub r0, 32 \n\t" \ "sub r1, 28 \n\t" \ \ "ldmia r1!, {r2, r3, r4, r5, r6, r7} \n\t" \ \ "umull r11, r12, r2, r2 \n\t" \ "stmia r0!, {r11} \n\t" \ \ "mov r9, #0 \n\t" \ "umull r10, r11, r2, r3 \n\t" \ "adds r12, r12, r10 \n\t" \ "adcs r8, r11, #0 \n\t" \ "adc r9, r9, #0 \n\t" \ "adds r12, r12, r10 \n\t" \ "adcs r8, r8, r11 \n\t" \ "adc r9, r9, #0 \n\t" \ "stmia r0!, {r12} \n\t" \ \ "mov r10, #0 \n\t" \ "umull r11, r12, r2, r4 \n\t" \ "adds r11, r11, r11 \n\t" \ "adcs r12, r12, r12 \n\t" \ "adc r10, r10, #0 \n\t" \ "adds r8, r8, r11 \n\t" \ "adcs r9, r9, r12 \n\t" \ "adc r10, r10, #0 \n\t" \ "umull r11, r12, r3, r3 \n\t" \ "adds r8, r8, r11 \n\t" \ "adcs r9, r9, r12 \n\t" \ "adc r10, r10, #0 \n\t" \ "stmia r0!, {r8} \n\t" \ \ "mov r12, #0 \n\t" \ "umull r8, r11, r2, r5 \n\t" \ "mov r14, r11 \n\t" \ "umlal r8, r11, r3, r4 \n\t" \ "cmp r14, r11 \n\t" \ "it hi \n\t" \ "adchi r12, r12, #0 \n\t" \ "adds r8, r8, r8 \n\t" \ "adcs r11, r11, r11 \n\t" \ "adc r12, r12, r12 \n\t" \ "adds r8, r8, r9 \n\t" \ "adcs r11, r11, r10 \n\t" \ "adc r12, r12, #0 \n\t" \ "stmia r0!, {r8} \n\t" \ \ "mov r10, #0 \n\t" \ "umull r8, r9, r2, r6 \n\t" \ "mov r14, r9 \n\t" \ "umlal r8, r9, r3, r5 \n\t" \ "cmp r14, r9 \n\t" \ "it hi \n\t" \ "adchi r10, r10, #0 \n\t" \ "adds r8, r8, r8 \n\t" \ "adcs r9, r9, r9 \n\t" \ "adc r10, r10, r10 \n\t" \ "mov r14, r9 \n\t" \ "umlal r8, r9, r4, r4 \n\t" \ "cmp r14, r9 \n\t" \ "it hi \n\t" \ "adchi r10, r10, #0 \n\t" \ "adds r8, r8, r11 \n\t" \ "adcs r9, r9, r12 \n\t" \ "adc r10, r10, #0 \n\t" \ "stmia r0!, {r8} \n\t" \ \ "mov r12, #0 \n\t" \ "umull r8, r11, r2, r7 \n\t" \ "mov r14, r11 \n\t" \ "umlal r8, r11, r3, r6 \n\t" \ "cmp r14, r11 \n\t" \ "it hi \n\t" \ "adchi r12, r12, #0 \n\t" \ "mov r14, r11 \n\t" \ "umlal r8, r11, r4, r5 \n\t" \ "cmp r14, r11 \n\t" \ "it hi \n\t" \ "adchi r12, r12, #0 \n\t" \ "adds r8, r8, r8 \n\t" \ "adcs r11, r11, r11 \n\t" \ "adc r12, r12, r12 \n\t" \ "adds r8, r8, r9 \n\t" \ "adcs r11, r11, r10 \n\t" \ "adc r12, r12, #0 \n\t" \ "stmia r0!, {r8} \n\t" \ \ "ldmia r1!, {r2} \n\t" \ "mov r10, #0 \n\t" \ "umull r8, r9, r3, r7 \n\t" \ "mov r14, r9 \n\t" \ "umlal r8, r9, r4, r6 \n\t" \ "cmp r14, r9 \n\t" \ "it hi \n\t" \ "adchi r10, r10, #0 \n\t" \ "ldr r14, [r0] \n\t" \ "adds r8, r8, r14 \n\t" \ "adcs r9, r9, #0 \n\t" \ "adc r10, r10, #0 \n\t" \ "adds r8, r8, r8 \n\t" \ "adcs r9, r9, r9 \n\t" \ "adc r10, r10, r10 \n\t" \ "mov r14, r9 \n\t" \ "umlal r8, r9, r5, r5 \n\t" \ "cmp r14, r9 \n\t" \ "it hi \n\t" \ "adchi r10, r10, #0 \n\t" \ "adds r8, r8, r11 \n\t" \ "adcs r9, r9, r12 \n\t" \ "adc r10, r10, #0 \n\t" \ "stmia r0!, {r8} \n\t" \ \ "mov r12, #0 \n\t" \ "umull r8, r11, r3, r2 \n\t" \ "mov r14, r11 \n\t" \ "umlal r8, r11, r4, r7 \n\t" \ "cmp r14, r11 \n\t" \ "it hi \n\t" \ "adchi r12, r12, #0 \n\t" \ "mov r14, r11 \n\t" \ "umlal r8, r11, r5, r6 \n\t" \ "cmp r14, r11 \n\t" \ "it hi \n\t" \ "adchi r12, r12, #0 \n\t" \ "ldr r14, [r0] \n\t" \ "adds r8, r8, r14 \n\t" \ "adcs r11, r11, #0 \n\t" \ "adc r12, r12, #0 \n\t" \ "adds r8, r8, r8 \n\t" \ "adcs r11, r11, r11 \n\t" \ "adc r12, r12, r12 \n\t" \ "adds r8, r8, r9 \n\t" \ "adcs r11, r11, r10 \n\t" \ "adc r12, r12, #0 \n\t" \ "stmia r0!, {r8} \n\t" \ \ "mov r10, #0 \n\t" \ "umull r8, r9, r4, r2 \n\t" \ "mov r14, r9 \n\t" \ "umlal r8, r9, r5, r7 \n\t" \ "cmp r14, r9 \n\t" \ "it hi \n\t" \ "adchi r10, r10, #0 \n\t" \ "adds r8, r8, r8 \n\t" \ "adcs r9, r9, r9 \n\t" \ "adc r10, r10, r10 \n\t" \ "mov r14, r9 \n\t" \ "umlal r8, r9, r6, r6 \n\t" \ "cmp r14, r9 \n\t" \ "it hi \n\t" \ "adchi r10, r10, #0 \n\t" \ "adds r8, r8, r11 \n\t" \ "adcs r9, r9, r12 \n\t" \ "adc r10, r10, #0 \n\t" \ "stmia r0!, {r8} \n\t" \ \ "mov r12, #0 \n\t" \ "umull r8, r11, r5, r2 \n\t" \ "mov r14, r11 \n\t" \ "umlal r8, r11, r6, r7 \n\t" \ "cmp r14, r11 \n\t" \ "it hi \n\t" \ "adchi r12, r12, #0 \n\t" \ "adds r8, r8, r8 \n\t" \ "adcs r11, r11, r11 \n\t" \ "adc r12, r12, r12 \n\t" \ "adds r8, r8, r9 \n\t" \ "adcs r11, r11, r10 \n\t" \ "adc r12, r12, #0 \n\t" \ "stmia r0!, {r8} \n\t" \ \ "mov r8, #0 \n\t" \ "umull r1, r10, r6, r2 \n\t" \ "adds r1, r1, r1 \n\t" \ "adcs r10, r10, r10 \n\t" \ "adc r8, r8, #0 \n\t" \ "adds r11, r11, r1 \n\t" \ "adcs r12, r12, r10 \n\t" \ "adc r8, r8, #0 \n\t" \ "umull r1, r10, r7, r7 \n\t" \ "adds r11, r11, r1 \n\t" \ "adcs r12, r12, r10 \n\t" \ "adc r8, r8, #0 \n\t" \ "stmia r0!, {r11} \n\t" \ \ "mov r11, #0 \n\t" \ "umull r1, r10, r7, r2 \n\t" \ "adds r1, r1, r1 \n\t" \ "adcs r10, r10, r10 \n\t" \ "adc r11, r11, #0 \n\t" \ "adds r12, r12, r1 \n\t" \ "adcs r8, r8, r10 \n\t" \ "adc r11, r11, #0 \n\t" \ "stmia r0!, {r12} \n\t" \ \ "umull r1, r10, r2, r2 \n\t" \ "adds r8, r8, r1 \n\t" \ "adcs r11, r11, r10 \n\t" \ "stmia r0!, {r8, r11} \n\t" #define FAST_SQUARE_ASM_8 \ "ldmia r1!, {r2, r3} \n\t" \ "add r1, 16 \n\t" \ "ldmia r1!, {r5, r6} \n\t" \ "add r0, 24 \n\t" \ \ "umull r8, r9, r2, r5 \n\t" \ "stmia r0!, {r8} \n\t" \ \ "umull r12, r10, r2, r6 \n\t" \ "adds r9, r9, r12 \n\t" \ "adc r10, r10, #0 \n\t" \ "stmia r0!, {r9} \n\t" \ \ "umull r8, r9, r3, r6 \n\t" \ "adds r10, r10, r8 \n\t" \ "adc r11, r9, #0 \n\t" \ "stmia r0!, {r10, r11} \n\t" \ \ "sub r0, 40 \n\t" \ "sub r1, 32 \n\t" \ "ldmia r1!, {r2,r3,r4,r5,r6,r7} \n\t" \ \ "umull r11, r12, r2, r2 \n\t" \ "stmia r0!, {r11} \n\t" \ \ "mov r9, #0 \n\t" \ "umull r10, r11, r2, r3 \n\t" \ "adds r12, r12, r10 \n\t" \ "adcs r8, r11, #0 \n\t" \ "adc r9, r9, #0 \n\t" \ "adds r12, r12, r10 \n\t" \ "adcs r8, r8, r11 \n\t" \ "adc r9, r9, #0 \n\t" \ "stmia r0!, {r12} \n\t" \ \ "mov r10, #0 \n\t" \ "umull r11, r12, r2, r4 \n\t" \ "adds r11, r11, r11 \n\t" \ "adcs r12, r12, r12 \n\t" \ "adc r10, r10, #0 \n\t" \ "adds r8, r8, r11 \n\t" \ "adcs r9, r9, r12 \n\t" \ "adc r10, r10, #0 \n\t" \ "umull r11, r12, r3, r3 \n\t" \ "adds r8, r8, r11 \n\t" \ "adcs r9, r9, r12 \n\t" \ "adc r10, r10, #0 \n\t" \ "stmia r0!, {r8} \n\t" \ \ "mov r12, #0 \n\t" \ "umull r8, r11, r2, r5 \n\t" \ "mov r14, r11 \n\t" \ "umlal r8, r11, r3, r4 \n\t" \ "cmp r14, r11 \n\t" \ "it hi \n\t" \ "adchi r12, r12, #0 \n\t" \ "adds r8, r8, r8 \n\t" \ "adcs r11, r11, r11 \n\t" \ "adc r12, r12, r12 \n\t" \ "adds r8, r8, r9 \n\t" \ "adcs r11, r11, r10 \n\t" \ "adc r12, r12, #0 \n\t" \ "stmia r0!, {r8} \n\t" \ \ "mov r10, #0 \n\t" \ "umull r8, r9, r2, r6 \n\t" \ "mov r14, r9 \n\t" \ "umlal r8, r9, r3, r5 \n\t" \ "cmp r14, r9 \n\t" \ "it hi \n\t" \ "adchi r10, r10, #0 \n\t" \ "adds r8, r8, r8 \n\t" \ "adcs r9, r9, r9 \n\t" \ "adc r10, r10, r10 \n\t" \ "mov r14, r9 \n\t" \ "umlal r8, r9, r4, r4 \n\t" \ "cmp r14, r9 \n\t" \ "it hi \n\t" \ "adchi r10, r10, #0 \n\t" \ "adds r8, r8, r11 \n\t" \ "adcs r9, r9, r12 \n\t" \ "adc r10, r10, #0 \n\t" \ "stmia r0!, {r8} \n\t" \ \ "mov r12, #0 \n\t" \ "umull r8, r11, r2, r7 \n\t" \ "mov r14, r11 \n\t" \ "umlal r8, r11, r3, r6 \n\t" \ "cmp r14, r11 \n\t" \ "it hi \n\t" \ "adchi r12, r12, #0 \n\t" \ "mov r14, r11 \n\t" \ "umlal r8, r11, r4, r5 \n\t" \ "cmp r14, r11 \n\t" \ "it hi \n\t" \ "adchi r12, r12, #0 \n\t" \ "adds r8, r8, r8 \n\t" \ "adcs r11, r11, r11 \n\t" \ "adc r12, r12, r12 \n\t" \ "adds r8, r8, r9 \n\t" \ "adcs r11, r11, r10 \n\t" \ "adc r12, r12, #0 \n\t" \ "stmia r0!, {r8} \n\t" \ \ "ldmia r1!, {r2} \n\t" \ "mov r10, #0 \n\t" \ "umull r8, r9, r3, r7 \n\t" \ "mov r14, r9 \n\t" \ "umlal r8, r9, r4, r6 \n\t" \ "cmp r14, r9 \n\t" \ "it hi \n\t" \ "adchi r10, r10, #0 \n\t" \ "ldr r14, [r0] \n\t" \ "adds r8, r8, r14 \n\t" \ "adcs r9, r9, #0 \n\t" \ "adc r10, r10, #0 \n\t" \ "adds r8, r8, r8 \n\t" \ "adcs r9, r9, r9 \n\t" \ "adc r10, r10, r10 \n\t" \ "mov r14, r9 \n\t" \ "umlal r8, r9, r5, r5 \n\t" \ "cmp r14, r9 \n\t" \ "it hi \n\t" \ "adchi r10, r10, #0 \n\t" \ "adds r8, r8, r11 \n\t" \ "adcs r9, r9, r12 \n\t" \ "adc r10, r10, #0 \n\t" \ "stmia r0!, {r8} \n\t" \ \ "mov r12, #0 \n\t" \ "umull r8, r11, r3, r2 \n\t" \ "mov r14, r11 \n\t" \ "umlal r8, r11, r4, r7 \n\t" \ "cmp r14, r11 \n\t" \ "it hi \n\t" \ "adchi r12, r12, #0 \n\t" \ "mov r14, r11 \n\t" \ "umlal r8, r11, r5, r6 \n\t" \ "cmp r14, r11 \n\t" \ "it hi \n\t" \ "adchi r12, r12, #0 \n\t" \ "ldr r14, [r0] \n\t" \ "adds r8, r8, r14 \n\t" \ "adcs r11, r11, #0 \n\t" \ "adc r12, r12, #0 \n\t" \ "adds r8, r8, r8 \n\t" \ "adcs r11, r11, r11 \n\t" \ "adc r12, r12, r12 \n\t" \ "adds r8, r8, r9 \n\t" \ "adcs r11, r11, r10 \n\t" \ "adc r12, r12, #0 \n\t" \ "stmia r0!, {r8} \n\t" \ \ "ldmia r1!, {r3} \n\t" \ "mov r10, #0 \n\t" \ "umull r8, r9, r4, r2 \n\t" \ "mov r14, r9 \n\t" \ "umlal r8, r9, r5, r7 \n\t" \ "cmp r14, r9 \n\t" \ "it hi \n\t" \ "adchi r10, r10, #0 \n\t" \ "ldr r14, [r0] \n\t" \ "adds r8, r8, r14 \n\t" \ "adcs r9, r9, #0 \n\t" \ "adc r10, r10, #0 \n\t" \ "adds r8, r8, r8 \n\t" \ "adcs r9, r9, r9 \n\t" \ "adc r10, r10, r10 \n\t" \ "mov r14, r9 \n\t" \ "umlal r8, r9, r6, r6 \n\t" \ "cmp r14, r9 \n\t" \ "it hi \n\t" \ "adchi r10, r10, #0 \n\t" \ "adds r8, r8, r11 \n\t" \ "adcs r9, r9, r12 \n\t" \ "adc r10, r10, #0 \n\t" \ "stmia r0!, {r8} \n\t" \ \ "mov r12, #0 \n\t" \ "umull r8, r11, r4, r3 \n\t" \ "mov r14, r11 \n\t" \ "umlal r8, r11, r5, r2 \n\t" \ "cmp r14, r11 \n\t" \ "it hi \n\t" \ "adchi r12, r12, #0 \n\t" \ "mov r14, r11 \n\t" \ "umlal r8, r11, r6, r7 \n\t" \ "cmp r14, r11 \n\t" \ "it hi \n\t" \ "adchi r12, r12, #0 \n\t" \ "ldr r14, [r0] \n\t" \ "adds r8, r8, r14 \n\t" \ "adcs r11, r11, #0 \n\t" \ "adc r12, r12, #0 \n\t" \ "adds r8, r8, r8 \n\t" \ "adcs r11, r11, r11 \n\t" \ "adc r12, r12, r12 \n\t" \ "adds r8, r8, r9 \n\t" \ "adcs r11, r11, r10 \n\t" \ "adc r12, r12, #0 \n\t" \ "stmia r0!, {r8} \n\t" \ \ "mov r10, #0 \n\t" \ "umull r8, r9, r5, r3 \n\t" \ "mov r14, r9 \n\t" \ "umlal r8, r9, r6, r2 \n\t" \ "cmp r14, r9 \n\t" \ "it hi \n\t" \ "adchi r10, r10, #0 \n\t" \ "adds r8, r8, r8 \n\t" \ "adcs r9, r9, r9 \n\t" \ "adc r10, r10, r10 \n\t" \ "mov r14, r9 \n\t" \ "umlal r8, r9, r7, r7 \n\t" \ "cmp r14, r9 \n\t" \ "it hi \n\t" \ "adchi r10, r10, #0 \n\t" \ "adds r8, r8, r11 \n\t" \ "adcs r9, r9, r12 \n\t" \ "adc r10, r10, #0 \n\t" \ "stmia r0!, {r8} \n\t" \ \ "mov r12, #0 \n\t" \ "umull r8, r11, r6, r3 \n\t" \ "mov r14, r11 \n\t" \ "umlal r8, r11, r7, r2 \n\t" \ "cmp r14, r11 \n\t" \ "it hi \n\t" \ "adchi r12, r12, #0 \n\t" \ "adds r8, r8, r8 \n\t" \ "adcs r11, r11, r11 \n\t" \ "adc r12, r12, r12 \n\t" \ "adds r8, r8, r9 \n\t" \ "adcs r11, r11, r10 \n\t" \ "adc r12, r12, #0 \n\t" \ "stmia r0!, {r8} \n\t" \ \ "mov r8, #0 \n\t" \ "umull r1, r10, r7, r3 \n\t" \ "adds r1, r1, r1 \n\t" \ "adcs r10, r10, r10 \n\t" \ "adc r8, r8, #0 \n\t" \ "adds r11, r11, r1 \n\t" \ "adcs r12, r12, r10 \n\t" \ "adc r8, r8, #0 \n\t" \ "umull r1, r10, r2, r2 \n\t" \ "adds r11, r11, r1 \n\t" \ "adcs r12, r12, r10 \n\t" \ "adc r8, r8, #0 \n\t" \ "stmia r0!, {r11} \n\t" \ \ "mov r11, #0 \n\t" \ "umull r1, r10, r2, r3 \n\t" \ "adds r1, r1, r1 \n\t" \ "adcs r10, r10, r10 \n\t" \ "adc r11, r11, #0 \n\t" \ "adds r12, r12, r1 \n\t" \ "adcs r8, r8, r10 \n\t" \ "adc r11, r11, #0 \n\t" \ "stmia r0!, {r12} \n\t" \ \ "umull r1, r10, r3, r3 \n\t" \ "adds r8, r8, r1 \n\t" \ "adcs r11, r11, r10 \n\t" \ "stmia r0!, {r8, r11} \n\t" #endif /* _UECC_ASM_ARM_MULT_SQUARE_H_ */ ================================================ FILE: u2f/curve-specific.h ================================================ /* Copyright 2015, Kenneth MacKay. Licensed under the BSD 2-clause license. */ #ifndef _UECC_CURVE_SPECIFIC_H_ #define _UECC_CURVE_SPECIFIC_H_ #define num_bytes_secp160r1 20 #define num_bytes_secp192r1 24 #define num_bytes_secp224r1 28 #define num_bytes_secp256r1 32 #define num_bytes_secp256k1 32 #if (uECC_WORD_SIZE == 1) #define num_words_secp160r1 20 #define num_words_secp192r1 24 #define num_words_secp224r1 28 #define num_words_secp256r1 32 #define num_words_secp256k1 32 #define BYTES_TO_WORDS_8(a, b, c, d, e, f, g, h) \ 0x##a, 0x##b, 0x##c, 0x##d, 0x##e, 0x##f, 0x##g, 0x##h #define BYTES_TO_WORDS_4(a, b, c, d) 0x##a, 0x##b, 0x##c, 0x##d #elif (uECC_WORD_SIZE == 4) #define num_words_secp160r1 5 #define num_words_secp192r1 6 #define num_words_secp224r1 7 #define num_words_secp256r1 8 #define num_words_secp256k1 8 #define BYTES_TO_WORDS_8(a, b, c, d, e, f, g, h) 0x##d##c##b##a, 0x##h##g##f##e #define BYTES_TO_WORDS_4(a, b, c, d) 0x##d##c##b##a #elif (uECC_WORD_SIZE == 8) #define num_words_secp160r1 3 #define num_words_secp192r1 3 #define num_words_secp224r1 4 #define num_words_secp256r1 4 #define num_words_secp256k1 4 #define BYTES_TO_WORDS_8(a, b, c, d, e, f, g, h) 0x##h##g##f##e##d##c##b##a##ull #define BYTES_TO_WORDS_4(a, b, c, d) 0x##d##c##b##a##ull #endif /* uECC_WORD_SIZE */ #if uECC_SUPPORTS_secp160r1 || uECC_SUPPORTS_secp192r1 || \ uECC_SUPPORTS_secp224r1 || uECC_SUPPORTS_secp256r1 static void double_jacobian_default(uECC_word_t * X1, uECC_word_t * Y1, uECC_word_t * Z1, uECC_Curve curve) { /* t1 = X, t2 = Y, t3 = Z */ uECC_word_t t4[uECC_MAX_WORDS]; uECC_word_t t5[uECC_MAX_WORDS]; wordcount_t num_words = curve->num_words; if (uECC_vli_isZero(Z1, num_words)) { return; } uECC_vli_modSquare_fast(t4, Y1, curve); /* t4 = y1^2 */ uECC_vli_modMult_fast(t5, X1, t4, curve); /* t5 = x1*y1^2 = A */ uECC_vli_modSquare_fast(t4, t4, curve); /* t4 = y1^4 */ uECC_vli_modMult_fast(Y1, Y1, Z1, curve); /* t2 = y1*z1 = z3 */ uECC_vli_modSquare_fast(Z1, Z1, curve); /* t3 = z1^2 */ uECC_vli_modAdd(X1, X1, Z1, curve->p, num_words); /* t1 = x1 + z1^2 */ uECC_vli_modAdd(Z1, Z1, Z1, curve->p, num_words); /* t3 = 2*z1^2 */ uECC_vli_modSub(Z1, X1, Z1, curve->p, num_words); /* t3 = x1 - z1^2 */ uECC_vli_modMult_fast(X1, X1, Z1, curve); /* t1 = x1^2 - z1^4 */ uECC_vli_modAdd(Z1, X1, X1, curve->p, num_words); /* t3 = 2*(x1^2 - z1^4) */ uECC_vli_modAdd(X1, X1, Z1, curve->p, num_words); /* t1 = 3*(x1^2 - z1^4) */ if (uECC_vli_testBit(X1, 0)) { uECC_word_t l_carry = uECC_vli_add(X1, X1, curve->p, num_words); uECC_vli_rshift1(X1, num_words); X1[num_words - 1] |= l_carry << (uECC_WORD_BITS - 1); } else { uECC_vli_rshift1(X1, num_words); } /* t1 = 3/2*(x1^2 - z1^4) = B */ uECC_vli_modSquare_fast(Z1, X1, curve); /* t3 = B^2 */ uECC_vli_modSub(Z1, Z1, t5, curve->p, num_words); /* t3 = B^2 - A */ uECC_vli_modSub(Z1, Z1, t5, curve->p, num_words); /* t3 = B^2 - 2A = x3 */ uECC_vli_modSub(t5, t5, Z1, curve->p, num_words); /* t5 = A - x3 */ uECC_vli_modMult_fast(X1, X1, t5, curve); /* t1 = B * (A - x3) */ uECC_vli_modSub(t4, X1, t4, curve->p, num_words); /* t4 = B * (A - x3) - y1^4 = y3 */ uECC_vli_set(X1, Z1, num_words); uECC_vli_set(Z1, Y1, num_words); uECC_vli_set(Y1, t4, num_words); } /* Computes result = x^3 + ax + b. result must not overlap x. */ static void x_side_default(uECC_word_t *result, const uECC_word_t *x, uECC_Curve curve) { uECC_word_t _3[uECC_MAX_WORDS] = {3}; /* -a = 3 */ wordcount_t num_words = curve->num_words; uECC_vli_modSquare_fast(result, x, curve); /* r = x^2 */ uECC_vli_modSub(result, result, _3, curve->p, num_words); /* r = x^2 - 3 */ uECC_vli_modMult_fast(result, result, x, curve); /* r = x^3 - 3x */ uECC_vli_modAdd(result, result, curve->b, curve->p, num_words); /* r = x^3 - 3x + b */ } #endif /* uECC_SUPPORTS_secp... */ #if uECC_SUPPORT_COMPRESSED_POINT #if uECC_SUPPORTS_secp160r1 || uECC_SUPPORTS_secp192r1 || \ uECC_SUPPORTS_secp256r1 || uECC_SUPPORTS_secp256k1 /* Compute a = sqrt(a) (mod curve_p). */ static void mod_sqrt_default(uECC_word_t *a, uECC_Curve curve) { bitcount_t i; uECC_word_t p1[uECC_MAX_WORDS] = {1}; uECC_word_t l_result[uECC_MAX_WORDS] = {1}; wordcount_t num_words = curve->num_words; /* When curve->p == 3 (mod 4), we can compute sqrt(a) = a^((curve->p + 1) / 4) (mod curve->p). */ uECC_vli_add(p1, curve->p, p1, num_words); /* p1 = curve_p + 1 */ for (i = uECC_vli_numBits(p1, num_words) - 1; i > 1; --i) { uECC_vli_modSquare_fast(l_result, l_result, curve); if (uECC_vli_testBit(p1, i)) { uECC_vli_modMult_fast(l_result, l_result, a, curve); } } uECC_vli_set(a, l_result, num_words); } #endif /* uECC_SUPPORTS_secp... */ #endif /* uECC_SUPPORT_COMPRESSED_POINT */ #if uECC_SUPPORTS_secp160r1 #if (uECC_OPTIMIZATION_LEVEL > 0) static void vli_mmod_fast_secp160r1(uECC_word_t *result, uECC_word_t *product); #endif static const struct uECC_Curve_t curve_secp160r1 = { num_words_secp160r1, num_bytes_secp160r1, 161, /* num_n_bits */ { BYTES_TO_WORDS_8(FF, FF, FF, 7F, FF, FF, FF, FF), BYTES_TO_WORDS_8(FF, FF, FF, FF, FF, FF, FF, FF), BYTES_TO_WORDS_4(FF, FF, FF, FF) }, { BYTES_TO_WORDS_8(57, 22, 75, CA, D3, AE, 27, F9), BYTES_TO_WORDS_8(C8, F4, 01, 00, 00, 00, 00, 00), BYTES_TO_WORDS_8(00, 00, 00, 00, 01, 00, 00, 00) }, { BYTES_TO_WORDS_8(82, FC, CB, 13, B9, 8B, C3, 68), BYTES_TO_WORDS_8(89, 69, 64, 46, 28, 73, F5, 8E), BYTES_TO_WORDS_4(68, B5, 96, 4A), BYTES_TO_WORDS_8(32, FB, C5, 7A, 37, 51, 23, 04), BYTES_TO_WORDS_8(12, C9, DC, 59, 7D, 94, 68, 31), BYTES_TO_WORDS_4(55, 28, A6, 23) }, { BYTES_TO_WORDS_8(45, FA, 65, C5, AD, D4, D4, 81), BYTES_TO_WORDS_8(9F, F8, AC, 65, 8B, 7A, BD, 54), BYTES_TO_WORDS_4(FC, BE, 97, 1C) }, &double_jacobian_default, #if uECC_SUPPORT_COMPRESSED_POINT &mod_sqrt_default, #endif &x_side_default, #if (uECC_OPTIMIZATION_LEVEL > 0) &vli_mmod_fast_secp160r1 #endif }; uECC_Curve uECC_secp160r1(void) { return &curve_secp160r1; } #if (uECC_OPTIMIZATION_LEVEL > 0 && !asm_mmod_fast_secp160r1) /* Computes result = product % curve_p see http://www.isys.uni-klu.ac.at/PDF/2001-0126-MT.pdf page 354 Note that this only works if log2(omega) < log2(p) / 2 */ static void omega_mult_secp160r1(uECC_word_t *result, const uECC_word_t *right); #if uECC_WORD_SIZE == 8 static void vli_mmod_fast_secp160r1(uECC_word_t *result, uECC_word_t *product) { uECC_word_t tmp[2 * num_words_secp160r1]; uECC_word_t copy; uECC_vli_clear(tmp, num_words_secp160r1); uECC_vli_clear(tmp + num_words_secp160r1, num_words_secp160r1); omega_mult_secp160r1(tmp, product + num_words_secp160r1 - 1); /* (Rq, q) = q * c */ product[num_words_secp160r1 - 1] &= 0xffffffff; copy = tmp[num_words_secp160r1 - 1]; tmp[num_words_secp160r1 - 1] &= 0xffffffff; uECC_vli_add(result, product, tmp, num_words_secp160r1); /* (C, r) = r + q */ uECC_vli_clear(product, num_words_secp160r1); tmp[num_words_secp160r1 - 1] = copy; omega_mult_secp160r1(product, tmp + num_words_secp160r1 - 1); /* Rq*c */ uECC_vli_add(result, result, product, num_words_secp160r1); /* (C1, r) = r + Rq*c */ while (uECC_vli_cmp_unsafe(result, curve_secp160r1.p, num_words_secp160r1) > 0) { uECC_vli_sub(result, result, curve_secp160r1.p, num_words_secp160r1); } } static void omega_mult_secp160r1(uint64_t *result, const uint64_t *right) { uint32_t carry; unsigned i; /* Multiply by (2^31 + 1). */ carry = 0; for (i = 0; i < num_words_secp160r1; ++i) { uint64_t tmp = (right[i] >> 32) | (right[i + 1] << 32); result[i] = (tmp << 31) + tmp + carry; carry = (tmp >> 33) + (result[i] < tmp || (carry && result[i] == tmp)); } result[i] = carry; } #else static void vli_mmod_fast_secp160r1(uECC_word_t *result, uECC_word_t *product) { uECC_word_t tmp[2 * num_words_secp160r1]; uECC_word_t carry; uECC_vli_clear(tmp, num_words_secp160r1); uECC_vli_clear(tmp + num_words_secp160r1, num_words_secp160r1); omega_mult_secp160r1(tmp, product + num_words_secp160r1); /* (Rq, q) = q * c */ carry = uECC_vli_add(result, product, tmp, num_words_secp160r1); /* (C, r) = r + q */ uECC_vli_clear(product, num_words_secp160r1); omega_mult_secp160r1(product, tmp + num_words_secp160r1); /* Rq*c */ carry += uECC_vli_add(result, result, product, num_words_secp160r1); /* (C1, r) = r + Rq*c */ while (carry > 0) { --carry; uECC_vli_sub(result, result, curve_secp160r1.p, num_words_secp160r1); } if (uECC_vli_cmp_unsafe(result, curve_secp160r1.p, num_words_secp160r1) > 0) { uECC_vli_sub(result, result, curve_secp160r1.p, num_words_secp160r1); } } #endif #if uECC_WORD_SIZE == 1 static void omega_mult_secp160r1(uint8_t *result, const uint8_t *right) { uint8_t carry; uint8_t i; /* Multiply by (2^31 + 1). */ uECC_vli_set(result + 4, right, num_words_secp160r1); /* 2^32 */ uECC_vli_rshift1(result + 4, num_words_secp160r1); /* 2^31 */ result[3] = right[0] << 7; /* get last bit from shift */ carry = uECC_vli_add(result, result, right, num_words_secp160r1); /* 2^31 + 1 */ for (i = num_words_secp160r1; carry; ++i) { uint16_t sum = (uint16_t)result[i] + carry; result[i] = (uint8_t)sum; carry = sum >> 8; } } #elif uECC_WORD_SIZE == 4 static void omega_mult_secp160r1(uint32_t *result, const uint32_t *right) { uint32_t carry; unsigned i; /* Multiply by (2^31 + 1). */ uECC_vli_set(result + 1, right, num_words_secp160r1); /* 2^32 */ uECC_vli_rshift1(result + 1, num_words_secp160r1); /* 2^31 */ result[0] = right[0] << 31; /* get last bit from shift */ carry = uECC_vli_add(result, result, right, num_words_secp160r1); /* 2^31 + 1 */ for (i = num_words_secp160r1; carry; ++i) { uint64_t sum = (uint64_t)result[i] + carry; result[i] = (uint32_t)sum; carry = sum >> 32; } } #endif /* uECC_WORD_SIZE */ #endif /* (uECC_OPTIMIZATION_LEVEL > 0 && !asm_mmod_fast_secp160r1) */ #endif /* uECC_SUPPORTS_secp160r1 */ #if uECC_SUPPORTS_secp192r1 #if (uECC_OPTIMIZATION_LEVEL > 0) static void vli_mmod_fast_secp192r1(uECC_word_t *result, uECC_word_t *product); #endif static const struct uECC_Curve_t curve_secp192r1 = { num_words_secp192r1, num_bytes_secp192r1, 192, /* num_n_bits */ { BYTES_TO_WORDS_8(FF, FF, FF, FF, FF, FF, FF, FF), BYTES_TO_WORDS_8(FE, FF, FF, FF, FF, FF, FF, FF), BYTES_TO_WORDS_8(FF, FF, FF, FF, FF, FF, FF, FF) }, { BYTES_TO_WORDS_8(31, 28, D2, B4, B1, C9, 6B, 14), BYTES_TO_WORDS_8(36, F8, DE, 99, FF, FF, FF, FF), BYTES_TO_WORDS_8(FF, FF, FF, FF, FF, FF, FF, FF) }, { BYTES_TO_WORDS_8(12, 10, FF, 82, FD, 0A, FF, F4), BYTES_TO_WORDS_8(00, 88, A1, 43, EB, 20, BF, 7C), BYTES_TO_WORDS_8(F6, 90, 30, B0, 0E, A8, 8D, 18), BYTES_TO_WORDS_8(11, 48, 79, 1E, A1, 77, F9, 73), BYTES_TO_WORDS_8(D5, CD, 24, 6B, ED, 11, 10, 63), BYTES_TO_WORDS_8(78, DA, C8, FF, 95, 2B, 19, 07) }, { BYTES_TO_WORDS_8(B1, B9, 46, C1, EC, DE, B8, FE), BYTES_TO_WORDS_8(49, 30, 24, 72, AB, E9, A7, 0F), BYTES_TO_WORDS_8(E7, 80, 9C, E5, 19, 05, 21, 64) }, &double_jacobian_default, #if uECC_SUPPORT_COMPRESSED_POINT &mod_sqrt_default, #endif &x_side_default, #if (uECC_OPTIMIZATION_LEVEL > 0) &vli_mmod_fast_secp192r1 #endif }; uECC_Curve uECC_secp192r1(void) { return &curve_secp192r1; } #if (uECC_OPTIMIZATION_LEVEL > 0) /* Computes result = product % curve_p. See algorithm 5 and 6 from http://www.isys.uni-klu.ac.at/PDF/2001-0126-MT.pdf */ #if uECC_WORD_SIZE == 1 static void vli_mmod_fast_secp192r1(uint8_t *result, uint8_t *product) { uint8_t tmp[num_words_secp192r1]; uint8_t carry; uECC_vli_set(result, product, num_words_secp192r1); uECC_vli_set(tmp, &product[24], num_words_secp192r1); carry = uECC_vli_add(result, result, tmp, num_words_secp192r1); tmp[0] = tmp[1] = tmp[2] = tmp[3] = tmp[4] = tmp[5] = tmp[6] = tmp[7] = 0; tmp[8] = product[24]; tmp[9] = product[25]; tmp[10] = product[26]; tmp[11] = product[27]; tmp[12] = product[28]; tmp[13] = product[29]; tmp[14] = product[30]; tmp[15] = product[31]; tmp[16] = product[32]; tmp[17] = product[33]; tmp[18] = product[34]; tmp[19] = product[35]; tmp[20] = product[36]; tmp[21] = product[37]; tmp[22] = product[38]; tmp[23] = product[39]; carry += uECC_vli_add(result, result, tmp, num_words_secp192r1); tmp[0] = tmp[8] = product[40]; tmp[1] = tmp[9] = product[41]; tmp[2] = tmp[10] = product[42]; tmp[3] = tmp[11] = product[43]; tmp[4] = tmp[12] = product[44]; tmp[5] = tmp[13] = product[45]; tmp[6] = tmp[14] = product[46]; tmp[7] = tmp[15] = product[47]; tmp[16] = tmp[17] = tmp[18] = tmp[19] = tmp[20] = tmp[21] = tmp[22] = tmp[23] = 0; carry += uECC_vli_add(result, result, tmp, num_words_secp192r1); while (carry || uECC_vli_cmp_unsafe(curve_secp192r1.p, result, num_words_secp192r1) != 1) { carry -= uECC_vli_sub(result, result, curve_secp192r1.p, num_words_secp192r1); } } #elif uECC_WORD_SIZE == 4 static void vli_mmod_fast_secp192r1(uint32_t *result, uint32_t *product) { uint32_t tmp[num_words_secp192r1]; int carry; uECC_vli_set(result, product, num_words_secp192r1); uECC_vli_set(tmp, &product[6], num_words_secp192r1); carry = uECC_vli_add(result, result, tmp, num_words_secp192r1); tmp[0] = tmp[1] = 0; tmp[2] = product[6]; tmp[3] = product[7]; tmp[4] = product[8]; tmp[5] = product[9]; carry += uECC_vli_add(result, result, tmp, num_words_secp192r1); tmp[0] = tmp[2] = product[10]; tmp[1] = tmp[3] = product[11]; tmp[4] = tmp[5] = 0; carry += uECC_vli_add(result, result, tmp, num_words_secp192r1); while (carry || uECC_vli_cmp_unsafe(curve_secp192r1.p, result, num_words_secp192r1) != 1) { carry -= uECC_vli_sub(result, result, curve_secp192r1.p, num_words_secp192r1); } } #else static void vli_mmod_fast_secp192r1(uint64_t *result, uint64_t *product) { uint64_t tmp[num_words_secp192r1]; int carry; uECC_vli_set(result, product, num_words_secp192r1); uECC_vli_set(tmp, &product[3], num_words_secp192r1); carry = (int)uECC_vli_add(result, result, tmp, num_words_secp192r1); tmp[0] = 0; tmp[1] = product[3]; tmp[2] = product[4]; carry += uECC_vli_add(result, result, tmp, num_words_secp192r1); tmp[0] = tmp[1] = product[5]; tmp[2] = 0; carry += uECC_vli_add(result, result, tmp, num_words_secp192r1); while (carry || uECC_vli_cmp_unsafe(curve_secp192r1.p, result, num_words_secp192r1) != 1) { carry -= uECC_vli_sub(result, result, curve_secp192r1.p, num_words_secp192r1); } } #endif /* uECC_WORD_SIZE */ #endif /* (uECC_OPTIMIZATION_LEVEL > 0) */ #endif /* uECC_SUPPORTS_secp192r1 */ #if uECC_SUPPORTS_secp224r1 #if uECC_SUPPORT_COMPRESSED_POINT static void mod_sqrt_secp224r1(uECC_word_t *a, uECC_Curve curve); #endif #if (uECC_OPTIMIZATION_LEVEL > 0) static void vli_mmod_fast_secp224r1(uECC_word_t *result, uECC_word_t *product); #endif static const struct uECC_Curve_t curve_secp224r1 = { num_words_secp224r1, num_bytes_secp224r1, 224, /* num_n_bits */ { BYTES_TO_WORDS_8(01, 00, 00, 00, 00, 00, 00, 00), BYTES_TO_WORDS_8(00, 00, 00, 00, FF, FF, FF, FF), BYTES_TO_WORDS_8(FF, FF, FF, FF, FF, FF, FF, FF), BYTES_TO_WORDS_4(FF, FF, FF, FF) }, { BYTES_TO_WORDS_8(3D, 2A, 5C, 5C, 45, 29, DD, 13), BYTES_TO_WORDS_8(3E, F0, B8, E0, A2, 16, FF, FF), BYTES_TO_WORDS_8(FF, FF, FF, FF, FF, FF, FF, FF), BYTES_TO_WORDS_4(FF, FF, FF, FF) }, { BYTES_TO_WORDS_8(21, 1D, 5C, 11, D6, 80, 32, 34), BYTES_TO_WORDS_8(22, 11, C2, 56, D3, C1, 03, 4A), BYTES_TO_WORDS_8(B9, 90, 13, 32, 7F, BF, B4, 6B), BYTES_TO_WORDS_4(BD, 0C, 0E, B7), BYTES_TO_WORDS_8(34, 7E, 00, 85, 99, 81, D5, 44), BYTES_TO_WORDS_8(64, 47, 07, 5A, A0, 75, 43, CD), BYTES_TO_WORDS_8(E6, DF, 22, 4C, FB, 23, F7, B5), BYTES_TO_WORDS_4(88, 63, 37, BD) }, { BYTES_TO_WORDS_8(B4, FF, 55, 23, 43, 39, 0B, 27), BYTES_TO_WORDS_8(BA, D8, BF, D7, B7, B0, 44, 50), BYTES_TO_WORDS_8(56, 32, 41, F5, AB, B3, 04, 0C), BYTES_TO_WORDS_4(85, 0A, 05, B4) }, &double_jacobian_default, #if uECC_SUPPORT_COMPRESSED_POINT &mod_sqrt_secp224r1, #endif &x_side_default, #if (uECC_OPTIMIZATION_LEVEL > 0) &vli_mmod_fast_secp224r1 #endif }; uECC_Curve uECC_secp224r1(void) { return &curve_secp224r1; } #if uECC_SUPPORT_COMPRESSED_POINT /* Routine 3.2.4 RS; from http://www.nsa.gov/ia/_files/nist-routines.pdf */ static void mod_sqrt_secp224r1_rs(uECC_word_t *d1, uECC_word_t *e1, uECC_word_t *f1, const uECC_word_t *d0, const uECC_word_t *e0, const uECC_word_t *f0) { uECC_word_t t[num_words_secp224r1]; uECC_vli_modSquare_fast(t, d0, &curve_secp224r1); /* t <-- d0 ^ 2 */ uECC_vli_modMult_fast(e1, d0, e0, &curve_secp224r1); /* e1 <-- d0 * e0 */ uECC_vli_modAdd(d1, t, f0, curve_secp224r1.p, num_words_secp224r1); /* d1 <-- t + f0 */ uECC_vli_modAdd(e1, e1, e1, curve_secp224r1.p, num_words_secp224r1); /* e1 <-- e1 + e1 */ uECC_vli_modMult_fast(f1, t, f0, &curve_secp224r1); /* f1 <-- t * f0 */ uECC_vli_modAdd(f1, f1, f1, curve_secp224r1.p, num_words_secp224r1); /* f1 <-- f1 + f1 */ uECC_vli_modAdd(f1, f1, f1, curve_secp224r1.p, num_words_secp224r1); /* f1 <-- f1 + f1 */ } /* Routine 3.2.5 RSS; from http://www.nsa.gov/ia/_files/nist-routines.pdf */ static void mod_sqrt_secp224r1_rss(uECC_word_t *d1, uECC_word_t *e1, uECC_word_t *f1, const uECC_word_t *d0, const uECC_word_t *e0, const uECC_word_t *f0, const bitcount_t j) { bitcount_t i; uECC_vli_set(d1, d0, num_words_secp224r1); /* d1 <-- d0 */ uECC_vli_set(e1, e0, num_words_secp224r1); /* e1 <-- e0 */ uECC_vli_set(f1, f0, num_words_secp224r1); /* f1 <-- f0 */ for (i = 1; i <= j; i++) { mod_sqrt_secp224r1_rs(d1, e1, f1, d1, e1, f1); /* RS (d1,e1,f1,d1,e1,f1) */ } } /* Routine 3.2.6 RM; from http://www.nsa.gov/ia/_files/nist-routines.pdf */ static void mod_sqrt_secp224r1_rm(uECC_word_t *d2, uECC_word_t *e2, uECC_word_t *f2, const uECC_word_t *c, const uECC_word_t *d0, const uECC_word_t *e0, const uECC_word_t *d1, const uECC_word_t *e1) { uECC_word_t t1[num_words_secp224r1]; uECC_word_t t2[num_words_secp224r1]; uECC_vli_modMult_fast(t1, e0, e1, &curve_secp224r1); /* t1 <-- e0 * e1 */ uECC_vli_modMult_fast(t1, t1, c, &curve_secp224r1); /* t1 <-- t1 * c */ /* t1 <-- p - t1 */ uECC_vli_modSub(t1, curve_secp224r1.p, t1, curve_secp224r1.p, num_words_secp224r1); uECC_vli_modMult_fast(t2, d0, d1, &curve_secp224r1); /* t2 <-- d0 * d1 */ uECC_vli_modAdd(t2, t2, t1, curve_secp224r1.p, num_words_secp224r1); /* t2 <-- t2 + t1 */ uECC_vli_modMult_fast(t1, d0, e1, &curve_secp224r1); /* t1 <-- d0 * e1 */ uECC_vli_modMult_fast(e2, d1, e0, &curve_secp224r1); /* e2 <-- d1 * e0 */ uECC_vli_modAdd(e2, e2, t1, curve_secp224r1.p, num_words_secp224r1); /* e2 <-- e2 + t1 */ uECC_vli_modSquare_fast(f2, e2, &curve_secp224r1); /* f2 <-- e2^2 */ uECC_vli_modMult_fast(f2, f2, c, &curve_secp224r1); /* f2 <-- f2 * c */ /* f2 <-- p - f2 */ uECC_vli_modSub(f2, curve_secp224r1.p, f2, curve_secp224r1.p, num_words_secp224r1); uECC_vli_set(d2, t2, num_words_secp224r1); /* d2 <-- t2 */ } /* Routine 3.2.7 RP; from http://www.nsa.gov/ia/_files/nist-routines.pdf */ static void mod_sqrt_secp224r1_rp(uECC_word_t *d1, uECC_word_t *e1, uECC_word_t *f1, const uECC_word_t *c, const uECC_word_t *r) { wordcount_t i; wordcount_t pow2i = 1; uECC_word_t d0[num_words_secp224r1]; uECC_word_t e0[num_words_secp224r1] = {1}; /* e0 <-- 1 */ uECC_word_t f0[num_words_secp224r1]; uECC_vli_set(d0, r, num_words_secp224r1); /* d0 <-- r */ /* f0 <-- p - c */ uECC_vli_modSub(f0, curve_secp224r1.p, c, curve_secp224r1.p, num_words_secp224r1); for (i = 0; i <= 6; i++) { mod_sqrt_secp224r1_rss(d1, e1, f1, d0, e0, f0, pow2i); /* RSS (d1,e1,f1,d0,e0,f0,2^i) */ mod_sqrt_secp224r1_rm(d1, e1, f1, c, d1, e1, d0, e0); /* RM (d1,e1,f1,c,d1,e1,d0,e0) */ uECC_vli_set(d0, d1, num_words_secp224r1); /* d0 <-- d1 */ uECC_vli_set(e0, e1, num_words_secp224r1); /* e0 <-- e1 */ uECC_vli_set(f0, f1, num_words_secp224r1); /* f0 <-- f1 */ pow2i *= 2; } } /* Compute a = sqrt(a) (mod curve_p). */ /* Routine 3.2.8 mp_mod_sqrt_224; from http://www.nsa.gov/ia/_files/nist-routines.pdf */ static void mod_sqrt_secp224r1(uECC_word_t *a, uECC_Curve curve) { bitcount_t i; uECC_word_t e1[num_words_secp224r1]; uECC_word_t f1[num_words_secp224r1]; uECC_word_t d0[num_words_secp224r1]; uECC_word_t e0[num_words_secp224r1]; uECC_word_t f0[num_words_secp224r1]; uECC_word_t d1[num_words_secp224r1]; /* s = a; using constant instead of random value */ mod_sqrt_secp224r1_rp(d0, e0, f0, a, a); /* RP (d0, e0, f0, c, s) */ mod_sqrt_secp224r1_rs(d1, e1, f1, d0, e0, f0); /* RS (d1, e1, f1, d0, e0, f0) */ for (i = 1; i <= 95; i++) { uECC_vli_set(d0, d1, num_words_secp224r1); /* d0 <-- d1 */ uECC_vli_set(e0, e1, num_words_secp224r1); /* e0 <-- e1 */ uECC_vli_set(f0, f1, num_words_secp224r1); /* f0 <-- f1 */ mod_sqrt_secp224r1_rs(d1, e1, f1, d0, e0, f0); /* RS (d1, e1, f1, d0, e0, f0) */ if (uECC_vli_isZero(d1, num_words_secp224r1)) { /* if d1 == 0 */ break; } } uECC_vli_modInv(f1, e0, curve_secp224r1.p, num_words_secp224r1); /* f1 <-- 1 / e0 */ uECC_vli_modMult_fast(a, d0, f1, &curve_secp224r1); /* a <-- d0 / e0 */ } #endif /* uECC_SUPPORT_COMPRESSED_POINT */ #if (uECC_OPTIMIZATION_LEVEL > 0) /* Computes result = product % curve_p from http://www.nsa.gov/ia/_files/nist-routines.pdf */ #if uECC_WORD_SIZE == 1 static void vli_mmod_fast_secp224r1(uint8_t *result, uint8_t *product) { uint8_t tmp[num_words_secp224r1]; int8_t carry; /* t */ uECC_vli_set(result, product, num_words_secp224r1); /* s1 */ tmp[0] = tmp[1] = tmp[2] = tmp[3] = 0; tmp[4] = tmp[5] = tmp[6] = tmp[7] = 0; tmp[8] = tmp[9] = tmp[10] = tmp[11] = 0; tmp[12] = product[28]; tmp[13] = product[29]; tmp[14] = product[30]; tmp[15] = product[31]; tmp[16] = product[32]; tmp[17] = product[33]; tmp[18] = product[34]; tmp[19] = product[35]; tmp[20] = product[36]; tmp[21] = product[37]; tmp[22] = product[38]; tmp[23] = product[39]; tmp[24] = product[40]; tmp[25] = product[41]; tmp[26] = product[42]; tmp[27] = product[43]; carry = uECC_vli_add(result, result, tmp, num_words_secp224r1); /* s2 */ tmp[12] = product[44]; tmp[13] = product[45]; tmp[14] = product[46]; tmp[15] = product[47]; tmp[16] = product[48]; tmp[17] = product[49]; tmp[18] = product[50]; tmp[19] = product[51]; tmp[20] = product[52]; tmp[21] = product[53]; tmp[22] = product[54]; tmp[23] = product[55]; tmp[24] = tmp[25] = tmp[26] = tmp[27] = 0; carry += uECC_vli_add(result, result, tmp, num_words_secp224r1); /* d1 */ tmp[0] = product[28]; tmp[1] = product[29]; tmp[2] = product[30]; tmp[3] = product[31]; tmp[4] = product[32]; tmp[5] = product[33]; tmp[6] = product[34]; tmp[7] = product[35]; tmp[8] = product[36]; tmp[9] = product[37]; tmp[10] = product[38]; tmp[11] = product[39]; tmp[12] = product[40]; tmp[13] = product[41]; tmp[14] = product[42]; tmp[15] = product[43]; tmp[16] = product[44]; tmp[17] = product[45]; tmp[18] = product[46]; tmp[19] = product[47]; tmp[20] = product[48]; tmp[21] = product[49]; tmp[22] = product[50]; tmp[23] = product[51]; tmp[24] = product[52]; tmp[25] = product[53]; tmp[26] = product[54]; tmp[27] = product[55]; carry -= uECC_vli_sub(result, result, tmp, num_words_secp224r1); /* d2 */ tmp[0] = product[44]; tmp[1] = product[45]; tmp[2] = product[46]; tmp[3] = product[47]; tmp[4] = product[48]; tmp[5] = product[49]; tmp[6] = product[50]; tmp[7] = product[51]; tmp[8] = product[52]; tmp[9] = product[53]; tmp[10] = product[54]; tmp[11] = product[55]; tmp[12] = tmp[13] = tmp[14] = tmp[15] = 0; tmp[16] = tmp[17] = tmp[18] = tmp[19] = 0; tmp[20] = tmp[21] = tmp[22] = tmp[23] = 0; tmp[24] = tmp[25] = tmp[26] = tmp[27] = 0; carry -= uECC_vli_sub(result, result, tmp, num_words_secp224r1); if (carry < 0) { do { carry += uECC_vli_add(result, result, curve_secp224r1.p, num_words_secp224r1); } while (carry < 0); } else { while (carry || uECC_vli_cmp_unsafe(curve_secp224r1.p, result, num_words_secp224r1) != 1) { carry -= uECC_vli_sub(result, result, curve_secp224r1.p, num_words_secp224r1); } } } #elif uECC_WORD_SIZE == 4 static void vli_mmod_fast_secp224r1(uint32_t *result, uint32_t *product) { uint32_t tmp[num_words_secp224r1]; int carry; /* t */ uECC_vli_set(result, product, num_words_secp224r1); /* s1 */ tmp[0] = tmp[1] = tmp[2] = 0; tmp[3] = product[7]; tmp[4] = product[8]; tmp[5] = product[9]; tmp[6] = product[10]; carry = uECC_vli_add(result, result, tmp, num_words_secp224r1); /* s2 */ tmp[3] = product[11]; tmp[4] = product[12]; tmp[5] = product[13]; tmp[6] = 0; carry += uECC_vli_add(result, result, tmp, num_words_secp224r1); /* d1 */ tmp[0] = product[7]; tmp[1] = product[8]; tmp[2] = product[9]; tmp[3] = product[10]; tmp[4] = product[11]; tmp[5] = product[12]; tmp[6] = product[13]; carry -= uECC_vli_sub(result, result, tmp, num_words_secp224r1); /* d2 */ tmp[0] = product[11]; tmp[1] = product[12]; tmp[2] = product[13]; tmp[3] = tmp[4] = tmp[5] = tmp[6] = 0; carry -= uECC_vli_sub(result, result, tmp, num_words_secp224r1); if (carry < 0) { do { carry += uECC_vli_add(result, result, curve_secp224r1.p, num_words_secp224r1); } while (carry < 0); } else { while (carry || uECC_vli_cmp_unsafe(curve_secp224r1.p, result, num_words_secp224r1) != 1) { carry -= uECC_vli_sub(result, result, curve_secp224r1.p, num_words_secp224r1); } } } #else static void vli_mmod_fast_secp224r1(uint64_t *result, uint64_t *product) { uint64_t tmp[num_words_secp224r1]; int carry = 0; /* t */ uECC_vli_set(result, product, num_words_secp224r1); result[num_words_secp224r1 - 1] &= 0xffffffff; /* s1 */ tmp[0] = 0; tmp[1] = product[3] & 0xffffffff00000000ull; tmp[2] = product[4]; tmp[3] = product[5] & 0xffffffff; uECC_vli_add(result, result, tmp, num_words_secp224r1); /* s2 */ tmp[1] = product[5] & 0xffffffff00000000ull; tmp[2] = product[6]; tmp[3] = 0; uECC_vli_add(result, result, tmp, num_words_secp224r1); /* d1 */ tmp[0] = (product[3] >> 32) | (product[4] << 32); tmp[1] = (product[4] >> 32) | (product[5] << 32); tmp[2] = (product[5] >> 32) | (product[6] << 32); tmp[3] = product[6] >> 32; carry -= uECC_vli_sub(result, result, tmp, num_words_secp224r1); /* d2 */ tmp[0] = (product[5] >> 32) | (product[6] << 32); tmp[1] = product[6] >> 32; tmp[2] = tmp[3] = 0; carry -= uECC_vli_sub(result, result, tmp, num_words_secp224r1); if (carry < 0) { do { carry += uECC_vli_add(result, result, curve_secp224r1.p, num_words_secp224r1); } while (carry < 0); } else { while (uECC_vli_cmp_unsafe(curve_secp224r1.p, result, num_words_secp224r1) != 1) { uECC_vli_sub(result, result, curve_secp224r1.p, num_words_secp224r1); } } } #endif /* uECC_WORD_SIZE */ #endif /* (uECC_OPTIMIZATION_LEVEL > 0) */ #endif /* uECC_SUPPORTS_secp224r1 */ #if uECC_SUPPORTS_secp256r1 #if (uECC_OPTIMIZATION_LEVEL > 0) static void vli_mmod_fast_secp256r1(uECC_word_t *result, uECC_word_t *product); #endif static const struct uECC_Curve_t curve_secp256r1 = { num_words_secp256r1, num_bytes_secp256r1, 256, /* num_n_bits */ { BYTES_TO_WORDS_8(FF, FF, FF, FF, FF, FF, FF, FF), BYTES_TO_WORDS_8(FF, FF, FF, FF, 00, 00, 00, 00), BYTES_TO_WORDS_8(00, 00, 00, 00, 00, 00, 00, 00), BYTES_TO_WORDS_8(01, 00, 00, 00, FF, FF, FF, FF) }, { BYTES_TO_WORDS_8(51, 25, 63, FC, C2, CA, B9, F3), BYTES_TO_WORDS_8(84, 9E, 17, A7, AD, FA, E6, BC), BYTES_TO_WORDS_8(FF, FF, FF, FF, FF, FF, FF, FF), BYTES_TO_WORDS_8(00, 00, 00, 00, FF, FF, FF, FF) }, { BYTES_TO_WORDS_8(96, C2, 98, D8, 45, 39, A1, F4), BYTES_TO_WORDS_8(A0, 33, EB, 2D, 81, 7D, 03, 77), BYTES_TO_WORDS_8(F2, 40, A4, 63, E5, E6, BC, F8), BYTES_TO_WORDS_8(47, 42, 2C, E1, F2, D1, 17, 6B), BYTES_TO_WORDS_8(F5, 51, BF, 37, 68, 40, B6, CB), BYTES_TO_WORDS_8(CE, 5E, 31, 6B, 57, 33, CE, 2B), BYTES_TO_WORDS_8(16, 9E, 0F, 7C, 4A, EB, E7, 8E), BYTES_TO_WORDS_8(9B, 7F, 1A, FE, E2, 42, E3, 4F) }, { BYTES_TO_WORDS_8(4B, 60, D2, 27, 3E, 3C, CE, 3B), BYTES_TO_WORDS_8(F6, B0, 53, CC, B0, 06, 1D, 65), BYTES_TO_WORDS_8(BC, 86, 98, 76, 55, BD, EB, B3), BYTES_TO_WORDS_8(E7, 93, 3A, AA, D8, 35, C6, 5A) }, &double_jacobian_default, #if uECC_SUPPORT_COMPRESSED_POINT &mod_sqrt_default, #endif &x_side_default, #if (uECC_OPTIMIZATION_LEVEL > 0) &vli_mmod_fast_secp256r1 #endif }; uECC_Curve uECC_secp256r1(void) { return &curve_secp256r1; } #if (uECC_OPTIMIZATION_LEVEL > 0 && !asm_mmod_fast_secp256r1) /* Computes result = product % curve_p from http://www.nsa.gov/ia/_files/nist-routines.pdf */ #if uECC_WORD_SIZE == 1 static void vli_mmod_fast_secp256r1(uint8_t *result, uint8_t *product) { uint8_t tmp[num_words_secp256r1]; int8_t carry; /* t */ uECC_vli_set(result, product, num_words_secp256r1); /* s1 */ tmp[0] = tmp[1] = tmp[2] = tmp[3] = 0; tmp[4] = tmp[5] = tmp[6] = tmp[7] = 0; tmp[8] = tmp[9] = tmp[10] = tmp[11] = 0; tmp[12] = product[44]; tmp[13] = product[45]; tmp[14] = product[46]; tmp[15] = product[47]; tmp[16] = product[48]; tmp[17] = product[49]; tmp[18] = product[50]; tmp[19] = product[51]; tmp[20] = product[52]; tmp[21] = product[53]; tmp[22] = product[54]; tmp[23] = product[55]; tmp[24] = product[56]; tmp[25] = product[57]; tmp[26] = product[58]; tmp[27] = product[59]; tmp[28] = product[60]; tmp[29] = product[61]; tmp[30] = product[62]; tmp[31] = product[63]; carry = uECC_vli_add(tmp, tmp, tmp, num_words_secp256r1); carry += uECC_vli_add(result, result, tmp, num_words_secp256r1); /* s2 */ tmp[12] = product[48]; tmp[13] = product[49]; tmp[14] = product[50]; tmp[15] = product[51]; tmp[16] = product[52]; tmp[17] = product[53]; tmp[18] = product[54]; tmp[19] = product[55]; tmp[20] = product[56]; tmp[21] = product[57]; tmp[22] = product[58]; tmp[23] = product[59]; tmp[24] = product[60]; tmp[25] = product[61]; tmp[26] = product[62]; tmp[27] = product[63]; tmp[28] = tmp[29] = tmp[30] = tmp[31] = 0; carry += uECC_vli_add(tmp, tmp, tmp, num_words_secp256r1); carry += uECC_vli_add(result, result, tmp, num_words_secp256r1); /* s3 */ tmp[0] = product[32]; tmp[1] = product[33]; tmp[2] = product[34]; tmp[3] = product[35]; tmp[4] = product[36]; tmp[5] = product[37]; tmp[6] = product[38]; tmp[7] = product[39]; tmp[8] = product[40]; tmp[9] = product[41]; tmp[10] = product[42]; tmp[11] = product[43]; tmp[12] = tmp[13] = tmp[14] = tmp[15] = 0; tmp[16] = tmp[17] = tmp[18] = tmp[19] = 0; tmp[20] = tmp[21] = tmp[22] = tmp[23] = 0; tmp[24] = product[56]; tmp[25] = product[57]; tmp[26] = product[58]; tmp[27] = product[59]; tmp[28] = product[60]; tmp[29] = product[61]; tmp[30] = product[62]; tmp[31] = product[63]; carry += uECC_vli_add(result, result, tmp, num_words_secp256r1); /* s4 */ tmp[0] = product[36]; tmp[1] = product[37]; tmp[2] = product[38]; tmp[3] = product[39]; tmp[4] = product[40]; tmp[5] = product[41]; tmp[6] = product[42]; tmp[7] = product[43]; tmp[8] = product[44]; tmp[9] = product[45]; tmp[10] = product[46]; tmp[11] = product[47]; tmp[12] = product[52]; tmp[13] = product[53]; tmp[14] = product[54]; tmp[15] = product[55]; tmp[16] = product[56]; tmp[17] = product[57]; tmp[18] = product[58]; tmp[19] = product[59]; tmp[20] = product[60]; tmp[21] = product[61]; tmp[22] = product[62]; tmp[23] = product[63]; tmp[24] = product[52]; tmp[25] = product[53]; tmp[26] = product[54]; tmp[27] = product[55]; tmp[28] = product[32]; tmp[29] = product[33]; tmp[30] = product[34]; tmp[31] = product[35]; carry += uECC_vli_add(result, result, tmp, num_words_secp256r1); /* d1 */ tmp[0] = product[44]; tmp[1] = product[45]; tmp[2] = product[46]; tmp[3] = product[47]; tmp[4] = product[48]; tmp[5] = product[49]; tmp[6] = product[50]; tmp[7] = product[51]; tmp[8] = product[52]; tmp[9] = product[53]; tmp[10] = product[54]; tmp[11] = product[55]; tmp[12] = tmp[13] = tmp[14] = tmp[15] = 0; tmp[16] = tmp[17] = tmp[18] = tmp[19] = 0; tmp[20] = tmp[21] = tmp[22] = tmp[23] = 0; tmp[24] = product[32]; tmp[25] = product[33]; tmp[26] = product[34]; tmp[27] = product[35]; tmp[28] = product[40]; tmp[29] = product[41]; tmp[30] = product[42]; tmp[31] = product[43]; carry -= uECC_vli_sub(result, result, tmp, num_words_secp256r1); /* d2 */ tmp[0] = product[48]; tmp[1] = product[49]; tmp[2] = product[50]; tmp[3] = product[51]; tmp[4] = product[52]; tmp[5] = product[53]; tmp[6] = product[54]; tmp[7] = product[55]; tmp[8] = product[56]; tmp[9] = product[57]; tmp[10] = product[58]; tmp[11] = product[59]; tmp[12] = product[60]; tmp[13] = product[61]; tmp[14] = product[62]; tmp[15] = product[63]; tmp[16] = tmp[17] = tmp[18] = tmp[19] = 0; tmp[20] = tmp[21] = tmp[22] = tmp[23] = 0; tmp[24] = product[36]; tmp[25] = product[37]; tmp[26] = product[38]; tmp[27] = product[39]; tmp[28] = product[44]; tmp[29] = product[45]; tmp[30] = product[46]; tmp[31] = product[47]; carry -= uECC_vli_sub(result, result, tmp, num_words_secp256r1); /* d3 */ tmp[0] = product[52]; tmp[1] = product[53]; tmp[2] = product[54]; tmp[3] = product[55]; tmp[4] = product[56]; tmp[5] = product[57]; tmp[6] = product[58]; tmp[7] = product[59]; tmp[8] = product[60]; tmp[9] = product[61]; tmp[10] = product[62]; tmp[11] = product[63]; tmp[12] = product[32]; tmp[13] = product[33]; tmp[14] = product[34]; tmp[15] = product[35]; tmp[16] = product[36]; tmp[17] = product[37]; tmp[18] = product[38]; tmp[19] = product[39]; tmp[20] = product[40]; tmp[21] = product[41]; tmp[22] = product[42]; tmp[23] = product[43]; tmp[24] = tmp[25] = tmp[26] = tmp[27] = 0; tmp[28] = product[48]; tmp[29] = product[49]; tmp[30] = product[50]; tmp[31] = product[51]; carry -= uECC_vli_sub(result, result, tmp, num_words_secp256r1); /* d4 */ tmp[0] = product[56]; tmp[1] = product[57]; tmp[2] = product[58]; tmp[3] = product[59]; tmp[4] = product[60]; tmp[5] = product[61]; tmp[6] = product[62]; tmp[7] = product[63]; tmp[8] = tmp[9] = tmp[10] = tmp[11] = 0; tmp[12] = product[36]; tmp[13] = product[37]; tmp[14] = product[38]; tmp[15] = product[39]; tmp[16] = product[40]; tmp[17] = product[41]; tmp[18] = product[42]; tmp[19] = product[43]; tmp[20] = product[44]; tmp[21] = product[45]; tmp[22] = product[46]; tmp[23] = product[47]; tmp[24] = tmp[25] = tmp[26] = tmp[27] = 0; tmp[28] = product[52]; tmp[29] = product[53]; tmp[30] = product[54]; tmp[31] = product[55]; carry -= uECC_vli_sub(result, result, tmp, num_words_secp256r1); if (carry < 0) { do { carry += uECC_vli_add(result, result, curve_secp256r1.p, num_words_secp256r1); } while (carry < 0); } else { while (carry || uECC_vli_cmp_unsafe(curve_secp256r1.p, result, num_words_secp256r1) != 1) { carry -= uECC_vli_sub(result, result, curve_secp256r1.p, num_words_secp256r1); } } } #elif uECC_WORD_SIZE == 4 static void vli_mmod_fast_secp256r1(uint32_t *result, uint32_t *product) { uint32_t tmp[num_words_secp256r1]; int carry; /* t */ uECC_vli_set(result, product, num_words_secp256r1); /* s1 */ tmp[0] = tmp[1] = tmp[2] = 0; tmp[3] = product[11]; tmp[4] = product[12]; tmp[5] = product[13]; tmp[6] = product[14]; tmp[7] = product[15]; carry = uECC_vli_add(tmp, tmp, tmp, num_words_secp256r1); carry += uECC_vli_add(result, result, tmp, num_words_secp256r1); /* s2 */ tmp[3] = product[12]; tmp[4] = product[13]; tmp[5] = product[14]; tmp[6] = product[15]; tmp[7] = 0; carry += uECC_vli_add(tmp, tmp, tmp, num_words_secp256r1); carry += uECC_vli_add(result, result, tmp, num_words_secp256r1); /* s3 */ tmp[0] = product[8]; tmp[1] = product[9]; tmp[2] = product[10]; tmp[3] = tmp[4] = tmp[5] = 0; tmp[6] = product[14]; tmp[7] = product[15]; carry += uECC_vli_add(result, result, tmp, num_words_secp256r1); /* s4 */ tmp[0] = product[9]; tmp[1] = product[10]; tmp[2] = product[11]; tmp[3] = product[13]; tmp[4] = product[14]; tmp[5] = product[15]; tmp[6] = product[13]; tmp[7] = product[8]; carry += uECC_vli_add(result, result, tmp, num_words_secp256r1); /* d1 */ tmp[0] = product[11]; tmp[1] = product[12]; tmp[2] = product[13]; tmp[3] = tmp[4] = tmp[5] = 0; tmp[6] = product[8]; tmp[7] = product[10]; carry -= uECC_vli_sub(result, result, tmp, num_words_secp256r1); /* d2 */ tmp[0] = product[12]; tmp[1] = product[13]; tmp[2] = product[14]; tmp[3] = product[15]; tmp[4] = tmp[5] = 0; tmp[6] = product[9]; tmp[7] = product[11]; carry -= uECC_vli_sub(result, result, tmp, num_words_secp256r1); /* d3 */ tmp[0] = product[13]; tmp[1] = product[14]; tmp[2] = product[15]; tmp[3] = product[8]; tmp[4] = product[9]; tmp[5] = product[10]; tmp[6] = 0; tmp[7] = product[12]; carry -= uECC_vli_sub(result, result, tmp, num_words_secp256r1); /* d4 */ tmp[0] = product[14]; tmp[1] = product[15]; tmp[2] = 0; tmp[3] = product[9]; tmp[4] = product[10]; tmp[5] = product[11]; tmp[6] = 0; tmp[7] = product[13]; carry -= uECC_vli_sub(result, result, tmp, num_words_secp256r1); if (carry < 0) { do { carry += uECC_vli_add(result, result, curve_secp256r1.p, num_words_secp256r1); } while (carry < 0); } else { while (carry || uECC_vli_cmp_unsafe(curve_secp256r1.p, result, num_words_secp256r1) != 1) { carry -= uECC_vli_sub(result, result, curve_secp256r1.p, num_words_secp256r1); } } } #else static void vli_mmod_fast_secp256r1(uint64_t *result, uint64_t *product) { uint64_t tmp[num_words_secp256r1]; int carry; /* t */ uECC_vli_set(result, product, num_words_secp256r1); /* s1 */ tmp[0] = 0; tmp[1] = product[5] & 0xffffffff00000000ull; tmp[2] = product[6]; tmp[3] = product[7]; carry = (int)uECC_vli_add(tmp, tmp, tmp, num_words_secp256r1); carry += uECC_vli_add(result, result, tmp, num_words_secp256r1); /* s2 */ tmp[1] = product[6] << 32; tmp[2] = (product[6] >> 32) | (product[7] << 32); tmp[3] = product[7] >> 32; carry += uECC_vli_add(tmp, tmp, tmp, num_words_secp256r1); carry += uECC_vli_add(result, result, tmp, num_words_secp256r1); /* s3 */ tmp[0] = product[4]; tmp[1] = product[5] & 0xffffffff; tmp[2] = 0; tmp[3] = product[7]; carry += uECC_vli_add(result, result, tmp, num_words_secp256r1); /* s4 */ tmp[0] = (product[4] >> 32) | (product[5] << 32); tmp[1] = (product[5] >> 32) | (product[6] & 0xffffffff00000000ull); tmp[2] = product[7]; tmp[3] = (product[6] >> 32) | (product[4] << 32); carry += uECC_vli_add(result, result, tmp, num_words_secp256r1); /* d1 */ tmp[0] = (product[5] >> 32) | (product[6] << 32); tmp[1] = (product[6] >> 32); tmp[2] = 0; tmp[3] = (product[4] & 0xffffffff) | (product[5] << 32); carry -= uECC_vli_sub(result, result, tmp, num_words_secp256r1); /* d2 */ tmp[0] = product[6]; tmp[1] = product[7]; tmp[2] = 0; tmp[3] = (product[4] >> 32) | (product[5] & 0xffffffff00000000ull); carry -= uECC_vli_sub(result, result, tmp, num_words_secp256r1); /* d3 */ tmp[0] = (product[6] >> 32) | (product[7] << 32); tmp[1] = (product[7] >> 32) | (product[4] << 32); tmp[2] = (product[4] >> 32) | (product[5] << 32); tmp[3] = (product[6] << 32); carry -= uECC_vli_sub(result, result, tmp, num_words_secp256r1); /* d4 */ tmp[0] = product[7]; tmp[1] = product[4] & 0xffffffff00000000ull; tmp[2] = product[5]; tmp[3] = product[6] & 0xffffffff00000000ull; carry -= uECC_vli_sub(result, result, tmp, num_words_secp256r1); if (carry < 0) { do { carry += uECC_vli_add(result, result, curve_secp256r1.p, num_words_secp256r1); } while (carry < 0); } else { while (carry || uECC_vli_cmp_unsafe(curve_secp256r1.p, result, num_words_secp256r1) != 1) { carry -= uECC_vli_sub(result, result, curve_secp256r1.p, num_words_secp256r1); } } } #endif /* uECC_WORD_SIZE */ #endif /* (uECC_OPTIMIZATION_LEVEL > 0 && !asm_mmod_fast_secp256r1) */ #endif /* uECC_SUPPORTS_secp256r1 */ #if uECC_SUPPORTS_secp256k1 static void double_jacobian_secp256k1(uECC_word_t * X1, uECC_word_t * Y1, uECC_word_t * Z1, uECC_Curve curve); static void x_side_secp256k1(uECC_word_t *result, const uECC_word_t *x, uECC_Curve curve); #if (uECC_OPTIMIZATION_LEVEL > 0) static void vli_mmod_fast_secp256k1(uECC_word_t *result, uECC_word_t *product); #endif static const struct uECC_Curve_t curve_secp256k1 = { num_words_secp256k1, num_bytes_secp256k1, 256, /* num_n_bits */ { BYTES_TO_WORDS_8(2F, FC, FF, FF, FE, FF, FF, FF), BYTES_TO_WORDS_8(FF, FF, FF, FF, FF, FF, FF, FF), BYTES_TO_WORDS_8(FF, FF, FF, FF, FF, FF, FF, FF), BYTES_TO_WORDS_8(FF, FF, FF, FF, FF, FF, FF, FF) }, { BYTES_TO_WORDS_8(41, 41, 36, D0, 8C, 5E, D2, BF), BYTES_TO_WORDS_8(3B, A0, 48, AF, E6, DC, AE, BA), BYTES_TO_WORDS_8(FE, FF, FF, FF, FF, FF, FF, FF), BYTES_TO_WORDS_8(FF, FF, FF, FF, FF, FF, FF, FF) }, { BYTES_TO_WORDS_8(98, 17, F8, 16, 5B, 81, F2, 59), BYTES_TO_WORDS_8(D9, 28, CE, 2D, DB, FC, 9B, 02), BYTES_TO_WORDS_8(07, 0B, 87, CE, 95, 62, A0, 55), BYTES_TO_WORDS_8(AC, BB, DC, F9, 7E, 66, BE, 79), BYTES_TO_WORDS_8(B8, D4, 10, FB, 8F, D0, 47, 9C), BYTES_TO_WORDS_8(19, 54, 85, A6, 48, B4, 17, FD), BYTES_TO_WORDS_8(A8, 08, 11, 0E, FC, FB, A4, 5D), BYTES_TO_WORDS_8(65, C4, A3, 26, 77, DA, 3A, 48) }, { BYTES_TO_WORDS_8(07, 00, 00, 00, 00, 00, 00, 00), BYTES_TO_WORDS_8(00, 00, 00, 00, 00, 00, 00, 00), BYTES_TO_WORDS_8(00, 00, 00, 00, 00, 00, 00, 00), BYTES_TO_WORDS_8(00, 00, 00, 00, 00, 00, 00, 00) }, &double_jacobian_secp256k1, #if uECC_SUPPORT_COMPRESSED_POINT &mod_sqrt_default, #endif &x_side_secp256k1, #if (uECC_OPTIMIZATION_LEVEL > 0) &vli_mmod_fast_secp256k1 #endif }; uECC_Curve uECC_secp256k1(void) { return &curve_secp256k1; } /* Double in place */ static void double_jacobian_secp256k1(uECC_word_t * X1, uECC_word_t * Y1, uECC_word_t * Z1, uECC_Curve curve) { /* t1 = X, t2 = Y, t3 = Z */ uECC_word_t t4[num_words_secp256k1]; uECC_word_t t5[num_words_secp256k1]; if (uECC_vli_isZero(Z1, num_words_secp256k1)) { return; } uECC_vli_modSquare_fast(t5, Y1, curve); /* t5 = y1^2 */ uECC_vli_modMult_fast(t4, X1, t5, curve); /* t4 = x1*y1^2 = A */ uECC_vli_modSquare_fast(X1, X1, curve); /* t1 = x1^2 */ uECC_vli_modSquare_fast(t5, t5, curve); /* t5 = y1^4 */ uECC_vli_modMult_fast(Z1, Y1, Z1, curve); /* t3 = y1*z1 = z3 */ uECC_vli_modAdd(Y1, X1, X1, curve->p, num_words_secp256k1); /* t2 = 2*x1^2 */ uECC_vli_modAdd(Y1, Y1, X1, curve->p, num_words_secp256k1); /* t2 = 3*x1^2 */ if (uECC_vli_testBit(Y1, 0)) { uECC_word_t carry = uECC_vli_add(Y1, Y1, curve->p, num_words_secp256k1); uECC_vli_rshift1(Y1, num_words_secp256k1); Y1[num_words_secp256k1 - 1] |= carry << (uECC_WORD_BITS - 1); } else { uECC_vli_rshift1(Y1, num_words_secp256k1); } /* t2 = 3/2*(x1^2) = B */ uECC_vli_modSquare_fast(X1, Y1, curve); /* t1 = B^2 */ uECC_vli_modSub(X1, X1, t4, curve->p, num_words_secp256k1); /* t1 = B^2 - A */ uECC_vli_modSub(X1, X1, t4, curve->p, num_words_secp256k1); /* t1 = B^2 - 2A = x3 */ uECC_vli_modSub(t4, t4, X1, curve->p, num_words_secp256k1); /* t4 = A - x3 */ uECC_vli_modMult_fast(Y1, Y1, t4, curve); /* t2 = B * (A - x3) */ uECC_vli_modSub(Y1, Y1, t5, curve->p, num_words_secp256k1); /* t2 = B * (A - x3) - y1^4 = y3 */ } /* Computes result = x^3 + b. result must not overlap x. */ static void x_side_secp256k1(uECC_word_t *result, const uECC_word_t *x, uECC_Curve curve) { uECC_vli_modSquare_fast(result, x, curve); /* r = x^2 */ uECC_vli_modMult_fast(result, result, x, curve); /* r = x^3 */ uECC_vli_modAdd(result, result, curve->b, curve->p, num_words_secp256k1); /* r = x^3 + b */ } #if (uECC_OPTIMIZATION_LEVEL > 0) static void omega_mult_secp256k1(uECC_word_t *result, const uECC_word_t *right); static void vli_mmod_fast_secp256k1(uECC_word_t *result, uECC_word_t *product) { uECC_word_t tmp[2 * num_words_secp256k1]; uECC_word_t carry; uECC_vli_clear(tmp, num_words_secp256k1); uECC_vli_clear(tmp + num_words_secp256k1, num_words_secp256k1); omega_mult_secp256k1(tmp, product + num_words_secp256k1); /* (Rq, q) = q * c */ carry = uECC_vli_add(result, product, tmp, num_words_secp256k1); /* (C, r) = r + q */ uECC_vli_clear(product, num_words_secp256k1); omega_mult_secp256k1(product, tmp + num_words_secp256k1); /* Rq*c */ carry += uECC_vli_add(result, result, product, num_words_secp256k1); /* (C1, r) = r + Rq*c */ while (carry > 0) { --carry; uECC_vli_sub(result, result, curve_secp256k1.p, num_words_secp256k1); } if (uECC_vli_cmp_unsafe(result, curve_secp256k1.p, num_words_secp256k1) > 0) { uECC_vli_sub(result, result, curve_secp256k1.p, num_words_secp256k1); } } #if uECC_WORD_SIZE == 1 static void omega_mult_secp256k1(uint8_t * result, const uint8_t * right) { /* Multiply by (2^32 + 2^9 + 2^8 + 2^7 + 2^6 + 2^4 + 1). */ uECC_word_t r0 = 0; uECC_word_t r1 = 0; uECC_word_t r2 = 0; wordcount_t k; /* Multiply by (2^9 + 2^8 + 2^7 + 2^6 + 2^4 + 1). */ muladd(0xD1, right[0], &r0, &r1, &r2); result[0] = r0; r0 = r1; r1 = r2; /* r2 is still 0 */ for (k = 1; k < num_words_secp256k1; ++k) { muladd(0x03, right[k - 1], &r0, &r1, &r2); muladd(0xD1, right[k], &r0, &r1, &r2); result[k] = r0; r0 = r1; r1 = r2; r2 = 0; } muladd(0x03, right[num_words_secp256k1 - 1], &r0, &r1, &r2); result[num_words_secp256k1] = r0; result[num_words_secp256k1 + 1] = r1; /* add the 2^32 multiple */ result[4 + num_words_secp256k1] = uECC_vli_add(result + 4, result + 4, right, num_words_secp256k1); } #elif uECC_WORD_SIZE == 4 static void omega_mult_secp256k1(uint32_t * result, const uint32_t * right) { /* Multiply by (2^9 + 2^8 + 2^7 + 2^6 + 2^4 + 1). */ uint32_t carry = 0; wordcount_t k; for (k = 0; k < num_words_secp256k1; ++k) { uint64_t p = (uint64_t)0x3D1 * right[k] + carry; result[k] = p; carry = p >> 32; } result[num_words_secp256k1] = carry; /* add the 2^32 multiple */ result[1 + num_words_secp256k1] = uECC_vli_add(result + 1, result + 1, right, num_words_secp256k1); } #else static void omega_mult_secp256k1(uint64_t * result, const uint64_t * right) { uECC_word_t r0 = 0; uECC_word_t r1 = 0; uECC_word_t r2 = 0; wordcount_t k; /* Multiply by (2^32 + 2^9 + 2^8 + 2^7 + 2^6 + 2^4 + 1). */ for (k = 0; k < num_words_secp256k1; ++k) { muladd(0x1000003D1ull, right[k], &r0, &r1, &r2); result[k] = r0; r0 = r1; r1 = r2; r2 = 0; } result[num_words_secp256k1] = r0; } #endif /* uECC_WORD_SIZE */ #endif /* (uECC_OPTIMIZATION_LEVEL > 0) */ #endif /* uECC_SUPPORTS_secp256k1 */ #endif /* _UECC_CURVE_SPECIFIC_H_ */ ================================================ FILE: u2f/desktop_test.cpp ================================================ //to prevent arduino IDE from compiling this #ifdef IS_DESKTOP_TEST //test in desktop #define _POSIX_C_SOURCE 200809L #include #include #include #include #include #include //for storing fake eprom #include //for fake input #include #include #include #include #include #define DESKTOP_TEST //fake eprom #define F(X) X typedef unsigned char byte; //this is used for random enum OUTPUT_FORMAT_ENUM { HEX = 1 }; std::vector fake_input; int current_fake_input; int hexchar2int(int c) { if (c<='9') return c-'0'; return 10 + (c-'A'); } void hex2bytes(const std::string & inp, unsigned char **res, int *len) { std::string tmp; //ignore non hex characters (e.g: space, tab) for (size_t i = 0; i < inp.size(); i++) { int c = toupper(inp[i]); if ((c>='0' && c<='9') || (c>='A' && c<='F')) { tmp += (char)c; } } *len = tmp.size()/2; unsigned char *tmpres = (unsigned char *)malloc(*len); size_t j =0; for (size_t i = 0; i < tmp.size(); i+=2, j++) { unsigned int c1 = hexchar2int(tmp[i]); unsigned int c2 = hexchar2int(tmp[i+1]); unsigned int c = (c1 << 4) + c2; tmpres[j] = c; } *res = tmpres; } int get_next_fake_input(unsigned char **res) { if (current_fake_input >= fake_input.size()) return -1; int len; printf("CURRENT FAKE INPUT: %s\n", fake_input[current_fake_input].c_str()); hex2bytes(fake_input[current_fake_input++], res, &len); return len; } void read_file(const std::string & filename) { std::ifstream file(filename); std::string temp; while(std::getline(file, temp)) { fake_input.push_back(temp); } current_fake_input = 0; } int RNG(uint8_t *dest, unsigned size) { for (int i =0; i < size; i++) { dest[i] = rand() % 255; } return 1; } long system_millis() { #if 0 long ms; // Milliseconds time_t s; // Seconds struct timespec spec; clock_gettime(CLOCK_REALTIME, &spec); s = spec.tv_sec; ms = round(spec.tv_nsec / 1.0e6); // Convert nanoseconds to milliseconds return s*1000 + ms; #endif struct timeval tv; gettimeofday(&tv, NULL); long time_in_mill = (tv.tv_sec) * 1000 + (tv.tv_usec) / 1000 ; // convert tv_sec & tv_usec to millisecond return time_in_mill; } int millis() { return 0; } void delayMicroseconds(int micro) { } class EEPROMClass { std::map values; public: void get(int address, unsigned int &value) { value = values[address]; } void put(int address, int value) { values[address] = value; } }; class SerialClass { public: void begin(int speed) { } void print(const char *msg) { printf("%s", msg); } void println() { printf("\n"); } void println(const char *msg) { printf("%s\n", msg); } void println(int number) { printf("%d\n", number); } void print(int number, OUTPUT_FORMAT_ENUM e) { printf("%02x", number); } void println(int number, OUTPUT_FORMAT_ENUM e) { printf("%02x", number); } }; class RawHIDClass { public: void send(byte *buffer, int to) { printf("HID SEND: "); for (int i =0; i < 64; i++) { printf("%02x ", buffer[i]); } printf("\n"); } int recv(byte *buffer, int timeout) { unsigned char *inp; int len = get_next_fake_input(&inp); if (len==-1) { printf("END OF INPUT\n"); exit(0); } printf("HID READ: "); for (int i =0; i < len; i++) { printf("%02x ", inp[i]); } printf("\n"); memcpy(buffer, inp, len); free(inp); return len; } }; SerialClass Serial; RawHIDClass RawHID; EEPROMClass EEPROM; #include "u2f.ino" int main(int argc, char *argv[]) { if (argc<2) { printf("usage desktop_test \n"); return 0; } read_file(argv[1]); setup(); while (1) { loop(); } } #endif ================================================ FILE: u2f/platform-specific.h ================================================ /* Copyright 2015, Kenneth MacKay. Licensed under the BSD 2-clause license. */ #ifndef _UECC_PLATFORM_SPECIFIC_H_ #define _UECC_PLATFORM_SPECIFIC_H_ #include "types.h" #if (defined(_WIN32) || defined(_WIN64)) /* Windows */ #define WIN32_LEAN_AND_MEAN #include #include static int default_RNG(uint8_t *dest, unsigned size) { HCRYPTPROV prov; if (!CryptAcquireContext(&prov, NULL, NULL, PROV_RSA_FULL, CRYPT_VERIFYCONTEXT)) { return 0; } CryptGenRandom(prov, size, (BYTE *)dest); CryptReleaseContext(prov, 0); return 1; } #define default_RNG_defined 1 #elif defined(unix) || defined(__linux__) || defined(__unix__) || defined(__unix) || \ (defined(__APPLE__) && defined(__MACH__)) || defined(uECC_POSIX) /* Some POSIX-like system with /dev/urandom or /dev/random. */ #include #include #include #ifndef O_CLOEXEC #define O_CLOEXEC 0 #endif static int default_RNG(uint8_t *dest, unsigned size) { int fd = open("/dev/urandom", O_RDONLY | O_CLOEXEC); if (fd == -1) { fd = open("/dev/random", O_RDONLY | O_CLOEXEC); if (fd == -1) { return 0; } } char *ptr = (char *)dest; size_t left = size; while (left > 0) { ssize_t bytes_read = read(fd, ptr, left); if (bytes_read <= 0) { // read failed close(fd); return 0; } left -= bytes_read; ptr += bytes_read; } close(fd); return 1; } #define default_RNG_defined 1 #endif /* platform */ #endif /* _UECC_PLATFORM_SPECIFIC_H_ */ ================================================ FILE: u2f/sha256.c ================================================ /********************************************************************* * Filename: sha256.c * Author: Brad Conte (brad AT bradconte.com) * Copyright: * Disclaimer: This code is presented "as is" without any guarantees. * Details: Implementation of the SHA-256 hashing algorithm. SHA-256 is one of the three algorithms in the SHA2 specification. The others, SHA-384 and SHA-512, are not offered in this implementation. Algorithm specification can be found here: * http://csrc.nist.gov/publications/fips/fips180-2/fips180-2withchangenotice.pdf This implementation uses little endian byte order. *********************************************************************/ /*************************** HEADER FILES ***************************/ #include #include //#include #include "sha256.h" #ifdef __cplusplus extern "C" { #endif /****************************** MACROS ******************************/ #define ROTLEFT(a,b) (((a) << (b)) | ((a) >> (32-(b)))) #define ROTRIGHT(a,b) (((a) >> (b)) | ((a) << (32-(b)))) #define CH(x,y,z) (((x) & (y)) ^ (~(x) & (z))) #define MAJ(x,y,z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z))) #define EP0(x) (ROTRIGHT(x,2) ^ ROTRIGHT(x,13) ^ ROTRIGHT(x,22)) #define EP1(x) (ROTRIGHT(x,6) ^ ROTRIGHT(x,11) ^ ROTRIGHT(x,25)) #define SIG0(x) (ROTRIGHT(x,7) ^ ROTRIGHT(x,18) ^ ((x) >> 3)) #define SIG1(x) (ROTRIGHT(x,17) ^ ROTRIGHT(x,19) ^ ((x) >> 10)) /**************************** VARIABLES *****************************/ static const WORD k[64] = { 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5,0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5, 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3,0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174, 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc,0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da, 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7,0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967, 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13,0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85, 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3,0xd192e819,0xd6990624,0xf40e3585,0x106aa070, 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5,0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3, 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208,0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 }; /*********************** FUNCTION DEFINITIONS ***********************/ void sha256_transform(SHA256_CTX *ctx, const BYTE data[]) { WORD a, b, c, d, e, f, g, h, i, j, t1, t2, m[64]; for (i = 0, j = 0; i < 16; ++i, j += 4) m[i] = (data[j] << 24) | (data[j + 1] << 16) | (data[j + 2] << 8) | (data[j + 3]); for ( ; i < 64; ++i) m[i] = SIG1(m[i - 2]) + m[i - 7] + SIG0(m[i - 15]) + m[i - 16]; a = ctx->state[0]; b = ctx->state[1]; c = ctx->state[2]; d = ctx->state[3]; e = ctx->state[4]; f = ctx->state[5]; g = ctx->state[6]; h = ctx->state[7]; for (i = 0; i < 64; ++i) { t1 = h + EP1(e) + CH(e,f,g) + k[i] + m[i]; t2 = EP0(a) + MAJ(a,b,c); h = g; g = f; f = e; e = d + t1; d = c; c = b; b = a; a = t1 + t2; } ctx->state[0] += a; ctx->state[1] += b; ctx->state[2] += c; ctx->state[3] += d; ctx->state[4] += e; ctx->state[5] += f; ctx->state[6] += g; ctx->state[7] += h; } void sha256_init(SHA256_CTX *ctx) { ctx->datalen = 0; ctx->bitlen = 0; ctx->state[0] = 0x6a09e667; ctx->state[1] = 0xbb67ae85; ctx->state[2] = 0x3c6ef372; ctx->state[3] = 0xa54ff53a; ctx->state[4] = 0x510e527f; ctx->state[5] = 0x9b05688c; ctx->state[6] = 0x1f83d9ab; ctx->state[7] = 0x5be0cd19; } void sha256_update(SHA256_CTX *ctx, const BYTE data[], size_t len) { WORD i; for (i = 0; i < len; ++i) { ctx->data[ctx->datalen] = data[i]; ctx->datalen++; if (ctx->datalen == 64) { sha256_transform(ctx, ctx->data); ctx->bitlen += 512; ctx->datalen = 0; } } } void sha256_final(SHA256_CTX *ctx, BYTE hash[]) { WORD i; i = ctx->datalen; // Pad whatever data is left in the buffer. if (ctx->datalen < 56) { ctx->data[i++] = 0x80; while (i < 56) ctx->data[i++] = 0x00; } else { ctx->data[i++] = 0x80; while (i < 64) ctx->data[i++] = 0x00; sha256_transform(ctx, ctx->data); memset(ctx->data, 0, 56); } // Append to the padding the total message's length in bits and transform. ctx->bitlen += ctx->datalen * 8; ctx->data[63] = ctx->bitlen; ctx->data[62] = ctx->bitlen >> 8; ctx->data[61] = ctx->bitlen >> 16; ctx->data[60] = ctx->bitlen >> 24; ctx->data[59] = ctx->bitlen >> 32; ctx->data[58] = ctx->bitlen >> 40; ctx->data[57] = ctx->bitlen >> 48; ctx->data[56] = ctx->bitlen >> 56; sha256_transform(ctx, ctx->data); // Since this implementation uses little endian byte ordering and SHA uses big endian, // reverse all the bytes when copying the final state to the output hash. for (i = 0; i < 4; ++i) { hash[i] = (ctx->state[0] >> (24 - i * 8)) & 0x000000ff; hash[i + 4] = (ctx->state[1] >> (24 - i * 8)) & 0x000000ff; hash[i + 8] = (ctx->state[2] >> (24 - i * 8)) & 0x000000ff; hash[i + 12] = (ctx->state[3] >> (24 - i * 8)) & 0x000000ff; hash[i + 16] = (ctx->state[4] >> (24 - i * 8)) & 0x000000ff; hash[i + 20] = (ctx->state[5] >> (24 - i * 8)) & 0x000000ff; hash[i + 24] = (ctx->state[6] >> (24 - i * 8)) & 0x000000ff; hash[i + 28] = (ctx->state[7] >> (24 - i * 8)) & 0x000000ff; } } #ifdef __cplusplus } #endif ================================================ FILE: u2f/sha256.h ================================================ /********************************************************************* * Filename: sha256.h * Author: Brad Conte (brad AT bradconte.com) * Copyright: * Disclaimer: This code is presented "as is" without any guarantees. * Details: Defines the API for the corresponding SHA1 implementation. *********************************************************************/ #ifndef SHA256_H #define SHA256_H /*************************** HEADER FILES ***************************/ #include /****************************** MACROS ******************************/ #define SHA256_BLOCK_SIZE 32 // SHA256 outputs a 32 byte digest /**************************** DATA TYPES ****************************/ typedef unsigned char BYTE; // 8-bit byte typedef unsigned int WORD; // 32-bit word, change to "long" for 16-bit machines typedef struct { BYTE data[64]; WORD datalen; unsigned long long bitlen; WORD state[8]; } SHA256_CTX; /*********************** FUNCTION DECLARATIONS **********************/ #ifdef __cplusplus extern "C" { #endif void sha256_init(SHA256_CTX *ctx); void sha256_update(SHA256_CTX *ctx, const BYTE data[], size_t len); void sha256_final(SHA256_CTX *ctx, BYTE hash[]); #ifdef __cplusplus } #endif #endif // SHA256_H ================================================ FILE: u2f/types.h ================================================ /* Copyright 2015, Kenneth MacKay. Licensed under the BSD 2-clause license. */ #ifndef _UECC_TYPES_H_ #define _UECC_TYPES_H_ #ifndef uECC_PLATFORM #if __AVR__ #define uECC_PLATFORM uECC_avr #elif defined(__thumb2__) || defined(_M_ARMT) /* I think MSVC only supports Thumb-2 targets */ #define uECC_PLATFORM uECC_arm_thumb2 #elif defined(__thumb__) #define uECC_PLATFORM uECC_arm_thumb #elif defined(__arm__) || defined(_M_ARM) #define uECC_PLATFORM uECC_arm #elif defined(__aarch64__) #define uECC_PLATFORM uECC_arm64 #elif defined(__i386__) || defined(_M_IX86) || defined(_X86_) || defined(__I86__) #define uECC_PLATFORM uECC_x86 #elif defined(__amd64__) || defined(_M_X64) #define uECC_PLATFORM uECC_x86_64 #else #define uECC_PLATFORM uECC_arch_other #endif #endif #ifndef uECC_WORD_SIZE #if uECC_PLATFORM == uECC_avr #define uECC_WORD_SIZE 1 #elif (uECC_PLATFORM == uECC_x86_64 || uECC_PLATFORM == uECC_arm64) #define uECC_WORD_SIZE 8 #else #define uECC_WORD_SIZE 4 #endif #endif #if (uECC_WORD_SIZE != 1) && (uECC_WORD_SIZE != 4) && (uECC_WORD_SIZE != 8) #error "Unsupported value for uECC_WORD_SIZE" #endif #if ((uECC_PLATFORM == uECC_avr) && (uECC_WORD_SIZE != 1)) #pragma message ("uECC_WORD_SIZE must be 1 for AVR") #undef uECC_WORD_SIZE #define uECC_WORD_SIZE 1 #endif #if ((uECC_PLATFORM == uECC_arm || uECC_PLATFORM == uECC_arm_thumb || \ uECC_PLATFORM == uECC_arm_thumb2) && \ (uECC_WORD_SIZE != 4)) #pragma message ("uECC_WORD_SIZE must be 4 for ARM") #undef uECC_WORD_SIZE #define uECC_WORD_SIZE 4 #endif #if defined(__SIZEOF_INT128__) || ((__clang_major__ * 100 + __clang_minor__) >= 302) #define SUPPORTS_INT128 1 #else #define SUPPORTS_INT128 0 #endif typedef int8_t wordcount_t; typedef int16_t bitcount_t; typedef int8_t cmpresult_t; #if (uECC_WORD_SIZE == 1) typedef uint8_t uECC_word_t; typedef uint16_t uECC_dword_t; #define HIGH_BIT_SET 0x80 #define uECC_WORD_BITS 8 #define uECC_WORD_BITS_SHIFT 3 #define uECC_WORD_BITS_MASK 0x07 #elif (uECC_WORD_SIZE == 4) typedef uint32_t uECC_word_t; typedef uint64_t uECC_dword_t; #define HIGH_BIT_SET 0x80000000 #define uECC_WORD_BITS 32 #define uECC_WORD_BITS_SHIFT 5 #define uECC_WORD_BITS_MASK 0x01F #elif (uECC_WORD_SIZE == 8) typedef uint64_t uECC_word_t; #if SUPPORTS_INT128 typedef unsigned __int128 uECC_dword_t; #endif #define HIGH_BIT_SET 0x8000000000000000ull #define uECC_WORD_BITS 64 #define uECC_WORD_BITS_SHIFT 6 #define uECC_WORD_BITS_MASK 0x03F #endif /* uECC_WORD_SIZE */ #endif /* _UECC_TYPES_H_ */ ================================================ FILE: u2f/u2f.ino ================================================ #ifndef DESKTOP_TEST #include #endif #include #include "sha256.h" #include "uECC.h" #undef DEBUG #define DEBUG #define CID_BROADCAST 0xffffffff // Broadcast channel id #define TYPE_MASK 0x80 // Frame type mask #define TYPE_INIT 0x80 // Initial frame identifier #define TYPE_CONT 0x00 // Continuation frame identifier #define U2FHID_PING (TYPE_INIT | 0x01) // Echo data through local processor only #define U2FHID_MSG (TYPE_INIT | 0x03) // Send U2F message frame #define U2FHID_LOCK (TYPE_INIT | 0x04) // Send lock channel command #define U2FHID_INIT (TYPE_INIT | 0x06) // Channel initialization #define U2FHID_WINK (TYPE_INIT | 0x08) // Send device identification wink #define U2FHID_ERROR (TYPE_INIT | 0x3f) // Error response // Errors #define ERR_NONE 0 #define ERR_INVALID_CMD 1 #define ERR_INVALID_PAR 2 #define ERR_INVALID_LEN 3 #define ERR_INVALID_SEQ 4 #define ERR_MSG_TIMEOUT 5 #define ERR_CHANNEL_BUSY 6 #define ERR_LOCK_REQUIRED 10 #define ERR_INVALID_CID 11 #define ERR_OTHER 127 #define U2F_INS_REGISTER 0x01 #define U2F_INS_AUTHENTICATE 0x02 #define U2F_INS_VERSION 0x03 #define STATE_CHANNEL_AVAILABLE 0 #define STATE_CHANNEL_WAIT_PACKET 1 #define STATE_CHANNEL_WAIT_CONT 2 #define STATE_CHANNEL_TIMEOUT 3 #define STATE_LARGE_PACKET 4 #define MAX_TOTAL_PACKET 7609 #define MAX_INITIAL_PACKET 57 #define MAX_CONTINUATION_PACKET 59 #define SET_MSG_LEN(b, v) do { (b)[5] = ((v) >> 8) & 0xff; (b)[6] = (v) & 0xff; } while(0) #define U2FHID_IF_VERSION 2 // Current interface implementation version byte expected_next_packet; int large_data_len; int large_data_offset; byte large_buffer[1024]; byte large_resp_buffer[1024]; byte recv_buffer[64]; byte resp_buffer[64]; byte handle[64]; byte sha256_hash[32]; #define MAX_CHANNEL 4 const char attestation_key[] = "\xf3\xfc\xcc\x0d\x00\xd8\x03\x19\x54\xf9" "\x08\x64\xd4\x3c\x24\x7f\x4b\xf5\xf0\x66\x5c\x6b\x50\xcc" "\x17\x74\x9a\x27\xd1\xcf\x76\x64"; const char attestation_der[] = "\x30\x82\x01\x3c\x30\x81\xe4\xa0\x03\x02" "\x01\x02\x02\x0a\x47\x90\x12\x80\x00\x11\x55\x95\x73\x52" "\x30\x0a\x06\x08\x2a\x86\x48\xce\x3d\x04\x03\x02\x30\x17" "\x31\x15\x30\x13\x06\x03\x55\x04\x03\x13\x0c\x47\x6e\x75" "\x62\x62\x79\x20\x50\x69\x6c\x6f\x74\x30\x1e\x17\x0d\x31" "\x32\x30\x38\x31\x34\x31\x38\x32\x39\x33\x32\x5a\x17\x0d" "\x31\x33\x30\x38\x31\x34\x31\x38\x32\x39\x33\x32\x5a\x30" "\x31\x31\x2f\x30\x2d\x06\x03\x55\x04\x03\x13\x26\x50\x69" "\x6c\x6f\x74\x47\x6e\x75\x62\x62\x79\x2d\x30\x2e\x34\x2e" "\x31\x2d\x34\x37\x39\x30\x31\x32\x38\x30\x30\x30\x31\x31" "\x35\x35\x39\x35\x37\x33\x35\x32\x30\x59\x30\x13\x06\x07" "\x2a\x86\x48\xce\x3d\x02\x01\x06\x08\x2a\x86\x48\xce\x3d" "\x03\x01\x07\x03\x42\x00\x04\x8d\x61\x7e\x65\xc9\x50\x8e" "\x64\xbc\xc5\x67\x3a\xc8\x2a\x67\x99\xda\x3c\x14\x46\x68" "\x2c\x25\x8c\x46\x3f\xff\xdf\x58\xdf\xd2\xfa\x3e\x6c\x37" "\x8b\x53\xd7\x95\xc4\xa4\xdf\xfb\x41\x99\xed\xd7\x86\x2f" "\x23\xab\xaf\x02\x03\xb4\xb8\x91\x1b\xa0\x56\x99\x94\xe1" "\x01\x30\x0a\x06\x08\x2a\x86\x48\xce\x3d\x04\x03\x02\x03" "\x47\x00\x30\x44\x02\x20\x60\xcd\xb6\x06\x1e\x9c\x22\x26" "\x2d\x1a\xac\x1d\x96\xd8\xc7\x08\x29\xb2\x36\x65\x31\xdd" "\xa2\x68\x83\x2c\xb8\x36\xbc\xd3\x0d\xfa\x02\x20\x63\x1b" "\x14\x59\xf0\x9e\x63\x30\x05\x57\x22\xc8\xd8\x9b\x7f\x48" "\x88\x3b\x90\x89\xb8\x8d\x60\xd1\xd9\x79\x59\x02\xb3\x04" "\x10\xdf"; //key handle: (private key + app parameter) ^ this array const char handlekey[] = "-YOHANES-NUGROHO-YOHANES-NUGROHO-"; const struct uECC_Curve_t * curve = uECC_secp256r1(); //P-256 uint8_t private_k[36]; //32 uint8_t public_k[68]; //64 struct ch_state { int cid; byte state; int last_millis; }; ch_state channel_states[MAX_CHANNEL]; #ifdef DESKTOP_TEST extern int RNG(uint8_t *dest, unsigned size); #else extern "C" { static int RNG(uint8_t *dest, unsigned size) { // Use the least-significant bits from the ADC for an unconnected pin (or connected to a source of // random noise). This can take a long time to generate random data if the result of analogRead(0) // doesn't change very frequently. while (size) { uint8_t val = 0; for (unsigned i = 0; i < 8; ++i) { int init = analogRead(0); int count = 0; while (analogRead(0) == init) { ++count; } if (count == 0) { val = (val << 1) | (init & 0x01); } else { val = (val << 1) | (count & 0x01); } } *dest = val; ++dest; --size; } // NOTE: it would be a good idea to hash the resulting random data using SHA-256 or similar. return 1; } } // extern "C" #endif #define TIMEOUT_VALUE 1000 typedef struct SHA256_HashContext { uECC_HashContext uECC; SHA256_CTX ctx; } SHA256_HashContext; void init_SHA256(uECC_HashContext *base) { SHA256_HashContext *context = (SHA256_HashContext *)base; sha256_init(&context->ctx); } void update_SHA256(uECC_HashContext *base, const uint8_t *message, unsigned message_size) { SHA256_HashContext *context = (SHA256_HashContext *)base; sha256_update(&context->ctx, message, message_size); } void finish_SHA256(uECC_HashContext *base, uint8_t *hash_result) { SHA256_HashContext *context = (SHA256_HashContext *)base; sha256_final(&context->ctx, hash_result); } void setup() { Serial.begin(9600); Serial.println(F("U2F")); uECC_set_rng(&RNG); } void cleanup_timeout() { int i; for (i = 0; i < MAX_CHANNEL; i++) { //free channel that is inactive ch_state &c = channel_states[i]; int m = millis(); if (c.state != STATE_CHANNEL_AVAILABLE) { if ((m - c.last_millis) > TIMEOUT_VALUE) { c.state = STATE_CHANNEL_AVAILABLE; } } } } int allocate_new_channel() { int i; //alloace new channel_id int channel_id = 1; do { bool found = false; for (i = 0; i < MAX_CHANNEL; i++) { if (channel_states[i].state != STATE_CHANNEL_AVAILABLE) { if (channel_states[i].cid == channel_id) { found = true; channel_id++; break; } } } if (!found) break; } while (true); return channel_id; } int allocate_channel(int channel_id) { int i; if (channel_id==0) { channel_id = allocate_new_channel(); } bool has_free_slots = false; for (i = 0; i < MAX_CHANNEL; i++) { if (channel_states[i].state == STATE_CHANNEL_AVAILABLE) { has_free_slots = true; break; } } if (!has_free_slots) cleanup_timeout(); for (i = 0; i < MAX_CHANNEL; i++) { ch_state &c = channel_states[i]; if (c.state == STATE_CHANNEL_AVAILABLE) { c.cid = channel_id; c.state = STATE_CHANNEL_WAIT_PACKET; c.last_millis = millis(); return channel_id; } } return 0; } int initResponse(byte *buffer) { #ifdef DEBUG Serial.print("INIT RESPONSE"); #endif int cid = *(int*)buffer; #ifdef DEBUG Serial.println(cid, HEX); #endif int len = buffer[5] << 8 | buffer[6]; int i; memcpy(resp_buffer, buffer, 5); SET_MSG_LEN(resp_buffer, 17); memcpy(resp_buffer + 7, buffer + 7, len); //nonce i = 7 + len; if (cid==-1) { cid = allocate_channel(0); } else { #ifdef DEBUG Serial.println("using existing CID"); #endif allocate_channel(cid); } memcpy(resp_buffer + i, &cid, 4); i += 4; resp_buffer[i++] = U2FHID_IF_VERSION; resp_buffer[i++] = 1; //major resp_buffer[i++] = 0; resp_buffer[i++] = 1; //build //resp_buffer[i++] = CAPABILITY_WINK; //capabilities resp_buffer[i++] = 0; //capabilities #ifdef DEBUG Serial.println("SENT RESPONSE 1"); #endif RawHID.send(resp_buffer, 100); #ifdef DEBUG Serial.println(cid, HEX); #endif return cid; } void errorResponse(byte *buffer, int code) { memcpy(resp_buffer, buffer, 4); resp_buffer[4] = U2FHID_ERROR; SET_MSG_LEN(resp_buffer, 1); resp_buffer[7] = code & 0xff; #ifdef DEBUG Serial.print("SENT RESPONSE error:"); Serial.println(code); #endif RawHID.send(resp_buffer, 100); } //find channel index and update last access int find_channel_index(int channel_id) { int i; for (i = 0; i < MAX_CHANNEL; i++) { if (channel_states[i].cid==channel_id) { channel_states[i].last_millis = millis(); return i; } } return -1; } #define IS_CONTINUATION_PACKET(x) ( (x) < 0x80) #define IS_NOT_CONTINUATION_PACKET(x) ( (x) >= 0x80) #define SW_NO_ERROR 0x9000 #define SW_CONDITIONS_NOT_SATISFIED 0x6985 #define SW_WRONG_DATA 0x6A80 #define SW_WRONG_LENGTH 0x6700 #define SW_INS_NOT_SUPPORTED 0x6D00 #define SW_CLA_NOT_SUPPORTED 0x6E00 #define APPEND_SW(x, v1, v2) do { (*x++)=v1; (*x++)=v2;} while (0) #define APPEND_SW_NO_ERROR(x) do { (*x++)=0x90; (*x++)=0x00;} while (0) void respondErrorPDU(byte *buffer, int err) { SET_MSG_LEN(buffer, 2); //len("") + 2 byte SW byte *datapart = buffer + 7; APPEND_SW(datapart, (err >> 8) & 0xff, err & 0xff); RawHID.send(buffer, 100); } void sendLargeResponse(byte *request, int len) { #ifdef DEBUG Serial.print("Sending large response "); Serial.println(len); for (int i = 0; i < len; i++) { Serial.print(large_resp_buffer[i], HEX); Serial.print(" "); } Serial.println("\n--\n"); #endif memcpy(resp_buffer, request, 4); //copy cid resp_buffer[4] = U2FHID_MSG; int r = len; if (r>MAX_INITIAL_PACKET) { r = MAX_INITIAL_PACKET; } SET_MSG_LEN(resp_buffer, len); memcpy(resp_buffer + 7, large_resp_buffer, r); RawHID.send(resp_buffer, 100); len -= r; byte p = 0; int offset = MAX_INITIAL_PACKET; while (len > 0) { //memcpy(resp_buffer, request, 4); //copy cid, doesn't need to recopy resp_buffer[4] = p++; memcpy(resp_buffer + 5, large_resp_buffer + offset, MAX_CONTINUATION_PACKET); RawHID.send(resp_buffer, 100); len-= MAX_CONTINUATION_PACKET; offset += MAX_CONTINUATION_PACKET; delayMicroseconds(2500); } } int getCounter() { unsigned int eeAddress = 0; //EEPROM address to start reading from unsigned int counter; EEPROM.get( eeAddress, counter ); return counter; } void setCounter(int counter) { unsigned int eeAddress = 0; //EEPROM address to start reading from EEPROM.put( eeAddress, counter ); } #ifdef SIMULATE_BUTTON //for now just simulate this int button_pressed = 0; #endif void processMessage(byte *buffer) { int len = buffer[5] << 8 | buffer[6]; #ifdef DEBUG Serial.println(F("Got message")); Serial.println(len); Serial.println(F("Data:")); #endif byte *message = buffer + 7; #ifdef DEBUG for (int i = 7; i < 7+len; i++) { Serial.print(buffer[i], HEX); } Serial.println(F("")); #endif //todo: check CLA = 0 byte CLA = message[0]; if (CLA!=0) { respondErrorPDU(buffer, SW_CLA_NOT_SUPPORTED); return; } byte INS = message[1]; byte P1 = message[2]; //byte P2 = message[3]; int reqlength = (message[4] << 16) | (message[5] << 8) | message[6]; switch (INS) { case U2F_INS_REGISTER: { if (reqlength!=64) { respondErrorPDU(buffer, SW_WRONG_LENGTH); return; } #ifdef SIMULATE_BUTTON if (!button_pressed) { respondErrorPDU(buffer, SW_CONDITIONS_NOT_SATISFIED); button_pressed = 1; return; } #endif byte *datapart = message + 7; byte *challenge_parameter = datapart; byte *application_parameter = datapart+32; memset(public_k, 0, sizeof(public_k)); memset(private_k, 0, sizeof(private_k)); uECC_make_key(public_k + 1, private_k, curve); //so we ca insert 0x04 public_k[0] = 0x04; #ifdef DEBUG Serial.println(F("Public K")); for (size_t i =0; i < sizeof(public_k); i++) { Serial.print(public_k[i], HEX); Serial.print(" "); } Serial.println(""); Serial.println(F("Private K")); for (size_t i =0; i < sizeof(private_k); i++) { Serial.print(private_k[i], HEX); Serial.print(" "); } Serial.println(""); #endif //construct hash memcpy(handle, application_parameter, 32); memcpy(handle+32, private_k, 32); for (int i =0; i < 64; i++) { handle[i] ^= handlekey[i%(sizeof(handlekey)-1)]; } SHA256_CTX ctx; sha256_init(&ctx); large_resp_buffer[0] = 0x00; sha256_update(&ctx, large_resp_buffer, 1); #ifdef DEBUG Serial.println(F("App Parameter:")); for (int i =0; i < 32; i++) { Serial.print(application_parameter[i], HEX); Serial.print(" "); } Serial.println(""); #endif sha256_update(&ctx, application_parameter, 32); #ifdef DEBUG Serial.println(F("Chal Parameter:")); for (int i =0; i < 32; i++) { Serial.print(challenge_parameter[i], HEX); Serial.print(" "); } Serial.println(""); #endif sha256_update(&ctx, challenge_parameter, 32); #ifdef DEBUG Serial.println(F("Handle Parameter:")); for (int i =0; i < 64; i++) { Serial.print(handle[i], HEX); Serial.print(" "); } Serial.println(""); #endif sha256_update(&ctx, handle, 64); sha256_update(&ctx, public_k, 65); #ifdef DEBUG Serial.println(F("Public key:")); for (int i =0; i < 65; i++) { Serial.print(public_k[i], HEX); Serial.print(" "); } Serial.println(""); #endif sha256_final(&ctx, sha256_hash); #ifdef DEBUG Serial.println(F("Hash:")); for (int i =0; i < 32; i++) { Serial.print(sha256_hash[i], HEX); Serial.print(" "); } Serial.println(""); #endif uint8_t *signature = resp_buffer; //temporary uint8_t tmp[32 + 32 + 64]; SHA256_HashContext ectx = {{&init_SHA256, &update_SHA256, &finish_SHA256, 64, 32, tmp}}; uECC_sign_deterministic((uint8_t *)attestation_key, sha256_hash, 32, &ectx.uECC, signature, curve); int len = 0; large_resp_buffer[len++] = 0x05; memcpy(large_resp_buffer + len, public_k, 65); len+=65; large_resp_buffer[len++] = 64; //length of handle memcpy(large_resp_buffer+len, handle, 64); len += 64; memcpy(large_resp_buffer+len, attestation_der, sizeof(attestation_der)); len += sizeof(attestation_der)-1; //convert signature format //http://bitcoin.stackexchange.com/questions/12554/why-the-signature-is-always-65-13232-bytes-long large_resp_buffer[len++] = 0x30; //header: compound structure uint8_t *total_len = &large_resp_buffer[len]; large_resp_buffer[len++] = 0x44; //total length (32 + 32 + 2 + 2) large_resp_buffer[len++] = 0x02; //header: integer if (signature[0]>0x7f) { large_resp_buffer[len++] = 33; //33 byte large_resp_buffer[len++] = 0; (*total_len)++; //update total length } else { large_resp_buffer[len++] = 32; //32 byte } memcpy(large_resp_buffer+len, signature, 32); //R value len +=32; large_resp_buffer[len++] = 0x02; //header: integer if (signature[32]>0x7f) { large_resp_buffer[len++] = 33; //32 byte large_resp_buffer[len++] = 0; (*total_len)++; //update total length } else { large_resp_buffer[len++] = 32; //32 byte } memcpy(large_resp_buffer+len, signature+32, 32); //R value len +=32; byte *last = large_resp_buffer+len; APPEND_SW_NO_ERROR(last); len += 2; #ifdef SIMULATE_BUTTON button_pressed = 0; #endif sendLargeResponse(buffer, len); } break; case U2F_INS_AUTHENTICATE: { //minimum is 64 + 1 + 64 if (reqlength!=(64+1+64)) { respondErrorPDU(buffer, SW_WRONG_LENGTH); return; } byte *datapart = message + 7; byte *challenge_parameter = datapart; byte *application_parameter = datapart+32; byte handle_len = datapart[64]; byte *client_handle = datapart+65; if (handle_len!=64) { //not from this device respondErrorPDU(buffer, SW_WRONG_DATA); return; } #ifdef SIMULATE_BUTTON if (!button_pressed) { respondErrorPDU(buffer, SW_CONDITIONS_NOT_SATISFIED); button_pressed = 1; return; } #endif memcpy(handle, client_handle, 64); for (int i =0; i < 64; i++) { handle[i] ^= handlekey[i%(sizeof(handlekey)-1)]; } uint8_t *key = handle + 32; if (memcmp(handle, application_parameter, 32)!=0) { //this handle is not from us respondErrorPDU(buffer, SW_WRONG_DATA); return; } if (P1==0x07) { //check-only respondErrorPDU(buffer, SW_CONDITIONS_NOT_SATISFIED); } else if (P1==0x03) { //enforce-user-presence-and-sign int counter = getCounter(); SHA256_CTX ctx; sha256_init(&ctx); sha256_update(&ctx, application_parameter, 32); large_resp_buffer[0] = 0x01; // user_presence int ctr = ((counter>>24)&0xff) | // move byte 3 to byte 0 ((counter<<8)&0xff0000) | // move byte 1 to byte 2 ((counter>>8)&0xff00) | // move byte 2 to byte 1 ((counter<<24)&0xff000000); // byte 0 to byte 3 memcpy(large_resp_buffer + 1, &ctr, 4); sha256_update(&ctx, large_resp_buffer, 5); //user presence + ctr sha256_update(&ctx, challenge_parameter, 32); sha256_final(&ctx, sha256_hash); uint8_t *signature = resp_buffer; //temporary uint8_t tmp[32 + 32 + 64]; SHA256_HashContext ectx = {{&init_SHA256, &update_SHA256, &finish_SHA256, 64, 32, tmp}}; uECC_sign_deterministic((uint8_t *)key, sha256_hash, 32, &ectx.uECC, signature, curve); int len = 5; //convert signature format //http://bitcoin.stackexchange.com/questions/12554/why-the-signature-is-always-65-13232-bytes-long large_resp_buffer[len++] = 0x30; //header: compound structure uint8_t *total_len = &large_resp_buffer[len]; large_resp_buffer[len++] = 0x44; //total length (32 + 32 + 2 + 2) large_resp_buffer[len++] = 0x02; //header: integer if (signature[0]>0x7f) { large_resp_buffer[len++] = 33; //33 byte large_resp_buffer[len++] = 0; (*total_len)++; //update total length } else { large_resp_buffer[len++] = 32; //32 byte } memcpy(large_resp_buffer+len, signature, 32); //R value len +=32; large_resp_buffer[len++] = 0x02; //header: integer if (signature[32]>0x7f) { large_resp_buffer[len++] = 33; //32 byte large_resp_buffer[len++] = 0; (*total_len)++; //update total length } else { large_resp_buffer[len++] = 32; //32 byte } memcpy(large_resp_buffer+len, signature+32, 32); //R value len +=32; byte *last = large_resp_buffer+len; APPEND_SW_NO_ERROR(last); len += 2; #ifdef DEBUG Serial.print("Len to send "); Serial.println(len); #endif #ifdef SIMULATE_BUTTON button_pressed = 0; #endif sendLargeResponse(buffer, len); setCounter(counter+1); } else { //return error } } break; case U2F_INS_VERSION: { if (reqlength!=0) { respondErrorPDU(buffer, SW_WRONG_LENGTH); return; } //reuse input buffer for sending SET_MSG_LEN(buffer, 8); //len("U2F_V2") + 2 byte SW byte *datapart = buffer + 7; memcpy(datapart, "U2F_V2", 6); datapart += 6; APPEND_SW_NO_ERROR(datapart); RawHID.send(buffer, 100); } break; default: { respondErrorPDU(buffer, SW_INS_NOT_SUPPORTED); } ; } } void processPacket(byte *buffer) { #ifdef DEBUG Serial.print("Process CMD "); #endif unsigned char cmd = buffer[4]; //cmd or continuation #ifdef DEBUG Serial.println((int)cmd, HEX); #endif int len = buffer[5] << 8 | buffer[6]; if (cmd > U2FHID_INIT || cmd==U2FHID_LOCK) { errorResponse(recv_buffer, ERR_INVALID_CMD); return; } if (cmd==U2FHID_PING) { if (len <= MAX_INITIAL_PACKET) { #ifdef DEBUG Serial.println("Sending ping response"); #endif RawHID.send(buffer, 100); } else { //large packet //send first one #ifdef DEBUG Serial.println("SENT RESPONSE 3"); #endif RawHID.send(buffer, 100); len -= MAX_INITIAL_PACKET; byte p = 0; int offset = 7 + MAX_INITIAL_PACKET; while (len > 0) { memcpy(resp_buffer, buffer, 4); //copy cid resp_buffer[4] = p++; memcpy(resp_buffer + 5, buffer + offset, MAX_CONTINUATION_PACKET); RawHID.send(resp_buffer, 100); len-= MAX_CONTINUATION_PACKET; offset += MAX_CONTINUATION_PACKET; delayMicroseconds(2500); } #ifdef DEBUG Serial.println("Sending large ping response"); #endif } } if (cmd==U2FHID_MSG) { processMessage(buffer); } } void setOtherTimeout() { //we can process the data //but if we find another channel is waiting for continuation, we set it as timeout for (int i = 0; i < MAX_CHANNEL; i++) { if (channel_states[i].state==STATE_CHANNEL_WAIT_CONT) { #ifdef DEBUG Serial.println("Set other timeout"); #endif channel_states[i].state= STATE_CHANNEL_TIMEOUT; } } } int cont_start = 0; #ifndef DESKTOP_TEST #ifdef DEBUG void dump_hex(byte *buffer, int len) { for (int i = 0 ; i < len; i++) { if (buffer[i] <= 0xf) { Serial.print(0); } Serial.print(buffer[i], HEX); Serial.print(" "); } Serial.println(); } #endif #endif void loop() { int n; n = RawHID.recv(recv_buffer, 0); // 0 timeout = do not wait if (n > 0) { #ifdef DEBUG #ifndef DESKTOP_TEST Serial.print("RAW_RECV: "); dump_hex(recv_buffer, n); #endif Serial.print(F("\n\nReceived packet, CID: ")); #endif //int cid = *(int*)recv_buffer; int cid; //handle strict-aliasing warning memcpy(&cid, recv_buffer, sizeof(cid)); #ifdef DEBUG Serial.println(cid, HEX); #endif if (cid==0) { errorResponse(recv_buffer, ERR_INVALID_CID); return; } unsigned char cmd_or_cont = recv_buffer[4]; //cmd or continuation int len = (recv_buffer[5]) << 8 | recv_buffer[6]; #ifdef DEBUG if (IS_NOT_CONTINUATION_PACKET(cmd_or_cont)) { Serial.print(F("LEN ")); Serial.println((int)len); } #endif //don't care about cid if (cmd_or_cont==U2FHID_INIT) { setOtherTimeout(); cid = initResponse(recv_buffer); int cidx = find_channel_index(cid); channel_states[cidx].state= STATE_CHANNEL_WAIT_PACKET; return; } if (cid==-1) { errorResponse(recv_buffer, ERR_INVALID_CID); return; } int cidx = find_channel_index(cid); if (cidx==-1) { #ifdef DEBUG Serial.println("allocating new CID"); #endif allocate_channel(cid); cidx = find_channel_index(cid); if (cidx==-1) { errorResponse(recv_buffer, ERR_INVALID_CID); return; } } if (IS_NOT_CONTINUATION_PACKET(cmd_or_cont)) { if (len > MAX_TOTAL_PACKET) { errorResponse(recv_buffer, ERR_INVALID_LEN); //invalid length return; } if (len > MAX_INITIAL_PACKET) { //if another channel is waiting for continuation, we respond with busy for (int i = 0; i < MAX_CHANNEL; i++) { if (channel_states[i].state==STATE_CHANNEL_WAIT_CONT) { if (i==cidx) { errorResponse(recv_buffer, ERR_INVALID_SEQ); //invalid sequence channel_states[i].state= STATE_CHANNEL_WAIT_PACKET; } else { errorResponse(recv_buffer, ERR_CHANNEL_BUSY); return; } return; } } //no other channel is waiting channel_states[cidx].state=STATE_CHANNEL_WAIT_CONT; cont_start = millis(); memcpy(large_buffer, recv_buffer, 64); large_data_len = len; large_data_offset = MAX_INITIAL_PACKET; expected_next_packet = 0; return; } setOtherTimeout(); processPacket(recv_buffer); channel_states[cidx].state= STATE_CHANNEL_WAIT_PACKET; } else { if (channel_states[cidx].state!=STATE_CHANNEL_WAIT_CONT) { #ifdef DEBUG Serial.println("ignoring stray packet"); Serial.println(cid, HEX); #endif return; } //this is a continuation if (cmd_or_cont != expected_next_packet) { errorResponse(recv_buffer, ERR_INVALID_SEQ); //invalid sequence channel_states[cidx].state= STATE_CHANNEL_WAIT_PACKET; return; } else { memcpy(large_buffer + large_data_offset + 7, recv_buffer + 5, MAX_CONTINUATION_PACKET); large_data_offset += MAX_CONTINUATION_PACKET; if (large_data_offset < large_data_len) { expected_next_packet++; #ifdef DEBUG Serial.println("Expecting next cont"); #endif return; } #ifdef DEBUG Serial.println("Completed"); #endif channel_states[cidx].state= STATE_CHANNEL_WAIT_PACKET; processPacket(large_buffer); return; } } } else { for (int i = 0; i < MAX_CHANNEL; i++) { if (channel_states[i].state==STATE_CHANNEL_TIMEOUT) { #ifdef DEBUG Serial.println("send timeout"); Serial.println(channel_states[i].cid, HEX); #endif memcpy(recv_buffer, &channel_states[i].cid, 4); errorResponse(recv_buffer, ERR_MSG_TIMEOUT); channel_states[i].state= STATE_CHANNEL_WAIT_PACKET; } if (channel_states[i].state==STATE_CHANNEL_WAIT_CONT) { int now = millis(); if ((now - channel_states[i].last_millis)>500) { #ifdef DEBUG Serial.println("SET timeout"); #endif channel_states[i].state=STATE_CHANNEL_TIMEOUT; } } } } } ================================================ FILE: u2f/uECC.c ================================================ /* Copyright 2014, Kenneth MacKay. Licensed under the BSD 2-clause license. */ #include "uECC.h" #include "uECC_vli.h" #ifndef uECC_RNG_MAX_TRIES #define uECC_RNG_MAX_TRIES 64 #endif #if uECC_ENABLE_VLI_API #define uECC_VLI_API #else #define uECC_VLI_API static #endif #define CONCATX(a, ...) a ## __VA_ARGS__ #define CONCAT(a, ...) CONCATX(a, __VA_ARGS__) #define STRX(a) #a #define STR(a) STRX(a) #define EVAL(...) EVAL1(EVAL1(EVAL1(EVAL1(__VA_ARGS__)))) #define EVAL1(...) EVAL2(EVAL2(EVAL2(EVAL2(__VA_ARGS__)))) #define EVAL2(...) EVAL3(EVAL3(EVAL3(EVAL3(__VA_ARGS__)))) #define EVAL3(...) EVAL4(EVAL4(EVAL4(EVAL4(__VA_ARGS__)))) #define EVAL4(...) __VA_ARGS__ #define DEC_1 0 #define DEC_2 1 #define DEC_3 2 #define DEC_4 3 #define DEC_5 4 #define DEC_6 5 #define DEC_7 6 #define DEC_8 7 #define DEC_9 8 #define DEC_10 9 #define DEC_11 10 #define DEC_12 11 #define DEC_13 12 #define DEC_14 13 #define DEC_15 14 #define DEC_16 15 #define DEC_17 16 #define DEC_18 17 #define DEC_19 18 #define DEC_20 19 #define DEC_21 20 #define DEC_22 21 #define DEC_23 22 #define DEC_24 23 #define DEC_25 24 #define DEC_26 25 #define DEC_27 26 #define DEC_28 27 #define DEC_29 28 #define DEC_30 29 #define DEC_31 30 #define DEC_32 31 #define DEC(N) CONCAT(DEC_, N) #define SECOND_ARG(_, val, ...) val #define SOME_CHECK_0 ~, 0 #define GET_SECOND_ARG(...) SECOND_ARG(__VA_ARGS__, SOME,) #define SOME_OR_0(N) GET_SECOND_ARG(CONCAT(SOME_CHECK_, N)) #define EMPTY(...) #define DEFER(...) __VA_ARGS__ EMPTY() #define REPEAT_NAME_0() REPEAT_0 #define REPEAT_NAME_SOME() REPEAT_SOME #define REPEAT_0(...) #define REPEAT_SOME(N, stuff) DEFER(CONCAT(REPEAT_NAME_, SOME_OR_0(DEC(N))))()(DEC(N), stuff) stuff #define REPEAT(N, stuff) EVAL(REPEAT_SOME(N, stuff)) #define REPEATM_NAME_0() REPEATM_0 #define REPEATM_NAME_SOME() REPEATM_SOME #define REPEATM_0(...) #define REPEATM_SOME(N, macro) macro(N) \ DEFER(CONCAT(REPEATM_NAME_, SOME_OR_0(DEC(N))))()(DEC(N), macro) #define REPEATM(N, macro) EVAL(REPEATM_SOME(N, macro)) #include "platform-specific.h" #if (uECC_WORD_SIZE == 1) #if uECC_SUPPORTS_secp160r1 #define uECC_MAX_WORDS 21 /* Due to the size of curve_n. */ #endif #if uECC_SUPPORTS_secp192r1 #undef uECC_MAX_WORDS #define uECC_MAX_WORDS 24 #endif #if uECC_SUPPORTS_secp224r1 #undef uECC_MAX_WORDS #define uECC_MAX_WORDS 28 #endif #if (uECC_SUPPORTS_secp256r1 || uECC_SUPPORTS_secp256k1) #undef uECC_MAX_WORDS #define uECC_MAX_WORDS 32 #endif #elif (uECC_WORD_SIZE == 4) #if uECC_SUPPORTS_secp160r1 #define uECC_MAX_WORDS 6 /* Due to the size of curve_n. */ #endif #if uECC_SUPPORTS_secp192r1 #undef uECC_MAX_WORDS #define uECC_MAX_WORDS 6 #endif #if uECC_SUPPORTS_secp224r1 #undef uECC_MAX_WORDS #define uECC_MAX_WORDS 7 #endif #if (uECC_SUPPORTS_secp256r1 || uECC_SUPPORTS_secp256k1) #undef uECC_MAX_WORDS #define uECC_MAX_WORDS 8 #endif #elif (uECC_WORD_SIZE == 8) #if uECC_SUPPORTS_secp160r1 #define uECC_MAX_WORDS 3 #endif #if uECC_SUPPORTS_secp192r1 #undef uECC_MAX_WORDS #define uECC_MAX_WORDS 3 #endif #if uECC_SUPPORTS_secp224r1 #undef uECC_MAX_WORDS #define uECC_MAX_WORDS 4 #endif #if (uECC_SUPPORTS_secp256r1 || uECC_SUPPORTS_secp256k1) #undef uECC_MAX_WORDS #define uECC_MAX_WORDS 4 #endif #endif /* uECC_WORD_SIZE */ #define BITS_TO_WORDS(num_bits) ((num_bits + ((uECC_WORD_SIZE * 8) - 1)) / (uECC_WORD_SIZE * 8)) #define BITS_TO_BYTES(num_bits) ((num_bits + 7) / 8) struct uECC_Curve_t { wordcount_t num_words; wordcount_t num_bytes; bitcount_t num_n_bits; uECC_word_t p[uECC_MAX_WORDS]; uECC_word_t n[uECC_MAX_WORDS]; uECC_word_t G[uECC_MAX_WORDS * 2]; uECC_word_t b[uECC_MAX_WORDS]; void (*double_jacobian)(uECC_word_t * X1, uECC_word_t * Y1, uECC_word_t * Z1, uECC_Curve curve); #if uECC_SUPPORT_COMPRESSED_POINT void (*mod_sqrt)(uECC_word_t *a, uECC_Curve curve); #endif void (*x_side)(uECC_word_t *result, const uECC_word_t *x, uECC_Curve curve); #if (uECC_OPTIMIZATION_LEVEL > 0) void (*mmod_fast)(uECC_word_t *result, uECC_word_t *product); #endif }; static cmpresult_t uECC_vli_cmp_unsafe(const uECC_word_t *left, const uECC_word_t *right, wordcount_t num_words); #if (uECC_PLATFORM == uECC_arm || uECC_PLATFORM == uECC_arm_thumb || \ uECC_PLATFORM == uECC_arm_thumb2) #include "asm_arm.h" #endif #if (uECC_PLATFORM == uECC_avr) #include "asm_avr.inc" #endif #if default_RNG_defined static uECC_RNG_Function g_rng_function = &default_RNG; #else static uECC_RNG_Function g_rng_function = 0; #endif void uECC_set_rng(uECC_RNG_Function rng_function) { g_rng_function = rng_function; } #if !asm_clear uECC_VLI_API void uECC_vli_clear(uECC_word_t *vli, wordcount_t num_words) { wordcount_t i; for (i = 0; i < num_words; ++i) { vli[i] = 0; } } #endif /* !asm_clear */ /* Constant-time comparison to zero - secure way to compare long integers */ /* Returns 1 if vli == 0, 0 otherwise. */ uECC_VLI_API uECC_word_t uECC_vli_isZero(const uECC_word_t *vli, wordcount_t num_words) { uECC_word_t bits = 0; wordcount_t i; for (i = 0; i < num_words; ++i) { bits |= vli[i]; } return (bits == 0); } /* Returns nonzero if bit 'bit' of vli is set. */ uECC_VLI_API uECC_word_t uECC_vli_testBit(const uECC_word_t *vli, bitcount_t bit) { return (vli[bit >> uECC_WORD_BITS_SHIFT] & ((uECC_word_t)1 << (bit & uECC_WORD_BITS_MASK))); } /* Counts the number of words in vli. */ static wordcount_t vli_numDigits(const uECC_word_t *vli, const wordcount_t max_words) { wordcount_t i; /* Search from the end until we find a non-zero digit. We do it in reverse because we expect that most digits will be nonzero. */ for (i = max_words - 1; i >= 0 && vli[i] == 0; --i) { } return (i + 1); } /* Counts the number of bits required to represent vli. */ uECC_VLI_API bitcount_t uECC_vli_numBits(const uECC_word_t *vli, const wordcount_t max_words) { uECC_word_t i; uECC_word_t digit; wordcount_t num_digits = vli_numDigits(vli, max_words); if (num_digits == 0) { return 0; } digit = vli[num_digits - 1]; for (i = 0; digit; ++i) { digit >>= 1; } return (((bitcount_t)(num_digits - 1) << uECC_WORD_BITS_SHIFT) + i); } /* Sets dest = src. */ #if !asm_set uECC_VLI_API void uECC_vli_set(uECC_word_t *dest, const uECC_word_t *src, wordcount_t num_words) { wordcount_t i; for (i = 0; i < num_words; ++i) { dest[i] = src[i]; } } #endif /* !asm_set */ /* Returns sign of left - right. */ static cmpresult_t uECC_vli_cmp_unsafe(const uECC_word_t *left, const uECC_word_t *right, wordcount_t num_words) { wordcount_t i; for (i = num_words - 1; i >= 0; --i) { if (left[i] > right[i]) { return 1; } else if (left[i] < right[i]) { return -1; } } return 0; } /* Constant-time comparison function - secure way to compare long integers */ /* Returns one if left == right, zero otherwise. */ uECC_VLI_API uECC_word_t uECC_vli_equal(const uECC_word_t *left, const uECC_word_t *right, wordcount_t num_words) { uECC_word_t diff = 0; wordcount_t i; for (i = num_words - 1; i >= 0; --i) { diff |= (left[i] ^ right[i]); } return (diff == 0); } uECC_VLI_API uECC_word_t uECC_vli_sub(uECC_word_t *result, const uECC_word_t *left, const uECC_word_t *right, wordcount_t num_words); /* Returns sign of left - right, in constant time. */ uECC_VLI_API cmpresult_t uECC_vli_cmp(const uECC_word_t *left, const uECC_word_t *right, wordcount_t num_words) { uECC_word_t tmp[uECC_MAX_WORDS]; uECC_word_t neg = !!uECC_vli_sub(tmp, left, right, num_words); uECC_word_t equal = uECC_vli_isZero(tmp, num_words); return (!equal - 2 * neg); } /* Computes vli = vli >> 1. */ #if !asm_rshift1 uECC_VLI_API void uECC_vli_rshift1(uECC_word_t *vli, wordcount_t num_words) { uECC_word_t *end = vli; uECC_word_t carry = 0; vli += num_words; while (vli-- > end) { uECC_word_t temp = *vli; *vli = (temp >> 1) | carry; carry = temp << (uECC_WORD_BITS - 1); } } #endif /* !asm_rshift1 */ /* Computes result = left + right, returning carry. Can modify in place. */ #if !asm_add uECC_VLI_API uECC_word_t uECC_vli_add(uECC_word_t *result, const uECC_word_t *left, const uECC_word_t *right, wordcount_t num_words) { uECC_word_t carry = 0; wordcount_t i; for (i = 0; i < num_words; ++i) { uECC_word_t sum = left[i] + right[i] + carry; if (sum != left[i]) { carry = (sum < left[i]); } result[i] = sum; } return carry; } #endif /* !asm_add */ /* Computes result = left - right, returning borrow. Can modify in place. */ #if !asm_sub uECC_VLI_API uECC_word_t uECC_vli_sub(uECC_word_t *result, const uECC_word_t *left, const uECC_word_t *right, wordcount_t num_words) { uECC_word_t borrow = 0; wordcount_t i; for (i = 0; i < num_words; ++i) { uECC_word_t diff = left[i] - right[i] - borrow; if (diff != left[i]) { borrow = (diff > left[i]); } result[i] = diff; } return borrow; } #endif /* !asm_sub */ #if !asm_mult || (uECC_SQUARE_FUNC && !asm_square) || \ (uECC_SUPPORTS_secp256k1 && (uECC_OPTIMIZATION_LEVEL > 0) && \ ((uECC_WORD_SIZE == 1) || (uECC_WORD_SIZE == 8))) static void muladd(uECC_word_t a, uECC_word_t b, uECC_word_t *r0, uECC_word_t *r1, uECC_word_t *r2) { #if uECC_WORD_SIZE == 8 && !SUPPORTS_INT128 uint64_t a0 = a & 0xffffffffull; uint64_t a1 = a >> 32; uint64_t b0 = b & 0xffffffffull; uint64_t b1 = b >> 32; uint64_t i0 = a0 * b0; uint64_t i1 = a0 * b1; uint64_t i2 = a1 * b0; uint64_t i3 = a1 * b1; uint64_t p0, p1; i2 += (i0 >> 32); i2 += i1; if (i2 < i1) { /* overflow */ i3 += 0x100000000ull; } p0 = (i0 & 0xffffffffull) | (i2 << 32); p1 = i3 + (i2 >> 32); *r0 += p0; *r1 += (p1 + (*r0 < p0)); *r2 += ((*r1 < p1) || (*r1 == p1 && *r0 < p0)); #else uECC_dword_t p = (uECC_dword_t)a * b; uECC_dword_t r01 = ((uECC_dword_t)(*r1) << uECC_WORD_BITS) | *r0; r01 += p; *r2 += (r01 < p); *r1 = r01 >> uECC_WORD_BITS; *r0 = (uECC_word_t)r01; #endif } #endif /* muladd needed */ #if !asm_mult uECC_VLI_API void uECC_vli_mult(uECC_word_t *result, const uECC_word_t *left, const uECC_word_t *right, wordcount_t num_words) { uECC_word_t r0 = 0; uECC_word_t r1 = 0; uECC_word_t r2 = 0; wordcount_t i, k; /* Compute each digit of result in sequence, maintaining the carries. */ for (k = 0; k < num_words; ++k) { for (i = 0; i <= k; ++i) { muladd(left[i], right[k - i], &r0, &r1, &r2); } result[k] = r0; r0 = r1; r1 = r2; r2 = 0; } for (k = num_words; k < num_words * 2 - 1; ++k) { for (i = (k + 1) - num_words; i < num_words; ++i) { muladd(left[i], right[k - i], &r0, &r1, &r2); } result[k] = r0; r0 = r1; r1 = r2; r2 = 0; } result[num_words * 2 - 1] = r0; } #endif /* !asm_mult */ #if uECC_SQUARE_FUNC #if !asm_square static void mul2add(uECC_word_t a, uECC_word_t b, uECC_word_t *r0, uECC_word_t *r1, uECC_word_t *r2) { #if uECC_WORD_SIZE == 8 && !SUPPORTS_INT128 uint64_t a0 = a & 0xffffffffull; uint64_t a1 = a >> 32; uint64_t b0 = b & 0xffffffffull; uint64_t b1 = b >> 32; uint64_t i0 = a0 * b0; uint64_t i1 = a0 * b1; uint64_t i2 = a1 * b0; uint64_t i3 = a1 * b1; uint64_t p0, p1; i2 += (i0 >> 32); i2 += i1; if (i2 < i1) { /* overflow */ i3 += 0x100000000ull; } p0 = (i0 & 0xffffffffull) | (i2 << 32); p1 = i3 + (i2 >> 32); *r2 += (p1 >> 63); p1 = (p1 << 1) | (p0 >> 63); p0 <<= 1; *r0 += p0; *r1 += (p1 + (*r0 < p0)); *r2 += ((*r1 < p1) || (*r1 == p1 && *r0 < p0)); #else uECC_dword_t p = (uECC_dword_t)a * b; uECC_dword_t r01 = ((uECC_dword_t)(*r1) << uECC_WORD_BITS) | *r0; *r2 += (p >> (uECC_WORD_BITS * 2 - 1)); p *= 2; r01 += p; *r2 += (r01 < p); *r1 = r01 >> uECC_WORD_BITS; *r0 = (uECC_word_t)r01; #endif } uECC_VLI_API void uECC_vli_square(uECC_word_t *result, const uECC_word_t *left, wordcount_t num_words) { uECC_word_t r0 = 0; uECC_word_t r1 = 0; uECC_word_t r2 = 0; wordcount_t i, k; for (k = 0; k < num_words * 2 - 1; ++k) { uECC_word_t min = (k < num_words ? 0 : (k + 1) - num_words); for (i = min; i <= k && i <= k - i; ++i) { if (i < k-i) { mul2add(left[i], left[k - i], &r0, &r1, &r2); } else { muladd(left[i], left[k - i], &r0, &r1, &r2); } } result[k] = r0; r0 = r1; r1 = r2; r2 = 0; } result[num_words * 2 - 1] = r0; } #endif /* !asm_square */ #else /* uECC_SQUARE_FUNC */ #if uECC_ENABLE_VLI_API uECC_VLI_API void uECC_vli_square(uECC_word_t *result, const uECC_word_t *left, wordcount_t num_words) { uECC_vli_mult(result, left, left, num_words); } #endif /* uECC_ENABLE_VLI_API */ #endif /* uECC_SQUARE_FUNC */ /* Computes result = (left + right) % mod. Assumes that left < mod and right < mod, and that result does not overlap mod. */ uECC_VLI_API void uECC_vli_modAdd(uECC_word_t *result, const uECC_word_t *left, const uECC_word_t *right, const uECC_word_t *mod, wordcount_t num_words) { uECC_word_t carry = uECC_vli_add(result, left, right, num_words); if (carry || uECC_vli_cmp_unsafe(mod, result, num_words) != 1) { /* result > mod (result = mod + remainder), so subtract mod to get remainder. */ uECC_vli_sub(result, result, mod, num_words); } } /* Computes result = (left - right) % mod. Assumes that left < mod and right < mod, and that result does not overlap mod. */ uECC_VLI_API void uECC_vli_modSub(uECC_word_t *result, const uECC_word_t *left, const uECC_word_t *right, const uECC_word_t *mod, wordcount_t num_words) { uECC_word_t l_borrow = uECC_vli_sub(result, left, right, num_words); if (l_borrow) { /* In this case, result == -diff == (max int) - diff. Since -x % d == d - x, we can get the correct result from result + mod (with overflow). */ uECC_vli_add(result, result, mod, num_words); } } /* Computes result = product % mod, where product is 2N words long. */ /* Currently only designed to work for curve_p or curve_n. */ uECC_VLI_API void uECC_vli_mmod(uECC_word_t *result, uECC_word_t *product, const uECC_word_t *mod, wordcount_t num_words) { uECC_word_t mod_multiple[2 * uECC_MAX_WORDS]; uECC_word_t tmp[2 * uECC_MAX_WORDS]; uECC_word_t *v[2] = {tmp, product}; uECC_word_t index; /* Shift mod so its highest set bit is at the maximum position. */ bitcount_t shift = (num_words * 2 * uECC_WORD_BITS) - uECC_vli_numBits(mod, num_words); wordcount_t word_shift = shift / uECC_WORD_BITS; wordcount_t bit_shift = shift % uECC_WORD_BITS; uECC_word_t carry = 0; uECC_vli_clear(mod_multiple, word_shift); if (bit_shift > 0) { for(index = 0; index < (uECC_word_t)num_words; ++index) { mod_multiple[word_shift + index] = (mod[index] << bit_shift) | carry; carry = mod[index] >> (uECC_WORD_BITS - bit_shift); } } else { uECC_vli_set(mod_multiple + word_shift, mod, num_words); } for (index = 1; shift >= 0; --shift) { uECC_word_t borrow = 0; wordcount_t i; for (i = 0; i < num_words * 2; ++i) { uECC_word_t diff = v[index][i] - mod_multiple[i] - borrow; if (diff != v[index][i]) { borrow = (diff > v[index][i]); } v[1 - index][i] = diff; } index = !(index ^ borrow); /* Swap the index if there was no borrow */ uECC_vli_rshift1(mod_multiple, num_words); mod_multiple[num_words - 1] |= mod_multiple[num_words] << (uECC_WORD_BITS - 1); uECC_vli_rshift1(mod_multiple + num_words, num_words); } uECC_vli_set(result, v[index], num_words); } /* Computes result = (left * right) % mod. */ uECC_VLI_API void uECC_vli_modMult(uECC_word_t *result, const uECC_word_t *left, const uECC_word_t *right, const uECC_word_t *mod, wordcount_t num_words) { uECC_word_t product[2 * uECC_MAX_WORDS]; uECC_vli_mult(product, left, right, num_words); uECC_vli_mmod(result, product, mod, num_words); } uECC_VLI_API void uECC_vli_modMult_fast(uECC_word_t *result, const uECC_word_t *left, const uECC_word_t *right, uECC_Curve curve) { uECC_word_t product[2 * uECC_MAX_WORDS]; uECC_vli_mult(product, left, right, curve->num_words); #if (uECC_OPTIMIZATION_LEVEL > 0) curve->mmod_fast(result, product); #else uECC_vli_mmod(result, product, curve->p, curve->num_words); #endif } #if uECC_SQUARE_FUNC #if uECC_ENABLE_VLI_API /* Computes result = left^2 % mod. */ uECC_VLI_API void uECC_vli_modSquare(uECC_word_t *result, const uECC_word_t *left, const uECC_word_t *mod, wordcount_t num_words) { uECC_word_t product[2 * uECC_MAX_WORDS]; uECC_vli_square(product, left, num_words); uECC_vli_mmod(result, product, mod, num_words); } #endif /* uECC_ENABLE_VLI_API */ uECC_VLI_API void uECC_vli_modSquare_fast(uECC_word_t *result, const uECC_word_t *left, uECC_Curve curve) { uECC_word_t product[2 * uECC_MAX_WORDS]; uECC_vli_square(product, left, curve->num_words); #if (uECC_OPTIMIZATION_LEVEL > 0) curve->mmod_fast(result, product); #else uECC_vli_mmod(result, product, curve->p, curve->num_words); #endif } #else /* uECC_SQUARE_FUNC */ #if uECC_ENABLE_VLI_API uECC_VLI_API void uECC_vli_modSquare(uECC_word_t *result, const uECC_word_t *left, const uECC_word_t *mod, wordcount_t num_words) { uECC_vli_modMult(result, left, left, mod, num_words); } #endif /* uECC_ENABLE_VLI_API */ uECC_VLI_API void uECC_vli_modSquare_fast(uECC_word_t *result, const uECC_word_t *left, uECC_Curve curve) { uECC_vli_modMult_fast(result, left, left, curve); } #endif /* uECC_SQUARE_FUNC */ #define EVEN(vli) (!(vli[0] & 1)) static void vli_modInv_update(uECC_word_t *uv, const uECC_word_t *mod, wordcount_t num_words) { uECC_word_t carry = 0; if (!EVEN(uv)) { carry = uECC_vli_add(uv, uv, mod, num_words); } uECC_vli_rshift1(uv, num_words); if (carry) { uv[num_words - 1] |= HIGH_BIT_SET; } } /* Computes result = (1 / input) % mod. All VLIs are the same size. See "From Euclid's GCD to Montgomery Multiplication to the Great Divide" */ uECC_VLI_API void uECC_vli_modInv(uECC_word_t *result, const uECC_word_t *input, const uECC_word_t *mod, wordcount_t num_words) { uECC_word_t a[uECC_MAX_WORDS], b[uECC_MAX_WORDS], u[uECC_MAX_WORDS], v[uECC_MAX_WORDS]; cmpresult_t cmpResult; if (uECC_vli_isZero(input, num_words)) { uECC_vli_clear(result, num_words); return; } uECC_vli_set(a, input, num_words); uECC_vli_set(b, mod, num_words); uECC_vli_clear(u, num_words); u[0] = 1; uECC_vli_clear(v, num_words); while ((cmpResult = uECC_vli_cmp_unsafe(a, b, num_words)) != 0) { if (EVEN(a)) { uECC_vli_rshift1(a, num_words); vli_modInv_update(u, mod, num_words); } else if (EVEN(b)) { uECC_vli_rshift1(b, num_words); vli_modInv_update(v, mod, num_words); } else if (cmpResult > 0) { uECC_vli_sub(a, a, b, num_words); uECC_vli_rshift1(a, num_words); if (uECC_vli_cmp_unsafe(u, v, num_words) < 0) { uECC_vli_add(u, u, mod, num_words); } uECC_vli_sub(u, u, v, num_words); vli_modInv_update(u, mod, num_words); } else { uECC_vli_sub(b, b, a, num_words); uECC_vli_rshift1(b, num_words); if (uECC_vli_cmp_unsafe(v, u, num_words) < 0) { uECC_vli_add(v, v, mod, num_words); } uECC_vli_sub(v, v, u, num_words); vli_modInv_update(v, mod, num_words); } } uECC_vli_set(result, u, num_words); } /* ------ Point operations ------ */ #include "curve-specific.h" /* Returns 1 if 'point' is the point at infinity, 0 otherwise. */ #define EccPoint_isZero(point, curve) uECC_vli_isZero((point), (curve)->num_words * 2) /* Point multiplication algorithm using Montgomery's ladder with co-Z coordinates. From http://eprint.iacr.org/2011/338.pdf */ /* Modify (x1, y1) => (x1 * z^2, y1 * z^3) */ static void apply_z(uECC_word_t * X1, uECC_word_t * Y1, const uECC_word_t * const Z, uECC_Curve curve) { uECC_word_t t1[uECC_MAX_WORDS]; uECC_vli_modSquare_fast(t1, Z, curve); /* z^2 */ uECC_vli_modMult_fast(X1, X1, t1, curve); /* x1 * z^2 */ uECC_vli_modMult_fast(t1, t1, Z, curve); /* z^3 */ uECC_vli_modMult_fast(Y1, Y1, t1, curve); /* y1 * z^3 */ } /* P = (x1, y1) => 2P, (x2, y2) => P' */ static void XYcZ_initial_double(uECC_word_t * X1, uECC_word_t * Y1, uECC_word_t * X2, uECC_word_t * Y2, const uECC_word_t * const initial_Z, uECC_Curve curve) { uECC_word_t z[uECC_MAX_WORDS]; wordcount_t num_words = curve->num_words; if (initial_Z) { uECC_vli_set(z, initial_Z, num_words); } else { uECC_vli_clear(z, num_words); z[0] = 1; } uECC_vli_set(X2, X1, num_words); uECC_vli_set(Y2, Y1, num_words); apply_z(X1, Y1, z, curve); curve->double_jacobian(X1, Y1, z, curve); apply_z(X2, Y2, z, curve); } /* Input P = (x1, y1, Z), Q = (x2, y2, Z) Output P' = (x1', y1', Z3), P + Q = (x3, y3, Z3) or P => P', Q => P + Q */ static void XYcZ_add(uECC_word_t * X1, uECC_word_t * Y1, uECC_word_t * X2, uECC_word_t * Y2, uECC_Curve curve) { /* t1 = X1, t2 = Y1, t3 = X2, t4 = Y2 */ uECC_word_t t5[uECC_MAX_WORDS]; wordcount_t num_words = curve->num_words; uECC_vli_modSub(t5, X2, X1, curve->p, num_words); /* t5 = x2 - x1 */ uECC_vli_modSquare_fast(t5, t5, curve); /* t5 = (x2 - x1)^2 = A */ uECC_vli_modMult_fast(X1, X1, t5, curve); /* t1 = x1*A = B */ uECC_vli_modMult_fast(X2, X2, t5, curve); /* t3 = x2*A = C */ uECC_vli_modSub(Y2, Y2, Y1, curve->p, num_words); /* t4 = y2 - y1 */ uECC_vli_modSquare_fast(t5, Y2, curve); /* t5 = (y2 - y1)^2 = D */ uECC_vli_modSub(t5, t5, X1, curve->p, num_words); /* t5 = D - B */ uECC_vli_modSub(t5, t5, X2, curve->p, num_words); /* t5 = D - B - C = x3 */ uECC_vli_modSub(X2, X2, X1, curve->p, num_words); /* t3 = C - B */ uECC_vli_modMult_fast(Y1, Y1, X2, curve); /* t2 = y1*(C - B) */ uECC_vli_modSub(X2, X1, t5, curve->p, num_words); /* t3 = B - x3 */ uECC_vli_modMult_fast(Y2, Y2, X2, curve); /* t4 = (y2 - y1)*(B - x3) */ uECC_vli_modSub(Y2, Y2, Y1, curve->p, num_words); /* t4 = y3 */ uECC_vli_set(X2, t5, num_words); } /* Input P = (x1, y1, Z), Q = (x2, y2, Z) Output P + Q = (x3, y3, Z3), P - Q = (x3', y3', Z3) or P => P - Q, Q => P + Q */ static void XYcZ_addC(uECC_word_t * X1, uECC_word_t * Y1, uECC_word_t * X2, uECC_word_t * Y2, uECC_Curve curve) { /* t1 = X1, t2 = Y1, t3 = X2, t4 = Y2 */ uECC_word_t t5[uECC_MAX_WORDS]; uECC_word_t t6[uECC_MAX_WORDS]; uECC_word_t t7[uECC_MAX_WORDS]; wordcount_t num_words = curve->num_words; uECC_vli_modSub(t5, X2, X1, curve->p, num_words); /* t5 = x2 - x1 */ uECC_vli_modSquare_fast(t5, t5, curve); /* t5 = (x2 - x1)^2 = A */ uECC_vli_modMult_fast(X1, X1, t5, curve); /* t1 = x1*A = B */ uECC_vli_modMult_fast(X2, X2, t5, curve); /* t3 = x2*A = C */ uECC_vli_modAdd(t5, Y2, Y1, curve->p, num_words); /* t5 = y2 + y1 */ uECC_vli_modSub(Y2, Y2, Y1, curve->p, num_words); /* t4 = y2 - y1 */ uECC_vli_modSub(t6, X2, X1, curve->p, num_words); /* t6 = C - B */ uECC_vli_modMult_fast(Y1, Y1, t6, curve); /* t2 = y1 * (C - B) = E */ uECC_vli_modAdd(t6, X1, X2, curve->p, num_words); /* t6 = B + C */ uECC_vli_modSquare_fast(X2, Y2, curve); /* t3 = (y2 - y1)^2 = D */ uECC_vli_modSub(X2, X2, t6, curve->p, num_words); /* t3 = D - (B + C) = x3 */ uECC_vli_modSub(t7, X1, X2, curve->p, num_words); /* t7 = B - x3 */ uECC_vli_modMult_fast(Y2, Y2, t7, curve); /* t4 = (y2 - y1)*(B - x3) */ uECC_vli_modSub(Y2, Y2, Y1, curve->p, num_words); /* t4 = (y2 - y1)*(B - x3) - E = y3 */ uECC_vli_modSquare_fast(t7, t5, curve); /* t7 = (y2 + y1)^2 = F */ uECC_vli_modSub(t7, t7, t6, curve->p, num_words); /* t7 = F - (B + C) = x3' */ uECC_vli_modSub(t6, t7, X1, curve->p, num_words); /* t6 = x3' - B */ uECC_vli_modMult_fast(t6, t6, t5, curve); /* t6 = (y2+y1)*(x3' - B) */ uECC_vli_modSub(Y1, t6, Y1, curve->p, num_words); /* t2 = (y2+y1)*(x3' - B) - E = y3' */ uECC_vli_set(X1, t7, num_words); } /* result may overlap point. */ static void EccPoint_mult(uECC_word_t * result, const uECC_word_t * point, const uECC_word_t * scalar, const uECC_word_t * initial_Z, bitcount_t num_bits, uECC_Curve curve) { /* R0 and R1 */ uECC_word_t Rx[2][uECC_MAX_WORDS]; uECC_word_t Ry[2][uECC_MAX_WORDS]; uECC_word_t z[uECC_MAX_WORDS]; bitcount_t i; uECC_word_t nb; wordcount_t num_words = curve->num_words; uECC_vli_set(Rx[1], point, num_words); uECC_vli_set(Ry[1], point + num_words, num_words); XYcZ_initial_double(Rx[1], Ry[1], Rx[0], Ry[0], initial_Z, curve); for (i = num_bits - 2; i > 0; --i) { nb = !uECC_vli_testBit(scalar, i); XYcZ_addC(Rx[1 - nb], Ry[1 - nb], Rx[nb], Ry[nb], curve); XYcZ_add(Rx[nb], Ry[nb], Rx[1 - nb], Ry[1 - nb], curve); } nb = !uECC_vli_testBit(scalar, 0); XYcZ_addC(Rx[1 - nb], Ry[1 - nb], Rx[nb], Ry[nb], curve); /* Find final 1/Z value. */ uECC_vli_modSub(z, Rx[1], Rx[0], curve->p, num_words); /* X1 - X0 */ uECC_vli_modMult_fast(z, z, Ry[1 - nb], curve); /* Yb * (X1 - X0) */ uECC_vli_modMult_fast(z, z, point, curve); /* xP * Yb * (X1 - X0) */ uECC_vli_modInv(z, z, curve->p, num_words); /* 1 / (xP * Yb * (X1 - X0)) */ /* yP / (xP * Yb * (X1 - X0)) */ uECC_vli_modMult_fast(z, z, point + num_words, curve); uECC_vli_modMult_fast(z, z, Rx[1 - nb], curve); /* Xb * yP / (xP * Yb * (X1 - X0)) */ /* End 1/Z calculation */ XYcZ_add(Rx[nb], Ry[nb], Rx[1 - nb], Ry[1 - nb], curve); apply_z(Rx[0], Ry[0], z, curve); uECC_vli_set(result, Rx[0], num_words); uECC_vli_set(result + num_words, Ry[0], num_words); } static uECC_word_t regularize_k(const uECC_word_t * const k, uECC_word_t *k0, uECC_word_t *k1, uECC_Curve curve) { wordcount_t num_n_words = BITS_TO_WORDS(curve->num_n_bits); bitcount_t num_n_bits = curve->num_n_bits; uECC_word_t carry = uECC_vli_add(k0, k, curve->n, num_n_words) || (num_n_bits < ((bitcount_t)num_n_words * uECC_WORD_SIZE * 8) && uECC_vli_testBit(k0, num_n_bits)); uECC_vli_add(k1, k0, curve->n, num_n_words); return carry; } static uECC_word_t EccPoint_compute_public_key(uECC_word_t *result, uECC_word_t *private, uECC_Curve curve) { uECC_word_t tmp1[uECC_MAX_WORDS]; uECC_word_t tmp2[uECC_MAX_WORDS]; uECC_word_t *p2[2] = {tmp1, tmp2}; uECC_word_t carry; /* Regularize the bitcount for the private key so that attackers cannot use a side channel attack to learn the number of leading zeros. */ carry = regularize_k(private, tmp1, tmp2, curve); EccPoint_mult(result, curve->G, p2[!carry], 0, curve->num_n_bits + 1, curve); if (EccPoint_isZero(result, curve)) { return 0; } return 1; } #if uECC_WORD_SIZE == 1 uECC_VLI_API void uECC_vli_nativeToBytes(uint8_t *bytes, int num_bytes, const uint8_t *native) { wordcount_t i; for (i = 0; i < num_bytes; ++i) { bytes[i] = native[(num_bytes - 1) - i]; } } uECC_VLI_API void uECC_vli_bytesToNative(uint8_t *native, const uint8_t *bytes, int num_bytes) { uECC_vli_nativeToBytes(native, num_bytes, bytes); } #else uECC_VLI_API void uECC_vli_nativeToBytes(uint8_t *bytes, int num_bytes, const uECC_word_t *native) { wordcount_t i; for (i = 0; i < num_bytes; ++i) { unsigned b = num_bytes - 1 - i; bytes[i] = native[b / uECC_WORD_SIZE] >> (8 * (b % uECC_WORD_SIZE)); } } uECC_VLI_API void uECC_vli_bytesToNative(uECC_word_t *native, const uint8_t *bytes, int num_bytes) { wordcount_t i; uECC_vli_clear(native, (num_bytes + (uECC_WORD_SIZE - 1)) / uECC_WORD_SIZE); for (i = 0; i < num_bytes; ++i) { unsigned b = num_bytes - 1 - i; native[b / uECC_WORD_SIZE] |= (uECC_word_t)bytes[i] << (8 * (b % uECC_WORD_SIZE)); } } #endif /* uECC_WORD_SIZE */ /* Generates a random integer in the range 0 < random < top. Both random and top have num_words words. */ uECC_VLI_API int uECC_generate_random_int(uECC_word_t *random, const uECC_word_t *top, wordcount_t num_words) { uECC_word_t mask = (uECC_word_t)-1; uECC_word_t tries; bitcount_t num_bits = uECC_vli_numBits(top, num_words); if (!g_rng_function) { return 0; } for (tries = 0; tries < uECC_RNG_MAX_TRIES; ++tries) { if (!g_rng_function((uint8_t *)random, num_words * uECC_WORD_SIZE)) { return 0; } random[num_words - 1] &= mask >> ((bitcount_t)(num_words * uECC_WORD_SIZE * 8 - num_bits)); if (!uECC_vli_isZero(random, num_words) && uECC_vli_cmp(top, random, num_words) == 1) { return 1; } } return 0; } int uECC_make_key(uint8_t *public_key, uint8_t *private_key, uECC_Curve curve) { uECC_word_t private[uECC_MAX_WORDS]; uECC_word_t public[uECC_MAX_WORDS * 2]; uECC_word_t tries; for (tries = 0; tries < uECC_RNG_MAX_TRIES; ++tries) { if (!uECC_generate_random_int(private, curve->n, BITS_TO_WORDS(curve->num_n_bits))) { return 0; } if (EccPoint_compute_public_key(public, private, curve)) { uECC_vli_nativeToBytes(private_key, BITS_TO_BYTES(curve->num_n_bits), private); uECC_vli_nativeToBytes(public_key, curve->num_bytes, public); uECC_vli_nativeToBytes( public_key + curve->num_bytes, curve->num_bytes, public + curve->num_words); return 1; } } return 0; } int uECC_shared_secret(const uint8_t *public_key, const uint8_t *private_key, uint8_t *secret, uECC_Curve curve) { uECC_word_t public[uECC_MAX_WORDS * 2]; uECC_word_t private[uECC_MAX_WORDS]; uECC_word_t tmp[uECC_MAX_WORDS]; uECC_word_t *p2[2] = {private, tmp}; uECC_word_t *initial_Z = 0; uECC_word_t carry; wordcount_t num_words = curve->num_words; wordcount_t num_bytes = curve->num_bytes; uECC_vli_bytesToNative(private, private_key, BITS_TO_BYTES(curve->num_n_bits)); uECC_vli_bytesToNative(public, public_key, num_bytes); uECC_vli_bytesToNative(public + num_words, public_key + num_bytes, num_bytes); /* Regularize the bitcount for the private key so that attackers cannot use a side channel attack to learn the number of leading zeros. */ carry = regularize_k(private, private, tmp, curve); /* If an RNG function was specified, try to get a random initial Z value to improve protection against side-channel attacks. */ if (g_rng_function) { if (!uECC_generate_random_int(p2[carry], curve->p, num_words)) { return 0; } initial_Z = p2[carry]; } EccPoint_mult(public, public, p2[!carry], initial_Z, curve->num_n_bits + 1, curve); uECC_vli_nativeToBytes(secret, num_bytes, public); return !EccPoint_isZero(public, curve); } #if uECC_SUPPORT_COMPRESSED_POINT void uECC_compress(const uint8_t *public_key, uint8_t *compressed, uECC_Curve curve) { wordcount_t i; for (i = 0; i < curve->num_bytes; ++i) { compressed[i+1] = public_key[i]; } compressed[0] = 2 + (public_key[curve->num_bytes * 2 - 1] & 0x01); } void uECC_decompress(const uint8_t *compressed, uint8_t *public_key, uECC_Curve curve) { uECC_word_t point[uECC_MAX_WORDS * 2]; uECC_word_t *y = point + curve->num_words; uECC_vli_bytesToNative(point, compressed + 1, curve->num_bytes); curve->x_side(y, point, curve); curve->mod_sqrt(y, curve); if ((y[0] & 0x01) != (compressed[0] & 0x01)) { uECC_vli_sub(y, curve->p, y, curve->num_words); } uECC_vli_nativeToBytes(public_key, curve->num_bytes, point); uECC_vli_nativeToBytes(public_key + curve->num_bytes, curve->num_bytes, y); } #endif /* uECC_SUPPORT_COMPRESSED_POINT */ int uECC_valid_point(const uECC_word_t *point, uECC_Curve curve) { uECC_word_t tmp1[uECC_MAX_WORDS]; uECC_word_t tmp2[uECC_MAX_WORDS]; wordcount_t num_words = curve->num_words; /* The point at infinity is invalid. */ if (EccPoint_isZero(point, curve)) { return 0; } /* x and y must be smaller than p. */ if (uECC_vli_cmp_unsafe(curve->p, point, num_words) != 1 || uECC_vli_cmp_unsafe(curve->p, point + num_words, num_words) != 1) { return 0; } uECC_vli_modSquare_fast(tmp1, point + num_words, curve); curve->x_side(tmp2, point, curve); /* tmp2 = x^3 + ax + b */ /* Make sure that y^2 == x^3 + ax + b */ return (int)(uECC_vli_equal(tmp1, tmp2, num_words)); } int uECC_valid_public_key(const uint8_t *public_key, uECC_Curve curve) { uECC_word_t public[uECC_MAX_WORDS * 2]; uECC_vli_bytesToNative(public, public_key, curve->num_bytes); uECC_vli_bytesToNative( public + curve->num_words, public_key + curve->num_bytes, curve->num_bytes); return uECC_valid_point(public, curve); } int uECC_compute_public_key(const uint8_t *private_key, uint8_t *public_key, uECC_Curve curve) { uECC_word_t private[uECC_MAX_WORDS]; uECC_word_t public[uECC_MAX_WORDS * 2]; uECC_vli_bytesToNative(private, private_key, BITS_TO_BYTES(curve->num_n_bits)); /* Make sure the private key is in the range [1, n-1]. */ if (uECC_vli_isZero(private, BITS_TO_WORDS(curve->num_n_bits))) { return 0; } if (uECC_vli_cmp(curve->n, private, BITS_TO_WORDS(curve->num_n_bits)) != 1) { return 0; } /* Compute public key. */ if (!EccPoint_compute_public_key(public, private, curve)) { return 0; } uECC_vli_nativeToBytes(public_key, curve->num_bytes, public); uECC_vli_nativeToBytes( public_key + curve->num_bytes, curve->num_bytes, public + curve->num_words); return 1; } /* -------- ECDSA code -------- */ static void bits2int(uECC_word_t *native, const uint8_t *bits, unsigned bits_size, uECC_Curve curve) { unsigned num_n_bytes = BITS_TO_BYTES(curve->num_n_bits); unsigned num_n_words = BITS_TO_WORDS(curve->num_n_bits); if (bits_size > num_n_bytes) { bits_size = num_n_bytes; } uECC_vli_clear(native, num_n_words); uECC_vli_bytesToNative(native, bits, bits_size); if (bits_size * 8 <= (unsigned)curve->num_n_bits) { return; } int shift = bits_size * 8 - curve->num_n_bits; uECC_word_t carry = 0; uECC_word_t *ptr = native + num_n_words; while (ptr-- > native) { uECC_word_t temp = *ptr; *ptr = (temp >> shift) | carry; carry = temp << (uECC_WORD_BITS - shift); } /* Reduce mod curve_n */ if (uECC_vli_cmp_unsafe(curve->n, native, num_n_words) != 1) { uECC_vli_sub(native, native, curve->n, num_n_words); } } static int uECC_sign_with_k(const uint8_t *private_key, const uint8_t *message_hash, unsigned hash_size, uECC_word_t *k, uint8_t *signature, uECC_Curve curve) { uECC_word_t tmp[uECC_MAX_WORDS]; uECC_word_t s[uECC_MAX_WORDS]; uECC_word_t *k2[2] = {tmp, s}; uECC_word_t p[uECC_MAX_WORDS * 2]; uECC_word_t carry; wordcount_t num_words = curve->num_words; wordcount_t num_n_words = BITS_TO_WORDS(curve->num_n_bits); bitcount_t num_n_bits = curve->num_n_bits; /* Make sure 0 < k < curve_n */ if (uECC_vli_isZero(k, num_words) || uECC_vli_cmp(curve->n, k, num_n_words) != 1) { return 0; } carry = regularize_k(k, tmp, s, curve); EccPoint_mult(p, curve->G, k2[!carry], 0, num_n_bits + 1, curve); if (uECC_vli_isZero(p, num_words)) { return 0; } /* If an RNG function was specified, get a random number to prevent side channel analysis of k. */ if (!g_rng_function) { uECC_vli_clear(tmp, num_n_words); tmp[0] = 1; } else if (!uECC_generate_random_int(tmp, curve->n, num_n_words)) { return 0; } /* Prevent side channel analysis of uECC_vli_modInv() to determine bits of k / the private key by premultiplying by a random number */ uECC_vli_modMult(k, k, tmp, curve->n, num_n_words); /* k' = rand * k */ uECC_vli_modInv(k, k, curve->n, num_n_words); /* k = 1 / k' */ uECC_vli_modMult(k, k, tmp, curve->n, num_n_words); /* k = 1 / k */ uECC_vli_nativeToBytes(signature, curve->num_bytes, p); /* store r */ uECC_vli_bytesToNative(tmp, private_key, BITS_TO_BYTES(curve->num_n_bits)); /* tmp = d */ s[num_n_words - 1] = 0; uECC_vli_set(s, p, num_words); uECC_vli_modMult(s, tmp, s, curve->n, num_n_words); /* s = r*d */ bits2int(tmp, message_hash, hash_size, curve); uECC_vli_modAdd(s, tmp, s, curve->n, num_n_words); /* s = e + r*d */ uECC_vli_modMult(s, s, k, curve->n, num_n_words); /* s = (e + r*d) / k */ if (uECC_vli_numBits(s, num_n_words) > (bitcount_t)curve->num_bytes * 8) { return 0; } uECC_vli_nativeToBytes(signature + curve->num_bytes, curve->num_bytes, s); return 1; } int uECC_sign(const uint8_t *private_key, const uint8_t *message_hash, unsigned hash_size, uint8_t *signature, uECC_Curve curve) { uECC_word_t k[uECC_MAX_WORDS]; uECC_word_t tries; for (tries = 0; tries < uECC_RNG_MAX_TRIES; ++tries) { if (!uECC_generate_random_int(k, curve->n, BITS_TO_WORDS(curve->num_n_bits))) { return 0; } if (uECC_sign_with_k(private_key, message_hash, hash_size, k, signature, curve)) { return 1; } } return 0; } /* Compute an HMAC using K as a key (as in RFC 6979). Note that K is always the same size as the hash result size. */ static void HMAC_init(uECC_HashContext *hash_context, const uint8_t *K) { uint8_t *pad = hash_context->tmp + 2 * hash_context->result_size; unsigned i; for (i = 0; i < hash_context->result_size; ++i) pad[i] = K[i] ^ 0x36; for (; i < hash_context->block_size; ++i) pad[i] = 0x36; hash_context->init_hash(hash_context); hash_context->update_hash(hash_context, pad, hash_context->block_size); } static void HMAC_update(uECC_HashContext *hash_context, const uint8_t *message, unsigned message_size) { hash_context->update_hash(hash_context, message, message_size); } static void HMAC_finish(uECC_HashContext *hash_context, const uint8_t *K, uint8_t *result) { uint8_t *pad = hash_context->tmp + 2 * hash_context->result_size; unsigned i; for (i = 0; i < hash_context->result_size; ++i) pad[i] = K[i] ^ 0x5c; for (; i < hash_context->block_size; ++i) pad[i] = 0x5c; hash_context->finish_hash(hash_context, result); hash_context->init_hash(hash_context); hash_context->update_hash(hash_context, pad, hash_context->block_size); hash_context->update_hash(hash_context, result, hash_context->result_size); hash_context->finish_hash(hash_context, result); } /* V = HMAC_K(V) */ static void update_V(uECC_HashContext *hash_context, uint8_t *K, uint8_t *V) { HMAC_init(hash_context, K); HMAC_update(hash_context, V, hash_context->result_size); HMAC_finish(hash_context, K, V); } /* Deterministic signing, similar to RFC 6979. Differences are: * We just use H(m) directly rather than bits2octets(H(m)) (it is not reduced modulo curve_n). * We generate a value for k (aka T) directly rather than converting endianness. Layout of hash_context->tmp: | | (1 byte overlapped 0x00 or 0x01) / */ int uECC_sign_deterministic(const uint8_t *private_key, const uint8_t *message_hash, unsigned hash_size, uECC_HashContext *hash_context, uint8_t *signature, uECC_Curve curve) { uint8_t *K = hash_context->tmp; uint8_t *V = K + hash_context->result_size; wordcount_t num_bytes = curve->num_bytes; wordcount_t num_n_words = BITS_TO_WORDS(curve->num_n_bits); bitcount_t num_n_bits = curve->num_n_bits; uECC_word_t tries; unsigned i; for (i = 0; i < hash_context->result_size; ++i) { V[i] = 0x01; K[i] = 0; } /* K = HMAC_K(V || 0x00 || int2octets(x) || h(m)) */ HMAC_init(hash_context, K); V[hash_context->result_size] = 0x00; HMAC_update(hash_context, V, hash_context->result_size + 1); HMAC_update(hash_context, private_key, num_bytes); HMAC_update(hash_context, message_hash, hash_size); HMAC_finish(hash_context, K, K); update_V(hash_context, K, V); /* K = HMAC_K(V || 0x01 || int2octets(x) || h(m)) */ HMAC_init(hash_context, K); V[hash_context->result_size] = 0x01; HMAC_update(hash_context, V, hash_context->result_size + 1); HMAC_update(hash_context, private_key, num_bytes); HMAC_update(hash_context, message_hash, hash_size); HMAC_finish(hash_context, K, K); update_V(hash_context, K, V); for (tries = 0; tries < uECC_RNG_MAX_TRIES; ++tries) { uECC_word_t T[uECC_MAX_WORDS]; uint8_t *T_ptr = (uint8_t *)T; wordcount_t T_bytes = 0; for (;;) { update_V(hash_context, K, V); for (i = 0; i < hash_context->result_size; ++i) { T_ptr[T_bytes++] = V[i]; if (T_bytes >= num_n_words * uECC_WORD_SIZE) { goto filled; } } } filled: if ((bitcount_t)num_n_words * uECC_WORD_SIZE * 8 > num_n_bits) { uECC_word_t mask = (uECC_word_t)-1; T[num_n_words - 1] &= mask >> ((bitcount_t)(num_n_words * uECC_WORD_SIZE * 8 - num_n_bits)); } if (uECC_sign_with_k(private_key, message_hash, hash_size, T, signature, curve)) { return 1; } /* K = HMAC_K(V || 0x00) */ HMAC_init(hash_context, K); V[hash_context->result_size] = 0x00; HMAC_update(hash_context, V, hash_context->result_size + 1); HMAC_finish(hash_context, K, K); update_V(hash_context, K, V); } return 0; } static bitcount_t smax(bitcount_t a, bitcount_t b) { return (a > b ? a : b); } int uECC_verify(const uint8_t *public_key, const uint8_t *message_hash, unsigned hash_size, const uint8_t *signature, uECC_Curve curve) { uECC_word_t u1[uECC_MAX_WORDS], u2[uECC_MAX_WORDS]; uECC_word_t z[uECC_MAX_WORDS]; uECC_word_t public[uECC_MAX_WORDS * 2]; uECC_word_t sum[uECC_MAX_WORDS * 2]; uECC_word_t rx[uECC_MAX_WORDS]; uECC_word_t ry[uECC_MAX_WORDS]; uECC_word_t tx[uECC_MAX_WORDS]; uECC_word_t ty[uECC_MAX_WORDS]; uECC_word_t tz[uECC_MAX_WORDS]; const uECC_word_t *points[4]; const uECC_word_t *point; bitcount_t num_bits; bitcount_t i; uECC_word_t r[uECC_MAX_WORDS], s[uECC_MAX_WORDS]; wordcount_t num_words = curve->num_words; wordcount_t num_n_words = BITS_TO_WORDS(curve->num_n_bits); rx[num_n_words - 1] = 0; r[num_n_words - 1] = 0; s[num_n_words - 1] = 0; uECC_vli_bytesToNative(public, public_key, curve->num_bytes); uECC_vli_bytesToNative( public + num_words, public_key + curve->num_bytes, curve->num_bytes); uECC_vli_bytesToNative(r, signature, curve->num_bytes); uECC_vli_bytesToNative(s, signature + curve->num_bytes, curve->num_bytes); /* r, s must not be 0. */ if (uECC_vli_isZero(r, num_words) || uECC_vli_isZero(s, num_words)) { return 0; } /* r, s must be < n. */ if (uECC_vli_cmp_unsafe(curve->n, r, num_n_words) != 1 || uECC_vli_cmp_unsafe(curve->n, s, num_n_words) != 1) { return 0; } /* Calculate u1 and u2. */ uECC_vli_modInv(z, s, curve->n, num_n_words); /* z = 1/s */ u1[num_n_words - 1] = 0; bits2int(u1, message_hash, hash_size, curve); uECC_vli_modMult(u1, u1, z, curve->n, num_n_words); /* u1 = e/s */ uECC_vli_modMult(u2, r, z, curve->n, num_n_words); /* u2 = r/s */ /* Calculate sum = G + Q. */ uECC_vli_set(sum, public, num_words); uECC_vli_set(sum + num_words, public + num_words, num_words); uECC_vli_set(tx, curve->G, num_words); uECC_vli_set(ty, curve->G + num_words, num_words); uECC_vli_modSub(z, sum, tx, curve->p, num_words); /* z = x2 - x1 */ XYcZ_add(tx, ty, sum, sum + num_words, curve); uECC_vli_modInv(z, z, curve->p, num_words); /* z = 1/z */ apply_z(sum, sum + num_words, z, curve); /* Use Shamir's trick to calculate u1*G + u2*Q */ points[0] = 0; points[1] = curve->G; points[2] = public; points[3] = sum; num_bits = smax(uECC_vli_numBits(u1, num_n_words), uECC_vli_numBits(u2, num_n_words)); point = points[(!!uECC_vli_testBit(u1, num_bits - 1)) | ((!!uECC_vli_testBit(u2, num_bits - 1)) << 1)]; uECC_vli_set(rx, point, num_words); uECC_vli_set(ry, point + num_words, num_words); uECC_vli_clear(z, num_words); z[0] = 1; for (i = num_bits - 2; i >= 0; --i) { uECC_word_t index; curve->double_jacobian(rx, ry, z, curve); index = (!!uECC_vli_testBit(u1, i)) | ((!!uECC_vli_testBit(u2, i)) << 1); point = points[index]; if (point) { uECC_vli_set(tx, point, num_words); uECC_vli_set(ty, point + num_words, num_words); apply_z(tx, ty, z, curve); uECC_vli_modSub(tz, rx, tx, curve->p, num_words); /* Z = x2 - x1 */ XYcZ_add(tx, ty, rx, ry, curve); uECC_vli_modMult_fast(z, z, tz, curve); } } uECC_vli_modInv(z, z, curve->p, num_words); /* Z = 1/Z */ apply_z(rx, ry, z, curve); /* v = x1 (mod n) */ if (uECC_vli_cmp_unsafe(curve->n, rx, num_n_words) != 1) { uECC_vli_sub(rx, rx, curve->n, num_n_words); } /* Accept only if v == r. */ return (int)(uECC_vli_equal(rx, r, num_words)); } #if uECC_ENABLE_VLI_API unsigned uECC_curve_num_words(uECC_Curve curve) { return curve->num_words; } unsigned uECC_curve_num_bytes(uECC_Curve curve) { return curve->num_bytes; } unsigned uECC_curve_num_bits(uECC_Curve curve) { return curve->num_bytes * 8; } unsigned uECC_curve_num_n_words(uECC_Curve curve) { return BITS_TO_WORDS(curve->num_n_bits); } unsigned uECC_curve_num_n_bytes(uECC_Curve curve) { return BITS_TO_BYTES(curve->num_n_bits); } unsigned uECC_curve_num_n_bits(uECC_Curve curve) { return curve->num_n_bits; } const uECC_word_t *uECC_curve_p(uECC_Curve curve) { return curve->p; } const uECC_word_t *uECC_curve_n(uECC_Curve curve) { return curve->n; } const uECC_word_t *uECC_curve_G(uECC_Curve curve) { return curve->G; } const uECC_word_t *uECC_curve_b(uECC_Curve curve) { return curve->b; } #if uECC_SUPPORT_COMPRESSED_POINT void uECC_vli_mod_sqrt(uECC_word_t *a, uECC_Curve curve) { curve->mod_sqrt(a, curve); } #endif void uECC_vli_mmod_fast(uECC_word_t *result, uECC_word_t *product, uECC_Curve curve) { #if (uECC_OPTIMIZATION_LEVEL > 0) curve->mmod_fast(result, product); #else uECC_vli_mmod(result, product, curve->p, curve->num_words); #endif } void uECC_point_mult(uECC_word_t *result, const uECC_word_t *point, const uECC_word_t *scalar, uECC_Curve curve) { uECC_word_t tmp1[uECC_MAX_WORDS]; uECC_word_t tmp2[uECC_MAX_WORDS]; uECC_word_t *p2[2] = {tmp1, tmp2}; uECC_word_t carry = regularize_k(scalar, tmp1, tmp2, curve); EccPoint_mult(result, point, p2[!carry], 0, curve->num_n_bits + 1, curve); } #endif /* uECC_ENABLE_VLI_API */ ================================================ FILE: u2f/uECC.h ================================================ /* Copyright 2014, Kenneth MacKay. Licensed under the BSD 2-clause license. */ #ifndef _UECC_H_ #define _UECC_H_ #include /* Platform selection options. If uECC_PLATFORM is not defined, the code will try to guess it based on compiler macros. Possible values for uECC_PLATFORM are defined below: */ #define uECC_arch_other 0 #define uECC_x86 1 #define uECC_x86_64 2 #define uECC_arm 3 #define uECC_arm_thumb 4 #define uECC_arm_thumb2 5 #define uECC_arm64 6 #define uECC_avr 7 /* If desired, you can define uECC_WORD_SIZE as appropriate for your platform (1, 4, or 8 bytes). If uECC_WORD_SIZE is not explicitly defined then it will be automatically set based on your platform. */ /* Optimization level; trade speed for code size. Larger values produce code that is faster but larger. Currently supported values are 0 - 3; 0 is unusably slow for most applications. */ #ifndef uECC_OPTIMIZATION_LEVEL #define uECC_OPTIMIZATION_LEVEL 2 #endif /* uECC_SQUARE_FUNC - If enabled (defined as nonzero), this will cause a specific function to be used for (scalar) squaring instead of the generic multiplication function. This can make things faster somewhat faster, but increases the code size. */ #ifndef uECC_SQUARE_FUNC #define uECC_SQUARE_FUNC 0 #endif /* Curve support selection. Set to 0 to remove that curve. */ #ifndef uECC_SUPPORTS_secp160r1 #define uECC_SUPPORTS_secp160r1 1 #endif #ifndef uECC_SUPPORTS_secp192r1 #define uECC_SUPPORTS_secp192r1 1 #endif #ifndef uECC_SUPPORTS_secp224r1 #define uECC_SUPPORTS_secp224r1 1 #endif #ifndef uECC_SUPPORTS_secp256r1 #define uECC_SUPPORTS_secp256r1 1 #endif #ifndef uECC_SUPPORTS_secp256k1 #define uECC_SUPPORTS_secp256k1 1 #endif /* Specifies whether compressed point format is supported. Set to 0 to disable point compression/decompression functions. */ #ifndef uECC_SUPPORT_COMPRESSED_POINT #define uECC_SUPPORT_COMPRESSED_POINT 0 #endif struct uECC_Curve_t; typedef const struct uECC_Curve_t * uECC_Curve; #ifdef __cplusplus extern "C" { #endif #if uECC_SUPPORTS_secp160r1 uECC_Curve uECC_secp160r1(void); #endif #if uECC_SUPPORTS_secp192r1 uECC_Curve uECC_secp192r1(void); #endif #if uECC_SUPPORTS_secp224r1 uECC_Curve uECC_secp224r1(void); #endif #if uECC_SUPPORTS_secp256r1 uECC_Curve uECC_secp256r1(void); #endif #if uECC_SUPPORTS_secp256k1 uECC_Curve uECC_secp256k1(void); #endif /* uECC_RNG_Function type The RNG function should fill 'size' random bytes into 'dest'. It should return 1 if 'dest' was filled with random data, or 0 if the random data could not be generated. The filled-in values should be either truly random, or from a cryptographically-secure PRNG. A correctly functioning RNG function must be set (using uECC_set_rng()) before calling uECC_make_key() or uECC_sign(). Setting a correctly functioning RNG function improves the resistance to side-channel attacks for uECC_shared_secret() and uECC_sign_deterministic(). A correct RNG function is set by default when building for Windows, Linux, or OS X. If you are building on another POSIX-compliant system that supports /dev/random or /dev/urandom, you can define uECC_POSIX to use the predefined RNG. For embedded platforms there is no predefined RNG function; you must provide your own. */ typedef int (*uECC_RNG_Function)(uint8_t *dest, unsigned size); /* uECC_set_rng() function. Set the function that will be used to generate random bytes. The RNG function should return 1 if the random data was generated, or 0 if the random data could not be generated. On platforms where there is no predefined RNG function (eg embedded platforms), this must be called before uECC_make_key() or uECC_sign() are used. Inputs: rng_function - The function that will be used to generate random bytes. */ void uECC_set_rng(uECC_RNG_Function rng_function); /* uECC_make_key() function. Create a public/private key pair. Outputs: public_key - Will be filled in with the public key. Must be at least 2 * the curve size (in bytes) long. For example, if the curve is secp256r1, public_key must be 64 bytes long. private_key - Will be filled in with the private key. Must be as long as the curve order; this is typically the same as the curve size, except for secp160r1. For example, if the curve is secp256r1, private_key must be 32 bytes long. For secp160r1, private_key must be 21 bytes long! Note that the first byte will almost always be 0 (there is about a 1 in 2^80 chance of it being non-zero). Returns 1 if the key pair was generated successfully, 0 if an error occurred. */ int uECC_make_key(uint8_t *public_key, uint8_t *private_key, uECC_Curve curve); /* uECC_shared_secret() function. Compute a shared secret given your secret key and someone else's public key. Note: It is recommended that you hash the result of uECC_shared_secret() before using it for symmetric encryption or HMAC. Inputs: public_key - The public key of the remote party. private_key - Your private key. Outputs: secret - Will be filled in with the shared secret value. Must be the same size as the curve size; for example, if the curve is secp256r1, secret must be 32 bytes long. Returns 1 if the shared secret was generated successfully, 0 if an error occurred. */ int uECC_shared_secret(const uint8_t *public_key, const uint8_t *private_key, uint8_t *secret, uECC_Curve curve); #if uECC_SUPPORT_COMPRESSED_POINT /* uECC_compress() function. Compress a public key. Inputs: public_key - The public key to compress. Outputs: compressed - Will be filled in with the compressed public key. Must be at least (curve size + 1) bytes long; for example, if the curve is secp256r1, compressed must be 33 bytes long. */ void uECC_compress(const uint8_t *public_key, uint8_t *compressed, uECC_Curve curve); /* uECC_decompress() function. Decompress a compressed public key. Inputs: compressed - The compressed public key. Outputs: public_key - Will be filled in with the decompressed public key. */ void uECC_decompress(const uint8_t *compressed, uint8_t *public_key, uECC_Curve curve); #endif /* uECC_SUPPORT_COMPRESSED_POINT */ /* uECC_valid_public_key() function. Check to see if a public key is valid. Note that you are not required to check for a valid public key before using any other uECC functions. However, you may wish to avoid spending CPU time computing a shared secret or verifying a signature using an invalid public key. Inputs: public_key - The public key to check. Returns 1 if the public key is valid, 0 if it is invalid. */ int uECC_valid_public_key(const uint8_t *public_key, uECC_Curve curve); /* uECC_compute_public_key() function. Compute the corresponding public key for a private key. Inputs: private_key - The private key to compute the public key for Outputs: public_key - Will be filled in with the corresponding public key Returns 1 if the key was computed successfully, 0 if an error occurred. */ int uECC_compute_public_key(const uint8_t *private_key, uint8_t *public_key, uECC_Curve curve); /* uECC_sign() function. Generate an ECDSA signature for a given hash value. Usage: Compute a hash of the data you wish to sign (SHA-2 is recommended) and pass it in to this function along with your private key. Inputs: private_key - Your private key. message_hash - The hash of the message to sign. hash_size - The size of message_hash in bytes. Outputs: signature - Will be filled in with the signature value. Must be at least 2 * curve size long. For example, if the curve is secp256r1, signature must be 64 bytes long. Returns 1 if the signature generated successfully, 0 if an error occurred. */ int uECC_sign(const uint8_t *private_key, const uint8_t *message_hash, unsigned hash_size, uint8_t *signature, uECC_Curve curve); /* uECC_HashContext structure. This is used to pass in an arbitrary hash function to uECC_sign_deterministic(). The structure will be used for multiple hash computations; each time a new hash is computed, init_hash() will be called, followed by one or more calls to update_hash(), and finally a call to finish_hash() to prudoce the resulting hash. The intention is that you will create a structure that includes uECC_HashContext followed by any hash-specific data. For example: typedef struct SHA256_HashContext { uECC_HashContext uECC; SHA256_CTX ctx; } SHA256_HashContext; void init_SHA256(uECC_HashContext *base) { SHA256_HashContext *context = (SHA256_HashContext *)base; SHA256_Init(&context->ctx); } void update_SHA256(uECC_HashContext *base, const uint8_t *message, unsigned message_size) { SHA256_HashContext *context = (SHA256_HashContext *)base; SHA256_Update(&context->ctx, message, message_size); } void finish_SHA256(uECC_HashContext *base, uint8_t *hash_result) { SHA256_HashContext *context = (SHA256_HashContext *)base; SHA256_Final(hash_result, &context->ctx); } ... when signing ... { uint8_t tmp[32 + 32 + 64]; SHA256_HashContext ctx = {{&init_SHA256, &update_SHA256, &finish_SHA256, 64, 32, tmp}}; uECC_sign_deterministic(key, message_hash, &ctx.uECC, signature); } */ typedef struct uECC_HashContext { void (*init_hash)(struct uECC_HashContext *context); void (*update_hash)(struct uECC_HashContext *context, const uint8_t *message, unsigned message_size); void (*finish_hash)(struct uECC_HashContext *context, uint8_t *hash_result); unsigned block_size; /* Hash function block size in bytes, eg 64 for SHA-256. */ unsigned result_size; /* Hash function result size in bytes, eg 32 for SHA-256. */ uint8_t *tmp; /* Must point to a buffer of at least (2 * result_size + block_size) bytes. */ } uECC_HashContext; /* uECC_sign_deterministic() function. Generate an ECDSA signature for a given hash value, using a deterministic algorithm (see RFC 6979). You do not need to set the RNG using uECC_set_rng() before calling this function; however, if the RNG is defined it will improve resistance to side-channel attacks. Usage: Compute a hash of the data you wish to sign (SHA-2 is recommended) and pass it in to this function along with your private key and a hash context. Note that the message_hash does not need to be computed with the same hash function used by hash_context. Inputs: private_key - Your private key. message_hash - The hash of the message to sign. hash_size - The size of message_hash in bytes. hash_context - A hash context to use. Outputs: signature - Will be filled in with the signature value. Returns 1 if the signature generated successfully, 0 if an error occurred. */ int uECC_sign_deterministic(const uint8_t *private_key, const uint8_t *message_hash, unsigned hash_size, uECC_HashContext *hash_context, uint8_t *signature, uECC_Curve curve); /* uECC_verify() function. Verify an ECDSA signature. Usage: Compute the hash of the signed data using the same hash as the signer and pass it to this function along with the signer's public key and the signature values (r and s). Inputs: public_key - The signer's public key. message_hash - The hash of the signed data. hash_size - The size of message_hash in bytes. signature - The signature value. Returns 1 if the signature is valid, 0 if it is invalid. */ int uECC_verify(const uint8_t *private_key, const uint8_t *message_hash, unsigned hash_size, const uint8_t *signature, uECC_Curve curve); #ifdef __cplusplus } /* end of extern "C" */ #endif #endif /* _UECC_H_ */ ================================================ FILE: u2f/uECC_vli.h ================================================ /* Copyright 2015, Kenneth MacKay. Licensed under the BSD 2-clause license. */ #ifndef _UECC_VLI_H_ #define _UECC_VLI_H_ #include "uECC.h" #include "types.h" /* Functions for raw large-integer manipulation. These are only available if uECC.c is compiled with uECC_ENABLE_VLI_API defined to 1. */ #ifndef uECC_ENABLE_VLI_API #define uECC_ENABLE_VLI_API 0 #endif #ifdef __cplusplus extern "C" { #endif #if uECC_ENABLE_VLI_API void uECC_vli_clear(uECC_word_t *vli, wordcount_t num_words); /* Constant-time comparison to zero - secure way to compare long integers */ /* Returns 1 if vli == 0, 0 otherwise. */ uECC_word_t uECC_vli_isZero(const uECC_word_t *vli, wordcount_t num_words); /* Returns nonzero if bit 'bit' of vli is set. */ uECC_word_t uECC_vli_testBit(const uECC_word_t *vli, bitcount_t bit); /* Counts the number of bits required to represent vli. */ bitcount_t uECC_vli_numBits(const uECC_word_t *vli, const wordcount_t max_words); /* Sets dest = src. */ void uECC_vli_set(uECC_word_t *dest, const uECC_word_t *src, wordcount_t num_words); /* Constant-time comparison function - secure way to compare long integers */ /* Returns one if left == right, zero otherwise */ uECC_word_t uECC_vli_equal(const uECC_word_t *left, const uECC_word_t *right, wordcount_t num_words); /* Constant-time comparison function - secure way to compare long integers */ /* Returns sign of left - right, in constant time. */ cmpresult_t uECC_vli_cmp(const uECC_word_t *left, const uECC_word_t *right, wordcount_t num_words); /* Computes vli = vli >> 1. */ void uECC_vli_rshift1(uECC_word_t *vli, wordcount_t num_words); /* Computes result = left + right, returning carry. Can modify in place. */ uECC_word_t uECC_vli_add(uECC_word_t *result, const uECC_word_t *left, const uECC_word_t *right, wordcount_t num_words); /* Computes result = left - right, returning borrow. Can modify in place. */ uECC_word_t uECC_vli_sub(uECC_word_t *result, const uECC_word_t *left, const uECC_word_t *right, wordcount_t num_words); /* Computes result = left * right. Result must be 2 * num_words long. */ void uECC_vli_mult(uECC_word_t *result, const uECC_word_t *left, const uECC_word_t *right, wordcount_t num_words); /* Computes result = left^2. Result must be 2 * num_words long. */ void uECC_vli_square(uECC_word_t *result, const uECC_word_t *left, wordcount_t num_words); /* Computes result = (left + right) % mod. Assumes that left < mod and right < mod, and that result does not overlap mod. */ void uECC_vli_modAdd(uECC_word_t *result, const uECC_word_t *left, const uECC_word_t *right, const uECC_word_t *mod, wordcount_t num_words); /* Computes result = (left - right) % mod. Assumes that left < mod and right < mod, and that result does not overlap mod. */ void uECC_vli_modSub(uECC_word_t *result, const uECC_word_t *left, const uECC_word_t *right, const uECC_word_t *mod, wordcount_t num_words); /* Computes result = product % mod, where product is 2N words long. Currently only designed to work for mod == curve->p or curve_n. */ void uECC_vli_mmod(uECC_word_t *result, uECC_word_t *product, const uECC_word_t *mod, wordcount_t num_words); /* Calculates result = product (mod curve->p), where product is up to 2 * curve->num_words long. */ void uECC_vli_mmod_fast(uECC_word_t *result, uECC_word_t *product, uECC_Curve curve); /* Computes result = (left * right) % mod. Currently only designed to work for mod == curve->p or curve_n. */ void uECC_vli_modMult(uECC_word_t *result, const uECC_word_t *left, const uECC_word_t *right, const uECC_word_t *mod, wordcount_t num_words); /* Computes result = (left * right) % curve->p. */ void uECC_vli_modMult_fast(uECC_word_t *result, const uECC_word_t *left, const uECC_word_t *right, uECC_Curve curve); /* Computes result = left^2 % mod. Currently only designed to work for mod == curve->p or curve_n. */ void uECC_vli_modSquare(uECC_word_t *result, const uECC_word_t *left, const uECC_word_t *mod, wordcount_t num_words); /* Computes result = left^2 % curve->p. */ void uECC_vli_modSquare_fast(uECC_word_t *result, const uECC_word_t *left, uECC_Curve curve); /* Computes result = (1 / input) % mod.*/ void uECC_vli_modInv(uECC_word_t *result, const uECC_word_t *input, const uECC_word_t *mod, wordcount_t num_words); #if uECC_SUPPORT_COMPRESSED_POINT /* Calculates a = sqrt(a) (mod curve->p) */ void uECC_vli_mod_sqrt(uECC_word_t *a, uECC_Curve curve); #endif /* Converts an integer in uECC native format to big-endian bytes. */ void uECC_vli_nativeToBytes(uint8_t *bytes, int num_bytes, const uECC_word_t *native); /* Converts big-endian bytes to an integer in uECC native format. */ void uECC_vli_bytesToNative(uECC_word_t *native, const uint8_t *bytes, int num_bytes); unsigned uECC_curve_num_words(uECC_Curve curve); unsigned uECC_curve_num_bytes(uECC_Curve curve); unsigned uECC_curve_num_bits(uECC_Curve curve); unsigned uECC_curve_num_n_words(uECC_Curve curve); unsigned uECC_curve_num_n_bytes(uECC_Curve curve); unsigned uECC_curve_num_n_bits(uECC_Curve curve); const uECC_word_t *uECC_curve_p(uECC_Curve curve); const uECC_word_t *uECC_curve_n(uECC_Curve curve); const uECC_word_t *uECC_curve_G(uECC_Curve curve); const uECC_word_t *uECC_curve_b(uECC_Curve curve); int uECC_valid_point(const uECC_word_t *point, uECC_Curve curve); /* Multiplies a point by a scalar. Points are represented by the X coordinate followed by the Y coordinate in the same array, both coordinates are curve->num_words long. Note that scalar must be curve->num_n_words long (NOT curve->num_words). */ void uECC_point_mult(uECC_word_t *result, const uECC_word_t *point, const uECC_word_t *scalar, uECC_Curve curve); /* Generates a random integer in the range 0 < random < top. Both random and top have num_words words. */ int uECC_generate_random_int(uECC_word_t *random, const uECC_word_t *top, wordcount_t num_words); #endif /* uECC_ENABLE_VLI_API */ #ifdef __cplusplus } /* end of extern "C" */ #endif #endif /* _UECC_VLI_H_ */ ================================================ FILE: usb_desc.h ================================================ /* Teensyduino Core Library * http://www.pjrc.com/teensy/ * Copyright (c) 2013 PJRC.COM, LLC. * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * 1. The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * 2. If the Software is incorporated into a build system that allows * selection among a list of target devices, then similar target * devices manufactured by PJRC.COM must be included in the list of * target devices and selectable in the same manner. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #ifndef _usb_desc_h_ #define _usb_desc_h_ // This header is NOT meant to be included when compiling // user sketches in Arduino. The low-level functions // provided by usb_dev.c are meant to be called only by // code which provides higher-level interfaces to the user. #include #include #define ENDPOINT_UNUSED 0x00 #define ENDPOINT_TRANSIMIT_ONLY 0x15 #define ENDPOINT_RECEIVE_ONLY 0x19 #define ENDPOINT_TRANSMIT_AND_RECEIVE 0x1D /* Each group of #define lines below corresponds to one of the settings in the Tools > USB Type menu. This file defines what type of USB device is actually created for each of those menu options. Each "interface" is a set of functionality your PC or Mac will use and treat as if it is a unique device. Within each interface, the "endpoints" are the actual communication channels. Most interfaces use 1, 2 or 3 endpoints. By editing only this file, you can customize the USB Types to be any collection of interfaces. To modify a USB Type, delete the XYZ_INTERFACE lines for any interfaces you wish to remove, and copy them from another USB Type for any you want to add. Give each interface a unique number, and edit NUM_INTERFACE to reflect the total number of interfaces. Next, assign unique endpoint numbers to all the endpoints across all the interfaces your device has. You can reuse an endpoint number for transmit and receive, but the same endpoint number must not be used twice to transmit, or twice to receive. Most endpoints also require their maximum size, and some also need an interval specification (the number of milliseconds the PC will check for data from that endpoint). For existing interfaces, usually these other settings should not be changed. Edit NUM_ENDPOINTS to be at least the largest endpoint number used. Edit NUM_USB_BUFFERS to control how much memory the USB stack will allocate. At least 2 should be used for each endpoint. More memory will allow higher throughput for user programs that have high latency (eg, spending time doing things other than interacting with the USB). Edit the ENDPOINT*_CONFIG lines so each endpoint is configured the proper way (transmit, receive, or both). If you are using existing interfaces (making your own device with a different set of interfaces) the code in all other files should automatically adapt to the new endpoints you specify here. If you need to create a new type of interface, you'll need to write the code which sends and receives packets, and presents an API to the user. Usually, a pair of files are added for the actual code, and code is also added in usb_dev.c for any control transfers, interrupt-level code, or other very low-level stuff not possible from the packet send/receive functons. Code also is added in usb_inst.c to create an instance of your C++ object. You may edit the Vendor and Product ID numbers, and strings. If the numbers are changed, Teensyduino may not be able to automatically find and reboot your board when you click the Upload button in the Arduino IDE. You will need to press the Program button on Teensy to initiate programming. Some operating systems, especially Windows, may cache USB device info. Changes to the device name may not update on the same computer unless the vendor or product ID numbers change, or the "bcdDevice" revision code is increased. If these instructions are missing steps or could be improved, please let me know? http://forum.pjrc.com/forums/4-Suggestions-amp-Bug-Reports */ #if defined(USB_SERIAL) #define VENDOR_ID 0x16C0 #define PRODUCT_ID 0x0483 #define DEVICE_CLASS 2 // 2 = Communication Class #define MANUFACTURER_NAME {'T','e','e','n','s','y','d','u','i','n','o'} #define MANUFACTURER_NAME_LEN 11 #define PRODUCT_NAME {'U','S','B',' ','S','e','r','i','a','l'} #define PRODUCT_NAME_LEN 10 #define EP0_SIZE 64 #define NUM_ENDPOINTS 4 #define NUM_USB_BUFFERS 12 #define NUM_INTERFACE 2 #define CDC_STATUS_INTERFACE 0 #define CDC_DATA_INTERFACE 1 #define CDC_ACM_ENDPOINT 2 #define CDC_RX_ENDPOINT 3 #define CDC_TX_ENDPOINT 4 #define CDC_ACM_SIZE 16 #define CDC_RX_SIZE 64 #define CDC_TX_SIZE 64 #define ENDPOINT2_CONFIG ENDPOINT_TRANSIMIT_ONLY #define ENDPOINT3_CONFIG ENDPOINT_RECEIVE_ONLY #define ENDPOINT4_CONFIG ENDPOINT_TRANSIMIT_ONLY #elif defined(USB_HID) #define VENDOR_ID 0x16C0 #define PRODUCT_ID 0x0482 #define MANUFACTURER_NAME {'T','e','e','n','s','y','d','u','i','n','o'} #define MANUFACTURER_NAME_LEN 11 #define PRODUCT_NAME {'K','e','y','b','o','a','r','d','/','M','o','u','s','e','/','J','o','y','s','t','i','c','k'} #define PRODUCT_NAME_LEN 23 #define EP0_SIZE 64 #define NUM_ENDPOINTS 5 #define NUM_USB_BUFFERS 24 #define NUM_INTERFACE 4 #define SEREMU_INTERFACE 2 // Serial emulation #define SEREMU_TX_ENDPOINT 1 #define SEREMU_TX_SIZE 64 #define SEREMU_TX_INTERVAL 1 #define SEREMU_RX_ENDPOINT 2 #define SEREMU_RX_SIZE 32 #define SEREMU_RX_INTERVAL 2 #define KEYBOARD_INTERFACE 0 // Keyboard #define KEYBOARD_ENDPOINT 3 #define KEYBOARD_SIZE 8 #define KEYBOARD_INTERVAL 1 #define MOUSE_INTERFACE 1 // Mouse #define MOUSE_ENDPOINT 5 #define MOUSE_SIZE 8 #define MOUSE_INTERVAL 1 #define JOYSTICK_INTERFACE 3 // Joystick #define JOYSTICK_ENDPOINT 4 #define JOYSTICK_SIZE 16 #define JOYSTICK_INTERVAL 2 #define ENDPOINT1_CONFIG ENDPOINT_TRANSIMIT_ONLY #define ENDPOINT2_CONFIG ENDPOINT_RECEIVE_ONLY #define ENDPOINT3_CONFIG ENDPOINT_TRANSIMIT_ONLY #define ENDPOINT4_CONFIG ENDPOINT_TRANSIMIT_ONLY #define ENDPOINT5_CONFIG ENDPOINT_TRANSIMIT_ONLY #elif defined(USB_SERIAL_HID) #define VENDOR_ID 0x16C0 #define PRODUCT_ID 0x0487 #define DEVICE_CLASS 0xEF #define DEVICE_SUBCLASS 0x02 #define DEVICE_PROTOCOL 0x01 #define MANUFACTURER_NAME {'T','e','e','n','s','y','d','u','i','n','o'} #define MANUFACTURER_NAME_LEN 11 #define PRODUCT_NAME {'S','e','r','i','a','l','/','K','e','y','b','o','a','r','d','/','M','o','u','s','e','/','J','o','y','s','t','i','c','k'} #define PRODUCT_NAME_LEN 30 #define EP0_SIZE 64 #define NUM_ENDPOINTS 6 #define NUM_USB_BUFFERS 30 #define NUM_INTERFACE 5 #define CDC_IAD_DESCRIPTOR 1 #define CDC_STATUS_INTERFACE 0 #define CDC_DATA_INTERFACE 1 // Serial #define CDC_ACM_ENDPOINT 2 #define CDC_RX_ENDPOINT 3 #define CDC_TX_ENDPOINT 4 #define CDC_ACM_SIZE 16 #define CDC_RX_SIZE 64 #define CDC_TX_SIZE 64 #define KEYBOARD_INTERFACE 2 // Keyboard #define KEYBOARD_ENDPOINT 1 #define KEYBOARD_SIZE 8 #define KEYBOARD_INTERVAL 1 #define MOUSE_INTERFACE 3 // Mouse #define MOUSE_ENDPOINT 5 #define MOUSE_SIZE 8 #define MOUSE_INTERVAL 2 #define JOYSTICK_INTERFACE 4 // Joystick #define JOYSTICK_ENDPOINT 6 #define JOYSTICK_SIZE 16 #define JOYSTICK_INTERVAL 1 #define ENDPOINT1_CONFIG ENDPOINT_TRANSIMIT_ONLY #define ENDPOINT2_CONFIG ENDPOINT_TRANSIMIT_ONLY #define ENDPOINT3_CONFIG ENDPOINT_RECEIVE_ONLY #define ENDPOINT4_CONFIG ENDPOINT_TRANSIMIT_ONLY #define ENDPOINT5_CONFIG ENDPOINT_TRANSIMIT_ONLY #define ENDPOINT6_CONFIG ENDPOINT_TRANSIMIT_ONLY #elif defined(USB_MIDI) #define VENDOR_ID 0x16C0 #define PRODUCT_ID 0x0485 #define MANUFACTURER_NAME {'T','e','e','n','s','y','d','u','i','n','o'} #define MANUFACTURER_NAME_LEN 11 #define PRODUCT_NAME {'T','e','e','n','s','y',' ','M','I','D','I'} #define PRODUCT_NAME_LEN 11 #define EP0_SIZE 64 #define NUM_ENDPOINTS 4 #define NUM_USB_BUFFERS 16 #define NUM_INTERFACE 2 #define SEREMU_INTERFACE 1 // Serial emulation #define SEREMU_TX_ENDPOINT 1 #define SEREMU_TX_SIZE 64 #define SEREMU_TX_INTERVAL 1 #define SEREMU_RX_ENDPOINT 2 #define SEREMU_RX_SIZE 32 #define SEREMU_RX_INTERVAL 2 #define MIDI_INTERFACE 0 // MIDI #define MIDI_TX_ENDPOINT 3 #define MIDI_TX_SIZE 64 #define MIDI_RX_ENDPOINT 4 #define MIDI_RX_SIZE 64 #define ENDPOINT1_CONFIG ENDPOINT_TRANSIMIT_ONLY #define ENDPOINT2_CONFIG ENDPOINT_RECEIVE_ONLY #define ENDPOINT3_CONFIG ENDPOINT_TRANSIMIT_ONLY #define ENDPOINT4_CONFIG ENDPOINT_RECEIVE_ONLY #elif defined(USB_RAWHID) #define VENDOR_ID 0x16C0 #define PRODUCT_ID 0x0486 // #define RAWHID_USAGE_PAGE 0xFFAB // recommended: 0xFF00 to 0xFFFF // #define RAWHID_USAGE 0x0200 // recommended: 0x0100 to 0xFFFF #define RAWHID_USAGE_PAGE 0xf1d0 // recommended: 0xFF00 to 0xFFFF #define RAWHID_USAGE 0x01 // recommended: 0x0100 to 0xFFFF #define MANUFACTURER_NAME {'T','e','e','n','s','y','d','u','i','n','o'} #define MANUFACTURER_NAME_LEN 11 #define PRODUCT_NAME {'T','e','e','n','s','y','d','u','i','n','o',' ','U','2','F','H','I','D'} #define PRODUCT_NAME_LEN 18 #define EP0_SIZE 64 #define NUM_ENDPOINTS 6 #define NUM_USB_BUFFERS 12 #define NUM_INTERFACE 2 #define RAWHID_INTERFACE 0 // RawHID #define RAWHID_TX_ENDPOINT 3 #define RAWHID_TX_SIZE 64 #define RAWHID_TX_INTERVAL 1 #define RAWHID_RX_ENDPOINT 4 #define RAWHID_RX_SIZE 64 #define RAWHID_RX_INTERVAL 1 #define SEREMU_INTERFACE 1 // Serial emulation #define SEREMU_TX_ENDPOINT 1 #define SEREMU_TX_SIZE 64 #define SEREMU_TX_INTERVAL 1 #define SEREMU_RX_ENDPOINT 2 #define SEREMU_RX_SIZE 32 #define SEREMU_RX_INTERVAL 2 #define ENDPOINT1_CONFIG ENDPOINT_TRANSIMIT_ONLY #define ENDPOINT2_CONFIG ENDPOINT_RECEIVE_ONLY #define ENDPOINT3_CONFIG ENDPOINT_TRANSIMIT_ONLY #define ENDPOINT4_CONFIG ENDPOINT_RECEIVE_ONLY #elif defined(USB_FLIGHTSIM) #define VENDOR_ID 0x16C0 #define PRODUCT_ID 0x0488 #define MANUFACTURER_NAME {'T','e','e','n','s','y','d','u','i','n','o'} #define MANUFACTURER_NAME_LEN 11 #define PRODUCT_NAME {'T','e','e','n','s','y',' ','F','l','i','g','h','t',' ','S','i','m',' ','C','o','n','t','r','o','l','s'} #define PRODUCT_NAME_LEN 26 #define EP0_SIZE 64 #define NUM_ENDPOINTS 4 #define NUM_USB_BUFFERS 20 #define NUM_INTERFACE 2 #define FLIGHTSIM_INTERFACE 0 // Flight Sim Control #define FLIGHTSIM_TX_ENDPOINT 3 #define FLIGHTSIM_TX_SIZE 64 #define FLIGHTSIM_TX_INTERVAL 1 #define FLIGHTSIM_RX_ENDPOINT 4 #define FLIGHTSIM_RX_SIZE 64 #define FLIGHTSIM_RX_INTERVAL 1 #define SEREMU_INTERFACE 1 // Serial emulation #define SEREMU_TX_ENDPOINT 1 #define SEREMU_TX_SIZE 64 #define SEREMU_TX_INTERVAL 1 #define SEREMU_RX_ENDPOINT 2 #define SEREMU_RX_SIZE 32 #define SEREMU_RX_INTERVAL 2 #define ENDPOINT1_CONFIG ENDPOINT_TRANSIMIT_ONLY #define ENDPOINT2_CONFIG ENDPOINT_RECEIVE_ONLY #define ENDPOINT3_CONFIG ENDPOINT_TRANSIMIT_ONLY #define ENDPOINT4_CONFIG ENDPOINT_RECEIVE_ONLY #endif #ifdef USB_DESC_LIST_DEFINE #if defined(NUM_ENDPOINTS) && NUM_ENDPOINTS > 0 // NUM_ENDPOINTS = number of non-zero endpoints (0 to 15) extern const uint8_t usb_endpoint_config_table[NUM_ENDPOINTS]; typedef struct { uint16_t wValue; uint16_t wIndex; const uint8_t *addr; uint16_t length; } usb_descriptor_list_t; extern const usb_descriptor_list_t usb_descriptor_list[]; #endif // NUM_ENDPOINTS #endif // USB_DESC_LIST_DEFINE #endif