forked from esp8266/Arduino
-
Notifications
You must be signed in to change notification settings - Fork 0
/
core_esp8266_wiring.c
212 lines (187 loc) · 7.39 KB
/
core_esp8266_wiring.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
/*
core_esp8266_wiring.c - implementation of Wiring API for esp8266
Copyright (c) 2014 Ivan Grokhotkov. All rights reserved.
This file is part of the esp8266 core for Arduino environment.
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "wiring_private.h"
#include "ets_sys.h"
#include "osapi.h"
#include "user_interface.h"
#include "cont.h"
extern void esp_schedule();
extern void esp_yield();
static os_timer_t delay_timer;
static os_timer_t micros_overflow_timer;
static uint32_t micros_at_last_overflow_tick = 0;
static uint32_t micros_overflow_count = 0;
#define ONCE 0
#define REPEAT 1
void delay_end(void* arg) {
(void) arg;
esp_schedule();
}
void delay(unsigned long ms) {
if(ms) {
os_timer_setfn(&delay_timer, (os_timer_func_t*) &delay_end, 0);
os_timer_arm(&delay_timer, ms, ONCE);
} else {
esp_schedule();
}
esp_yield();
if(ms) {
os_timer_disarm(&delay_timer);
}
}
void micros_overflow_tick(void* arg) {
(void) arg;
uint32_t m = system_get_time();
if(m < micros_at_last_overflow_tick)
++micros_overflow_count;
micros_at_last_overflow_tick = m;
}
//---------------------------------------------------------------------------
// millis() 'magic multiplier' approximation
//
// This function corrects the cumlative (296us / usec overflow) drift
// seen in the orignal 'millis()' function.
//
// Input:
// 'm' - 32-bit usec counter, 0 <= m <= 0xFFFFFFFF
// 'c' - 32-bit usec overflow counter 0 <= c < 0x00400000
// Output:
// Returns milliseconds in modulo 0x1,0000,0000 (0 to 0xFFFFFFFF)
//
// Notes:
//
// 1) This routine approximates the 64-bit integer division,
//
// quotient = ( 2^32 c + m ) / 1000,
//
// through the use of 'magic' multipliers. A slow division is replaced by
// a faster multiply using a scaled multiplicative inverse of the divisor:
//
// quotient =~ ( 2^32 c + m ) * k, where k = Ceiling[ 2^n / 1000 ]
//
// The precision difference between multiplier and divisor sets the
// upper-bound of the dividend which can be successfully divided.
//
// For this application, n = 64, and the divisor (1000) has 10-bits of
// precision. This sets the dividend upper-bound to (64 - 10) = 54 bits,
// and that of 'c' to (54 - 32) = 22 bits. This corresponds to a value
// for 'c' = 0x0040,0000 , or +570 years of usec counter overflows.
//
// 2) A distributed multiply with offset-summing is used find k( 2^32 c + m ):
//
// prd = (2^32 kh + kl) * ( 2^32 c + m )
// = 2^64 kh c + 2^32 kl c + 2^32 kh m + kl m
// (d) (c) (b) (a)
//
// Graphically, the offset-sums align in little endian like this:
// LS -> MS
// 32 64 96 128
// | a[-1] | a[0] | a[1] | a[2] |
// | m kl | 0 | 0 | a[-1] not needed
// | | m kh | |
// | | c kl | | a[1] holds the result
// | | | c kh | a[2] can be discarded
//
// As only the high-word of 'm kl' and low-word of 'c kh' contribute to the
// overall result, only (2) 32-bit words are needed for the accumulator.
//
// 3) As C++ does not intrinsically test for addition overflows, one must
// code specifically to detect them. This approximation skips these
// overflow checks for speed, hence the sum,
//
// highword( m kl ) + m kh + c kl < (2^64-1), MUST NOT OVERFLOW.
//
// To meet this criteria, not only do we have to pick 'k' to achieve our
// desired precision, we also have to split 'k' appropriately to avoid
// any addition overflows.
//
// 'k' should be also chosen to align the various products on byte
// boundaries to avoid any 64-bit shifts before additions, as they incur
// major time penalties. The 'k' chosen for this specific division by 1000
// was picked primarily to avoid shifts as well as for precision.
//
// For the reasons list above, this routine is NOT a general one.
// Changing divisors could break the overflow requirement and force
// picking a 'k' split which requires shifts before additions.
//
// ** Test THOROUGHLY after making changes **
//
// 4) Results of time benchmarks run on an ESP8266 Huzzah feather are:
//
// usec x Orig Comment
// Orig: 3.18 1.00 Original code
// Corr: 13.21 4.15 64-bit reference code
// Test: 4.60 1.45 64-bit magic multiply, 4x32
//
// The magic multiplier routine runs ~3x faster than the reference. Execution
// times can vary considerably with the numbers being multiplied, so one
// should derate this factor to around 2x, worst case.
//
// Reference function: corrected millis(), 64-bit arithmetic,
// truncated to 32-bits by return
// unsigned long ICACHE_RAM_ATTR millis_corr_DEBUG( void )
// {
// // Get usec system time, usec overflow conter
// ......
// return ( (c * 4294967296 + m) / 1000 ); // 64-bit division is SLOW
// } //millis_corr
//
// 5) See this link for a good discussion on magic multipliers:
// http://ridiculousfish.com/blog/posts/labor-of-division-episode-i.html
//
#define MAGIC_1E3_wLO 0x4bc6a7f0 // LS part
#define MAGIC_1E3_wHI 0x00418937 // MS part, magic multiplier
unsigned long ICACHE_RAM_ATTR millis()
{
union {
uint64_t q; // Accumulator, 64-bit, little endian
uint32_t a[2]; // ..........., 32-bit segments
} acc;
acc.a[1] = 0; // Zero high-acc
// Get usec system time, usec overflow counter
uint32_t m = system_get_time();
uint32_t c = micros_overflow_count +
((m < micros_at_last_overflow_tick) ? 1 : 0);
// (a) Init. low-acc with high-word of 1st product. The right-shift
// falls on a byte boundary, hence is relatively quick.
acc.q = ( (uint64_t)( m * (uint64_t)MAGIC_1E3_wLO ) >> 32 );
// (b) Offset sum, low-acc
acc.q += ( m * (uint64_t)MAGIC_1E3_wHI );
// (c) Offset sum, low-acc
acc.q += ( c * (uint64_t)MAGIC_1E3_wLO );
// (d) Truncated sum, high-acc
acc.a[1] += (uint32_t)( c * (uint64_t)MAGIC_1E3_wHI );
return ( acc.a[1] ); // Extract result, high-acc
} //millis
unsigned long ICACHE_RAM_ATTR micros() {
return system_get_time();
}
uint64_t ICACHE_RAM_ATTR micros64() {
uint32_t low32_us = system_get_time();
uint32_t high32_us = micros_overflow_count + ((low32_us < micros_at_last_overflow_tick) ? 1 : 0);
uint64_t duration64_us = (uint64_t)high32_us << 32 | low32_us;
return duration64_us;
}
void ICACHE_RAM_ATTR delayMicroseconds(unsigned int us) {
os_delay_us(us);
}
void init() {
initPins();
timer1_isr_init();
os_timer_setfn(µs_overflow_timer, (os_timer_func_t*) µs_overflow_tick, 0);
os_timer_arm(µs_overflow_timer, 60000, REPEAT);
}