include/boost/corosio/native/detail/select/select_scheduler.hpp

87.9% Lines (145/165) 100.0% List of functions (10/10)
select_scheduler.hpp
f(x) Functions (10)
Line TLA Hits Source Code
1 //
2 // Copyright (c) 2026 Steve Gerbino
3 //
4 // Distributed under the Boost Software License, Version 1.0. (See accompanying
5 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6 //
7 // Official repository: https://github.com/cppalliance/corosio
8 //
9
10 #ifndef BOOST_COROSIO_NATIVE_DETAIL_SELECT_SELECT_SCHEDULER_HPP
11 #define BOOST_COROSIO_NATIVE_DETAIL_SELECT_SELECT_SCHEDULER_HPP
12
13 #include <boost/corosio/detail/platform.hpp>
14
15 #if BOOST_COROSIO_HAS_SELECT
16
17 #include <boost/corosio/detail/config.hpp>
18 #include <boost/capy/ex/execution_context.hpp>
19
20 #include <boost/corosio/native/detail/reactor/reactor_scheduler.hpp>
21
22 #include <boost/corosio/native/detail/select/select_traits.hpp>
23 #include <boost/corosio/detail/timer_service.hpp>
24 #include <boost/corosio/native/detail/make_err.hpp>
25 #include <boost/corosio/native/detail/posix/posix_resolver_service.hpp>
26 #include <boost/corosio/native/detail/posix/posix_signal_service.hpp>
27 #include <boost/corosio/native/detail/posix/posix_stream_file_service.hpp>
28 #include <boost/corosio/native/detail/posix/posix_random_access_file_service.hpp>
29
30 #include <boost/corosio/detail/except.hpp>
31
32 #include <sys/select.h>
33 #include <unistd.h>
34 #include <errno.h>
35 #include <fcntl.h>
36
37 #include <atomic>
38 #include <chrono>
39 #include <cstdint>
40 #include <limits>
41 #include <mutex>
42 #include <unordered_map>
43
44 namespace boost::corosio::detail {
45
46 struct select_op;
47
48 /** POSIX scheduler using select() for I/O multiplexing.
49
50 This scheduler implements the scheduler interface using the POSIX select()
51 call for I/O event notification. It inherits the shared reactor threading
52 model from reactor_scheduler: signal state machine, inline completion
53 budget, work counting, and the do_one event loop.
54
55 The design mirrors epoll_scheduler for behavioral consistency:
56 - Same single-reactor thread coordination model
57 - Same deferred I/O pattern (reactor marks ready; workers do I/O)
58 - Same timer integration pattern
59
60 Known Limitations:
61 - FD_SETSIZE (~1024) limits maximum concurrent connections
62 - O(n) scanning: rebuilds fd_sets each iteration
63 - Level-triggered only (no edge-triggered mode)
64
65 @par Thread Safety
66 All public member functions are thread-safe.
67 */
68 class BOOST_COROSIO_DECL select_scheduler final : public reactor_scheduler
69 {
70 public:
71 /** Construct the scheduler.
72
73 Creates a self-pipe for reactor interruption.
74
75 @param ctx Reference to the owning execution_context.
76 @param concurrency_hint Hint for expected thread count (unused).
77 */
78 select_scheduler(capy::execution_context& ctx, int concurrency_hint = -1);
79
80 /// Destroy the scheduler.
81 ~select_scheduler() override;
82
83 select_scheduler(select_scheduler const&) = delete;
84 select_scheduler& operator=(select_scheduler const&) = delete;
85
86 /// Shut down the scheduler, draining pending operations.
87 void shutdown() override;
88
89 /** Return the maximum file descriptor value supported.
90
91 Returns FD_SETSIZE - 1, the maximum fd value that can be
92 monitored by select(). Operations with fd >= FD_SETSIZE
93 will fail with EINVAL.
94
95 @return The maximum supported file descriptor value.
96 */
97 static constexpr int max_fd() noexcept
98 {
99 return FD_SETSIZE - 1;
100 }
101
102 /** Register a descriptor for persistent monitoring.
103
104 The fd is added to the registered_descs_ map and will be
105 included in subsequent select() calls. The reactor is
106 interrupted so a blocked select() rebuilds its fd_sets.
107
108 @param fd The file descriptor to register.
109 @param desc Pointer to descriptor state for this fd.
110 */
111 void register_descriptor(int fd, reactor_descriptor_state* desc) const;
112
113 /** Deregister a persistently registered descriptor.
114
115 @param fd The file descriptor to deregister.
116 */
117 void deregister_descriptor(int fd) const;
118
119 /** Interrupt the reactor so it rebuilds its fd_sets.
120
121 Called when a write or connect op is registered after
122 the reactor's snapshot was taken. Without this, select()
123 may block not watching for writability on the fd.
124 */
125 void notify_reactor() const;
126
127 private:
128 void
129 run_task(lock_type& lock, context_type* ctx,
130 long timeout_us) override;
131 void interrupt_reactor() const override;
132 long calculate_timeout(long requested_timeout_us) const;
133
134 // Self-pipe for interrupting select()
135 int pipe_fds_[2]; // [0]=read, [1]=write
136
137 // Per-fd tracking for fd_set building
138 mutable std::unordered_map<int, reactor_descriptor_state*> registered_descs_;
139 mutable int max_fd_ = -1;
140 };
141
142 229x inline select_scheduler::select_scheduler(capy::execution_context& ctx, int)
143 229x : pipe_fds_{-1, -1}
144 229x , max_fd_(-1)
145 {
146 229x if (::pipe(pipe_fds_) < 0)
147 detail::throw_system_error(make_err(errno), "pipe");
148
149 687x for (int i = 0; i < 2; ++i)
150 {
151 458x int flags = ::fcntl(pipe_fds_[i], F_GETFL, 0);
152 458x if (flags == -1)
153 {
154 int errn = errno;
155 ::close(pipe_fds_[0]);
156 ::close(pipe_fds_[1]);
157 detail::throw_system_error(make_err(errn), "fcntl F_GETFL");
158 }
159 458x if (::fcntl(pipe_fds_[i], F_SETFL, flags | O_NONBLOCK) == -1)
160 {
161 int errn = errno;
162 ::close(pipe_fds_[0]);
163 ::close(pipe_fds_[1]);
164 detail::throw_system_error(make_err(errn), "fcntl F_SETFL");
165 }
166 458x if (::fcntl(pipe_fds_[i], F_SETFD, FD_CLOEXEC) == -1)
167 {
168 int errn = errno;
169 ::close(pipe_fds_[0]);
170 ::close(pipe_fds_[1]);
171 detail::throw_system_error(make_err(errn), "fcntl F_SETFD");
172 }
173 }
174
175 229x timer_svc_ = &get_timer_service(ctx, *this);
176 229x timer_svc_->set_on_earliest_changed(
177 3985x timer_service::callback(this, [](void* p) {
178 3756x static_cast<select_scheduler*>(p)->interrupt_reactor();
179 3756x }));
180
181 229x get_resolver_service(ctx, *this);
182 229x get_signal_service(ctx, *this);
183 229x get_stream_file_service(ctx, *this);
184 229x get_random_access_file_service(ctx, *this);
185
186 229x completed_ops_.push(&task_op_);
187 229x }
188
189 458x inline select_scheduler::~select_scheduler()
190 {
191 229x if (pipe_fds_[0] >= 0)
192 229x ::close(pipe_fds_[0]);
193 229x if (pipe_fds_[1] >= 0)
194 229x ::close(pipe_fds_[1]);
195 458x }
196
197 inline void
198 229x select_scheduler::shutdown()
199 {
200 229x shutdown_drain();
201
202 229x if (pipe_fds_[1] >= 0)
203 229x interrupt_reactor();
204 229x }
205
206 inline void
207 7212x select_scheduler::register_descriptor(
208 int fd, reactor_descriptor_state* desc) const
209 {
210 7212x if (fd < 0 || fd >= FD_SETSIZE)
211 detail::throw_system_error(make_err(EINVAL), "select: fd out of range");
212
213 7212x desc->registered_events = reactor_event_read | reactor_event_write;
214 7212x desc->fd = fd;
215 7212x desc->scheduler_ = this;
216 7212x desc->mutex.set_enabled(!single_threaded_);
217 7212x desc->ready_events_.store(0, std::memory_order_relaxed);
218
219 {
220 7212x conditionally_enabled_mutex::scoped_lock lock(desc->mutex);
221 7212x desc->impl_ref_.reset();
222 7212x desc->read_ready = false;
223 7212x desc->write_ready = false;
224 7212x }
225
226 {
227 7212x mutex_type::scoped_lock lock(mutex_);
228 7212x registered_descs_[fd] = desc;
229 7212x if (fd > max_fd_)
230 7208x max_fd_ = fd;
231 7212x }
232
233 7212x interrupt_reactor();
234 7212x }
235
236 inline void
237 7212x select_scheduler::deregister_descriptor(int fd) const
238 {
239 7212x mutex_type::scoped_lock lock(mutex_);
240
241 7212x auto it = registered_descs_.find(fd);
242 7212x if (it == registered_descs_.end())
243 return;
244
245 7212x registered_descs_.erase(it);
246
247 7212x if (fd == max_fd_)
248 {
249 7150x max_fd_ = pipe_fds_[0];
250 14191x for (auto& [registered_fd, state] : registered_descs_)
251 {
252 7041x if (registered_fd > max_fd_)
253 7031x max_fd_ = registered_fd;
254 }
255 }
256 7212x }
257
258 inline void
259 3532x select_scheduler::notify_reactor() const
260 {
261 3532x interrupt_reactor();
262 3532x }
263
264 inline void
265 14884x select_scheduler::interrupt_reactor() const
266 {
267 14884x char byte = 1;
268 14884x [[maybe_unused]] auto r = ::write(pipe_fds_[1], &byte, 1);
269 14884x }
270
271 inline long
272 127796x select_scheduler::calculate_timeout(long requested_timeout_us) const
273 {
274 127796x if (requested_timeout_us == 0)
275 return 0;
276
277 127796x auto nearest = timer_svc_->nearest_expiry();
278 127796x if (nearest == timer_service::time_point::max())
279 46x return requested_timeout_us;
280
281 127750x auto now = std::chrono::steady_clock::now();
282 127750x if (nearest <= now)
283 870x return 0;
284
285 auto timer_timeout_us =
286 126880x std::chrono::duration_cast<std::chrono::microseconds>(nearest - now)
287 126880x .count();
288
289 126880x constexpr auto long_max =
290 static_cast<long long>((std::numeric_limits<long>::max)());
291 auto capped_timer_us =
292 126880x (std::min)((std::max)(static_cast<long long>(timer_timeout_us),
293 126880x static_cast<long long>(0)),
294 126880x long_max);
295
296 126880x if (requested_timeout_us < 0)
297 126874x return static_cast<long>(capped_timer_us);
298
299 return static_cast<long>(
300 6x (std::min)(static_cast<long long>(requested_timeout_us),
301 6x capped_timer_us));
302 }
303
304 inline void
305 153991x select_scheduler::run_task(
306 lock_type& lock, context_type* ctx, long timeout_us)
307 {
308 long effective_timeout_us =
309 153991x task_interrupted_ ? 0 : calculate_timeout(timeout_us);
310
311 // Snapshot registered descriptors while holding lock.
312 // Record which fds need write monitoring to avoid a hot loop:
313 // select is level-triggered so writable sockets (nearly always
314 // writable) would cause select() to return immediately every
315 // iteration if unconditionally added to write_fds.
316 struct fd_entry
317 {
318 int fd;
319 reactor_descriptor_state* desc;
320 bool needs_write;
321 };
322 fd_entry snapshot[FD_SETSIZE];
323 153991x int snapshot_count = 0;
324
325 455963x for (auto& [fd, desc] : registered_descs_)
326 {
327 301972x if (snapshot_count < FD_SETSIZE)
328 {
329 301972x conditionally_enabled_mutex::scoped_lock desc_lock(desc->mutex);
330 301972x snapshot[snapshot_count].fd = fd;
331 301972x snapshot[snapshot_count].desc = desc;
332 301972x snapshot[snapshot_count].needs_write =
333 301972x (desc->write_op || desc->connect_op);
334 301972x ++snapshot_count;
335 301972x }
336 }
337
338 153991x if (lock.owns_lock())
339 127796x lock.unlock();
340
341 153991x task_cleanup on_exit{this, &lock, ctx};
342
343 fd_set read_fds, write_fds, except_fds;
344 2617847x FD_ZERO(&read_fds);
345 2617847x FD_ZERO(&write_fds);
346 2617847x FD_ZERO(&except_fds);
347
348 153991x FD_SET(pipe_fds_[0], &read_fds);
349 153991x int nfds = pipe_fds_[0];
350
351 455963x for (int i = 0; i < snapshot_count; ++i)
352 {
353 301972x int fd = snapshot[i].fd;
354 301972x FD_SET(fd, &read_fds);
355 301972x if (snapshot[i].needs_write)
356 3532x FD_SET(fd, &write_fds);
357 301972x FD_SET(fd, &except_fds);
358 301972x if (fd > nfds)
359 153725x nfds = fd;
360 }
361
362 struct timeval tv;
363 153991x struct timeval* tv_ptr = nullptr;
364 153991x if (effective_timeout_us >= 0)
365 {
366 153945x tv.tv_sec = effective_timeout_us / 1000000;
367 153945x tv.tv_usec = effective_timeout_us % 1000000;
368 153945x tv_ptr = &tv;
369 }
370
371 153991x int ready = ::select(nfds + 1, &read_fds, &write_fds, &except_fds, tv_ptr);
372
373 // EINTR: signal interrupted select(), just retry.
374 // EBADF: an fd was closed between snapshot and select(); retry
375 // with a fresh snapshot from registered_descs_.
376 153991x if (ready < 0)
377 {
378 if (errno == EINTR || errno == EBADF)
379 return;
380 detail::throw_system_error(make_err(errno), "select");
381 }
382
383 // Process timers outside the lock
384 153991x timer_svc_->process_expired();
385
386 153991x op_queue local_ops;
387
388 153991x if (ready > 0)
389 {
390 134289x if (FD_ISSET(pipe_fds_[0], &read_fds))
391 {
392 char buf[256];
393 14754x while (::read(pipe_fds_[0], buf, sizeof(buf)) > 0)
394 {
395 }
396 }
397
398 381715x for (int i = 0; i < snapshot_count; ++i)
399 {
400 247426x int fd = snapshot[i].fd;
401 247426x reactor_descriptor_state* desc = snapshot[i].desc;
402
403 247426x std::uint32_t flags = 0;
404 247426x if (FD_ISSET(fd, &read_fds))
405 130624x flags |= reactor_event_read;
406 247426x if (FD_ISSET(fd, &write_fds))
407 3532x flags |= reactor_event_write;
408 247426x if (FD_ISSET(fd, &except_fds))
409 flags |= reactor_event_error;
410
411 247426x if (flags == 0)
412 113272x continue;
413
414 134154x desc->add_ready_events(flags);
415
416 134154x bool expected = false;
417 134154x if (desc->is_enqueued_.compare_exchange_strong(
418 expected, true, std::memory_order_release,
419 std::memory_order_relaxed))
420 {
421 134154x local_ops.push(desc);
422 }
423 }
424 }
425
426 153991x lock.lock();
427
428 153991x if (!local_ops.empty())
429 130625x completed_ops_.splice(local_ops);
430 153991x }
431
432 } // namespace boost::corosio::detail
433
434 #endif // BOOST_COROSIO_HAS_SELECT
435
436 #endif // BOOST_COROSIO_NATIVE_DETAIL_SELECT_SELECT_SCHEDULER_HPP
437