1/* Copyright (C) 2016-2020 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3
4 The GNU C Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Lesser General Public
6 License as published by the Free Software Foundation; either
7 version 2.1 of the License, or (at your option) any later version.
8
9 The GNU C Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
13
14 You should have received a copy of the GNU Lesser General Public
15 License along with the GNU C Library; if not, see
16 <https://www.gnu.org/licenses/>. */
17
18/*
19 * Copyright (c) 1985, 1989, 1993
20 * The Regents of the University of California. All rights reserved.
21 *
22 * Redistribution and use in source and binary forms, with or without
23 * modification, are permitted provided that the following conditions
24 * are met:
25 * 1. Redistributions of source code must retain the above copyright
26 * notice, this list of conditions and the following disclaimer.
27 * 2. Redistributions in binary form must reproduce the above copyright
28 * notice, this list of conditions and the following disclaimer in the
29 * documentation and/or other materials provided with the distribution.
30 * 4. Neither the name of the University nor the names of its contributors
31 * may be used to endorse or promote products derived from this software
32 * without specific prior written permission.
33 *
34 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
35 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
36 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
37 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
38 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
39 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
40 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
41 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
42 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
43 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
44 * SUCH DAMAGE.
45 */
46
47/*
48 * Portions Copyright (c) 1993 by Digital Equipment Corporation.
49 *
50 * Permission to use, copy, modify, and distribute this software for any
51 * purpose with or without fee is hereby granted, provided that the above
52 * copyright notice and this permission notice appear in all copies, and that
53 * the name of Digital Equipment Corporation not be used in advertising or
54 * publicity pertaining to distribution of the document or software without
55 * specific, written prior permission.
56 *
57 * THE SOFTWARE IS PROVIDED "AS IS" AND DIGITAL EQUIPMENT CORP. DISCLAIMS ALL
58 * WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES
59 * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL DIGITAL EQUIPMENT
60 * CORPORATION BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
61 * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
62 * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
63 * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
64 * SOFTWARE.
65 */
66
67/*
68 * Portions Copyright (c) 1996-1999 by Internet Software Consortium.
69 *
70 * Permission to use, copy, modify, and distribute this software for any
71 * purpose with or without fee is hereby granted, provided that the above
72 * copyright notice and this permission notice appear in all copies.
73 *
74 * THE SOFTWARE IS PROVIDED "AS IS" AND INTERNET SOFTWARE CONSORTIUM DISCLAIMS
75 * ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES
76 * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL INTERNET SOFTWARE
77 * CONSORTIUM BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
78 * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
79 * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
80 * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
81 * SOFTWARE.
82 */
83
84/*
85 * Send query to name server and wait for reply.
86 */
87
88#include <assert.h>
89#include <sys/types.h>
90#include <sys/param.h>
91#include <sys/time.h>
92#include <sys/socket.h>
93#include <sys/uio.h>
94#include <sys/poll.h>
95
96#include <netinet/in.h>
97#include <arpa/nameser.h>
98#include <arpa/inet.h>
99#include <sys/ioctl.h>
100
101#include <errno.h>
102#include <fcntl.h>
103#include <netdb.h>
104#include <resolv/resolv-internal.h>
105#include <resolv/resolv_context.h>
106#include <signal.h>
107#include <stdlib.h>
108#include <string.h>
109#include <unistd.h>
110#include <kernel-features.h>
111#include <libc-diag.h>
112#include <random-bits.h>
113
114#if PACKETSZ > 65536
115#define MAXPACKET PACKETSZ
116#else
117#define MAXPACKET 65536
118#endif
119
120/* From ev_streams.c. */
121
122static inline void
123__attribute ((always_inline))
124evConsIovec(void *buf, size_t cnt, struct iovec *vec) {
125 memset(vec, 0xf5, sizeof (*vec));
126 vec->iov_base = buf;
127 vec->iov_len = cnt;
128}
129
130/* From ev_timers.c. */
131
132#define BILLION 1000000000
133
134static inline void
135evConsTime(struct timespec *res, time_t sec, long nsec) {
136 res->tv_sec = sec;
137 res->tv_nsec = nsec;
138}
139
140static inline void
141evAddTime(struct timespec *res, const struct timespec *addend1,
142 const struct timespec *addend2) {
143 res->tv_sec = addend1->tv_sec + addend2->tv_sec;
144 res->tv_nsec = addend1->tv_nsec + addend2->tv_nsec;
145 if (res->tv_nsec >= BILLION) {
146 res->tv_sec++;
147 res->tv_nsec -= BILLION;
148 }
149}
150
151static inline void
152evSubTime(struct timespec *res, const struct timespec *minuend,
153 const struct timespec *subtrahend) {
154 res->tv_sec = minuend->tv_sec - subtrahend->tv_sec;
155 if (minuend->tv_nsec >= subtrahend->tv_nsec)
156 res->tv_nsec = minuend->tv_nsec - subtrahend->tv_nsec;
157 else {
158 res->tv_nsec = (BILLION
159 - subtrahend->tv_nsec + minuend->tv_nsec);
160 res->tv_sec--;
161 }
162}
163
164static int
165evCmpTime(struct timespec a, struct timespec b) {
166 long x = a.tv_sec - b.tv_sec;
167
168 if (x == 0L)
169 x = a.tv_nsec - b.tv_nsec;
170 return (x < 0L ? (-1) : x > 0L ? (1) : (0));
171}
172
173static void
174evNowTime(struct timespec *res) {
175 __clock_gettime(CLOCK_REALTIME, res);
176}
177
178
179#define EXT(res) ((res)->_u._ext)
180
181/* Forward. */
182
183static struct sockaddr *get_nsaddr (res_state, unsigned int);
184static int send_vc(res_state, const u_char *, int,
185 const u_char *, int,
186 u_char **, int *, int *, int, u_char **,
187 u_char **, int *, int *, int *);
188static int send_dg(res_state, const u_char *, int,
189 const u_char *, int,
190 u_char **, int *, int *, int,
191 int *, int *, u_char **,
192 u_char **, int *, int *, int *);
193static int sock_eq(struct sockaddr_in6 *, struct sockaddr_in6 *);
194
195/* Public. */
196
197/* int
198 * res_isourserver(ina)
199 * looks up "ina" in _res.ns_addr_list[]
200 * returns:
201 * 0 : not found
202 * >0 : found
203 * author:
204 * paul vixie, 29may94
205 */
206int
207res_ourserver_p(const res_state statp, const struct sockaddr_in6 *inp)
208{
209 int ns;
210
211 if (inp->sin6_family == AF_INET) {
212 struct sockaddr_in *in4p = (struct sockaddr_in *) inp;
213 in_port_t port = in4p->sin_port;
214 in_addr_t addr = in4p->sin_addr.s_addr;
215
216 for (ns = 0; ns < statp->nscount; ns++) {
217 const struct sockaddr_in *srv =
218 (struct sockaddr_in *) get_nsaddr (statp, ns);
219
220 if ((srv->sin_family == AF_INET) &&
221 (srv->sin_port == port) &&
222 (srv->sin_addr.s_addr == INADDR_ANY ||
223 srv->sin_addr.s_addr == addr))
224 return (1);
225 }
226 } else if (inp->sin6_family == AF_INET6) {
227 for (ns = 0; ns < statp->nscount; ns++) {
228 const struct sockaddr_in6 *srv
229 = (struct sockaddr_in6 *) get_nsaddr (statp, ns);
230 if ((srv->sin6_family == AF_INET6) &&
231 (srv->sin6_port == inp->sin6_port) &&
232 !(memcmp(&srv->sin6_addr, &in6addr_any,
233 sizeof (struct in6_addr)) &&
234 memcmp(&srv->sin6_addr, &inp->sin6_addr,
235 sizeof (struct in6_addr))))
236 return (1);
237 }
238 }
239 return (0);
240}
241
242int
243res_isourserver (const struct sockaddr_in *inp)
244{
245 return res_ourserver_p (&_res, (const struct sockaddr_in6 *) inp);
246}
247
248/* int
249 * res_nameinquery(name, type, class, buf, eom)
250 * look for (name,type,class) in the query section of packet (buf,eom)
251 * requires:
252 * buf + HFIXEDSZ <= eom
253 * returns:
254 * -1 : format error
255 * 0 : not found
256 * >0 : found
257 * author:
258 * paul vixie, 29may94
259 */
260int
261res_nameinquery(const char *name, int type, int class,
262 const u_char *buf, const u_char *eom)
263{
264 const u_char *cp = buf + HFIXEDSZ;
265 int qdcount = ntohs(((HEADER*)buf)->qdcount);
266
267 while (qdcount-- > 0) {
268 char tname[MAXDNAME+1];
269 int n, ttype, tclass;
270
271 n = dn_expand(buf, eom, cp, tname, sizeof tname);
272 if (n < 0)
273 return (-1);
274 cp += n;
275 if (cp + 2 * INT16SZ > eom)
276 return (-1);
277 NS_GET16(ttype, cp);
278 NS_GET16(tclass, cp);
279 if (ttype == type && tclass == class &&
280 ns_samename(tname, name) == 1)
281 return (1);
282 }
283 return (0);
284}
285libresolv_hidden_def (res_nameinquery)
286
287/* Returns a shift value for the name server index. Used to implement
288 RES_ROTATE. */
289static unsigned int
290nameserver_offset (struct __res_state *statp)
291{
292 /* If we only have one name server or rotation is disabled, return
293 offset 0 (no rotation). */
294 unsigned int nscount = statp->nscount;
295 if (nscount <= 1 || !(statp->options & RES_ROTATE))
296 return 0;
297
298 /* Global offset. The lowest bit indicates whether the offset has
299 been initialized with a random value. Use relaxed MO to access
300 global_offset because all we need is a sequence of roughly
301 sequential value. */
302 static unsigned int global_offset;
303 unsigned int offset = atomic_fetch_add_relaxed (&global_offset, 2);
304 if ((offset & 1) == 0)
305 {
306 /* Initialization is required. */
307 offset = random_bits ();
308 /* The lowest bit is the most random. Preserve it. */
309 offset <<= 1;
310
311 /* Store the new starting value. atomic_fetch_add_relaxed
312 returns the old value, so emulate that by storing the new
313 (incremented) value. Concurrent initialization with
314 different random values is harmless. */
315 atomic_store_relaxed (&global_offset, (offset | 1) + 2);
316 }
317
318 /* Remove the initialization bit. */
319 offset >>= 1;
320
321 /* Avoid the division in the most common cases. */
322 switch (nscount)
323 {
324 case 2:
325 return offset & 1;
326 case 3:
327 return offset % 3;
328 case 4:
329 return offset & 3;
330 default:
331 return offset % nscount;
332 }
333}
334
335/* Clear the AD bit unless the trust-ad option was specified in the
336 resolver configuration. */
337static void
338mask_ad_bit (struct resolv_context *ctx, void *buf)
339{
340 if (!(ctx->resp->options & RES_TRUSTAD))
341 ((HEADER *) buf)->ad = 0;
342}
343
344/* int
345 * res_queriesmatch(buf1, eom1, buf2, eom2)
346 * is there a 1:1 mapping of (name,type,class)
347 * in (buf1,eom1) and (buf2,eom2)?
348 * returns:
349 * -1 : format error
350 * 0 : not a 1:1 mapping
351 * >0 : is a 1:1 mapping
352 * author:
353 * paul vixie, 29may94
354 */
355int
356res_queriesmatch(const u_char *buf1, const u_char *eom1,
357 const u_char *buf2, const u_char *eom2)
358{
359 if (buf1 + HFIXEDSZ > eom1 || buf2 + HFIXEDSZ > eom2)
360 return (-1);
361
362 /*
363 * Only header section present in replies to
364 * dynamic update packets.
365 */
366 if ((((HEADER *)buf1)->opcode == ns_o_update) &&
367 (((HEADER *)buf2)->opcode == ns_o_update))
368 return (1);
369
370 /* Note that we initially do not convert QDCOUNT to the host byte
371 order. We can compare it with the second buffer's QDCOUNT
372 value without doing this. */
373 int qdcount = ((HEADER*)buf1)->qdcount;
374 if (qdcount != ((HEADER*)buf2)->qdcount)
375 return (0);
376
377 qdcount = htons (qdcount);
378 const u_char *cp = buf1 + HFIXEDSZ;
379
380 while (qdcount-- > 0) {
381 char tname[MAXDNAME+1];
382 int n, ttype, tclass;
383
384 n = dn_expand(buf1, eom1, cp, tname, sizeof tname);
385 if (n < 0)
386 return (-1);
387 cp += n;
388 if (cp + 2 * INT16SZ > eom1)
389 return (-1);
390 NS_GET16(ttype, cp);
391 NS_GET16(tclass, cp);
392 if (!res_nameinquery(tname, ttype, tclass, buf2, eom2))
393 return (0);
394 }
395 return (1);
396}
397libresolv_hidden_def (res_queriesmatch)
398
399int
400__res_context_send (struct resolv_context *ctx,
401 const unsigned char *buf, int buflen,
402 const unsigned char *buf2, int buflen2,
403 unsigned char *ans, int anssiz,
404 unsigned char **ansp, unsigned char **ansp2,
405 int *nansp2, int *resplen2, int *ansp2_malloced)
406{
407 struct __res_state *statp = ctx->resp;
408 int gotsomewhere, terrno, try, v_circuit, resplen;
409 /* On some architectures send_vc is inlined and the compiler might emit
410 a warning indicating 'resplen' may be used uninitialized. Note that
411 the warning belongs to resplen in send_vc which is used as return
412 value! There the maybe-uninitialized warning is already ignored as
413 it is a false-positive - see comment in send_vc.
414 Here the variable n is set to the return value of send_vc.
415 See below. */
416 DIAG_PUSH_NEEDS_COMMENT;
417 DIAG_IGNORE_NEEDS_COMMENT (9, "-Wmaybe-uninitialized");
418 int n;
419 DIAG_POP_NEEDS_COMMENT;
420
421 if (statp->nscount == 0) {
422 __set_errno (ESRCH);
423 return (-1);
424 }
425
426 if (anssiz < (buf2 == NULL ? 1 : 2) * HFIXEDSZ) {
427 __set_errno (EINVAL);
428 return (-1);
429 }
430
431 v_circuit = ((statp->options & RES_USEVC)
432 || buflen > PACKETSZ
433 || buflen2 > PACKETSZ);
434 gotsomewhere = 0;
435 terrno = ETIMEDOUT;
436
437 /*
438 * If the ns_addr_list in the resolver context has changed, then
439 * invalidate our cached copy and the associated timing data.
440 */
441 if (EXT(statp).nscount != 0) {
442 int needclose = 0;
443
444 if (EXT(statp).nscount != statp->nscount)
445 needclose++;
446 else
447 for (unsigned int ns = 0; ns < statp->nscount; ns++) {
448 if (statp->nsaddr_list[ns].sin_family != 0
449 && !sock_eq((struct sockaddr_in6 *)
450 &statp->nsaddr_list[ns],
451 EXT(statp).nsaddrs[ns]))
452 {
453 needclose++;
454 break;
455 }
456 }
457 if (needclose) {
458 __res_iclose(statp, false);
459 EXT(statp).nscount = 0;
460 }
461 }
462
463 /*
464 * Maybe initialize our private copy of the ns_addr_list.
465 */
466 if (EXT(statp).nscount == 0) {
467 for (unsigned int ns = 0; ns < statp->nscount; ns++) {
468 EXT(statp).nssocks[ns] = -1;
469 if (statp->nsaddr_list[ns].sin_family == 0)
470 continue;
471 if (EXT(statp).nsaddrs[ns] == NULL)
472 EXT(statp).nsaddrs[ns] =
473 malloc(sizeof (struct sockaddr_in6));
474 if (EXT(statp).nsaddrs[ns] != NULL)
475 memset (mempcpy(EXT(statp).nsaddrs[ns],
476 &statp->nsaddr_list[ns],
477 sizeof (struct sockaddr_in)),
478 '\0',
479 sizeof (struct sockaddr_in6)
480 - sizeof (struct sockaddr_in));
481 else
482 return -1;
483 }
484 EXT(statp).nscount = statp->nscount;
485 }
486
487 /* Name server index offset. Used to implement
488 RES_ROTATE. */
489 unsigned int ns_offset = nameserver_offset (statp);
490
491 /*
492 * Send request, RETRY times, or until successful.
493 */
494 for (try = 0; try < statp->retry; try++) {
495 for (unsigned ns_shift = 0; ns_shift < statp->nscount; ns_shift++)
496 {
497 /* The actual name server index. This implements
498 RES_ROTATE. */
499 unsigned int ns = ns_shift + ns_offset;
500 if (ns >= statp->nscount)
501 ns -= statp->nscount;
502
503 same_ns:
504 if (__glibc_unlikely (v_circuit)) {
505 /* Use VC; at most one attempt per server. */
506 try = statp->retry;
507 n = send_vc(statp, buf, buflen, buf2, buflen2,
508 &ans, &anssiz, &terrno,
509 ns, ansp, ansp2, nansp2, resplen2,
510 ansp2_malloced);
511 if (n < 0)
512 return (-1);
513 /* See comment at the declaration of n. */
514 DIAG_PUSH_NEEDS_COMMENT;
515 DIAG_IGNORE_NEEDS_COMMENT (9, "-Wmaybe-uninitialized");
516 if (n == 0 && (buf2 == NULL || *resplen2 == 0))
517 goto next_ns;
518 DIAG_POP_NEEDS_COMMENT;
519 } else {
520 /* Use datagrams. */
521 n = send_dg(statp, buf, buflen, buf2, buflen2,
522 &ans, &anssiz, &terrno,
523 ns, &v_circuit, &gotsomewhere, ansp,
524 ansp2, nansp2, resplen2, ansp2_malloced);
525 if (n < 0)
526 return (-1);
527 if (n == 0 && (buf2 == NULL || *resplen2 == 0))
528 goto next_ns;
529 if (v_circuit)
530 // XXX Check whether both requests failed or
531 // XXX whether one has been answered successfully
532 goto same_ns;
533 }
534
535 resplen = n;
536
537 /* See comment at the declaration of n. Note: resplen = n; */
538 DIAG_PUSH_NEEDS_COMMENT;
539 DIAG_IGNORE_NEEDS_COMMENT (9, "-Wmaybe-uninitialized");
540 /* Mask the AD bit in both responses unless it is
541 marked trusted. */
542 if (resplen > HFIXEDSZ)
543 {
544 if (ansp != NULL)
545 mask_ad_bit (ctx, *ansp);
546 else
547 mask_ad_bit (ctx, ans);
548 }
549 DIAG_POP_NEEDS_COMMENT;
550 if (resplen2 != NULL && *resplen2 > HFIXEDSZ)
551 mask_ad_bit (ctx, *ansp2);
552
553 /*
554 * If we have temporarily opened a virtual circuit,
555 * or if we haven't been asked to keep a socket open,
556 * close the socket.
557 */
558 if ((v_circuit && (statp->options & RES_USEVC) == 0) ||
559 (statp->options & RES_STAYOPEN) == 0) {
560 __res_iclose(statp, false);
561 }
562 return (resplen);
563 next_ns: ;
564 } /*foreach ns*/
565 } /*foreach retry*/
566 __res_iclose(statp, false);
567 if (!v_circuit) {
568 if (!gotsomewhere)
569 __set_errno (ECONNREFUSED); /* no nameservers found */
570 else
571 __set_errno (ETIMEDOUT); /* no answer obtained */
572 } else
573 __set_errno (terrno);
574 return (-1);
575}
576
577/* Common part of res_nsend and res_send. */
578static int
579context_send_common (struct resolv_context *ctx,
580 const unsigned char *buf, int buflen,
581 unsigned char *ans, int anssiz)
582{
583 if (ctx == NULL)
584 {
585 RES_SET_H_ERRNO (&_res, NETDB_INTERNAL);
586 return -1;
587 }
588 int result = __res_context_send (ctx, buf, buflen, NULL, 0, ans, anssiz,
589 NULL, NULL, NULL, NULL, NULL);
590 __resolv_context_put (ctx);
591 return result;
592}
593
594int
595res_nsend (res_state statp, const unsigned char *buf, int buflen,
596 unsigned char *ans, int anssiz)
597{
598 return context_send_common
599 (__resolv_context_get_override (statp), buf, buflen, ans, anssiz);
600}
601
602int
603res_send (const unsigned char *buf, int buflen, unsigned char *ans, int anssiz)
604{
605 return context_send_common
606 (__resolv_context_get (), buf, buflen, ans, anssiz);
607}
608
609/* Private */
610
611static struct sockaddr *
612get_nsaddr (res_state statp, unsigned int n)
613{
614 assert (n < statp->nscount);
615
616 if (statp->nsaddr_list[n].sin_family == 0 && EXT(statp).nsaddrs[n] != NULL)
617 /* EXT(statp).nsaddrs[n] holds an address that is larger than
618 struct sockaddr, and user code did not update
619 statp->nsaddr_list[n]. */
620 return (struct sockaddr *) EXT(statp).nsaddrs[n];
621 else
622 /* User code updated statp->nsaddr_list[n], or statp->nsaddr_list[n]
623 has the same content as EXT(statp).nsaddrs[n]. */
624 return (struct sockaddr *) (void *) &statp->nsaddr_list[n];
625}
626
627/* Close the resolver structure, assign zero to *RESPLEN2 if RESPLEN2
628 is not NULL, and return zero. */
629static int
630__attribute__ ((warn_unused_result))
631close_and_return_error (res_state statp, int *resplen2)
632{
633 __res_iclose(statp, false);
634 if (resplen2 != NULL)
635 *resplen2 = 0;
636 return 0;
637}
638
639/* The send_vc function is responsible for sending a DNS query over TCP
640 to the nameserver numbered NS from the res_state STATP i.e.
641 EXT(statp).nssocks[ns]. The function supports sending both IPv4 and
642 IPv6 queries at the same serially on the same socket.
643
644 Please note that for TCP there is no way to disable sending both
645 queries, unlike UDP, which honours RES_SNGLKUP and RES_SNGLKUPREOP
646 and sends the queries serially and waits for the result after each
647 sent query. This implementation should be corrected to honour these
648 options.
649
650 Please also note that for TCP we send both queries over the same
651 socket one after another. This technically violates best practice
652 since the server is allowed to read the first query, respond, and
653 then close the socket (to service another client). If the server
654 does this, then the remaining second query in the socket data buffer
655 will cause the server to send the client an RST which will arrive
656 asynchronously and the client's OS will likely tear down the socket
657 receive buffer resulting in a potentially short read and lost
658 response data. This will force the client to retry the query again,
659 and this process may repeat until all servers and connection resets
660 are exhausted and then the query will fail. It's not known if this
661 happens with any frequency in real DNS server implementations. This
662 implementation should be corrected to use two sockets by default for
663 parallel queries.
664
665 The query stored in BUF of BUFLEN length is sent first followed by
666 the query stored in BUF2 of BUFLEN2 length. Queries are sent
667 serially on the same socket.
668
669 Answers to the query are stored firstly in *ANSP up to a max of
670 *ANSSIZP bytes. If more than *ANSSIZP bytes are needed and ANSCP
671 is non-NULL (to indicate that modifying the answer buffer is allowed)
672 then malloc is used to allocate a new response buffer and ANSCP and
673 ANSP will both point to the new buffer. If more than *ANSSIZP bytes
674 are needed but ANSCP is NULL, then as much of the response as
675 possible is read into the buffer, but the results will be truncated.
676 When truncation happens because of a small answer buffer the DNS
677 packets header field TC will bet set to 1, indicating a truncated
678 message and the rest of the socket data will be read and discarded.
679
680 Answers to the query are stored secondly in *ANSP2 up to a max of
681 *ANSSIZP2 bytes, with the actual response length stored in
682 *RESPLEN2. If more than *ANSSIZP bytes are needed and ANSP2
683 is non-NULL (required for a second query) then malloc is used to
684 allocate a new response buffer, *ANSSIZP2 is set to the new buffer
685 size and *ANSP2_MALLOCED is set to 1.
686
687 The ANSP2_MALLOCED argument will eventually be removed as the
688 change in buffer pointer can be used to detect the buffer has
689 changed and that the caller should use free on the new buffer.
690
691 Note that the answers may arrive in any order from the server and
692 therefore the first and second answer buffers may not correspond to
693 the first and second queries.
694
695 It is not supported to call this function with a non-NULL ANSP2
696 but a NULL ANSCP. Put another way, you can call send_vc with a
697 single unmodifiable buffer or two modifiable buffers, but no other
698 combination is supported.
699
700 It is the caller's responsibility to free the malloc allocated
701 buffers by detecting that the pointers have changed from their
702 original values i.e. *ANSCP or *ANSP2 has changed.
703
704 If errors are encountered then *TERRNO is set to an appropriate
705 errno value and a zero result is returned for a recoverable error,
706 and a less-than zero result is returned for a non-recoverable error.
707
708 If no errors are encountered then *TERRNO is left unmodified and
709 a the length of the first response in bytes is returned. */
710static int
711send_vc(res_state statp,
712 const u_char *buf, int buflen, const u_char *buf2, int buflen2,
713 u_char **ansp, int *anssizp,
714 int *terrno, int ns, u_char **anscp, u_char **ansp2, int *anssizp2,
715 int *resplen2, int *ansp2_malloced)
716{
717 const HEADER *hp = (HEADER *) buf;
718 const HEADER *hp2 = (HEADER *) buf2;
719 HEADER *anhp = (HEADER *) *ansp;
720 struct sockaddr *nsap = get_nsaddr (statp, ns);
721 int truncating, connreset, n;
722 /* On some architectures compiler might emit a warning indicating
723 'resplen' may be used uninitialized. However if buf2 == NULL
724 then this code won't be executed; if buf2 != NULL, then first
725 time round the loop recvresp1 and recvresp2 will be 0 so this
726 code won't be executed but "thisresplenp = &resplen;" followed
727 by "*thisresplenp = rlen;" will be executed so that subsequent
728 times round the loop resplen has been initialized. So this is
729 a false-positive.
730 */
731 DIAG_PUSH_NEEDS_COMMENT;
732 DIAG_IGNORE_NEEDS_COMMENT (5, "-Wmaybe-uninitialized");
733 int resplen;
734 DIAG_POP_NEEDS_COMMENT;
735 struct iovec iov[4];
736 u_short len;
737 u_short len2;
738 u_char *cp;
739
740 connreset = 0;
741 same_ns:
742 truncating = 0;
743
744 /* Are we still talking to whom we want to talk to? */
745 if (statp->_vcsock >= 0 && (statp->_flags & RES_F_VC) != 0) {
746 struct sockaddr_in6 peer;
747 socklen_t size = sizeof peer;
748
749 if (getpeername(statp->_vcsock,
750 (struct sockaddr *)&peer, &size) < 0 ||
751 !sock_eq(&peer, (struct sockaddr_in6 *) nsap)) {
752 __res_iclose(statp, false);
753 statp->_flags &= ~RES_F_VC;
754 }
755 }
756
757 if (statp->_vcsock < 0 || (statp->_flags & RES_F_VC) == 0) {
758 if (statp->_vcsock >= 0)
759 __res_iclose(statp, false);
760
761 statp->_vcsock = socket
762 (nsap->sa_family, SOCK_STREAM | SOCK_CLOEXEC, 0);
763 if (statp->_vcsock < 0) {
764 *terrno = errno;
765 if (resplen2 != NULL)
766 *resplen2 = 0;
767 return (-1);
768 }
769 __set_errno (0);
770 if (connect(statp->_vcsock, nsap,
771 nsap->sa_family == AF_INET
772 ? sizeof (struct sockaddr_in)
773 : sizeof (struct sockaddr_in6)) < 0) {
774 *terrno = errno;
775 return close_and_return_error (statp, resplen2);
776 }
777 statp->_flags |= RES_F_VC;
778 }
779
780 /*
781 * Send length & message
782 */
783 len = htons ((u_short) buflen);
784 evConsIovec(&len, INT16SZ, &iov[0]);
785 evConsIovec((void*)buf, buflen, &iov[1]);
786 int niov = 2;
787 ssize_t explen = INT16SZ + buflen;
788 if (buf2 != NULL) {
789 len2 = htons ((u_short) buflen2);
790 evConsIovec(&len2, INT16SZ, &iov[2]);
791 evConsIovec((void*)buf2, buflen2, &iov[3]);
792 niov = 4;
793 explen += INT16SZ + buflen2;
794 }
795 if (TEMP_FAILURE_RETRY (writev(statp->_vcsock, iov, niov)) != explen) {
796 *terrno = errno;
797 return close_and_return_error (statp, resplen2);
798 }
799 /*
800 * Receive length & response
801 */
802 int recvresp1 = 0;
803 /* Skip the second response if there is no second query.
804 To do that we mark the second response as received. */
805 int recvresp2 = buf2 == NULL;
806 uint16_t rlen16;
807 read_len:
808 cp = (u_char *)&rlen16;
809 len = sizeof(rlen16);
810 while ((n = TEMP_FAILURE_RETRY (read(statp->_vcsock, cp,
811 (int)len))) > 0) {
812 cp += n;
813 if ((len -= n) <= 0)
814 break;
815 }
816 if (n <= 0) {
817 *terrno = errno;
818 /*
819 * A long running process might get its TCP
820 * connection reset if the remote server was
821 * restarted. Requery the server instead of
822 * trying a new one. When there is only one
823 * server, this means that a query might work
824 * instead of failing. We only allow one reset
825 * per query to prevent looping.
826 */
827 if (*terrno == ECONNRESET && !connreset)
828 {
829 __res_iclose (statp, false);
830 connreset = 1;
831 goto same_ns;
832 }
833 return close_and_return_error (statp, resplen2);
834 }
835 int rlen = ntohs (rlen16);
836
837 int *thisanssizp;
838 u_char **thisansp;
839 int *thisresplenp;
840 if ((recvresp1 | recvresp2) == 0 || buf2 == NULL) {
841 /* We have not received any responses
842 yet or we only have one response to
843 receive. */
844 thisanssizp = anssizp;
845 thisansp = anscp ?: ansp;
846 assert (anscp != NULL || ansp2 == NULL);
847 thisresplenp = &resplen;
848 } else {
849 thisanssizp = anssizp2;
850 thisansp = ansp2;
851 thisresplenp = resplen2;
852 }
853 anhp = (HEADER *) *thisansp;
854
855 *thisresplenp = rlen;
856 /* Is the answer buffer too small? */
857 if (*thisanssizp < rlen) {
858 /* If the current buffer is not the the static
859 user-supplied buffer then we can reallocate
860 it. */
861 if (thisansp != NULL && thisansp != ansp) {
862 /* Always allocate MAXPACKET, callers expect
863 this specific size. */
864 u_char *newp = malloc (MAXPACKET);
865 if (newp == NULL)
866 {
867 *terrno = ENOMEM;
868 return close_and_return_error (statp, resplen2);
869 }
870 *thisanssizp = MAXPACKET;
871 *thisansp = newp;
872 if (thisansp == ansp2)
873 *ansp2_malloced = 1;
874 anhp = (HEADER *) newp;
875 /* A uint16_t can't be larger than MAXPACKET
876 thus it's safe to allocate MAXPACKET but
877 read RLEN bytes instead. */
878 len = rlen;
879 } else {
880 truncating = 1;
881 len = *thisanssizp;
882 }
883 } else
884 len = rlen;
885
886 if (__glibc_unlikely (len < HFIXEDSZ)) {
887 /*
888 * Undersized message.
889 */
890 *terrno = EMSGSIZE;
891 return close_and_return_error (statp, resplen2);
892 }
893
894 cp = *thisansp;
895 while (len != 0 && (n = read(statp->_vcsock, (char *)cp, (int)len)) > 0){
896 cp += n;
897 len -= n;
898 }
899 if (__glibc_unlikely (n <= 0)) {
900 *terrno = errno;
901 return close_and_return_error (statp, resplen2);
902 }
903 if (__glibc_unlikely (truncating)) {
904 /*
905 * Flush rest of answer so connection stays in synch.
906 */
907 anhp->tc = 1;
908 len = rlen - *thisanssizp;
909 while (len != 0) {
910 char junk[PACKETSZ];
911
912 n = read(statp->_vcsock, junk,
913 (len > sizeof junk) ? sizeof junk : len);
914 if (n > 0)
915 len -= n;
916 else
917 break;
918 }
919 }
920 /*
921 * If the calling application has bailed out of
922 * a previous call and failed to arrange to have
923 * the circuit closed or the server has got
924 * itself confused, then drop the packet and
925 * wait for the correct one.
926 */
927 if ((recvresp1 || hp->id != anhp->id)
928 && (recvresp2 || hp2->id != anhp->id))
929 goto read_len;
930
931 /* Mark which reply we received. */
932 if (recvresp1 == 0 && hp->id == anhp->id)
933 recvresp1 = 1;
934 else
935 recvresp2 = 1;
936 /* Repeat waiting if we have a second answer to arrive. */
937 if ((recvresp1 & recvresp2) == 0)
938 goto read_len;
939
940 /*
941 * All is well, or the error is fatal. Signal that the
942 * next nameserver ought not be tried.
943 */
944 return resplen;
945}
946
947static int
948reopen (res_state statp, int *terrno, int ns)
949{
950 if (EXT(statp).nssocks[ns] == -1) {
951 struct sockaddr *nsap = get_nsaddr (statp, ns);
952 socklen_t slen;
953
954 /* only try IPv6 if IPv6 NS and if not failed before */
955 if (nsap->sa_family == AF_INET6 && !statp->ipv6_unavail) {
956 EXT(statp).nssocks[ns] = socket
957 (PF_INET6,
958 SOCK_DGRAM | SOCK_NONBLOCK | SOCK_CLOEXEC, 0);
959 if (EXT(statp).nssocks[ns] < 0)
960 statp->ipv6_unavail = errno == EAFNOSUPPORT;
961 slen = sizeof (struct sockaddr_in6);
962 } else if (nsap->sa_family == AF_INET) {
963 EXT(statp).nssocks[ns] = socket
964 (PF_INET,
965 SOCK_DGRAM | SOCK_NONBLOCK | SOCK_CLOEXEC, 0);
966 slen = sizeof (struct sockaddr_in);
967 }
968 if (EXT(statp).nssocks[ns] < 0) {
969 *terrno = errno;
970 return (-1);
971 }
972
973 /* Enable full ICMP error reporting for this
974 socket. */
975 if (__res_enable_icmp (nsap->sa_family,
976 EXT (statp).nssocks[ns]) < 0)
977 {
978 int saved_errno = errno;
979 __res_iclose (statp, false);
980 __set_errno (saved_errno);
981 *terrno = saved_errno;
982 return -1;
983 }
984
985 /*
986 * On a 4.3BSD+ machine (client and server,
987 * actually), sending to a nameserver datagram
988 * port with no nameserver will cause an
989 * ICMP port unreachable message to be returned.
990 * If our datagram socket is "connected" to the
991 * server, we get an ECONNREFUSED error on the next
992 * socket operation, and select returns if the
993 * error message is received. We can thus detect
994 * the absence of a nameserver without timing out.
995 */
996 /* With GCC 5.3 when compiling with -Os the compiler
997 emits a warning that slen may be used uninitialized,
998 but that is never true. Both slen and
999 EXT(statp).nssocks[ns] are initialized together or
1000 the function return -1 before control flow reaches
1001 the call to connect with slen. */
1002 DIAG_PUSH_NEEDS_COMMENT;
1003 DIAG_IGNORE_Os_NEEDS_COMMENT (5, "-Wmaybe-uninitialized");
1004 if (connect(EXT(statp).nssocks[ns], nsap, slen) < 0) {
1005 DIAG_POP_NEEDS_COMMENT;
1006 __res_iclose(statp, false);
1007 return (0);
1008 }
1009 }
1010
1011 return 1;
1012}
1013
1014/* The send_dg function is responsible for sending a DNS query over UDP
1015 to the nameserver numbered NS from the res_state STATP i.e.
1016 EXT(statp).nssocks[ns]. The function supports IPv4 and IPv6 queries
1017 along with the ability to send the query in parallel for both stacks
1018 (default) or serially (RES_SINGLKUP). It also supports serial lookup
1019 with a close and reopen of the socket used to talk to the server
1020 (RES_SNGLKUPREOP) to work around broken name servers.
1021
1022 The query stored in BUF of BUFLEN length is sent first followed by
1023 the query stored in BUF2 of BUFLEN2 length. Queries are sent
1024 in parallel (default) or serially (RES_SINGLKUP or RES_SNGLKUPREOP).
1025
1026 Answers to the query are stored firstly in *ANSP up to a max of
1027 *ANSSIZP bytes. If more than *ANSSIZP bytes are needed and ANSCP
1028 is non-NULL (to indicate that modifying the answer buffer is allowed)
1029 then malloc is used to allocate a new response buffer and ANSCP and
1030 ANSP will both point to the new buffer. If more than *ANSSIZP bytes
1031 are needed but ANSCP is NULL, then as much of the response as
1032 possible is read into the buffer, but the results will be truncated.
1033 When truncation happens because of a small answer buffer the DNS
1034 packets header field TC will bet set to 1, indicating a truncated
1035 message, while the rest of the UDP packet is discarded.
1036
1037 Answers to the query are stored secondly in *ANSP2 up to a max of
1038 *ANSSIZP2 bytes, with the actual response length stored in
1039 *RESPLEN2. If more than *ANSSIZP bytes are needed and ANSP2
1040 is non-NULL (required for a second query) then malloc is used to
1041 allocate a new response buffer, *ANSSIZP2 is set to the new buffer
1042 size and *ANSP2_MALLOCED is set to 1.
1043
1044 The ANSP2_MALLOCED argument will eventually be removed as the
1045 change in buffer pointer can be used to detect the buffer has
1046 changed and that the caller should use free on the new buffer.
1047
1048 Note that the answers may arrive in any order from the server and
1049 therefore the first and second answer buffers may not correspond to
1050 the first and second queries.
1051
1052 It is not supported to call this function with a non-NULL ANSP2
1053 but a NULL ANSCP. Put another way, you can call send_vc with a
1054 single unmodifiable buffer or two modifiable buffers, but no other
1055 combination is supported.
1056
1057 It is the caller's responsibility to free the malloc allocated
1058 buffers by detecting that the pointers have changed from their
1059 original values i.e. *ANSCP or *ANSP2 has changed.
1060
1061 If an answer is truncated because of UDP datagram DNS limits then
1062 *V_CIRCUIT is set to 1 and the return value non-zero to indicate to
1063 the caller to retry with TCP. The value *GOTSOMEWHERE is set to 1
1064 if any progress was made reading a response from the nameserver and
1065 is used by the caller to distinguish between ECONNREFUSED and
1066 ETIMEDOUT (the latter if *GOTSOMEWHERE is 1).
1067
1068 If errors are encountered then *TERRNO is set to an appropriate
1069 errno value and a zero result is returned for a recoverable error,
1070 and a less-than zero result is returned for a non-recoverable error.
1071
1072 If no errors are encountered then *TERRNO is left unmodified and
1073 a the length of the first response in bytes is returned. */
1074static int
1075send_dg(res_state statp,
1076 const u_char *buf, int buflen, const u_char *buf2, int buflen2,
1077 u_char **ansp, int *anssizp,
1078 int *terrno, int ns, int *v_circuit, int *gotsomewhere, u_char **anscp,
1079 u_char **ansp2, int *anssizp2, int *resplen2, int *ansp2_malloced)
1080{
1081 const HEADER *hp = (HEADER *) buf;
1082 const HEADER *hp2 = (HEADER *) buf2;
1083 struct timespec now, timeout, finish;
1084 struct pollfd pfd[1];
1085 int ptimeout;
1086 struct sockaddr_in6 from;
1087 int resplen = 0;
1088 int n;
1089
1090 /*
1091 * Compute time for the total operation.
1092 */
1093 int seconds = (statp->retrans << ns);
1094 if (ns > 0)
1095 seconds /= statp->nscount;
1096 if (seconds <= 0)
1097 seconds = 1;
1098 bool single_request_reopen = (statp->options & RES_SNGLKUPREOP) != 0;
1099 bool single_request = (((statp->options & RES_SNGLKUP) != 0)
1100 | single_request_reopen);
1101 int save_gotsomewhere = *gotsomewhere;
1102
1103 int retval;
1104 retry_reopen:
1105 retval = reopen (statp, terrno, ns);
1106 if (retval <= 0)
1107 {
1108 if (resplen2 != NULL)
1109 *resplen2 = 0;
1110 return retval;
1111 }
1112 retry:
1113 evNowTime(&now);
1114 evConsTime(&timeout, seconds, 0);
1115 evAddTime(&finish, &now, &timeout);
1116 int need_recompute = 0;
1117 int nwritten = 0;
1118 int recvresp1 = 0;
1119 /* Skip the second response if there is no second query.
1120 To do that we mark the second response as received. */
1121 int recvresp2 = buf2 == NULL;
1122 pfd[0].fd = EXT(statp).nssocks[ns];
1123 pfd[0].events = POLLOUT;
1124 wait:
1125 if (need_recompute) {
1126 recompute_resend:
1127 evNowTime(&now);
1128 if (evCmpTime(finish, now) <= 0) {
1129 poll_err_out:
1130 return close_and_return_error (statp, resplen2);
1131 }
1132 evSubTime(&timeout, &finish, &now);
1133 need_recompute = 0;
1134 }
1135 /* Convert struct timespec in milliseconds. */
1136 ptimeout = timeout.tv_sec * 1000 + timeout.tv_nsec / 1000000;
1137
1138 n = 0;
1139 if (nwritten == 0)
1140 n = __poll (pfd, 1, 0);
1141 if (__glibc_unlikely (n == 0)) {
1142 n = __poll (pfd, 1, ptimeout);
1143 need_recompute = 1;
1144 }
1145 if (n == 0) {
1146 if (resplen > 1 && (recvresp1 || (buf2 != NULL && recvresp2)))
1147 {
1148 /* There are quite a few broken name servers out
1149 there which don't handle two outstanding
1150 requests from the same source. There are also
1151 broken firewall settings. If we time out after
1152 having received one answer switch to the mode
1153 where we send the second request only once we
1154 have received the first answer. */
1155 if (!single_request)
1156 {
1157 statp->options |= RES_SNGLKUP;
1158 single_request = true;
1159 *gotsomewhere = save_gotsomewhere;
1160 goto retry;
1161 }
1162 else if (!single_request_reopen)
1163 {
1164 statp->options |= RES_SNGLKUPREOP;
1165 single_request_reopen = true;
1166 *gotsomewhere = save_gotsomewhere;
1167 __res_iclose (statp, false);
1168 goto retry_reopen;
1169 }
1170
1171 *resplen2 = 1;
1172 return resplen;
1173 }
1174
1175 *gotsomewhere = 1;
1176 if (resplen2 != NULL)
1177 *resplen2 = 0;
1178 return 0;
1179 }
1180 if (n < 0) {
1181 if (errno == EINTR)
1182 goto recompute_resend;
1183
1184 goto poll_err_out;
1185 }
1186 __set_errno (0);
1187 if (pfd[0].revents & POLLOUT) {
1188#ifndef __ASSUME_SENDMMSG
1189 static int have_sendmmsg;
1190#else
1191# define have_sendmmsg 1
1192#endif
1193 if (have_sendmmsg >= 0 && nwritten == 0 && buf2 != NULL
1194 && !single_request)
1195 {
1196 struct iovec iov =
1197 { .iov_base = (void *) buf, .iov_len = buflen };
1198 struct iovec iov2 =
1199 { .iov_base = (void *) buf2, .iov_len = buflen2 };
1200 struct mmsghdr reqs[2] =
1201 {
1202 {
1203 .msg_hdr =
1204 {
1205 .msg_iov = &iov,
1206 .msg_iovlen = 1,
1207 },
1208 },
1209 {
1210 .msg_hdr =
1211 {
1212 .msg_iov = &iov2,
1213 .msg_iovlen = 1,
1214 }
1215 },
1216 };
1217
1218 int ndg = __sendmmsg (pfd[0].fd, reqs, 2, MSG_NOSIGNAL);
1219 if (__glibc_likely (ndg == 2))
1220 {
1221 if (reqs[0].msg_len != buflen
1222 || reqs[1].msg_len != buflen2)
1223 goto fail_sendmmsg;
1224
1225 pfd[0].events = POLLIN;
1226 nwritten += 2;
1227 }
1228 else if (ndg == 1 && reqs[0].msg_len == buflen)
1229 goto just_one;
1230 else if (ndg < 0 && (errno == EINTR || errno == EAGAIN))
1231 goto recompute_resend;
1232 else
1233 {
1234#ifndef __ASSUME_SENDMMSG
1235 if (__glibc_unlikely (have_sendmmsg == 0))
1236 {
1237 if (ndg < 0 && errno == ENOSYS)
1238 {
1239 have_sendmmsg = -1;
1240 goto try_send;
1241 }
1242 have_sendmmsg = 1;
1243 }
1244#endif
1245
1246 fail_sendmmsg:
1247 return close_and_return_error (statp, resplen2);
1248 }
1249 }
1250 else
1251 {
1252 ssize_t sr;
1253#ifndef __ASSUME_SENDMMSG
1254 try_send:
1255#endif
1256 if (nwritten != 0)
1257 sr = send (pfd[0].fd, buf2, buflen2, MSG_NOSIGNAL);
1258 else
1259 sr = send (pfd[0].fd, buf, buflen, MSG_NOSIGNAL);
1260
1261 if (sr != (nwritten != 0 ? buflen2 : buflen)) {
1262 if (errno == EINTR || errno == EAGAIN)
1263 goto recompute_resend;
1264 return close_and_return_error (statp, resplen2);
1265 }
1266 just_one:
1267 if (nwritten != 0 || buf2 == NULL || single_request)
1268 pfd[0].events = POLLIN;
1269 else
1270 pfd[0].events = POLLIN | POLLOUT;
1271 ++nwritten;
1272 }
1273 goto wait;
1274 } else if (pfd[0].revents & POLLIN) {
1275 int *thisanssizp;
1276 u_char **thisansp;
1277 int *thisresplenp;
1278
1279 if ((recvresp1 | recvresp2) == 0 || buf2 == NULL) {
1280 /* We have not received any responses
1281 yet or we only have one response to
1282 receive. */
1283 thisanssizp = anssizp;
1284 thisansp = anscp ?: ansp;
1285 assert (anscp != NULL || ansp2 == NULL);
1286 thisresplenp = &resplen;
1287 } else {
1288 thisanssizp = anssizp2;
1289 thisansp = ansp2;
1290 thisresplenp = resplen2;
1291 }
1292
1293 if (*thisanssizp < MAXPACKET
1294 /* If the current buffer is not the the static
1295 user-supplied buffer then we can reallocate
1296 it. */
1297 && (thisansp != NULL && thisansp != ansp)
1298#ifdef FIONREAD
1299 /* Is the size too small? */
1300 && (ioctl (pfd[0].fd, FIONREAD, thisresplenp) < 0
1301 || *thisanssizp < *thisresplenp)
1302#endif
1303 ) {
1304 /* Always allocate MAXPACKET, callers expect
1305 this specific size. */
1306 u_char *newp = malloc (MAXPACKET);
1307 if (newp != NULL) {
1308 *thisanssizp = MAXPACKET;
1309 *thisansp = newp;
1310 if (thisansp == ansp2)
1311 *ansp2_malloced = 1;
1312 }
1313 }
1314 /* We could end up with truncation if anscp was NULL
1315 (not allowed to change caller's buffer) and the
1316 response buffer size is too small. This isn't a
1317 reliable way to detect truncation because the ioctl
1318 may be an inaccurate report of the UDP message size.
1319 Therefore we use this only to issue debug output.
1320 To do truncation accurately with UDP we need
1321 MSG_TRUNC which is only available on Linux. We
1322 can abstract out the Linux-specific feature in the
1323 future to detect truncation. */
1324 HEADER *anhp = (HEADER *) *thisansp;
1325 socklen_t fromlen = sizeof(struct sockaddr_in6);
1326 assert (sizeof(from) <= fromlen);
1327 *thisresplenp = recvfrom(pfd[0].fd, (char*)*thisansp,
1328 *thisanssizp, 0,
1329 (struct sockaddr *)&from, &fromlen);
1330 if (__glibc_unlikely (*thisresplenp <= 0)) {
1331 if (errno == EINTR || errno == EAGAIN) {
1332 need_recompute = 1;
1333 goto wait;
1334 }
1335 return close_and_return_error (statp, resplen2);
1336 }
1337 *gotsomewhere = 1;
1338 if (__glibc_unlikely (*thisresplenp < HFIXEDSZ)) {
1339 /*
1340 * Undersized message.
1341 */
1342 *terrno = EMSGSIZE;
1343 return close_and_return_error (statp, resplen2);
1344 }
1345 if ((recvresp1 || hp->id != anhp->id)
1346 && (recvresp2 || hp2->id != anhp->id)) {
1347 /*
1348 * response from old query, ignore it.
1349 * XXX - potential security hazard could
1350 * be detected here.
1351 */
1352 goto wait;
1353 }
1354
1355 /* Paranoia check. Due to the connected UDP socket,
1356 the kernel has already filtered invalid addresses
1357 for us. */
1358 if (!res_ourserver_p(statp, &from))
1359 goto wait;
1360
1361 /* Check for the correct header layout and a matching
1362 question. */
1363 if ((recvresp1 || !res_queriesmatch(buf, buf + buflen,
1364 *thisansp,
1365 *thisansp
1366 + *thisanssizp))
1367 && (recvresp2 || !res_queriesmatch(buf2, buf2 + buflen2,
1368 *thisansp,
1369 *thisansp
1370 + *thisanssizp)))
1371 goto wait;
1372
1373 if (anhp->rcode == SERVFAIL ||
1374 anhp->rcode == NOTIMP ||
1375 anhp->rcode == REFUSED) {
1376 next_ns:
1377 if (recvresp1 || (buf2 != NULL && recvresp2)) {
1378 *resplen2 = 0;
1379 return resplen;
1380 }
1381 if (buf2 != NULL)
1382 {
1383 /* No data from the first reply. */
1384 resplen = 0;
1385 /* We are waiting for a possible second reply. */
1386 if (hp->id == anhp->id)
1387 recvresp1 = 1;
1388 else
1389 recvresp2 = 1;
1390
1391 goto wait;
1392 }
1393
1394 /* don't retry if called from dig */
1395 if (!statp->pfcode)
1396 return close_and_return_error (statp, resplen2);
1397 __res_iclose(statp, false);
1398 }
1399 if (anhp->rcode == NOERROR && anhp->ancount == 0
1400 && anhp->aa == 0 && anhp->ra == 0 && anhp->arcount == 0) {
1401 goto next_ns;
1402 }
1403 if (!(statp->options & RES_IGNTC) && anhp->tc) {
1404 /*
1405 * To get the rest of answer,
1406 * use TCP with same server.
1407 */
1408 *v_circuit = 1;
1409 __res_iclose(statp, false);
1410 // XXX if we have received one reply we could
1411 // XXX use it and not repeat it over TCP...
1412 if (resplen2 != NULL)
1413 *resplen2 = 0;
1414 return (1);
1415 }
1416 /* Mark which reply we received. */
1417 if (recvresp1 == 0 && hp->id == anhp->id)
1418 recvresp1 = 1;
1419 else
1420 recvresp2 = 1;
1421 /* Repeat waiting if we have a second answer to arrive. */
1422 if ((recvresp1 & recvresp2) == 0) {
1423 if (single_request) {
1424 pfd[0].events = POLLOUT;
1425 if (single_request_reopen) {
1426 __res_iclose (statp, false);
1427 retval = reopen (statp, terrno, ns);
1428 if (retval <= 0)
1429 {
1430 if (resplen2 != NULL)
1431 *resplen2 = 0;
1432 return retval;
1433 }
1434 pfd[0].fd = EXT(statp).nssocks[ns];
1435 }
1436 }
1437 goto wait;
1438 }
1439 /* All is well. We have received both responses (if
1440 two responses were requested). */
1441 return (resplen);
1442 } else if (pfd[0].revents & (POLLERR | POLLHUP | POLLNVAL))
1443 /* Something went wrong. We can stop trying. */
1444 return close_and_return_error (statp, resplen2);
1445 else {
1446 /* poll should not have returned > 0 in this case. */
1447 abort ();
1448 }
1449}
1450
1451static int
1452sock_eq(struct sockaddr_in6 *a1, struct sockaddr_in6 *a2) {
1453 if (a1->sin6_family == a2->sin6_family) {
1454 if (a1->sin6_family == AF_INET)
1455 return ((((struct sockaddr_in *)a1)->sin_port ==
1456 ((struct sockaddr_in *)a2)->sin_port) &&
1457 (((struct sockaddr_in *)a1)->sin_addr.s_addr ==
1458 ((struct sockaddr_in *)a2)->sin_addr.s_addr));
1459 else
1460 return ((a1->sin6_port == a2->sin6_port) &&
1461 !memcmp(&a1->sin6_addr, &a2->sin6_addr,
1462 sizeof (struct in6_addr)));
1463 }
1464 if (a1->sin6_family == AF_INET) {
1465 struct sockaddr_in6 *sap = a1;
1466 a1 = a2;
1467 a2 = sap;
1468 } /* assumes that AF_INET and AF_INET6 are the only possibilities */
1469 return ((a1->sin6_port == ((struct sockaddr_in *)a2)->sin_port) &&
1470 IN6_IS_ADDR_V4MAPPED(&a1->sin6_addr) &&
1471 (a1->sin6_addr.s6_addr32[3] ==
1472 ((struct sockaddr_in *)a2)->sin_addr.s_addr));
1473}
1474