自学内容网 自学内容网

UDP发送过程:copy_from_user调用路径跟踪

UDP发送过程,数据从用户空间复制到内核空间的copy_from_user调用路径跟踪

 
系统调用:sendto()------》__sys_sendto()------》 __sock_sendmsg() ----》sock_sendmsg_nosec() ------》 inet_sendmsg() ------》 udp_sendmsg()--------》 ip_make_skb()--------》 __ip_append_data()--------》 ip_generic_getfrag()--------》 csum_and_copy_from_iter_full() ----》 copy_from_user_iter_csum()--------》 csum_and_copy_from_user () ----》 copy_from_user()

net / socket.c


SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
                unsigned int, flags, struct sockaddr __user *, addr,
                int, addr_len)
{
        return __sys_sendto(fd, buff, len, flags, addr, addr_len);
}
SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
                unsigned int, flags, struct sockaddr __user *, addr,
                int, addr_len)
{
        return __sys_sendto(fd, buff, len, flags, addr, addr_len);
}

------》__sys_sendto()

net / socket.c


/*
 *      Send a datagram to a given address. We move the address into kernel
 *      space and check the user space data area is readable before invoking
 *      the protocol.
 */
int __sys_sendto(int fd, void __user *buff, size_t len, unsigned int flags,
                 struct sockaddr __user *addr,  int addr_len)
{
        struct socket *sock;
        struct sockaddr_storage address;
        int err;
        struct msghdr msg;
        int fput_needed;

        err = import_ubuf(ITER_SOURCE, buff, len, &msg.msg_iter);
        if (unlikely(err))
                return err;
        sock = sockfd_lookup_light(fd, &err, &fput_needed);
        if (!sock)
                goto out;

        msg.msg_name = NULL;
        msg.msg_control = NULL;
        msg.msg_controllen = 0;
        msg.msg_namelen = 0;
        msg.msg_ubuf = NULL;
        if (addr) {
                err = move_addr_to_kernel(addr, addr_len, &address);
                if (err < 0)
                        goto out_put;
                msg.msg_name = (struct sockaddr *)&address;
                msg.msg_namelen = addr_len;
        }
        flags &= ~MSG_INTERNAL_SENDMSG_FLAGS;
        if (sock->file->f_flags & O_NONBLOCK)
                flags |= MSG_DONTWAIT;
        msg.msg_flags = flags;
        err = __sock_sendmsg(sock, &msg);

out_put:
        fput_light(sock->file, fput_needed);
out:
        return err;
}
/*
 *      Send a datagram to a given address. We move the address into kernel
 *      space and check the user space data area is readable before invoking
 *      the protocol.
 */
int __sys_sendto(int fd, void __user *buff, size_t len, unsigned int flags,
                 struct sockaddr __user *addr,  int addr_len)
{
        struct socket *sock;
        struct sockaddr_storage address;
        int err;
        struct msghdr msg;
        int fput_needed;

        err = import_ubuf(ITER_SOURCE, buff, len, &msg.msg_iter);
        if (unlikely(err))
                return err;
        sock = sockfd_lookup_light(fd, &err, &fput_needed);
        if (!sock)
                goto out;

        msg.msg_name = NULL;
        msg.msg_control = NULL;
        msg.msg_controllen = 0;
        msg.msg_namelen = 0;
        msg.msg_ubuf = NULL;
        if (addr) {
                err = move_addr_to_kernel(addr, addr_len, &address);
                if (err < 0)
                        goto out_put;
                msg.msg_name = (struct sockaddr *)&address;
                msg.msg_namelen = addr_len;
        }
        flags &= ~MSG_INTERNAL_SENDMSG_FLAGS;
        if (sock->file->f_flags & O_NONBLOCK)
                flags |= MSG_DONTWAIT;
        msg.msg_flags = flags;
        err = __sock_sendmsg(sock, &msg);

out_put:
        fput_light(sock->file, fput_needed);
out:
        return err;
}

其中:

import_ubuf()-->iov_iter_ubuf()函数将用户数据地址(buff)保存在msg.msg_iter.ubuf中,并设置msg.msg_iter.iter_type = ITER_UBUF,后续通过struct msghdr msg往下传递给__sock_sendmsg()。

import_ubuf()-->iov_iter_ubuf()函数将用户数据地址(buff)保存在msg.msg_iter.ubuf中,并设置msg.msg_iter.iter_type = ITER_UBUF,后续通过struct msghdr msg往下传递给__sock_sendmsg()。

------》 __sock_sendmsg() ----》sock_sendmsg_nosec()

net / socket.c

static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
{
        int ret = INDIRECT_CALL_INET(READ_ONCE(sock->ops)->sendmsg, inet6_sendmsg,
                                     inet_sendmsg, sock, msg,
                                     msg_data_left(msg));
        BUG_ON(ret == -EIOCBQUEUED);

        if (trace_sock_send_length_enabled())
                call_trace_sock_send_length(sock->sk, ret, 0);
        return ret;
}
static int __sock_sendmsg(struct socket *sock, struct msghdr *msg)
{
        int err = security_socket_sendmsg(sock, msg,
                                          msg_data_left(msg));

        return err ?: sock_sendmsg_nosec(sock, msg);
}static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
{
        int ret = INDIRECT_CALL_INET(READ_ONCE(sock->ops)->sendmsg, inet6_sendmsg,
                                     inet_sendmsg, sock, msg,
                                     msg_data_left(msg));
        BUG_ON(ret == -EIOCBQUEUED);

        if (trace_sock_send_length_enabled())
                call_trace_sock_send_length(sock->sk, ret, 0);
        return ret;
}
static int __sock_sendmsg(struct socket *sock, struct msghdr *msg)
{
        int err = security_socket_sendmsg(sock, msg,
                                          msg_data_left(msg));

        return err ?: sock_sendmsg_nosec(sock, msg);
}

------》 inet_sendmsg()

IPv4:

net / ipv4 / af_inet.c

int inet_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
{
        struct sock *sk = sock->sk;

        if (unlikely(inet_send_prepare(sk)))
                return -EAGAIN;

        return INDIRECT_CALL_2(sk->sk_prot->sendmsg, tcp_sendmsg, udp_sendmsg,
                               sk, msg, size);
}
EXPORT_SYMBOL(inet_sendmsg);int inet_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
{
        struct sock *sk = sock->sk;

        if (unlikely(inet_send_prepare(sk)))
                return -EAGAIN;

        return INDIRECT_CALL_2(sk->sk_prot->sendmsg, tcp_sendmsg, udp_sendmsg,
                               sk, msg, size);
}
EXPORT_SYMBOL(inet_sendmsg);

------》 udp_sendmsg()

net / ipv4 / udp.c

int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
{
        struct inet_sock *inet = inet_sk(sk);
        struct udp_sock *up = udp_sk(sk);
        DECLARE_SOCKADDR(struct sockaddr_in *, usin, msg->msg_name);
        struct flowi4 fl4_stack;
        struct flowi4 *fl4;
        int ulen = len;
        struct ipcm_cookie ipc;
        struct rtable *rt = NULL;
        int free = 0;
        int connected = 0;
        __be32 daddr, faddr, saddr;
        u8 tos, scope;
        __be16 dport;
        int err, is_udplite = IS_UDPLITE(sk);
        int corkreq = udp_test_bit(CORK, sk) || msg->msg_flags & MSG_MORE;
        int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
        struct sk_buff *skb;
        struct ip_options_data opt_copy;
        int uc_index;

        if (len > 0xFFFF)
                return -EMSGSIZE;

        /*
         *      Check the flags.
         */

        if (msg->msg_flags & MSG_OOB) /* Mirror BSD error message compatibility */
                return -EOPNOTSUPP;

        getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag;

        fl4 = &inet->cork.fl.u.ip4;
        if (READ_ONCE(up->pending)) {
                /*
                 * There are pending frames.
                 * The socket lock must be held while it's corked.
                 */
                lock_sock(sk);
                if (likely(up->pending)) {
                        if (unlikely(up->pending != AF_INET)) {
                                release_sock(sk);
                                return -EINVAL;
                        }
                        goto do_append_data;
                }
                release_sock(sk);
        }
        ulen += sizeof(struct udphdr);

        /*
         *      Get and verify the address.
         */
        if (usin) {
                if (msg->msg_namelen < sizeof(*usin))
                        return -EINVAL;
                if (usin->sin_family != AF_INET) {
                        if (usin->sin_family != AF_UNSPEC)
                                return -EAFNOSUPPORT;
                }

                daddr = usin->sin_addr.s_addr;
                dport = usin->sin_port;
                if (dport == 0)
                        return -EINVAL;
        } else {
                if (sk->sk_state != TCP_ESTABLISHED)
                        return -EDESTADDRREQ;
                daddr = inet->inet_daddr;
                dport = inet->inet_dport;
                /* Open fast path for connected socket.
                   Route will not be used, if at least one option is set.
                 */
                connected = 1;
        }

        ipcm_init_sk(&ipc, inet);
        ipc.gso_size = READ_ONCE(up->gso_size);

        if (msg->msg_controllen) {
                err = udp_cmsg_send(sk, msg, &ipc.gso_size);
                if (err > 0) {
                        err = ip_cmsg_send(sk, msg, &ipc,
                                           sk->sk_family == AF_INET6);
                        connected = 0;
                }
                if (unlikely(err < 0)) {
                        kfree(ipc.opt);
                        return err;
                }
                if (ipc.opt)
                        free = 1;
        }
        if (!ipc.opt) {
                struct ip_options_rcu *inet_opt;

                rcu_read_lock();
                inet_opt = rcu_dereference(inet->inet_opt);
                if (inet_opt) {
                        memcpy(&opt_copy, inet_opt,
                               sizeof(*inet_opt) + inet_opt->opt.optlen);
                        ipc.opt = &opt_copy.opt;
                }
                rcu_read_unlock();
        }

        if (cgroup_bpf_enabled(CGROUP_UDP4_SENDMSG) && !connected) {
                err = BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk,
                                            (struct sockaddr *)usin,
                                            &msg->msg_namelen,
                                            &ipc.addr);
                if (err)
                        goto out_free;
                if (usin) {
                        if (usin->sin_port == 0) {
                                /* BPF program set invalid port. Reject it. */
                                err = -EINVAL;
                                goto out_free;
                        }
                        daddr = usin->sin_addr.s_addr;
                        dport = usin->sin_port;
                }
        }

        saddr = ipc.addr;
        ipc.addr = faddr = daddr;

        if (ipc.opt && ipc.opt->opt.srr) {
                if (!daddr) {
                        err = -EINVAL;
                        goto out_free;
                }
                faddr = ipc.opt->opt.faddr;
                connected = 0;
        }
        tos = get_rttos(&ipc, inet);
        scope = ip_sendmsg_scope(inet, &ipc, msg);
        if (scope == RT_SCOPE_LINK)
                connected = 0;

        uc_index = READ_ONCE(inet->uc_index);
        if (ipv4_is_multicast(daddr)) {
                if (!ipc.oif || netif_index_is_l3_master(sock_net(sk), ipc.oif))
                        ipc.oif = READ_ONCE(inet->mc_index);
                if (!saddr)
                        saddr = READ_ONCE(inet->mc_addr);
                connected = 0;
        } else if (!ipc.oif) {
                ipc.oif = uc_index;
        } else if (ipv4_is_lbcast(daddr) && uc_index) {
                /* oif is set, packet is to local broadcast and
                 * uc_index is set. oif is most likely set
                 * by sk_bound_dev_if. If uc_index != oif check if the
                 * oif is an L3 master and uc_index is an L3 slave.
                 * If so, we want to allow the send using the uc_index.
                 */
                if (ipc.oif != uc_index &&
                    ipc.oif == l3mdev_master_ifindex_by_index(sock_net(sk),
                                                              uc_index)) {
                        ipc.oif = uc_index;
                }
        }

        if (connected)
                rt = dst_rtable(sk_dst_check(sk, 0));

        if (!rt) {
                struct net *net = sock_net(sk);
                __u8 flow_flags = inet_sk_flowi_flags(sk);

                fl4 = &fl4_stack;

                flowi4_init_output(fl4, ipc.oif, ipc.sockc.mark, tos, scope,
                                   sk->sk_protocol, flow_flags, faddr, saddr,
                                   dport, inet->inet_sport, sk->sk_uid);

                security_sk_classify_flow(sk, flowi4_to_flowi_common(fl4));
                rt = ip_route_output_flow(net, fl4, sk);
                if (IS_ERR(rt)) {
                        err = PTR_ERR(rt);
                        rt = NULL;
                        if (err == -ENETUNREACH)
                                IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES);
                        goto out;
                }

                err = -EACCES;
                if ((rt->rt_flags & RTCF_BROADCAST) &&
                    !sock_flag(sk, SOCK_BROADCAST))
                        goto out;
                if (connected)
                        sk_dst_set(sk, dst_clone(&rt->dst));
        }

        if (msg->msg_flags&MSG_CONFIRM)
                goto do_confirm;
back_from_confirm:

        saddr = fl4->saddr;
        if (!ipc.addr)
                daddr = ipc.addr = fl4->daddr;

        /* Lockless fast path for the non-corking case. */
        if (!corkreq) {
                struct inet_cork cork;

                skb = ip_make_skb(sk, fl4, getfrag, msg, ulen,
                                  sizeof(struct udphdr), &ipc, &rt,
                                  &cork, msg->msg_flags);
                err = PTR_ERR(skb);
                if (!IS_ERR_OR_NULL(skb))
                        err = udp_send_skb(skb, fl4, &cork);
                goto out;
        }

        lock_sock(sk);
        if (unlikely(up->pending)) {
                /* The socket is already corked while preparing it. */
                /* ... which is an evident application bug. --ANK */
                release_sock(sk);

                net_dbg_ratelimited("socket already corked\n");
                err = -EINVAL;
                goto out;
        }
        /*
         *      Now cork the socket to pend data.
         */
        fl4 = &inet->cork.fl.u.ip4;
        fl4->daddr = daddr;
        fl4->saddr = saddr;
        fl4->fl4_dport = dport;
        fl4->fl4_sport = inet->inet_sport;
        WRITE_ONCE(up->pending, AF_INET);

do_append_data:
        up->len += ulen;
        err = ip_append_data(sk, fl4, getfrag, msg, ulen,
                             sizeof(struct udphdr), &ipc, &rt,
                             corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags);
        if (err)
                udp_flush_pending_frames(sk);
        else if (!corkreq)
                err = udp_push_pending_frames(sk);
        else if (unlikely(skb_queue_empty(&sk->sk_write_queue)))
                WRITE_ONCE(up->pending, 0);
        release_sock(sk);

out:
        ip_rt_put(rt);
out_free:
        if (free)
                kfree(ipc.opt);
        if (!err)
                return len;
        /*
         * ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space.  Reporting
         * ENOBUFS might not be good (it's not tunable per se), but otherwise
         * we don't have a good statistic (IpOutDiscards but it can be too many
         * things).  We could add another new stat but at least for now that
         * seems like overkill.
         */
        if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
                UDP_INC_STATS(sock_net(sk),
                              UDP_MIB_SNDBUFERRORS, is_udplite);
        }
        return err;

do_confirm:
        if (msg->msg_flags & MSG_PROBE)
                dst_confirm_neigh(&rt->dst, &fl4->daddr);
        if (!(msg->msg_flags&MSG_PROBE) || len)
                goto back_from_confirm;
        err = 0;
        goto out;
}
EXPORT_SYMBOL(udp_sendmsg);
其中,以getfrag =  ip_generic_getfrag为例:
getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag;int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
{
        struct inet_sock *inet = inet_sk(sk);
        struct udp_sock *up = udp_sk(sk);
        DECLARE_SOCKADDR(struct sockaddr_in *, usin, msg->msg_name);
        struct flowi4 fl4_stack;
        struct flowi4 *fl4;
        int ulen = len;
        struct ipcm_cookie ipc;
        struct rtable *rt = NULL;
        int free = 0;
        int connected = 0;
        __be32 daddr, faddr, saddr;
        u8 tos, scope;
        __be16 dport;
        int err, is_udplite = IS_UDPLITE(sk);
        int corkreq = udp_test_bit(CORK, sk) || msg->msg_flags & MSG_MORE;
        int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
        struct sk_buff *skb;
        struct ip_options_data opt_copy;
        int uc_index;

        if (len > 0xFFFF)
                return -EMSGSIZE;

        /*
         *      Check the flags.
         */

        if (msg->msg_flags & MSG_OOB) /* Mirror BSD error message compatibility */
                return -EOPNOTSUPP;

        getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag;

        fl4 = &inet->cork.fl.u.ip4;
        if (READ_ONCE(up->pending)) {
                /*
                 * There are pending frames.
                 * The socket lock must be held while it's corked.
                 */
                lock_sock(sk);
                if (likely(up->pending)) {
                        if (unlikely(up->pending != AF_INET)) {
                                release_sock(sk);
                                return -EINVAL;
                        }
                        goto do_append_data;
                }
                release_sock(sk);
        }
        ulen += sizeof(struct udphdr);

        /*
         *      Get and verify the address.
         */
        if (usin) {
                if (msg->msg_namelen < sizeof(*usin))
                        return -EINVAL;
                if (usin->sin_family != AF_INET) {
                        if (usin->sin_family != AF_UNSPEC)
                                return -EAFNOSUPPORT;
                }

                daddr = usin->sin_addr.s_addr;
                dport = usin->sin_port;
                if (dport == 0)
                        return -EINVAL;
        } else {
                if (sk->sk_state != TCP_ESTABLISHED)
                        return -EDESTADDRREQ;
                daddr = inet->inet_daddr;
                dport = inet->inet_dport;
                /* Open fast path for connected socket.
                   Route will not be used, if at least one option is set.
                 */
                connected = 1;
        }

        ipcm_init_sk(&ipc, inet);
        ipc.gso_size = READ_ONCE(up->gso_size);

        if (msg->msg_controllen) {
                err = udp_cmsg_send(sk, msg, &ipc.gso_size);
                if (err > 0) {
                        err = ip_cmsg_send(sk, msg, &ipc,
                                           sk->sk_family == AF_INET6);
                        connected = 0;
                }
                if (unlikely(err < 0)) {
                        kfree(ipc.opt);
                        return err;
                }
                if (ipc.opt)
                        free = 1;
        }
        if (!ipc.opt) {
                struct ip_options_rcu *inet_opt;

                rcu_read_lock();
                inet_opt = rcu_dereference(inet->inet_opt);
                if (inet_opt) {
                        memcpy(&opt_copy, inet_opt,
                               sizeof(*inet_opt) + inet_opt->opt.optlen);
                        ipc.opt = &opt_copy.opt;
                }
                rcu_read_unlock();
        }

        if (cgroup_bpf_enabled(CGROUP_UDP4_SENDMSG) && !connected) {
                err = BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk,
                                            (struct sockaddr *)usin,
                                            &msg->msg_namelen,
                                            &ipc.addr);
                if (err)
                        goto out_free;
                if (usin) {
                        if (usin->sin_port == 0) {
                                /* BPF program set invalid port. Reject it. */
                                err = -EINVAL;
                                goto out_free;
                        }
                        daddr = usin->sin_addr.s_addr;
                        dport = usin->sin_port;
                }
        }

        saddr = ipc.addr;
        ipc.addr = faddr = daddr;

        if (ipc.opt && ipc.opt->opt.srr) {
                if (!daddr) {
                        err = -EINVAL;
                        goto out_free;
                }
                faddr = ipc.opt->opt.faddr;
                connected = 0;
        }
        tos = get_rttos(&ipc, inet);
        scope = ip_sendmsg_scope(inet, &ipc, msg);
        if (scope == RT_SCOPE_LINK)
                connected = 0;

        uc_index = READ_ONCE(inet->uc_index);
        if (ipv4_is_multicast(daddr)) {
                if (!ipc.oif || netif_index_is_l3_master(sock_net(sk), ipc.oif))
                        ipc.oif = READ_ONCE(inet->mc_index);
                if (!saddr)
                        saddr = READ_ONCE(inet->mc_addr);
                connected = 0;
        } else if (!ipc.oif) {
                ipc.oif = uc_index;
        } else if (ipv4_is_lbcast(daddr) && uc_index) {
                /* oif is set, packet is to local broadcast and
                 * uc_index is set. oif is most likely set
                 * by sk_bound_dev_if. If uc_index != oif check if the
                 * oif is an L3 master and uc_index is an L3 slave.
                 * If so, we want to allow the send using the uc_index.
                 */
                if (ipc.oif != uc_index &&
                    ipc.oif == l3mdev_master_ifindex_by_index(sock_net(sk),
                                                              uc_index)) {
                        ipc.oif = uc_index;
                }
        }

        if (connected)
                rt = dst_rtable(sk_dst_check(sk, 0));

        if (!rt) {
                struct net *net = sock_net(sk);
                __u8 flow_flags = inet_sk_flowi_flags(sk);

                fl4 = &fl4_stack;

                flowi4_init_output(fl4, ipc.oif, ipc.sockc.mark, tos, scope,
                                   sk->sk_protocol, flow_flags, faddr, saddr,
                                   dport, inet->inet_sport, sk->sk_uid);

                security_sk_classify_flow(sk, flowi4_to_flowi_common(fl4));
                rt = ip_route_output_flow(net, fl4, sk);
                if (IS_ERR(rt)) {
                        err = PTR_ERR(rt);
                        rt = NULL;
                        if (err == -ENETUNREACH)
                                IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES);
                        goto out;
                }

                err = -EACCES;
                if ((rt->rt_flags & RTCF_BROADCAST) &&
                    !sock_flag(sk, SOCK_BROADCAST))
                        goto out;
                if (connected)
                        sk_dst_set(sk, dst_clone(&rt->dst));
        }

        if (msg->msg_flags&MSG_CONFIRM)
                goto do_confirm;
back_from_confirm:

        saddr = fl4->saddr;
        if (!ipc.addr)
                daddr = ipc.addr = fl4->daddr;

        /* Lockless fast path for the non-corking case. */
        if (!corkreq) {
                struct inet_cork cork;

                skb = ip_make_skb(sk, fl4, getfrag, msg, ulen,
                                  sizeof(struct udphdr), &ipc, &rt,
                                  &cork, msg->msg_flags);
                err = PTR_ERR(skb);
                if (!IS_ERR_OR_NULL(skb))
                        err = udp_send_skb(skb, fl4, &cork);
                goto out;
        }

        lock_sock(sk);
        if (unlikely(up->pending)) {
                /* The socket is already corked while preparing it. */
                /* ... which is an evident application bug. --ANK */
                release_sock(sk);

                net_dbg_ratelimited("socket already corked\n");
                err = -EINVAL;
                goto out;
        }
        /*
         *      Now cork the socket to pend data.
         */
        fl4 = &inet->cork.fl.u.ip4;
        fl4->daddr = daddr;
        fl4->saddr = saddr;
        fl4->fl4_dport = dport;
        fl4->fl4_sport = inet->inet_sport;
        WRITE_ONCE(up->pending, AF_INET);

do_append_data:
        up->len += ulen;
        err = ip_append_data(sk, fl4, getfrag, msg, ulen,
                             sizeof(struct udphdr), &ipc, &rt,
                             corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags);
        if (err)
                udp_flush_pending_frames(sk);
        else if (!corkreq)
                err = udp_push_pending_frames(sk);
        else if (unlikely(skb_queue_empty(&sk->sk_write_queue)))
                WRITE_ONCE(up->pending, 0);
        release_sock(sk);

out:
        ip_rt_put(rt);
out_free:
        if (free)
                kfree(ipc.opt);
        if (!err)
                return len;
        /*
         * ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space.  Reporting
         * ENOBUFS might not be good (it's not tunable per se), but otherwise
         * we don't have a good statistic (IpOutDiscards but it can be too many
         * things).  We could add another new stat but at least for now that
         * seems like overkill.
         */
        if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
                UDP_INC_STATS(sock_net(sk),
                              UDP_MIB_SNDBUFERRORS, is_udplite);
        }
        return err;

do_confirm:
        if (msg->msg_flags & MSG_PROBE)
                dst_confirm_neigh(&rt->dst, &fl4->daddr);
        if (!(msg->msg_flags&MSG_PROBE) || len)
                goto back_from_confirm;
        err = 0;
        goto out;
}
EXPORT_SYMBOL(udp_sendmsg);
其中,以getfrag =  ip_generic_getfrag为例:
getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag;

--------》 ip_make_skb()

net / ipv4 / ip_output.c

struct sk_buff *ip_make_skb(struct sock *sk,
                            struct flowi4 *fl4,
                            int getfrag(void *from, char *to, int offset,
                                        int len, int odd, struct sk_buff *skb),
                            void *from, int length, int transhdrlen,
                            struct ipcm_cookie *ipc, struct rtable **rtp,
                            struct inet_cork *cork, unsigned int flags)
{
        struct sk_buff_head queue;
        int err;

        if (flags & MSG_PROBE)
                return NULL;

        __skb_queue_head_init(&queue);

        cork->flags = 0;
        cork->addr = 0;
        cork->opt = NULL;
        err = ip_setup_cork(sk, cork, ipc, rtp);
        if (err)
                return ERR_PTR(err);

        err = __ip_append_data(sk, fl4, &queue, cork,
                               &current->task_frag, getfrag,
                               from, length, transhdrlen, flags);
        if (err) {
                __ip_flush_pending_frames(sk, &queue, cork);
                return ERR_PTR(err);
        }

        return __ip_make_skb(sk, fl4, &queue, cork);
}struct sk_buff *ip_make_skb(struct sock *sk,
                            struct flowi4 *fl4,
                            int getfrag(void *from, char *to, int offset,
                                        int len, int odd, struct sk_buff *skb),
                            void *from, int length, int transhdrlen,
                            struct ipcm_cookie *ipc, struct rtable **rtp,
                            struct inet_cork *cork, unsigned int flags)
{
        struct sk_buff_head queue;
        int err;

        if (flags & MSG_PROBE)
                return NULL;

        __skb_queue_head_init(&queue);

        cork->flags = 0;
        cork->addr = 0;
        cork->opt = NULL;
        err = ip_setup_cork(sk, cork, ipc, rtp);
        if (err)
                return ERR_PTR(err);

        err = __ip_append_data(sk, fl4, &queue, cork,
                               &current->task_frag, getfrag,
                               from, length, transhdrlen, flags);
        if (err) {
                __ip_flush_pending_frames(sk, &queue, cork);
                return ERR_PTR(err);
        }

        return __ip_make_skb(sk, fl4, &queue, cork);
}

--------》 __ip_append_data()

net / ipv4 / ip_output.c

static int __ip_append_data(struct sock *sk,
                            struct flowi4 *fl4,
                            struct sk_buff_head *queue,
                            struct inet_cork *cork,
                            struct page_frag *pfrag,
                            int getfrag(void *from, char *to, int offset,
                                        int len, int odd, struct sk_buff *skb),
                            void *from, int length, int transhdrlen,
                            unsigned int flags)
{
        struct inet_sock *inet = inet_sk(sk);
        struct ubuf_info *uarg = NULL;
        struct sk_buff *skb;
        struct ip_options *opt = cork->opt;
        int hh_len;
        int exthdrlen;
        int mtu;
        int copy;
        int err;
        int offset = 0;
        bool zc = false;
        unsigned int maxfraglen, fragheaderlen, maxnonfragsize;
        int csummode = CHECKSUM_NONE;
        struct rtable *rt = dst_rtable(cork->dst);
        bool paged, hold_tskey, extra_uref = false;
        unsigned int wmem_alloc_delta = 0;
        u32 tskey = 0;

        skb = skb_peek_tail(queue);

        exthdrlen = !skb ? rt->dst.header_len : 0;
        mtu = cork->gso_size ? IP_MAX_MTU : cork->fragsize;
        paged = !!cork->gso_size;

        hh_len = LL_RESERVED_SPACE(rt->dst.dev);

        fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
        maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
        maxnonfragsize = ip_sk_ignore_df(sk) ? IP_MAX_MTU : mtu;

        if (cork->length + length > maxnonfragsize - fragheaderlen) {
                ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport,
                               mtu - (opt ? opt->optlen : 0));
                return -EMSGSIZE;
        }

        /*
         * transhdrlen > 0 means that this is the first fragment and we wish
         * it won't be fragmented in the future.
         */
        if (transhdrlen &&
            length + fragheaderlen <= mtu &&
            rt->dst.dev->features & (NETIF_F_HW_CSUM | NETIF_F_IP_CSUM) &&
            (!(flags & MSG_MORE) || cork->gso_size) &&
            (!exthdrlen || (rt->dst.dev->features & NETIF_F_HW_ESP_TX_CSUM)))
                csummode = CHECKSUM_PARTIAL;

        if ((flags & MSG_ZEROCOPY) && length) {
                struct msghdr *msg = from;

                if (getfrag == ip_generic_getfrag && msg->msg_ubuf) {
                        if (skb_zcopy(skb) && msg->msg_ubuf != skb_zcopy(skb))
                                return -EINVAL;

                        /* Leave uarg NULL if can't zerocopy, callers should
                         * be able to handle it.
                         */
                        if ((rt->dst.dev->features & NETIF_F_SG) &&
                            csummode == CHECKSUM_PARTIAL) {
                                paged = true;
                                zc = true;
                                uarg = msg->msg_ubuf;
                        }
                } else if (sock_flag(sk, SOCK_ZEROCOPY)) {
                        uarg = msg_zerocopy_realloc(sk, length, skb_zcopy(skb));
                        if (!uarg)
                                return -ENOBUFS;
                        extra_uref = !skb_zcopy(skb);   /* only ref on new uarg */
                        if (rt->dst.dev->features & NETIF_F_SG &&
                            csummode == CHECKSUM_PARTIAL) {
                                paged = true;
                                zc = true;
                        } else {
                                uarg_to_msgzc(uarg)->zerocopy = 0;
                                skb_zcopy_set(skb, uarg, &extra_uref);
                        }
                }
        } else if ((flags & MSG_SPLICE_PAGES) && length) {
                if (inet_test_bit(HDRINCL, sk))
                        return -EPERM;
                if (rt->dst.dev->features & NETIF_F_SG &&
                    getfrag == ip_generic_getfrag)
                        /* We need an empty buffer to attach stuff to */
                        paged = true;
                else
                        flags &= ~MSG_SPLICE_PAGES;
        }

        cork->length += length;

        hold_tskey = cork->tx_flags & SKBTX_ANY_TSTAMP &&
                     READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID;
        if (hold_tskey)
                tskey = atomic_inc_return(&sk->sk_tskey) - 1;

        /* So, what's going on in the loop below?
         *
         * We use calculated fragment length to generate chained skb,
         * each of segments is IP fragment ready for sending to network after
         * adding appropriate IP header.
         */

        if (!skb)
                goto alloc_new_skb;

        while (length > 0) {
                /* Check if the remaining data fits into current packet. */
                copy = mtu - skb->len;
                if (copy < length)
                        copy = maxfraglen - skb->len;
                if (copy <= 0) {
                        char *data;
                        unsigned int datalen;
                        unsigned int fraglen;
                        unsigned int fraggap;
                        unsigned int alloclen, alloc_extra;
                        unsigned int pagedlen;
                        struct sk_buff *skb_prev;
alloc_new_skb:
                        skb_prev = skb;
                        if (skb_prev)
                                fraggap = skb_prev->len - maxfraglen;
                        else
                                fraggap = 0;

                        /*
                         * If remaining data exceeds the mtu,
                         * we know we need more fragment(s).
                         */
                        datalen = length + fraggap;
                        if (datalen > mtu - fragheaderlen)
                                datalen = maxfraglen - fragheaderlen;
                        fraglen = datalen + fragheaderlen;
                        pagedlen = 0;

                        alloc_extra = hh_len + 15;
                        alloc_extra += exthdrlen;

                        /* The last fragment gets additional space at tail.
                         * Note, with MSG_MORE we overallocate on fragments,
                         * because we have no idea what fragment will be
                         * the last.
                         */
                        if (datalen == length + fraggap)
                                alloc_extra += rt->dst.trailer_len;

                        if ((flags & MSG_MORE) &&
                            !(rt->dst.dev->features&NETIF_F_SG))
                                alloclen = mtu;
                        else if (!paged &&
                                 (fraglen + alloc_extra < SKB_MAX_ALLOC ||
                                  !(rt->dst.dev->features & NETIF_F_SG)))
                                alloclen = fraglen;
                        else {
                                alloclen = fragheaderlen + transhdrlen;
                                pagedlen = datalen - transhdrlen;
                        }

                        alloclen += alloc_extra;

                        if (transhdrlen) {
                                skb = sock_alloc_send_skb(sk, alloclen,
                                                (flags & MSG_DONTWAIT), &err);
                        } else {
                                skb = NULL;
                                if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
                                    2 * sk->sk_sndbuf)
                                        skb = alloc_skb(alloclen,
                                                        sk->sk_allocation);
                                if (unlikely(!skb))
                                        err = -ENOBUFS;
                        }
                        if (!skb)
                                goto error;

                        /*
                         *      Fill in the control structures
                         */
                        skb->ip_summed = csummode;
                        skb->csum = 0;
                        skb_reserve(skb, hh_len);

                        /*
                         *      Find where to start putting bytes.
                         */
                        data = skb_put(skb, fraglen + exthdrlen - pagedlen);
                        skb_set_network_header(skb, exthdrlen);
                        skb->transport_header = (skb->network_header +
                                                 fragheaderlen);
                        data += fragheaderlen + exthdrlen;

                        if (fraggap) {
                                skb->csum = skb_copy_and_csum_bits(
                                        skb_prev, maxfraglen,
                                        data + transhdrlen, fraggap);
                                skb_prev->csum = csum_sub(skb_prev->csum,
                                                          skb->csum);
                                data += fraggap;
                                pskb_trim_unique(skb_prev, maxfraglen);
                        }

                        copy = datalen - transhdrlen - fraggap - pagedlen;
                        /* [!] NOTE: copy will be negative if pagedlen>0
                         * because then the equation reduces to -fraggap.
                         */
                        if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
                                err = -EFAULT;
                                kfree_skb(skb);
                                goto error;
                        } else if (flags & MSG_SPLICE_PAGES) {
                                copy = 0;
                        }

                        offset += copy;
                        length -= copy + transhdrlen;
                        transhdrlen = 0;
                        exthdrlen = 0;
                        csummode = CHECKSUM_NONE;

                        /* only the initial fragment is time stamped */
                        skb_shinfo(skb)->tx_flags = cork->tx_flags;
                        cork->tx_flags = 0;
                        skb_shinfo(skb)->tskey = tskey;
                        tskey = 0;
                        skb_zcopy_set(skb, uarg, &extra_uref);

                        if ((flags & MSG_CONFIRM) && !skb_prev)
                                skb_set_dst_pending_confirm(skb, 1);

                        /*
                         * Put the packet on the pending queue.
                         */
                        if (!skb->destructor) {
                                skb->destructor = sock_wfree;
                                skb->sk = sk;
                                wmem_alloc_delta += skb->truesize;
                        }
                        __skb_queue_tail(queue, skb);
                        continue;
                }

                if (copy > length)
                        copy = length;

                if (!(rt->dst.dev->features&NETIF_F_SG) &&
                    skb_tailroom(skb) >= copy) {
                        unsigned int off;

                        off = skb->len;
                        if (getfrag(from, skb_put(skb, copy),
                                        offset, copy, off, skb) < 0) {
                                __skb_trim(skb, off);
                                err = -EFAULT;
                                goto error;
                        }
                } else if (flags & MSG_SPLICE_PAGES) {
                        struct msghdr *msg = from;

                        err = -EIO;
                        if (WARN_ON_ONCE(copy > msg->msg_iter.count))
                                goto error;

                        err = skb_splice_from_iter(skb, &msg->msg_iter, copy,
                                                   sk->sk_allocation);
                        if (err < 0)
                                goto error;
                        copy = err;
                        wmem_alloc_delta += copy;
                } else if (!zc) {
                        int i = skb_shinfo(skb)->nr_frags;

                        err = -ENOMEM;
                        if (!sk_page_frag_refill(sk, pfrag))
                                goto error;

                        skb_zcopy_downgrade_managed(skb);
                        if (!skb_can_coalesce(skb, i, pfrag->page,
                                              pfrag->offset)) {
                                err = -EMSGSIZE;
                                if (i == MAX_SKB_FRAGS)
                                        goto error;

                                __skb_fill_page_desc(skb, i, pfrag->page,
                                                     pfrag->offset, 0);
                                skb_shinfo(skb)->nr_frags = ++i;
                                get_page(pfrag->page);
                        }
                        copy = min_t(int, copy, pfrag->size - pfrag->offset);
                        if (getfrag(from,
                                    page_address(pfrag->page) + pfrag->offset,
                                    offset, copy, skb->len, skb) < 0)
                                goto error_efault;

                        pfrag->offset += copy;
                        skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
                        skb_len_add(skb, copy);
                        wmem_alloc_delta += copy;
                } else {
                        err = skb_zerocopy_iter_dgram(skb, from, copy);
                        if (err < 0)
                                goto error;
                }
                offset += copy;
                length -= copy;
        }

        if (wmem_alloc_delta)
                refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
        return 0;

error_efault:
        err = -EFAULT;
error:
        net_zcopy_put_abort(uarg, extra_uref);
        cork->length -= length;
        IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS);
        refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
        if (hold_tskey)
                atomic_dec(&sk->sk_tskey);
        return err;
}static int __ip_append_data(struct sock *sk,
                            struct flowi4 *fl4,
                            struct sk_buff_head *queue,
                            struct inet_cork *cork,
                            struct page_frag *pfrag,
                            int getfrag(void *from, char *to, int offset,
                                        int len, int odd, struct sk_buff *skb),
                            void *from, int length, int transhdrlen,
                            unsigned int flags)
{
        struct inet_sock *inet = inet_sk(sk);
        struct ubuf_info *uarg = NULL;
        struct sk_buff *skb;
        struct ip_options *opt = cork->opt;
        int hh_len;
        int exthdrlen;
        int mtu;
        int copy;
        int err;
        int offset = 0;
        bool zc = false;
        unsigned int maxfraglen, fragheaderlen, maxnonfragsize;
        int csummode = CHECKSUM_NONE;
        struct rtable *rt = dst_rtable(cork->dst);
        bool paged, hold_tskey, extra_uref = false;
        unsigned int wmem_alloc_delta = 0;
        u32 tskey = 0;

        skb = skb_peek_tail(queue);

        exthdrlen = !skb ? rt->dst.header_len : 0;
        mtu = cork->gso_size ? IP_MAX_MTU : cork->fragsize;
        paged = !!cork->gso_size;

        hh_len = LL_RESERVED_SPACE(rt->dst.dev);

        fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
        maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
        maxnonfragsize = ip_sk_ignore_df(sk) ? IP_MAX_MTU : mtu;

        if (cork->length + length > maxnonfragsize - fragheaderlen) {
                ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport,
                               mtu - (opt ? opt->optlen : 0));
                return -EMSGSIZE;
        }

        /*
         * transhdrlen > 0 means that this is the first fragment and we wish
         * it won't be fragmented in the future.
         */
        if (transhdrlen &&
            length + fragheaderlen <= mtu &&
            rt->dst.dev->features & (NETIF_F_HW_CSUM | NETIF_F_IP_CSUM) &&
            (!(flags & MSG_MORE) || cork->gso_size) &&
            (!exthdrlen || (rt->dst.dev->features & NETIF_F_HW_ESP_TX_CSUM)))
                csummode = CHECKSUM_PARTIAL;

        if ((flags & MSG_ZEROCOPY) && length) {
                struct msghdr *msg = from;

                if (getfrag == ip_generic_getfrag && msg->msg_ubuf) {
                        if (skb_zcopy(skb) && msg->msg_ubuf != skb_zcopy(skb))
                                return -EINVAL;

                        /* Leave uarg NULL if can't zerocopy, callers should
                         * be able to handle it.
                         */
                        if ((rt->dst.dev->features & NETIF_F_SG) &&
                            csummode == CHECKSUM_PARTIAL) {
                                paged = true;
                                zc = true;
                                uarg = msg->msg_ubuf;
                        }
                } else if (sock_flag(sk, SOCK_ZEROCOPY)) {
                        uarg = msg_zerocopy_realloc(sk, length, skb_zcopy(skb));
                        if (!uarg)
                                return -ENOBUFS;
                        extra_uref = !skb_zcopy(skb);   /* only ref on new uarg */
                        if (rt->dst.dev->features & NETIF_F_SG &&
                            csummode == CHECKSUM_PARTIAL) {
                                paged = true;
                                zc = true;
                        } else {
                                uarg_to_msgzc(uarg)->zerocopy = 0;
                                skb_zcopy_set(skb, uarg, &extra_uref);
                        }
                }
        } else if ((flags & MSG_SPLICE_PAGES) && length) {
                if (inet_test_bit(HDRINCL, sk))
                        return -EPERM;
                if (rt->dst.dev->features & NETIF_F_SG &&
                    getfrag == ip_generic_getfrag)
                        /* We need an empty buffer to attach stuff to */
                        paged = true;
                else
                        flags &= ~MSG_SPLICE_PAGES;
        }

        cork->length += length;

        hold_tskey = cork->tx_flags & SKBTX_ANY_TSTAMP &&
                     READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID;
        if (hold_tskey)
                tskey = atomic_inc_return(&sk->sk_tskey) - 1;

        /* So, what's going on in the loop below?
         *
         * We use calculated fragment length to generate chained skb,
         * each of segments is IP fragment ready for sending to network after
         * adding appropriate IP header.
         */

        if (!skb)
                goto alloc_new_skb;

        while (length > 0) {
                /* Check if the remaining data fits into current packet. */
                copy = mtu - skb->len;
                if (copy < length)
                        copy = maxfraglen - skb->len;
                if (copy <= 0) {
                        char *data;
                        unsigned int datalen;
                        unsigned int fraglen;
                        unsigned int fraggap;
                        unsigned int alloclen, alloc_extra;
                        unsigned int pagedlen;
                        struct sk_buff *skb_prev;
alloc_new_skb:
                        skb_prev = skb;
                        if (skb_prev)
                                fraggap = skb_prev->len - maxfraglen;
                        else
                                fraggap = 0;

                        /*
                         * If remaining data exceeds the mtu,
                         * we know we need more fragment(s).
                         */
                        datalen = length + fraggap;
                        if (datalen > mtu - fragheaderlen)
                                datalen = maxfraglen - fragheaderlen;
                        fraglen = datalen + fragheaderlen;
                        pagedlen = 0;

                        alloc_extra = hh_len + 15;
                        alloc_extra += exthdrlen;

                        /* The last fragment gets additional space at tail.
                         * Note, with MSG_MORE we overallocate on fragments,
                         * because we have no idea what fragment will be
                         * the last.
                         */
                        if (datalen == length + fraggap)
                                alloc_extra += rt->dst.trailer_len;

                        if ((flags & MSG_MORE) &&
                            !(rt->dst.dev->features&NETIF_F_SG))
                                alloclen = mtu;
                        else if (!paged &&
                                 (fraglen + alloc_extra < SKB_MAX_ALLOC ||
                                  !(rt->dst.dev->features & NETIF_F_SG)))
                                alloclen = fraglen;
                        else {
                                alloclen = fragheaderlen + transhdrlen;
                                pagedlen = datalen - transhdrlen;
                        }

                        alloclen += alloc_extra;

                        if (transhdrlen) {
                                skb = sock_alloc_send_skb(sk, alloclen,
                                                (flags & MSG_DONTWAIT), &err);
                        } else {
                                skb = NULL;
                                if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
                                    2 * sk->sk_sndbuf)
                                        skb = alloc_skb(alloclen,
                                                        sk->sk_allocation);
                                if (unlikely(!skb))
                                        err = -ENOBUFS;
                        }
                        if (!skb)
                                goto error;

                        /*
                         *      Fill in the control structures
                         */
                        skb->ip_summed = csummode;
                        skb->csum = 0;
                        skb_reserve(skb, hh_len);

                        /*
                         *      Find where to start putting bytes.
                         */
                        data = skb_put(skb, fraglen + exthdrlen - pagedlen);
                        skb_set_network_header(skb, exthdrlen);
                        skb->transport_header = (skb->network_header +
                                                 fragheaderlen);
                        data += fragheaderlen + exthdrlen;

                        if (fraggap) {
                                skb->csum = skb_copy_and_csum_bits(
                                        skb_prev, maxfraglen,
                                        data + transhdrlen, fraggap);
                                skb_prev->csum = csum_sub(skb_prev->csum,
                                                          skb->csum);
                                data += fraggap;
                                pskb_trim_unique(skb_prev, maxfraglen);
                        }

                        copy = datalen - transhdrlen - fraggap - pagedlen;
                        /* [!] NOTE: copy will be negative if pagedlen>0
                         * because then the equation reduces to -fraggap.
                         */
                        if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
                                err = -EFAULT;
                                kfree_skb(skb);
                                goto error;
                        } else if (flags & MSG_SPLICE_PAGES) {
                                copy = 0;
                        }

                        offset += copy;
                        length -= copy + transhdrlen;
                        transhdrlen = 0;
                        exthdrlen = 0;
                        csummode = CHECKSUM_NONE;

                        /* only the initial fragment is time stamped */
                        skb_shinfo(skb)->tx_flags = cork->tx_flags;
                        cork->tx_flags = 0;
                        skb_shinfo(skb)->tskey = tskey;
                        tskey = 0;
                        skb_zcopy_set(skb, uarg, &extra_uref);

                        if ((flags & MSG_CONFIRM) && !skb_prev)
                                skb_set_dst_pending_confirm(skb, 1);

                        /*
                         * Put the packet on the pending queue.
                         */
                        if (!skb->destructor) {
                                skb->destructor = sock_wfree;
                                skb->sk = sk;
                                wmem_alloc_delta += skb->truesize;
                        }
                        __skb_queue_tail(queue, skb);
                        continue;
                }

                if (copy > length)
                        copy = length;

                if (!(rt->dst.dev->features&NETIF_F_SG) &&
                    skb_tailroom(skb) >= copy) {
                        unsigned int off;

                        off = skb->len;
                        if (getfrag(from, skb_put(skb, copy),
                                        offset, copy, off, skb) < 0) {
                                __skb_trim(skb, off);
                                err = -EFAULT;
                                goto error;
                        }
                } else if (flags & MSG_SPLICE_PAGES) {
                        struct msghdr *msg = from;

                        err = -EIO;
                        if (WARN_ON_ONCE(copy > msg->msg_iter.count))
                                goto error;

                        err = skb_splice_from_iter(skb, &msg->msg_iter, copy,
                                                   sk->sk_allocation);
                        if (err < 0)
                                goto error;
                        copy = err;
                        wmem_alloc_delta += copy;
                } else if (!zc) {
                        int i = skb_shinfo(skb)->nr_frags;

                        err = -ENOMEM;
                        if (!sk_page_frag_refill(sk, pfrag))
                                goto error;

                        skb_zcopy_downgrade_managed(skb);
                        if (!skb_can_coalesce(skb, i, pfrag->page,
                                              pfrag->offset)) {
                                err = -EMSGSIZE;
                                if (i == MAX_SKB_FRAGS)
                                        goto error;

                                __skb_fill_page_desc(skb, i, pfrag->page,
                                                     pfrag->offset, 0);
                                skb_shinfo(skb)->nr_frags = ++i;
                                get_page(pfrag->page);
                        }
                        copy = min_t(int, copy, pfrag->size - pfrag->offset);
                        if (getfrag(from,
                                    page_address(pfrag->page) + pfrag->offset,
                                    offset, copy, skb->len, skb) < 0)
                                goto error_efault;

                        pfrag->offset += copy;
                        skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
                        skb_len_add(skb, copy);
                        wmem_alloc_delta += copy;
                } else {
                        err = skb_zerocopy_iter_dgram(skb, from, copy);
                        if (err < 0)
                                goto error;
                }
                offset += copy;
                length -= copy;
        }

        if (wmem_alloc_delta)
                refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
        return 0;

error_efault:
        err = -EFAULT;
error:
        net_zcopy_put_abort(uarg, extra_uref);
        cork->length -= length;
        IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS);
        refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
        if (hold_tskey)
                atomic_dec(&sk->sk_tskey);
        return err;
}

--------》 ip_generic_getfrag()

net / ipv4 / ip_output.c

int
ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
{
        struct msghdr *msg = from;

        if (skb->ip_summed == CHECKSUM_PARTIAL) {
                if (!copy_from_iter_full(to, len, &msg->msg_iter))
                        return -EFAULT;
        } else {
                __wsum csum = 0;
                if (!csum_and_copy_from_iter_full(to, len, &csum, &msg->msg_iter))
                        return -EFAULT;
                skb->csum = csum_block_add(skb->csum, csum, odd);
        }
        return 0;
}
EXPORT_SYMBOL(ip_generic_getfrag);int
ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
{
        struct msghdr *msg = from;

        if (skb->ip_summed == CHECKSUM_PARTIAL) {
                if (!copy_from_iter_full(to, len, &msg->msg_iter))
                        return -EFAULT;
        } else {
                __wsum csum = 0;
                if (!csum_and_copy_from_iter_full(to, len, &csum, &msg->msg_iter))
                        return -EFAULT;
                skb->csum = csum_block_add(skb->csum, csum, odd);
        }
        return 0;
}
EXPORT_SYMBOL(ip_generic_getfrag);

--------》 csum_and_copy_from_iter_full() ----》 copy_from_user_iter_csum()

net / core / skbuff.c

bool csum_and_copy_from_iter_full(void *addr, size_t bytes,
                                  __wsum *csum, struct iov_iter *i)
{
        size_t copied;

        if (WARN_ON_ONCE(!i->data_source))
                return false;
        copied = iterate_and_advance2(i, bytes, addr, csum,
                                      copy_from_user_iter_csum,
                                      memcpy_from_iter_csum);
        if (likely(copied == bytes))
                return true;
        iov_iter_revert(i, copied);
        return false;
}
EXPORT_SYMBOL(csum_and_copy_from_iter_full);
static __always_inline
size_t copy_from_user_iter_csum(void __user *iter_from, size_t progress,
                                size_t len, void *to, void *priv2)
{
        __wsum next, *csum = priv2;

        next = csum_and_copy_from_user(iter_from, to + progress, len);
        *csum = csum_block_add(*csum, next, progress);
        return next ? 0 : len;
}bool csum_and_copy_from_iter_full(void *addr, size_t bytes,
                                  __wsum *csum, struct iov_iter *i)
{
        size_t copied;

        if (WARN_ON_ONCE(!i->data_source))
                return false;
        copied = iterate_and_advance2(i, bytes, addr, csum,
                                      copy_from_user_iter_csum,
                                      memcpy_from_iter_csum);
        if (likely(copied == bytes))
                return true;
        iov_iter_revert(i, copied);
        return false;
}
EXPORT_SYMBOL(csum_and_copy_from_iter_full);
static __always_inline
size_t copy_from_user_iter_csum(void __user *iter_from, size_t progress,
                                size_t len, void *to, void *priv2)
{
        __wsum next, *csum = priv2;

        next = csum_and_copy_from_user(iter_from, to + progress, len);
        *csum = csum_block_add(*csum, next, progress);
        return next ? 0 : len;
}

--------》 csum_and_copy_from_user () ----》 copy_from_user()

include / net / checksum.h


#ifndef _HAVE_ARCH_COPY_AND_CSUM_FROM_USER
static __always_inline
__wsum csum_and_copy_from_user (const void __user *src, void *dst,
                                      int len)
{
        if (copy_from_user(dst, src, len))
                return 0;
        return csum_partial(dst, len, ~0U);
}
#endif
#ifndef _HAVE_ARCH_COPY_AND_CSUM_FROM_USER
static __always_inline
__wsum csum_and_copy_from_user (const void __user *src, void *dst,
                                      int len)
{
        if (copy_from_user(dst, src, len))
                return 0;
        return csum_partial(dst, len, ~0U);
}
#endif


原文地址:https://blog.csdn.net/xunknown/article/details/143870408

免责声明:本站文章内容转载自网络资源,如本站内容侵犯了原著者的合法权益,可联系本站删除。更多内容请关注自学内容网(zxcms.com)!