|
12 | 12 |
|
13 | 13 | struct kmem_cache *sk_msg_cachep;
|
14 | 14 |
|
15 |
| -static bool sk_msg_try_coalesce_ok(struct sk_msg *msg, int elem_first_coalesce) |
| 15 | +bool sk_msg_try_coalesce_ok(struct sk_msg *msg, int elem_first_coalesce) |
16 | 16 | {
|
17 | 17 | if (msg->sg.end > msg->sg.start &&
|
18 | 18 | elem_first_coalesce < msg->sg.end)
|
@@ -707,6 +707,118 @@ static void sk_psock_backlog(struct work_struct *work)
|
707 | 707 | mutex_unlock(&psock->work_mutex);
|
708 | 708 | }
|
709 | 709 |
|
| 710 | +static bool backlog_notify(struct sk_psock *psock, bool m_sched_failed, |
| 711 | + bool ingress_empty) |
| 712 | +{ |
| 713 | + /* Notify if: |
| 714 | + * 1. We have corked enough bytes |
| 715 | + * 2. We have already delayed notification |
| 716 | + * 3. Memory allocation failed |
| 717 | + * 4. Ingress queue was empty and we're about to add data |
| 718 | + */ |
| 719 | + return psock->backlog_since_notify >= TCP_BPF_GSO_SIZE || |
| 720 | + psock->backlog_work_delayed || |
| 721 | + m_sched_failed || |
| 722 | + ingress_empty; |
| 723 | +} |
| 724 | + |
| 725 | +static bool backlog_xfer_to_local(struct sk_psock *psock, struct sock *sk_from, |
| 726 | + struct list_head *local_head, u32 *tot_size) |
| 727 | +{ |
| 728 | + struct sock *sk = psock->sk; |
| 729 | + struct sk_msg *msg, *tmp; |
| 730 | + u32 size = 0; |
| 731 | + |
| 732 | + list_for_each_entry_safe(msg, tmp, &psock->backlog_msg, list) { |
| 733 | + if (msg->sk != sk_from) |
| 734 | + break; |
| 735 | + |
| 736 | + if (!__sk_rmem_schedule(sk, msg->sg.size, false)) |
| 737 | + return true; |
| 738 | + |
| 739 | + list_move_tail(&msg->list, local_head); |
| 740 | + sk_wmem_queued_add(msg->sk, -msg->sg.size); |
| 741 | + sock_put(msg->sk); |
| 742 | + msg->sk = NULL; |
| 743 | + psock->backlog_since_notify += msg->sg.size; |
| 744 | + size += msg->sg.size; |
| 745 | + } |
| 746 | + |
| 747 | + *tot_size = size; |
| 748 | + return false; |
| 749 | +} |
| 750 | + |
| 751 | +/* This function handles the transfer of backlogged messages from the sender |
| 752 | + * backlog queue to the ingress queue of the peer socket. Notification of data |
| 753 | + * availability will be sent under some conditions. |
| 754 | + */ |
| 755 | +void sk_psock_backlog_msg(struct sk_psock *psock) |
| 756 | +{ |
| 757 | + bool rmem_schedule_failed = false; |
| 758 | + struct sock *sk_from = NULL; |
| 759 | + struct sock *sk = psock->sk; |
| 760 | + LIST_HEAD(local_head); |
| 761 | + struct sk_msg *msg; |
| 762 | + bool should_notify; |
| 763 | + u32 tot_size = 0; |
| 764 | + |
| 765 | + if (!sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) |
| 766 | + return; |
| 767 | + |
| 768 | + lock_sock(sk); |
| 769 | + spin_lock(&psock->backlog_msg_lock); |
| 770 | + |
| 771 | + msg = list_first_entry_or_null(&psock->backlog_msg, |
| 772 | + struct sk_msg, list); |
| 773 | + if (!msg) { |
| 774 | + should_notify = !list_empty(&psock->ingress_msg); |
| 775 | + spin_unlock(&psock->backlog_msg_lock); |
| 776 | + goto notify; |
| 777 | + } |
| 778 | + |
| 779 | + sk_from = msg->sk; |
| 780 | + sock_hold(sk_from); |
| 781 | + |
| 782 | + rmem_schedule_failed = backlog_xfer_to_local(psock, sk_from, |
| 783 | + &local_head, &tot_size); |
| 784 | + should_notify = backlog_notify(psock, rmem_schedule_failed, |
| 785 | + list_empty(&psock->ingress_msg)); |
| 786 | + spin_unlock(&psock->backlog_msg_lock); |
| 787 | + |
| 788 | + spin_lock_bh(&psock->ingress_lock); |
| 789 | + list_splice_tail_init(&local_head, &psock->ingress_msg); |
| 790 | + spin_unlock_bh(&psock->ingress_lock); |
| 791 | + |
| 792 | + atomic_add(tot_size, &sk->sk_rmem_alloc); |
| 793 | + sk_mem_charge(sk, tot_size); |
| 794 | + |
| 795 | +notify: |
| 796 | + if (should_notify) { |
| 797 | + psock->backlog_since_notify = 0; |
| 798 | + sk_psock_data_ready(sk, psock); |
| 799 | + if (!list_empty(&psock->backlog_msg)) |
| 800 | + sk_psock_run_backlog_work(psock, rmem_schedule_failed); |
| 801 | + } else { |
| 802 | + sk_psock_run_backlog_work(psock, true); |
| 803 | + } |
| 804 | + release_sock(sk); |
| 805 | + |
| 806 | + if (sk_from) { |
| 807 | + bool slow = lock_sock_fast(sk_from); |
| 808 | + |
| 809 | + sk_mem_uncharge(sk_from, tot_size); |
| 810 | + unlock_sock_fast(sk_from, slow); |
| 811 | + sock_put(sk_from); |
| 812 | + } |
| 813 | +} |
| 814 | + |
| 815 | +static void sk_psock_backlog_msg_work(struct work_struct *work) |
| 816 | +{ |
| 817 | + struct delayed_work *dwork = to_delayed_work(work); |
| 818 | + |
| 819 | + sk_psock_backlog_msg(container_of(dwork, struct sk_psock, backlog_work)); |
| 820 | +} |
| 821 | + |
710 | 822 | struct sk_psock *sk_psock_init(struct sock *sk, int node)
|
711 | 823 | {
|
712 | 824 | struct sk_psock *psock;
|
@@ -744,8 +856,11 @@ struct sk_psock *sk_psock_init(struct sock *sk, int node)
|
744 | 856 |
|
745 | 857 | INIT_DELAYED_WORK(&psock->work, sk_psock_backlog);
|
746 | 858 | mutex_init(&psock->work_mutex);
|
| 859 | + INIT_DELAYED_WORK(&psock->backlog_work, sk_psock_backlog_msg_work); |
747 | 860 | INIT_LIST_HEAD(&psock->ingress_msg);
|
748 | 861 | spin_lock_init(&psock->ingress_lock);
|
| 862 | + INIT_LIST_HEAD(&psock->backlog_msg); |
| 863 | + spin_lock_init(&psock->backlog_msg_lock); |
749 | 864 | skb_queue_head_init(&psock->ingress_skb);
|
750 | 865 |
|
751 | 866 | sk_psock_set_state(psock, SK_PSOCK_TX_ENABLED);
|
@@ -799,6 +914,26 @@ static void __sk_psock_zap_ingress(struct sk_psock *psock)
|
799 | 914 | __sk_psock_purge_ingress_msg(psock);
|
800 | 915 | }
|
801 | 916 |
|
| 917 | +static void __sk_psock_purge_backlog_msg(struct sk_psock *psock) |
| 918 | +{ |
| 919 | + struct sk_msg *msg, *tmp; |
| 920 | + |
| 921 | + spin_lock(&psock->backlog_msg_lock); |
| 922 | + list_for_each_entry_safe(msg, tmp, &psock->backlog_msg, list) { |
| 923 | + struct sock *sk_from = msg->sk; |
| 924 | + bool slow; |
| 925 | + |
| 926 | + list_del(&msg->list); |
| 927 | + slow = lock_sock_fast(sk_from); |
| 928 | + sk_wmem_queued_add(sk_from, -msg->sg.size); |
| 929 | + sock_put(sk_from); |
| 930 | + sk_msg_free(sk_from, msg); |
| 931 | + unlock_sock_fast(sk_from, slow); |
| 932 | + kfree_sk_msg(msg); |
| 933 | + } |
| 934 | + spin_unlock(&psock->backlog_msg_lock); |
| 935 | +} |
| 936 | + |
802 | 937 | static void sk_psock_link_destroy(struct sk_psock *psock)
|
803 | 938 | {
|
804 | 939 | struct sk_psock_link *link, *tmp;
|
@@ -828,7 +963,9 @@ static void sk_psock_destroy(struct work_struct *work)
|
828 | 963 | sk_psock_done_strp(psock);
|
829 | 964 |
|
830 | 965 | cancel_delayed_work_sync(&psock->work);
|
| 966 | + cancel_delayed_work_sync(&psock->backlog_work); |
831 | 967 | __sk_psock_zap_ingress(psock);
|
| 968 | + __sk_psock_purge_backlog_msg(psock); |
832 | 969 | mutex_destroy(&psock->work_mutex);
|
833 | 970 |
|
834 | 971 | psock_progs_drop(&psock->progs);
|
|
0 commit comments