IPVS関連データ構造
47682 ワード
アムウェイのウェブサイトでは、カーネルのソースコードの中の構造がどのように定義されていますか?どこで引用されますか?https://elixir.bootlin.com/linux/v4.9.1/source/include/net/ip_vs.h〓〓〓L 1212はsource Insightの検索機能より便利だと感じます.
1——net
Linuxはカーネルレベルの環境隔離を提供しています.一番重要なのはnamespaceの概念です.6種類のnamespacesがあります.Mount namespaces UTS namespaces IPC namespaces PID namespaces Network namespaces User namespaces.
各network namespaceはnet構造を持っています.net定義はinclude/net/netname space.h中
net構造におけるipvsの定義は以下の通りである.
ipvsがサポートするプロトコルは、TCP、UDP、SCTP、AH、ESPを含む.プロトコル情報を格納する仕組みは、ip_vs_プロトdata定義は:include/net/ip_vs.h
同じ定義でinclude/net/ip_vs.h中
5——conn_schedule
接続の最初のメッセージが到着すると、このロジックに入り、あるrealserverにスケジュールして接続を作成します.
1——net
Linuxはカーネルレベルの環境隔離を提供しています.一番重要なのはnamespaceの概念です.6種類のnamespacesがあります.Mount namespaces UTS namespaces IPC namespaces PID namespaces Network namespaces User namespaces.
各network namespaceはnet構造を持っています.net定義はinclude/net/netname space.h中
struct net {
atomic_t passive; /* To decided when the network
* namespace should be freed.
*/
atomic_t count; /* To decided when the network
* namespace should be shut down.
*/
spinlock_t rules_mod_lock;
atomic64_t cookie_gen;
struct list_head list; /* list of network namespaces */
struct list_head cleanup_list; /* namespaces on death row */
struct list_head exit_list; /* Use only net_mutex */
struct user_namespace *user_ns; /* Owning user namespace */
struct ucounts *ucounts;
spinlock_t nsid_lock;
struct idr netns_ids;
struct ns_common ns;
struct proc_dir_entry *proc_net;
struct proc_dir_entry *proc_net_stat;
#ifdef CONFIG_SYSCTL
struct ctl_table_set sysctls;
#endif
struct sock *rtnl; /* rtnetlink socket */
struct sock *genl_sock;
struct list_head dev_base_head;
struct hlist_head *dev_name_head;
struct hlist_head *dev_index_head;
unsigned int dev_base_seq; /* protected by rtnl_mutex */
int ifindex;
unsigned int dev_unreg_count;
/* core fib_rules */
struct list_head rules_ops;
struct net_device *loopback_dev; /* The loopback */
struct netns_core core;
struct netns_mib mib;
struct netns_packet packet;
struct netns_unix unx;
struct netns_ipv4 ipv4;
#if IS_ENABLED(CONFIG_IPV6)
struct netns_ipv6 ipv6;
#endif
#if IS_ENABLED(CONFIG_IEEE802154_6LOWPAN)
struct netns_ieee802154_lowpan ieee802154_lowpan;
#endif
#if defined(CONFIG_IP_SCTP) || defined(CONFIG_IP_SCTP_MODULE)
struct netns_sctp sctp;
#endif
#if defined(CONFIG_IP_DCCP) || defined(CONFIG_IP_DCCP_MODULE)
struct netns_dccp dccp;
#endif
#ifdef CONFIG_NETFILTER
struct netns_nf nf;
struct netns_xt xt;
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
struct netns_ct ct;
#endif
#if defined(CONFIG_NF_TABLES) || defined(CONFIG_NF_TABLES_MODULE)
struct netns_nftables nft;
#endif
#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
struct netns_nf_frag nf_frag;
#endif
struct sock *nfnl;
struct sock *nfnl_stash;
#if IS_ENABLED(CONFIG_NETFILTER_NETLINK_ACCT)
struct list_head nfnl_acct_list;
#endif
#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
struct list_head nfct_timeout_list;
#endif
#endif
#ifdef CONFIG_WEXT_CORE
struct sk_buff_head wext_nlevents;
#endif
struct net_generic __rcu *gen;
/* Note : following structs are cache line aligned */
#ifdef CONFIG_XFRM
struct netns_xfrm xfrm;
#endif
#if IS_ENABLED(CONFIG_IP_VS)
struct netns_ipvs *ipvs;
#endif
#if IS_ENABLED(CONFIG_MPLS)
struct netns_mpls mpls;
#endif
struct sock *diag_nlsk;
atomic_t fnhe_genid;
};
2——netns_ipvsnet構造におけるipvsの定義は以下の通りである.
#if IS_ENABLED(CONFIG_IP_VS)
struct netns_ipvs *ipvs;
#endif
つまり、ネットごとに.namespaceには、ipvsの構造があります.netnsipvsはinclude/net/ip_に定義されています.vs.h中/* IPVS in network namespace */
struct netns_ipvs {
int gen; /* Generation */
int enable; /* enable like nf_hooks do */
/* Hash table: for real service lookups */
#define IP_VS_RTAB_BITS 4
#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
struct hlist_head rs_table[IP_VS_RTAB_SIZE];
/* ip_vs_app */
struct list_head app_list;
/* ip_vs_proto */
#define IP_VS_PROTO_TAB_SIZE 32 /* must be power of 2 */
struct ip_vs_proto_data *proto_data_table[IP_VS_PROTO_TAB_SIZE];
/* ip_vs_proto_tcp */
#ifdef CONFIG_IP_VS_PROTO_TCP
#define TCP_APP_TAB_BITS 4
#define TCP_APP_TAB_SIZE (1 << TCP_APP_TAB_BITS)
#define TCP_APP_TAB_MASK (TCP_APP_TAB_SIZE - 1)
struct list_head tcp_apps[TCP_APP_TAB_SIZE];
#endif
/* ip_vs_proto_udp */
#ifdef CONFIG_IP_VS_PROTO_UDP
#define UDP_APP_TAB_BITS 4
#define UDP_APP_TAB_SIZE (1 << UDP_APP_TAB_BITS)
#define UDP_APP_TAB_MASK (UDP_APP_TAB_SIZE - 1)
struct list_head udp_apps[UDP_APP_TAB_SIZE];
#endif
/* ip_vs_proto_sctp */
#ifdef CONFIG_IP_VS_PROTO_SCTP
#define SCTP_APP_TAB_BITS 4
#define SCTP_APP_TAB_SIZE (1 << SCTP_APP_TAB_BITS)
#define SCTP_APP_TAB_MASK (SCTP_APP_TAB_SIZE - 1)
/* Hash table for SCTP application incarnations */
struct list_head sctp_apps[SCTP_APP_TAB_SIZE];
#endif
/* ip_vs_conn */
atomic_t conn_count; /* connection counter */
/* ip_vs_ctl */
struct ip_vs_stats tot_stats; /* Statistics & est. */
int num_services; /* no of virtual services */
/* Trash for destinations */
struct list_head dest_trash;
spinlock_t dest_trash_lock;
struct timer_list dest_trash_timer; /* expiration timer */
/* Service counters */
atomic_t ftpsvc_counter;
atomic_t nullsvc_counter;
atomic_t conn_out_counter;
#ifdef CONFIG_SYSCTL
/* 1/rate drop and drop-entry variables */
struct delayed_work defense_work; /* Work handler */
int drop_rate;
int drop_counter;
atomic_t dropentry;
/* locks in ctl.c */
spinlock_t dropentry_lock; /* drop entry handling */
spinlock_t droppacket_lock; /* drop packet handling */
spinlock_t securetcp_lock; /* state and timeout tables */
/* sys-ctl struct */
struct ctl_table_header *sysctl_hdr;
struct ctl_table *sysctl_tbl;
#endif
/* sysctl variables */
int sysctl_amemthresh;
int sysctl_am_droprate;
int sysctl_drop_entry;
int sysctl_drop_packet;
int sysctl_secure_tcp;
#ifdef CONFIG_IP_VS_NFCT
int sysctl_conntrack;
#endif
int sysctl_snat_reroute;
int sysctl_sync_ver;
int sysctl_sync_ports;
int sysctl_sync_persist_mode;
unsigned long sysctl_sync_qlen_max;
int sysctl_sync_sock_size;
int sysctl_cache_bypass;
int sysctl_expire_nodest_conn;
int sysctl_sloppy_tcp;
int sysctl_sloppy_sctp;
int sysctl_expire_quiescent_template;
int sysctl_sync_threshold[2];
unsigned int sysctl_sync_refresh_period;
int sysctl_sync_retries;
int sysctl_nat_icmp_send;
int sysctl_pmtu_disc;
int sysctl_backup_only;
int sysctl_conn_reuse_mode;
int sysctl_schedule_icmp;
int sysctl_ignore_tunneled;
/* ip_vs_lblc */
int sysctl_lblc_expiration;
struct ctl_table_header *lblc_ctl_header;
struct ctl_table *lblc_ctl_table;
/* ip_vs_lblcr */
int sysctl_lblcr_expiration;
struct ctl_table_header *lblcr_ctl_header;
struct ctl_table *lblcr_ctl_table;
/* ip_vs_est */
struct list_head est_list; /* estimator list */
spinlock_t est_lock;
struct timer_list est_timer; /* Estimation timer */
/* ip_vs_sync */
spinlock_t sync_lock;
struct ipvs_master_sync_state *ms;
spinlock_t sync_buff_lock;
struct task_struct **backup_threads;
int threads_mask;
volatile int sync_state;
struct mutex sync_mutex;
struct ipvs_sync_daemon_cfg mcfg; /* Master Configuration */
struct ipvs_sync_daemon_cfg bcfg; /* Backup Configuration */
/* net name space ptr */
struct net *net; /* Needed by timer routines */
/* Number of heterogeneous destinations, needed becaus heterogeneous
* are not supported when synchronization is enabled.
*/
unsigned int mixed_address_family_dests;
};
3——ip_vs_プロトダタipvsがサポートするプロトコルは、TCP、UDP、SCTP、AH、ESPを含む.プロトコル情報を格納する仕組みは、ip_vs_プロトdata定義は:include/net/ip_vs.h
/* protocol data per netns */
struct ip_vs_proto_data {
struct ip_vs_proto_data *next;
struct ip_vs_protocol *pp;
int *timeout_table; /* protocol timeout table */
atomic_t appcnt; /* counter of proto app incs. */
struct tcp_states_t *tcp_state_table;
};
ip_vs_protocolはinclude/net/ip_で定義されています.vs.h:struct ip_vs_protocol {
struct ip_vs_protocol *next;
char *name;
u16 protocol;
u16 num_states;
int dont_defrag;
void (*init)(struct ip_vs_protocol *pp);
void (*exit)(struct ip_vs_protocol *pp);
int (*init_netns)(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd);
void (*exit_netns)(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd);
int (*conn_schedule)(struct netns_ipvs *ipvs,
int af, struct sk_buff *skb,
struct ip_vs_proto_data *pd,
int *verdict, struct ip_vs_conn **cpp,
struct ip_vs_iphdr *iph);
struct ip_vs_conn *
(*conn_in_get)(struct netns_ipvs *ipvs,
int af,
const struct sk_buff *skb,
const struct ip_vs_iphdr *iph);
struct ip_vs_conn *
(*conn_out_get)(struct netns_ipvs *ipvs,
int af,
const struct sk_buff *skb,
const struct ip_vs_iphdr *iph);
int (*snat_handler)(struct sk_buff *skb, struct ip_vs_protocol *pp,
struct ip_vs_conn *cp, struct ip_vs_iphdr *iph);
int (*dnat_handler)(struct sk_buff *skb, struct ip_vs_protocol *pp,
struct ip_vs_conn *cp, struct ip_vs_iphdr *iph);
int (*csum_check)(int af, struct sk_buff *skb,
struct ip_vs_protocol *pp);
const char *(*state_name)(int state);
void (*state_transition)(struct ip_vs_conn *cp, int direction,
const struct sk_buff *skb,
struct ip_vs_proto_data *pd);
int (*register_app)(struct netns_ipvs *ipvs, struct ip_vs_app *inc);
void (*unregister_app)(struct netns_ipvs *ipvs, struct ip_vs_app *inc);
int (*app_conn_bind)(struct ip_vs_conn *cp);
void (*debug_packet)(int af, struct ip_vs_protocol *pp,
const struct sk_buff *skb,
int offset,
const char *msg);
void (*timeout_change)(struct ip_vs_proto_data *pd, int flags);
};
一連の関数を宣言しただけで、関数が実現されていないことが分かります.関数の実装は、特定のプロトコルに対応するファイルにあります.例えば、net/netfilter/ipvs/ip_vs_プロトtcp.c.struct ip_vs_protocol ip_vs_protocol_tcp = {
.name = "TCP",
.protocol = IPPROTO_TCP,
.num_states = IP_VS_TCP_S_LAST,
.dont_defrag = 0,
.init = NULL,
.exit = NULL,
.init_netns = __ip_vs_tcp_init,
.exit_netns = __ip_vs_tcp_exit,
.register_app = tcp_register_app,
.unregister_app = tcp_unregister_app,
.conn_schedule = tcp_conn_schedule,
.conn_in_get = ip_vs_conn_in_get_proto,
.conn_out_get = ip_vs_conn_out_get_proto,
.snat_handler = tcp_snat_handler,
.dnat_handler = tcp_dnat_handler,
.csum_check = tcp_csum_check,
.state_name = tcp_state_name,
.state_transition = tcp_state_transition,
.app_conn_bind = tcp_app_conn_bind,
.debug_packet = ip_vs_tcpudp_debug_packet,
.timeout_change = tcp_timeout_change,
};
4——ip_vs_コンサート同じ定義でinclude/net/ip_vs.h中
/* IP_VS structure allocated for each dynamically scheduled connection */
struct ip_vs_conn {
struct hlist_node c_list; /* hashed list heads */
/* Protocol, addresses and port numbers */
__be16 cport;
__be16 dport;
__be16 vport;
u16 af; /* address family */
union nf_inet_addr caddr; /* client address */
union nf_inet_addr vaddr; /* virtual address */
union nf_inet_addr daddr; /* destination address */
volatile __u32 flags; /* status flags */
__u16 protocol; /* Which protocol (TCP/UDP) */
__u16 daf; /* Address family of the dest */
struct netns_ipvs *ipvs;
/* counter and timer */
atomic_t refcnt; /* reference count */
struct timer_list timer; /* Expiration timer */
volatile unsigned long timeout; /* timeout */
/* Flags and state transition */
spinlock_t lock; /* lock for state transition */
volatile __u16 state; /* state info */
volatile __u16 old_state; /* old state, to be used for
* state transition triggerd
* synchronization
*/
__u32 fwmark; /* Fire wall mark from skb */
unsigned long sync_endtime; /* jiffies + sent_retries */
/* Control members */
struct ip_vs_conn *control; /* Master control connection */
atomic_t n_control; /* Number of controlled ones */
struct ip_vs_dest *dest; /* real server */
atomic_t in_pkts; /* incoming packet counter */
/* Packet transmitter for different forwarding methods. If it
* mangles the packet, it must return NF_DROP or better NF_STOLEN,
* otherwise this must be changed to a sk_buff **.
* NF_ACCEPT can be returned when destination is local.
*/
int (*packet_xmit)(struct sk_buff *skb, struct ip_vs_conn *cp,
struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph);
/* Note: we can group the following members into a structure,
* in order to save more space, and the following members are
* only used in VS/NAT anyway
*/
struct ip_vs_app *app; /* bound ip_vs_app object */
void *app_data; /* Application private data */
struct ip_vs_seq in_seq; /* incoming seq. struct */
struct ip_vs_seq out_seq; /* outgoing seq. struct */
const struct ip_vs_pe *pe;
char *pe_data;
__u8 pe_data_len;
struct rcu_head rcu_head;
};
中にはパッキンxmit関数の役割はデータを送ることです.DR/NAT/TUNはそれぞれ異なる関数があります.5——conn_schedule
接続の最初のメッセージが到着すると、このロジックに入り、あるrealserverにスケジュールして接続を作成します.