ip_vs分析の実現(10)
34138 ワード
本文書のCopylityはすべてyfydzに帰属します.GPLを使って発表します.自由にコピー、転載できます.転載する時は文書の完全性を維持してください.商業用途に使用することは厳禁です.
msn:[email protected]
ソース:http://yfydz.cublog.cn
msn:[email protected]
ソース:http://yfydz.cublog.cn
13. IPVS
IPVS IPVS 。
IPVS setsockopt ,IPVS ipvsadm。
IPVS net/ipv4/ipvs/ip_vs_ctl.c 。
13.1 sockopt
netfilter struct nf_sockopt_ops :
static struct nf_sockopt_ops ip_vs_sockopts = {
.pf = PF_INET,
.set_optmin = IP_VS_BASE_CTL,
.set_optmax = IP_VS_SO_SET_MAX+1,
.set = do_ip_vs_set_ctl,
.get_optmin = IP_VS_BASE_CTL,
.get_optmax = IP_VS_SO_GET_MAX+1,
.get = do_ip_vs_get_ctl,
};
ret = nf_register_sockopt(&ip_vs_sockopts);
13.2
static int
do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
{
int ret;
unsigned char arg[MAX_ARG_LEN];
struct ip_vs_service_user *usvc;
struct ip_vs_service *svc;
struct ip_vs_dest_user *udest;
//
if (!capable(CAP_NET_ADMIN))
return -EPERM;
//
if (len != set_arglen[SET_CMDID(cmd)]) {
IP_VS_ERR("set_ctl: len %u != %u
",
len, set_arglen[SET_CMDID(cmd)]);
return -EINVAL;
}
//
if (copy_from_user(arg, user, len) != 0)
return -EFAULT;
/* increase the module use count */
// IPVS
ip_vs_use_count_inc();
//
if (mutex_lock_interruptible(&__ip_vs_mutex)) {
ret = -ERESTARTSYS;
goto out_dec;
}
if (cmd == IP_VS_SO_SET_FLUSH) {
/* Flush the virtual service */
//
ret = ip_vs_flush();
goto out_unlock;
} else if (cmd == IP_VS_SO_SET_TIMEOUT) {
/* Set timeout values for (tcp tcpfin udp) */
//
ret = ip_vs_set_timeout((struct ip_vs_timeout_user *)arg);
goto out_unlock;
} else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
// IPVS
struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
ret = start_sync_thread(dm->state, dm->mcast_ifn, dm->syncid);
goto out_unlock;
} else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
// IPVS
struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
ret = stop_sync_thread(dm->state);
goto out_unlock;
}
usvc = (struct ip_vs_service_user *)arg;
udest = (struct ip_vs_dest_user *)(usvc + 1);
if (cmd == IP_VS_SO_SET_ZERO) {
//
/* if no service address is set, zero counters in all */
if (!usvc->fwmark && !usvc->addr && !usvc->port) {
ret = ip_vs_zero_all();
goto out_unlock;
}
}
/* Check for valid protocol: TCP or UDP, even for fwmark!=0 */
// , TCP UDP
if (usvc->protocol!=IPPROTO_TCP && usvc->protocol!=IPPROTO_UDP) {
IP_VS_ERR("set_ctl: invalid protocol: %d %d.%d.%d.%d:%d %s
",
usvc->protocol, NIPQUAD(usvc->addr),
ntohs(usvc->port), usvc->sched_name);
ret = -EFAULT;
goto out_unlock;
}
/* Lookup the exact service by <protocol, addr, port> or fwmark */
// IPVS
if (usvc->fwmark == 0)
svc = __ip_vs_service_get(usvc->protocol,
usvc->addr, usvc->port);
else
svc = __ip_vs_svc_fwm_get(usvc->fwmark);
// , ,
if (cmd != IP_VS_SO_SET_ADD
&& (svc == NULL || svc->protocol != usvc->protocol)) {
ret = -ESRCH;
goto out_unlock;
}
switch (cmd) {
case IP_VS_SO_SET_ADD:
//
if (svc != NULL)
ret = -EEXIST;
else
ret = ip_vs_add_service(usvc, &svc);
break;
case IP_VS_SO_SET_EDIT:
//
ret = ip_vs_edit_service(svc, usvc);
break;
case IP_VS_SO_SET_DEL:
//
ret = ip_vs_del_service(svc);
if (!ret)
goto out_unlock;
break;
case IP_VS_SO_SET_ZERO:
//
ret = ip_vs_zero_service(svc);
break;
case IP_VS_SO_SET_ADDDEST:
//
ret = ip_vs_add_dest(svc, udest);
break;
case IP_VS_SO_SET_EDITDEST:
//
ret = ip_vs_edit_dest(svc, udest);
break;
case IP_VS_SO_SET_DELDEST:
//
ret = ip_vs_del_dest(svc, udest);
break;
default:
ret = -EINVAL;
}
//
if (svc)
ip_vs_service_put(svc);
out_unlock:
mutex_unlock(&__ip_vs_mutex);
out_dec:
/* decrease the module use count */
// IPVS
ip_vs_use_count_dec();
return ret;
}
13.3
static int
do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
{
unsigned char arg[128];
int ret = 0;
// ADMIN
if (!capable(CAP_NET_ADMIN))
return -EPERM;
//
if (*len < get_arglen[GET_CMDID(cmd)]) {
IP_VS_ERR("get_ctl: len %u < %u
",
*len, get_arglen[GET_CMDID(cmd)]);
return -EINVAL;
}
//
if (copy_from_user(arg, user, get_arglen[GET_CMDID(cmd)]) != 0)
return -EFAULT;
if (mutex_lock_interruptible(&__ip_vs_mutex))
return -ERESTARTSYS;
switch (cmd) {
case IP_VS_SO_GET_VERSION:
{
// IPVS
char buf[64];
sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
ret = -EFAULT;
goto out;
}
*len = strlen(buf)+1;
}
break;
case IP_VS_SO_GET_INFO:
{
// IPVS : , HASH ,
struct ip_vs_getinfo info;
info.version = IP_VS_VERSION_CODE;
info.size = IP_VS_CONN_TAB_SIZE;
info.num_services = ip_vs_num_services;
if (copy_to_user(user, &info, sizeof(info)) != 0)
ret = -EFAULT;
}
break;
case IP_VS_SO_GET_SERVICES:
{
// IPVS
struct ip_vs_get_services *get;
int size;
get = (struct ip_vs_get_services *)arg;
size = sizeof(*get) +
sizeof(struct ip_vs_service_entry) * get->num_services;
if (*len != size) {
IP_VS_ERR("length: %u != %u
", *len, size);
ret = -EINVAL;
goto out;
}
ret = __ip_vs_get_service_entries(get, user);
}
break;
case IP_VS_SO_GET_SERVICE:
{
// IPVS
struct ip_vs_service_entry *entry;
struct ip_vs_service *svc;
entry = (struct ip_vs_service_entry *)arg;
// IPVS
if (entry->fwmark)
svc = __ip_vs_svc_fwm_get(entry->fwmark);
else
svc = __ip_vs_service_get(entry->protocol,
entry->addr, entry->port);
if (svc) {
//
ip_vs_copy_service(entry, svc);
//
if (copy_to_user(user, entry, sizeof(*entry)) != 0)
ret = -EFAULT;
ip_vs_service_put(svc);
} else
ret = -ESRCH;
}
break;
case IP_VS_SO_GET_DESTS:
{
//
struct ip_vs_get_dests *get;
int size;
get = (struct ip_vs_get_dests *)arg;
size = sizeof(*get) +
sizeof(struct ip_vs_dest_entry) * get->num_dests;
if (*len != size) {
IP_VS_ERR("length: %u != %u
", *len, size);
ret = -EINVAL;
goto out;
}
ret = __ip_vs_get_dest_entries(get, user);
}
break;
case IP_VS_SO_GET_TIMEOUT:
{
//
struct ip_vs_timeout_user t;
__ip_vs_get_timeouts(&t);
if (copy_to_user(user, &t, sizeof(t)) != 0)
ret = -EFAULT;
}
break;
case IP_VS_SO_GET_DAEMON:
{
// : , , ID
struct ip_vs_daemon_user d[2];
memset(&d, 0, sizeof(d));
if (ip_vs_sync_state & IP_VS_STATE_MASTER) {
d[0].state = IP_VS_STATE_MASTER;
strlcpy(d[0].mcast_ifn, ip_vs_master_mcast_ifn, sizeof(d[0].mcast_ifn));
d[0].syncid = ip_vs_master_syncid;
}
if (ip_vs_sync_state & IP_VS_STATE_BACKUP) {
d[1].state = IP_VS_STATE_BACKUP;
strlcpy(d[1].mcast_ifn, ip_vs_backup_mcast_ifn, sizeof(d[1].mcast_ifn));
d[1].syncid = ip_vs_backup_syncid;
}
if (copy_to_user(user, &d, sizeof(d)) != 0)
ret = -EFAULT;
}
break;
default:
ret = -EINVAL;
}
out:
mutex_unlock(&__ip_vs_mutex);
return ret;
}
13.2
IPVS IPVS , struct ip_vs_serivce , , 。
ipvsadm , :
ipvsadm -A -t/u v_srv_ip:vport -s scheduler
13.2.1
/*
* Add a service into the service hash table
*/
static int
ip_vs_add_service(struct ip_vs_service_user *u, struct ip_vs_service **svc_p)
{
int ret = 0;
struct ip_vs_scheduler *sched = NULL;
struct ip_vs_service *svc = NULL;
/* increase the module use count */
// IPVS , IPVS
ip_vs_use_count_inc();
/* Lookup the scheduler by 'u->sched_name' */
//
sched = ip_vs_scheduler_get(u->sched_name);
if (sched == NULL) {
IP_VS_INFO("Scheduler module ip_vs_%s not found
",
u->sched_name);
ret = -ENOENT;
goto out_mod_dec;
}
//
svc = (struct ip_vs_service *)
kmalloc(sizeof(struct ip_vs_service), GFP_ATOMIC);
if (svc == NULL) {
IP_VS_DBG(1, "ip_vs_add_service: kmalloc failed.
");
ret = -ENOMEM;
goto out_err;
}
memset(svc, 0, sizeof(struct ip_vs_service));
/* I'm the first user of the service */
// : 1, 0
atomic_set(&svc->usecnt, 1);
atomic_set(&svc->refcnt, 0);
//
svc->protocol = u->protocol;
svc->addr = u->addr;
svc->port = u->port;
svc->fwmark = u->fwmark;
svc->flags = u->flags;
svc->timeout = u->timeout * HZ;
svc->netmask = u->netmask;
//
//
INIT_LIST_HEAD(&svc->destinations);
//
rwlock_init(&svc->sched_lock);
//
spin_lock_init(&svc->stats.lock);
/* Bind the scheduler */
//
ret = ip_vs_bind_scheduler(svc, sched);
if (ret)
goto out_err;
// sched
sched = NULL;
/* Update the virtual service counters */
// FTP 0 , netfilter helper,
// helper, ,
if (svc->port == FTPPORT)
atomic_inc(&ip_vs_ftpsvc_counter);
else if (svc->port == 0)
atomic_inc(&ip_vs_nullsvc_counter);
// , ,
ip_vs_new_estimator(&svc->stats);
// IPVS
ip_vs_num_services++;
/* Hash the service into the service table */
// HASH ,
write_lock_bh(&__ip_vs_svc_lock);
ip_vs_svc_hash(svc);
write_unlock_bh(&__ip_vs_svc_lock);
//
*svc_p = svc;
//
return 0;
out_err:
//
if (svc != NULL) {
//
if (svc->scheduler)
ip_vs_unbind_scheduler(svc);
if (svc->inc) {
//
local_bh_disable();
ip_vs_app_inc_put(svc->inc);
local_bh_enable();
}
kfree(svc);
}
//
ip_vs_scheduler_put(sched);
out_mod_dec:
/* decrease the module use count */
// IPVS
ip_vs_use_count_dec();
return ret;
}
13.2.2
/*
* Edit a service and bind it with a new scheduler
*/
static int
ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user *u)
{
struct ip_vs_scheduler *sched, *old_sched;
int ret = 0;
/*
* Lookup the scheduler, by 'u->sched_name'
*/
//
sched = ip_vs_scheduler_get(u->sched_name);
if (sched == NULL) {
IP_VS_INFO("Scheduler module ip_vs_%s not found
",
u->sched_name);
return -ENOENT;
}
// ?
old_sched = sched;
write_lock_bh(&__ip_vs_svc_lock);
/*
* Wait until all other svc users go away.
*/
//
IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
/*
* Set the flags and timeout value
*/
// HASHED ,
svc->flags = u->flags | IP_VS_SVC_F_HASHED;
svc->timeout = u->timeout * HZ;
svc->netmask = u->netmask;
// old_sched ,
old_sched = svc->scheduler;
if (sched != old_sched) {
// , ,
/*
* Unbind the old scheduler
*/
//
if ((ret = ip_vs_unbind_scheduler(svc))) {
old_sched = sched;
goto out;
}
/*
* Bind the new scheduler
*/
//
if ((ret = ip_vs_bind_scheduler(svc, sched))) {
/*
* If ip_vs_bind_scheduler fails, restore the old
* scheduler.
* The main reason of failure is out of memory.
*
* The question is if the old scheduler can be
* restored all the time. TODO: if it cannot be
* restored some time, we must delete the service,
* otherwise the system may crash.
*/
//
ip_vs_bind_scheduler(svc, old_sched);
old_sched = sched;
goto out;
}
}
out:
write_unlock_bh(&__ip_vs_svc_lock);
if (old_sched)
ip_vs_scheduler_put(old_sched);
return ret;
}
13.2.3
/*
* Delete a service from the service list
*/
static int ip_vs_del_service(struct ip_vs_service *svc)
{
//
if (svc == NULL)
return -EEXIST;
/*
* Unhash it from the service table
*/
write_lock_bh(&__ip_vs_svc_lock);
// HASH
ip_vs_svc_unhash(svc);
/*
* Wait until all the svc users go away.
*/
//
IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
//
__ip_vs_del_service(svc);
write_unlock_bh(&__ip_vs_svc_lock);
return 0;
}
/*
* Delete a service from the service list
* - The service must be unlinked, unlocked and not referenced!
* - We are called under _bh lock
*/
static void __ip_vs_del_service(struct ip_vs_service *svc)
{
struct ip_vs_dest *dest, *nxt;
struct ip_vs_scheduler *old_sched;
// IPVS
ip_vs_num_services--;
//
ip_vs_kill_estimator(&svc->stats);
/* Unbind scheduler */
//
old_sched = svc->scheduler;
ip_vs_unbind_scheduler(svc);
// ,
if (old_sched)
ip_vs_scheduler_put(old_sched);
/* Unbind app inc */
if (svc->inc) {
//
ip_vs_app_inc_put(svc->inc);
svc->inc = NULL;
}
/*
* Unlink the whole destination list
*/
//
list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
__ip_vs_unlink_dest(svc, dest, 0);
__ip_vs_del_dest(dest);
}
/*
* Update the virtual service counters
*/
//
if (svc->port == FTPPORT)
atomic_dec(&ip_vs_ftpsvc_counter);
else if (svc->port == 0)
atomic_dec(&ip_vs_nullsvc_counter);
/*
* Free the service if nobody refers to it
*/
// 0
if (atomic_read(&svc->refcnt) == 0)
kfree(svc);
/* decrease the module use count */
// IPVS
ip_vs_use_count_dec();
}
13.2.4
/*
* Flush all the virtual services
*/
static int ip_vs_flush(void)
{
int idx;
struct ip_vs_service *svc, *nxt;
/*
* Flush the service table hashed by <protocol,addr,port>
*/
// HASH
for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
//
list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) {
write_lock_bh(&__ip_vs_svc_lock);
ip_vs_svc_unhash(svc);
/*
* Wait until all the svc users go away.
*/
// 0
IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
//
__ip_vs_del_service(svc);
write_unlock_bh(&__ip_vs_svc_lock);
}
}
/*
* Flush the service table hashed by fwmark
*/
// fwmark HASH
//
for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
list_for_each_entry_safe(svc, nxt,
&ip_vs_svc_fwm_table[idx], f_list) {
write_lock_bh(&__ip_vs_svc_lock);
ip_vs_svc_unhash(svc);
/*
* Wait until all the svc users go away.
*/
IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
__ip_vs_del_service(svc);
write_unlock_bh(&__ip_vs_svc_lock);
}
}
return 0;
}
13.2.5
/*
* Zero counters in a service or all services
*/
//
static int ip_vs_zero_service(struct ip_vs_service *svc)
{
struct ip_vs_dest *dest;
write_lock_bh(&__ip_vs_svc_lock);
//
list_for_each_entry(dest, &svc->destinations, n_list) {
//
ip_vs_zero_stats(&dest->stats);
}
//
ip_vs_zero_stats(&svc->stats);
write_unlock_bh(&__ip_vs_svc_lock);
return 0;
}
//
static int ip_vs_zero_all(void)
{
int idx;
struct ip_vs_service *svc;
// HASH
for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
ip_vs_zero_service(svc);
}
}
for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
ip_vs_zero_service(svc);
}
}
ip_vs_zero_stats(&ip_vs_stats);
return 0;
}
13.3
, struct ip_vs_dest , , ,
ipvsadm , :
ipvsadm -a -t/u v_srv_ip:vport -r dest_ip:dest_port -w weight
13.3.1
/*
* Add a destination into an existing service
*/
static int
ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest)
{
struct ip_vs_dest *dest;
__u32 daddr = udest->addr;
__u16 dport = udest->port;
int ret;
EnterFunction(2);
// 0
if (udest->weight < 0) {
IP_VS_ERR("ip_vs_add_dest(): server weight less than zero
");
return -ERANGE;
}
//
if (udest->l_threshold > udest->u_threshold) {
IP_VS_ERR("ip_vs_add_dest(): lower threshold is higher than "
"upper threshold
");
return -ERANGE;
}
/*
* Check if the dest already exists in the list
*/
//
dest = ip_vs_lookup_dest(svc, daddr, dport);
if (dest != NULL) {
//
IP_VS_DBG(1, "ip_vs_add_dest(): dest already exists
");
return -EEXIST;
}
/*
* Check if the dest already exists in the trash and
* is from the same service
*/
//
dest = ip_vs_trash_get_dest(svc, daddr, dport);
if (dest != NULL) {
IP_VS_DBG(3, "Get destination %u.%u.%u.%u:%u from trash, "
"dest->refcnt=%d, service %u/%u.%u.%u.%u:%u
",
NIPQUAD(daddr), ntohs(dport),
atomic_read(&dest->refcnt),
dest->vfwmark,
NIPQUAD(dest->vaddr),
ntohs(dest->vport));
// ,
//
__ip_vs_update_dest(svc, dest, udest);
/*
* Get the destination from the trash
*/
//
list_del(&dest->n_list);
//
ip_vs_new_estimator(&dest->stats);
write_lock_bh(&__ip_vs_svc_lock);
/*
* Wait until all other svc users go away.
*/
IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
//
list_add(&dest->n_list, &svc->destinations);
svc->num_dests++;
/* call the update_service function of its scheduler */
// ,
svc->scheduler->update_service(svc);
write_unlock_bh(&__ip_vs_svc_lock);
return 0;
}
/*
* Allocate and initialize the dest structure
*/
// ,
ret = ip_vs_new_dest(svc, udest, &dest);
if (ret) {
return ret;
}
/*
* Add the dest entry into the list
*/
//
atomic_inc(&dest->refcnt);
write_lock_bh(&__ip_vs_svc_lock);
/*
* Wait until all other svc users go away.
*/
//
IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
//
list_add(&dest->n_list, &svc->destinations);
svc->num_dests++;
/* call the update_service function of its scheduler */
// ,
svc->scheduler->update_service(svc);
write_unlock_bh(&__ip_vs_svc_lock);
LeaveFunction(2);
return 0;
}
/*
* Create a destination for the given service
*/
//
static int
ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest,
struct ip_vs_dest **dest_p)
{
struct ip_vs_dest *dest;
unsigned atype;
EnterFunction(2);
// IP , 127.0.0.1
atype = inet_addr_type(udest->addr);
if (atype != RTN_LOCAL && atype != RTN_UNICAST)
return -EINVAL;
//
dest = kmalloc(sizeof(struct ip_vs_dest), GFP_ATOMIC);
if (dest == NULL) {
IP_VS_ERR("ip_vs_new_dest: kmalloc failed.
");
return -ENOMEM;
}
memset(dest, 0, sizeof(struct ip_vs_dest));
//
dest->protocol = svc->protocol;
dest->vaddr = svc->addr;
dest->vport = svc->port;
dest->vfwmark = svc->fwmark;
dest->addr = udest->addr;
dest->port = udest->port;
//
//
atomic_set(&dest->activeconns, 0);
//
atomic_set(&dest->inactconns, 0);
// ( )
atomic_set(&dest->persistconns, 0);
//
atomic_set(&dest->refcnt, 0);
//
INIT_LIST_HEAD(&dest->d_list);
spin_lock_init(&dest->dst_lock);
spin_lock_init(&dest->stats.lock);
//
__ip_vs_update_dest(svc, dest, udest);
//
ip_vs_new_estimator(&dest->stats);
//
*dest_p = dest;
LeaveFunction(2);
return 0;
}
13.3.2
/*
* Edit a destination in the given service
*/
static int
ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest)
{
struct ip_vs_dest *dest;
__u32 daddr = udest->addr;
__u16 dport = udest->port;
EnterFunction(2);
// 0
if (udest->weight < 0) {
IP_VS_ERR("ip_vs_edit_dest(): server weight less than zero
");
return -ERANGE;
}
//
if (udest->l_threshold > udest->u_threshold) {
IP_VS_ERR("ip_vs_edit_dest(): lower threshold is higher than "
"upper threshold
");
return -ERANGE;
}
/*
* Lookup the destination list
*/
//
dest = ip_vs_lookup_dest(svc, daddr, dport);
if (dest == NULL) {
IP_VS_DBG(1, "ip_vs_edit_dest(): dest doesn't exist
");
return -ENOENT;
}
//
__ip_vs_update_dest(svc, dest, udest);
write_lock_bh(&__ip_vs_svc_lock);
/* Wait until all other svc users go away */
// ,
while (atomic_read(&svc->usecnt) > 1) {};
/* call the update_service, because server weight may be changed */
//
svc->scheduler->update_service(svc);
write_unlock_bh(&__ip_vs_svc_lock);
LeaveFunction(2);
return 0;
}
13.3.3
/*
* Delete a destination server in the given service
*/
static int
ip_vs_del_dest(struct ip_vs_service *svc,struct ip_vs_dest_user *udest)
{
struct ip_vs_dest *dest;
__u32 daddr = udest->addr;
__u16 dport = udest->port;
EnterFunction(2);
//
dest = ip_vs_lookup_dest(svc, daddr, dport);
if (dest == NULL) {
IP_VS_DBG(1, "ip_vs_del_dest(): destination not found!
");
return -ENOENT;
}
write_lock_bh(&__ip_vs_svc_lock);
/*
* Wait until all other svc users go away.
*/
//
IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
/*
* Unlink dest from the service
*/
//
__ip_vs_unlink_dest(svc, dest, 1);
write_unlock_bh(&__ip_vs_svc_lock);
/*
* Delete the destination
*/
//
__ip_vs_del_dest(dest);
LeaveFunction(2);
return 0;
}
/*
* Delete a destination (must be already unlinked from the service)
*/
static void __ip_vs_del_dest(struct ip_vs_dest *dest)
{
//
ip_vs_kill_estimator(&dest->stats);
/*
* Remove it from the d-linked list with the real services.
*/
write_lock_bh(&__ip_vs_rs_lock);
// HASH
ip_vs_rs_unhash(dest);
write_unlock_bh(&__ip_vs_rs_lock);
/*
* Decrease the refcnt of the dest, and free the dest
* if nobody refers to it (refcnt=0). Otherwise, throw
* the destination into the trash.
*/
if (atomic_dec_and_test(&dest->refcnt)) {
// 0
// cache
ip_vs_dst_reset(dest);
/* simply decrease svc->refcnt here, let the caller check
and release the service if nobody refers to it.
Only user context can release destination and service,
and only one user context can update virtual service at a
time, so the operation here is OK */
//
atomic_dec(&dest->svc->refcnt);
//
kfree(dest);
} else {
// 0, ,
IP_VS_DBG(3, "Moving dest %u.%u.%u.%u:%u into trash, "
"dest->refcnt=%d
",
NIPQUAD(dest->addr), ntohs(dest->port),
atomic_read(&dest->refcnt));
list_add(&dest->n_list, &ip_vs_dest_trash);
atomic_inc(&dest->refcnt);
}
}
/*
* Unlink a destination from the given service
*/
//
static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
struct ip_vs_dest *dest,
int svcupd)
{
//
dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
/*
* Remove it from the d-linked destination list.
*/
//
list_del(&dest->n_list);
//
svc->num_dests--;
if (svcupd) {
/*
* Call the update_service function of its scheduler
*/
//
svc->scheduler->update_service(svc);
}
}
13.4
IPVS IPVS :
/*
* Timer for checking the defense
*/
// 1
#define DEFENSE_TIMER_PERIOD 1*HZ
static void defense_work_handler(void *data);
static DECLARE_WORK(defense_work, defense_work_handler, NULL);
//
static void defense_work_handler(void *data)
{
//
update_defense_level();
if (atomic_read(&ip_vs_dropentry))
ip_vs_random_dropentry();
//
schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
}
/*
* update_defense_level is called from keventd and from sysctl,
* so it needs to protect itself from softirqs
*/
// IPVS , /proc
static void update_defense_level(void)
{
struct sysinfo i;
static int old_secure_tcp = 0;
int availmem;
int nomem;
int to_change = -1;
/* we only count free and buffered memory (in pages) */
//
si_meminfo(&i);
availmem = i.freeram + i.bufferram;
/* however in linux 2.5 the i.bufferram is total page cache size,
we need adjust it */
/* si_swapinfo(&i); */
/* availmem = availmem - (i.totalswap - i.freeswap); */
//
nomem = (availmem < sysctl_ip_vs_amemthresh);
local_bh_disable();
/* drop_entry */
spin_lock(&__ip_vs_dropentry_lock);
//
switch (sysctl_ip_vs_drop_entry) {
case 0:
//
atomic_set(&ip_vs_dropentry, 0);
break;
case 1:
if (nomem) {
//
atomic_set(&ip_vs_dropentry, 1);
sysctl_ip_vs_drop_entry = 2;
} else {
//
atomic_set(&ip_vs_dropentry, 0);
}
break;
case 2:
if (nomem) {
//
atomic_set(&ip_vs_dropentry, 1);
} else {
//
atomic_set(&ip_vs_dropentry, 0);
sysctl_ip_vs_drop_entry = 1;
};
break;
case 3:
//
atomic_set(&ip_vs_dropentry, 1);
break;
}
spin_unlock(&__ip_vs_dropentry_lock);
/* drop_packet */
//
spin_lock(&__ip_vs_droppacket_lock);
switch (sysctl_ip_vs_drop_packet) {
case 0:
//
ip_vs_drop_rate = 0;
break;
case 1:
if (nomem) {
//
ip_vs_drop_rate = ip_vs_drop_counter
= sysctl_ip_vs_amemthresh /
(sysctl_ip_vs_amemthresh-availmem);
//
sysctl_ip_vs_drop_packet = 2;
} else {
//
ip_vs_drop_rate = 0;
}
break;
case 2:
if (nomem) {
//
ip_vs_drop_rate = ip_vs_drop_counter
= sysctl_ip_vs_amemthresh /
(sysctl_ip_vs_amemthresh-availmem);
} else {
//
ip_vs_drop_rate = 0;
sysctl_ip_vs_drop_packet = 1;
}
break;
case 3:
//
ip_vs_drop_rate = sysctl_ip_vs_am_droprate;
break;
}
spin_unlock(&__ip_vs_droppacket_lock);
/* secure_tcp */
// TCP
write_lock(&__ip_vs_securetcp_lock);
// TCP
switch (sysctl_ip_vs_secure_tcp) {
case 0:
// ,
if (old_secure_tcp >= 2)
to_change = 0;
break;
case 1:
if (nomem) {
// ,
if (old_secure_tcp < 2)
to_change = 1;
sysctl_ip_vs_secure_tcp = 2;
} else {
// ,
if (old_secure_tcp >= 2)
to_change = 0;
}
break;
case 2:
if (nomem) {
// ,
if (old_secure_tcp < 2)
to_change = 1;
} else {
// ,
if (old_secure_tcp >= 2)
to_change = 0;
sysctl_ip_vs_secure_tcp = 1;
}
break;
case 3:
// ,
if (old_secure_tcp < 2)
to_change = 1;
break;
}
// TCP
old_secure_tcp = sysctl_ip_vs_secure_tcp;
//
if (to_change >= 0)
ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1);
write_unlock(&__ip_vs_securetcp_lock);
local_bh_enable();
}
14.
,IPVS (1 ) , , 。 IPVS netfilter , ; helper ,IPVS 。 NAT IPVS netfilter ,IPVS NAT , , netfilter HASH , NAT netfilter , ; , , 。
: 2007-01-04, : 2007-01-04 08:55, 2256 , 4
: :2007-04-09 14:12:12 IP :221.122.54.★
IPVS real server ,
: yfydz :2007-04-10 12:56:36 IP :218.247.216.★
: :2007-04-11 13:45:29 IP :221.122.54.★
? ??
: yfydz :2007-04-13 09:36:14 IP :218.247.216.★