LVS-NAT one arm
LVS-NATで、real serverをいじらないで済む一本腕な構成を考えて見ました。
CONFIG_IP_VS_NFCT = y なカーネルが必要です。
real serverでは、directorサーバからのアクセスに見えます。
- 構成
client: 192.168.1.1 director: 172.16.1.2 (VIP: 172.16.1.1) realserver: 10.1.1.1
- directorで設定
- sysctl
echo 1 > /proc/sys/net/ipv4/ip_forward echo 1 > /proc/sys/net/ipv4/vs/conntrack
- vipの設定
ip addr add 172.16.1.1 label eth0:1 dev eth0 arping -U -c 1 -I eth0 172.16.1.1
- ipvsadm
ipvsadm -A -t 172.16.1.1:80 -s rr ipvsadm -a -t 172.16.1.1:80 -r 10.1.1.1:80 -m
iptables -t nat -A POSTROUTING -m ipvs --vaddr 172.16.1.1 --vport 80 -j SNAT --to-source 172.16.1.2
これだけです。
どいひーな greasemonkey
// ==UserScript==
// @name 503 Auto Reloader
// @description Auto Page Reload on Busy Servers
// @author WhiteAnthrax
// @version 201201090239
// @include *
// ==/UserScript==
(function ()
{
if (document.title == '503 Service Temporarily Unavailable' )
{window.location.reload(true);}
else if (document.title == '503 Service Unavailable' )
{window.location.reload(true);}
else if (document.getElementsByTagName('h1')[0].innerHTML == 'Error 503')
{window.location.reload(true);}
})();
LinuxのTCP/IPの実装について調べてみた
結論
- アプリごとにbacklogの制限があるので設定してやること
- ApacheならListenBacklogで設定できる(デフォルト511)
- nginxならlisten 80 backlog=1024;のように設定する
linux kernel 3.0.6の関係しそうな所を覗いてみた
int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) { struct tcp_extend_values tmp_ext; struct tcp_options_received tmp_opt; u8 *hash_location; struct request_sock *req; struct inet_request_sock *ireq; struct tcp_sock *tp = tcp_sk(sk); struct dst_entry *dst = NULL; __be32 saddr = ip_hdr(skb)->saddr; __be32 daddr = ip_hdr(skb)->daddr; __u32 isn = TCP_SKB_CB(skb)->when; #ifdef CONFIG_SYN_COOKIES int want_cookie = 0; #else #define want_cookie 0 /* Argh, why doesn't gcc optimize this :( */ #endif /* Never answer to SYNs send to broadcast or multicast */ if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) goto drop; /* TW buckets are converted to open requests without * limitations, they conserve resources and peer is * evidently real one. */ if (inet_csk_reqsk_queue_is_full(sk) && !isn) { if (net_ratelimit()) syn_flood_warning(skb); #ifdef CONFIG_SYN_COOKIES if (sysctl_tcp_syncookies) { want_cookie = 1; } else #endif goto drop; }
- inet_csk_reqsk_queue_is_full()
- include/net/inet_connection_sock.h
static inline int inet_csk_reqsk_queue_is_full(const struct sock *sk) { return reqsk_queue_is_full(&inet_csk(sk)->icsk_accept_queue); }
- reqsk_queue_is_full()
- include/net/request_sock.h
static inline int reqsk_queue_is_full(const struct request_sock_queue *queue)
{
return queue->listen_opt->qlen >> queue->listen_opt->max_qlen_log;
}
- listen_opt
- include/net/request_sock.h
/** struct listen_sock - listen state
*
* @max_qlen_log - log_2 of maximal queued SYNs/REQUESTs
*/
struct listen_sock {
u8 max_qlen_log;
/* 3 bytes hole, try to use */
int qlen;
int qlen_young;
int clock_hand;
u32 hash_rnd;
u32 nr_table_entries;
struct request_sock *syn_table[0];
};
/** struct request_sock_queue - queue of request_socks
*
* @rskq_accept_head - FIFO head of established children
* @rskq_accept_tail - FIFO tail of established children
* @rskq_defer_accept - User waits for some data after accept()
* @syn_wait_lock - serializer
*
* %syn_wait_lock is necessary only to avoid proc interface having to grab the main
* lock sock while browsing the listening hash (otherwise it's deadlock prone).
*
* This lock is acquired in read mode only from listening_get_next() seq_file
* op and it's acquired in write mode _only_ from code that is actively
* changing rskq_accept_head. All readers that are holding the master sock lock
* don't need to grab this lock in read mode too as rskq_accept_head. writes
* are always protected from the main sock lock.
*/
struct request_sock_queue {
struct request_sock *rskq_accept_head;
struct request_sock *rskq_accept_tail;
rwlock_t syn_wait_lock;
u8 rskq_defer_accept;
/* 3 bytes hole, try to pack */
struct listen_sock *listen_opt;
};
- net/socket.c
* Perform a listen. Basically, we allow the protocol to do anything
* necessary for a listen, and if that works, we mark the socket as
* ready for listening.
*/
SYSCALL_DEFINE2(listen, int, fd, int, backlog)
{
struct socket *sock;
int err, fput_needed;
int somaxconn;
sock = sockfd_lookup_light(fd, &err, &fput_needed);
if (sock) {
somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
if ((unsigned)backlog > somaxconn)
backlog = somaxconn;
err = security_socket_listen(sock, backlog);
if (!err)
err = sock->ops->listen(sock, backlog);
fput_light(sock->file, fput_needed);
}
return err;
}
- net/ipv4/af_inet.c
/*
* Move a socket into listening state.
*/
int inet_listen(struct socket *sock, int backlog)
{
struct sock *sk = sock->sk;
unsigned char old_state;
int err;
lock_sock(sk);
err = -EINVAL;
if (sock->state != SS_UNCONNECTED || sock->type != SOCK_STREAM)
goto out;
old_state = sk->sk_state;
if (!((1 << old_state) & (TCPF_CLOSE | TCPF_LISTEN)))
goto out;
/* Really, if the socket is already in listen state
* we can only allow the backlog to be adjusted.
*/
if (old_state != TCP_LISTEN) {
err = inet_csk_listen_start(sk, backlog);
if (err)
goto out;
}
sk->sk_max_ack_backlog = backlog;
err = 0;
out:
release_sock(sk);
return err;
}
EXPORT_SYMBOL(inet_listen);
- net/ipv4/inet_connection_sock.c
int inet_csk_listen_start(struct sock *sk, const int nr_table_entries) { struct inet_sock *inet = inet_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); int rc = reqsk_queue_alloc(&icsk->icsk_accept_queue, nr_table_entries); if (rc != 0) return rc; sk->sk_max_ack_backlog = 0; sk->sk_ack_backlog = 0; inet_csk_delack_init(sk); /* There is race window here: we announce ourselves listening, * but this transition is still not validated by get_port(). * It is OK, because this socket enters to hash table only * after validation is complete. */ sk->sk_state = TCP_LISTEN; if (!sk->sk_prot->get_port(sk, inet->inet_num)) { inet->inet_sport = htons(inet->inet_num); sk_dst_reset(sk); sk->sk_prot->hash(sk); return 0; } sk->sk_state = TCP_CLOSE; __reqsk_queue_destroy(&icsk->icsk_accept_queue); return -EADDRINUSE; } EXPORT_SYMBOL_GPL(inet_csk_listen_start);
- net/core/request_sock.c
/* * Maximum number of SYN_RECV sockets in queue per LISTEN socket. * One SYN_RECV socket costs about 80bytes on a 32bit machine. * It would be better to replace it with a global counter for all sockets * but then some measure against one socket starving all other sockets * would be needed. * * It was 128 by default. Experiments with real servers show, that * it is absolutely not enough even at 100conn/sec. 256 cures most * of problems. This value is adjusted to 128 for very small machines * (<=32Mb of memory) and to 1024 on normal or better ones (>=256Mb). * Note : Dont forget somaxconn that may limit backlog too. */ int sysctl_max_syn_backlog = 256; EXPORT_SYMBOL(sysctl_max_syn_backlog); int reqsk_queue_alloc(struct request_sock_queue *queue, unsigned int nr_table_entries) { size_t lopt_size = sizeof(struct listen_sock); struct listen_sock *lopt; nr_table_entries = min_t(u32, nr_table_entries, sysctl_max_syn_backlog); nr_table_entries = max_t(u32, nr_table_entries, 8); nr_table_entries = roundup_pow_of_two(nr_table_entries + 1); lopt_size += nr_table_entries * sizeof(struct request_sock *); if (lopt_size > PAGE_SIZE) lopt = vzalloc(lopt_size); else lopt = kzalloc(lopt_size, GFP_KERNEL); if (lopt == NULL) return -ENOMEM; for (lopt->max_qlen_log = 3; (1 << lopt->max_qlen_log) < nr_table_entries; lopt->max_qlen_log++); get_random_bytes(&lopt->hash_rnd, sizeof(lopt->hash_rnd)); rwlock_init(&queue->syn_wait_lock); queue->rskq_accept_head = NULL; lopt->nr_table_entries = nr_table_entries; write_lock_bh(&queue->syn_wait_lock); queue->listen_opt = lopt; write_unlock_bh(&queue->syn_wait_lock); return 0; }
- roundup_pow_of_two は 近くの2の累乗の値にする(include/linux/log2.h)
somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
if ((unsigned)backlog > somaxconn)
backlog = somaxconn;
- nr_table_entries = backlog (somaxconn以上ならsomaxconn)
- もし somaxconn = 128 backlog = 511 なら nr_table_entries = 128
- nr_table_entries = min_t(u32, nr_table_entries, sysctl_max_syn_backlog);
- nr_table_entries = 128
- nr_table_entries = max_t(u32, nr_table_entries, 8);
- nr_table_entries = 128
- nr_table_entries = roundup_pow_of_two(nr_table_entries + 1);
- nr_table_entries = roundup_pow_of_two(128 + 1) = 256
for (lopt->max_qlen_log = 3; (1 << lopt->max_qlen_log) < nr_table_entries; lopt->max_qlen_log++);
-
- max_qlen_log = 8
- これはつまりlog2256 を計算している。nr_table_entries = max_t(u32, nr_table_entries, 8) なので、3からスタートしている。
- max_qlen_log = 8
static inline int reqsk_queue_is_full(const struct request_sock_queue *queue)
{
return queue->listen_opt->qlen >> queue->listen_opt->max_qlen_log;
}
if (inet_csk_reqsk_queue_is_full(sk) && !isn) { if (net_ratelimit()) syn_flood_warning(skb);
tcp_max_syn_backlog の 値って?
/* Kill the following clause, if you dislike this way. */
else if (!sysctl_tcp_syncookies &&
(sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
(sysctl_max_syn_backlog >> 2)) &&
(!peer || !peer->tcp_ts_stamp) &&
(!dst || !dst_metric(dst, RTAX_RTT))) {
/* Without syncookies last quarter of
* backlog is filled with destinations,
* proven to be alive.
* It means that we continue to communicate
* to destinations, already remembered
* to the moment of synflood.
*/
LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI4/%u\n",
&saddr, ntohs(tcp_hdr(skb)->source));
goto drop_and_release;
- (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) < (sysctl_max_syn_backlog >> 2))
- sysctl_max_syn_backlogが
- 512のとき : (512 - X) < 128 ⇒ X = 384
- 1024のとき : (1024 - X) < 256 ⇒ X = 768
- 2048のとき : (2048 - X) < 512 ⇒ X = 1536
- 4096のとき : (4096 - X) < 1024 ⇒ X = 3072
- sysctl_max_syn_backlogが
- 要するに、だいたい int(X * 0.75) が 実効値
epgdump
ちょろちょろsegfaultがでてるのでパッチをあてた
--- ts.c.orig 2011-08-21 21:00:29.000000000 +0900
+++ ts.c 2011-08-21 18:11:17.000000000 +0900
@@ -320,6 +320,9 @@
int checkcrc(SECcache *secs) {
+ if(secs->seclen > MAXSECLEN) {
+ return 0;
+ }
/* セクションの終りに置かれる4バイトのCRC32は、
CRC計算の結果0になるように設定される。
いまのところでなくなった模様
udevが/dev/sd*を消してしまう
カーネルはちゃんと/dev/sd*を認識してるのに、udevが上がったとたん/dev/sd*を消しちゃう状態になってハマりました。
どうやら/usr/share/doc/udev-151-r4/README.bz2によると、カーネルのconfigを以下のようにする必要があるそうです
CONFIG_HOTPLUG=y
CONFIG_UEVENT_HELPER_PATH=""
CONFIG_NET=y
CONFIG_UNIX=y
CONFIG_SYSFS=y
CONFIG_SYSFS_DEPRECATED*=n
CONFIG_PROC_FS=y
CONFIG_TMPFS=y
CONFIG_INOTIFY_USER=y
CONFIG_SIGNALFD=y
CONFIG_TMPFS_POSIX_ACL=y (user ACLs for device nodes)
CONFIG_BLK_DEV_BSG=y (SCSI devices)これでOKでした。
CONFIG_SYSFS_DEPRECATED*=n について言及してる人はいるんだけど、それ以外はあまり見かけませんでした。ググる前にちゃんとREADMEみないといけないね。