gluster-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Gluster-devel] Problem with scheduler load-balancing


From: drizzo201-cfs
Subject: [Gluster-devel] Problem with scheduler load-balancing
Date: Fri, 9 Nov 2007 08:26:15 -0800 (PST)

This is a repost. Don't think the first one made it through.

I am having trouble getting a configuration based on this profile to work 
properly.

http://www.gluster.org/docs/index.php/Advanced_Striping_with_GlusterFS

Using dd to create files with the "striped" extensions works OK. Files get 
striped across all four servers. When I use dd to create files  w/out a  
"stripe" matched extension,  they're created on a single server. My problem 
occurs when I run unstriped dd's from the three clients simultaneously --- all 
of the file creates end up on the same glusterfsd server.

I've used both the rr and alu schedulers to try and get the non-striped file 
creates spread across the four servers but it doesn' t work. If I let the dd's 
run to completion and start a new set of dd's, the new files are created on a 
different server, but as before, all three dd's go the the same server.

Single client, single threaded, striped writes run at ~105MB/s. Single client, 
single threaded,  non-striped  writes run at ~85MB/s. When I run three 
"unstriped" client dd's concurrently and all IO goes to the same server, total 
thruput drops to ~50MB/s with each client getting a third of the total (lots of 
disk thrashing). The dd test is "dd if=/dev/zero of=/mnt/cluster/testX bs=64k 
count=64k". I just add the .img to get the file striped.

I'm using the gluster patched fuse client (-glfs5) and gluster 1.3.7.  
Interconnect is tcp over Gbe with clients having a single connection and 
servers having a bonded dual GBe interface. Three servers are running 
SLES10SP1, 4th server is running CentOS4.5. Local file systems are XFS and ext3 
mounted with extended attributes. These file systems are on unshared, single 
partition internal 250GB SATA disks.


Glusterfsd servers are started with log level NORMAL and don't show any 
problems at this log level.
server# glusterfsd -f /etc/glusterfs/bp-server.vol.iot -L NORMAL -l 
/var/log/glusterfs/glusterfsd.log

Clients are started with log level DEBUG. 
client# glusterfs  -l /var/log/glusterfs/glusterfs.log -L DEBUG --server=demo1 
/mnt/cluster

Here is a snippet from one of the clients. The other two clients have the same 
entries.

2007-11-07 12:09:31 D [inode.c:351:__active_inode] fuse/inode: activating 
inode(97417), lru=21/1024
2007-11-07 12:09:31 D [inode.c:308:__destroy_inode] fuse/inode: destroy 
inode(0) address@hidden
2007-11-07 12:14:01 D [inode.c:381:__passive_inode] fuse/inode: passivating 
inode(97417), lru=22/1024
2007-11-07 14:33:53 D [fuse-bridge.c:422:fuse_lookup] glusterfs-fuse: LOOKUP 
1/rlx11-8 (/rlx11-8)
2007-11-07 14:33:54 D [fuse-bridge.c:377:fuse_entry_cbk] glusterfs-fuse: ERR => 
-1 (2)
2007-11-07 14:33:54 D [inode.c:308:__destroy_inode] fuse/inode: destroy 
inode(0) address@hidden
2007-11-07 14:33:54 D [inode.c:559:__create_inode] fuse/inode: create 
inode(97692)
2007-11-07 14:33:54 D [inode.c:351:__active_inode] fuse/inode: activating 
inode(97692), lru=22/1024
2007-11-07 14:33:54 D [inode.c:308:__destroy_inode] fuse/inode: destroy 
inode(0) address@hidden
2007-11-07 14:46:32 D [inode.c:381:__passive_inode] fuse/inode: passivating 
inode(97692), lru=23/1024

Any idea's?

All four servers are setup the same way. Here is the spec file from the first 
server:
########################
volume posix-unify
           type storage/posix
           option directory /gluster/unify
 end-volume

volume posix-stripe
           type storage/posix
           option directory /gluster/stripe
 end-volume

volume posix-namespace
           type storage/posix
           option directory /export/namespace
 end-volume

#volume plocks
#       type features/posix-locks
#       option manadatory on
#       subvolumes posix-unify posix-stripe
# end-volume

volume iot-posix-unify
        type performance/io-threads
        option thread-count 8
        subvolumes posix-unify
 end-volume

volume iot-posix-stripe
        type performance/io-threads
        option thread-count 8
        subvolumes posix-stripe
 end-volume

volume server
           type protocol/server
           option transport-type tcp/server
           option auth.ip.iot-posix-unify.allow 192.168.1.*
           option auth.ip.iot-posix-stripe.allow 192.168.1.*
           option auth.ip.posix-namespace.allow 192.168.1.*
           option client-volume-filename /etc/glusterfs/bp-client.vol.iot
           subvolumes iot-posix-unify iot-posix-stripe posix-namespace
 end-volume
########################

Here is the client spec;

volume client-namespace
     type protocol/client
     option transport-type tcp/client
     option remote-host 192.168.1.201
     option remote-subvolume posix-namespace
 end-volume

volume client-unify-1
     type protocol/client
     option transport-type tcp/client
     option remote-host 192.168.1.201
     option remote-subvolume iot-posix-unify
 end-volume

volume client-unify-2
     type protocol/client
     option transport-type tcp/client
     option remote-host 192.168.1.202
     option remote-subvolume iot-posix-unify
 end-volume

volume client-unify-3
     type protocol/client
     option transport-type tcp/client
     option remote-host 192.168.1.203
     option remote-subvolume iot-posix-unify
 end-volume

volume client-unify-4
     type protocol/client
     option transport-type tcp/client
     option remote-host 192.168.1.224
     option remote-subvolume iot-posix-unify
 end-volume

volume client-stripe-1
     type protocol/client
     option transport-type tcp/client
     option remote-host 192.168.1.201
     option remote-subvolume iot-posix-stripe
 end-volume

volume client-stripe-2
     type protocol/client
     option transport-type tcp/client
     option remote-host 192.168.1.202
     option remote-subvolume iot-posix-stripe
 end-volume

volume client-stripe-3
     type protocol/client
     option transport-type tcp/client
     option remote-host 192.168.1.203
     option remote-subvolume iot-posix-stripe
 end-volume

volume client-stripe-4
     type protocol/client
     option transport-type tcp/client
     option remote-host 192.168.1.224
     option remote-subvolume iot-posix-stripe
 end-volume

volume unify
     type cluster/unify
#    option scheduler rr
### ** ALU Scheduler Option **
  option scheduler alu
  option alu.limits.min-free-disk  5% #%
  option alu.limits.max-open-files 10000
  option alu.order 
disk-usage:read-usage:write-usage:open-files-usage:disk-speed-usage
  option alu.disk-usage.entry-threshold 2GB
  option alu.disk-usage.exit-threshold  128MB
  option alu.open-files-usage.entry-threshold 1024
  option alu.open-files-usage.exit-threshold 32
  option alu.read-usage.entry-threshold 20 #%
  option alu.read-usage.exit-threshold 4 #%
  option alu.write-usage.entry-threshold 20 #%
  option alu.write-usage.exit-threshold 4 #%
#  option alu.disk-speed-usage.entry-threshold 0 # DO NOT SET IT. SPEED IS 
CONSTANT!!!
#  option alu.disk-speed-usage.exit-threshold 0 # DO NOT SET IT. SPEED IS 
CONSTANT!!!
  option alu.stat-refresh.interval 10sec
  option namespace client-namespace
  subvolumes client-unify-1 client-unify-2 client-unify-3 client-unify-4
 end-volume

volume stripe
     type cluster/stripe
     option block-size *.img:2MB,*.tmp:2MB,*DUMMY*:1MB # All files ending with 
.img in name are striped with 2MB stripe block size.
     subvolumes unify client-stripe-1 client-stripe-2 client-stripe-3 
client-stripe-4
#      subvolumes client-stripe-1 client-stripe-2 client-stripe-3 
client-stripe-4
 end-volume

volume iot
    type performance/io-threads
    option thread-count 8
    subvolumes stripe
 end-volume

volume wb
    type performance/write-behind
#    option thread-count 8
    subvolumes iot
 end-volume

volume ra
    type performance/read-ahead
#    option thread-count 8
    subvolumes wb
 end-volume

#volume ioc
#    type performance/io-cache
#    option thread-count 8
#    subvolumes ra
# end-volume
####################


reply via email to

[Prev in Thread] Current Thread [Next in Thread]