On Tue, Mar 10, 2009 at 11:04 AM, Dan Parsons <
address@hidden> wrote:
> I just received this error message using rc4:
> 2009-03-09 21:58:16 E [client-protocol.c:505:client_ping_timer_expired]
> distfs03-stripe: ping timer expired! bailing transport
> 2009-03-09 21:58:16 N [client-protocol.c:6607:notify] distfs03-stripe:
> disconnected
> It happened a total of 7 times across my 33 client nodes. It doesn't seem to
> be related to any particular client, but the errors did happen mostly
> (though not always) on the unify-ns server. The gluster servers are under
> pretty heavy network utilization, however it doesn't seem to be near the
> link capacity and in any case, i/o should just block if it's slow to
> respond, correct? Fortunately, gluster is automatically reconnecting after
> the error. I don't remember seeing this in rc2. The only corresponding
> errors in the server logs are simply showing the client disconnecting. I've
> also ruled out any interconnect faults.
> Any suggestions? My configs are below.
> Dan
>
> CLIENT CONFIG:
> volume unify-switch-ns
> type protocol/client
> option transport-type tcp
> option remote-host 10.8.101.51
> option remote-subvolume posix-unify-switch-ns
> end-volume
> #volume distfs01-ns-readahead
> # type performance/read-ahead
> # option page-size 1MB
> # option page-count 8
> # subvolumes distfs01-ns-brick
> #end-volume
> #volume unify-switch-ns
> # type performance/write-behind
> # option block-size 1MB
> # option cache-size 3MB
> # subvolumes distfs01-ns-readahead
> #end-volume
> volume distfs01-unify
> type protocol/client
> option transport-type tcp
> option remote-host 10.8.101.51
> option remote-subvolume posix-unify
> end-volume
> volume distfs02-unify
> type protocol/client
> option transport-type tcp
> option remote-host 10.8.101.52
> option remote-subvolume posix-unify
> end-volume
> volume distfs03-unify
> type protocol/client
> option transport-type tcp
> option remote-host 10.8.101.53
> option remote-subvolume posix-unify
> end-volume
> volume distfs04-unify
> type protocol/client
> option transport-type tcp
> option remote-host 10.8.101.54
> option remote-subvolume posix-unify
> end-volume
> volume distfs01-stripe
> type protocol/client
> option transport-type tcp
> option remote-host 10.8.101.51
> option remote-subvolume posix-stripe
> end-volume
> volume distfs02-stripe
> type protocol/client
> option transport-type tcp
> option remote-host 10.8.101.52
> option remote-subvolume posix-stripe
> end-volume
> volume distfs03-stripe
> type protocol/client
> option transport-type tcp
> option remote-host 10.8.101.53
> option remote-subvolume posix-stripe
> end-volume
> volume distfs04-stripe
> type protocol/client
> option transport-type tcp
> option remote-host 10.8.101.54
> option remote-subvolume posix-stripe
> end-volume
> volume stripe0
> type cluster/stripe
> option block-size *.jar,*.pin:1MB,*:2MB
> subvolumes distfs01-stripe distfs02-stripe distfs03-stripe distfs04-stripe
> end-volume
> volume dht0
> type cluster/dht
> # option lookup-unhashed yes
> subvolumes distfs01-unify distfs02-unify distfs03-unify distfs04-unify
> end-volume
> volume unify
> type cluster/unify
> option namespace unify-switch-ns
> option self-heal off
> option scheduler switch
> # send *.phr/psq/pnd etc to stripe0, send the rest to hash
> # extensions have to be *.foo* and not simply *.foo or rsync's tmp file
> naming will prevent files from being matched
> option scheduler.switch.case
> *.phr*:stripe0;*.psq*:stripe0;*.pnd*:stripe0;*.psd*:stripe0;*.pin*:stripe0;*.nsi*:stripe0;*.nin*:stripe0;*.nsd*:stripe0;*.nhr*:stripe0;*.nsq*:stripe0;*.tar*:stripe0;*.tar.gz*:stripe0;*.jar*:stripe0;*.img*:stripe0;*.perf*:stripe0;*.tgz*:stripe0;*.fasta*:stripe0;*.huge*:stripe0
> subvolumes stripe0 dht0
> end-volume
> volume ioc
> type performance/io-cache
> subvolumes unify
> option cache-size 3000MB
> option cache-timeout 3600
> end-volume
> volume filter
> type features/filter
> option fixed-uid 0
> option fixed-gid 900
> subvolumes ioc
> end-volume
>
>
>
> SERVER CONFIG:
> volume posix-unify-brick
> type storage/posix
> option directory /distfs-storage-space/glusterfs/unify
> # the below line is here to make the output of 'df' accurate, as both
> volumes are served from the same local drive
> option export-statfs-size off
> end-volume
> volume posix-stripe-brick
> type storage/posix
> option directory /distfs-storage-space/glusterfs/stripe
> end-volume
> volume posix-unify-switch-ns-brick
> type storage/posix
> option directory /distfs-storage-space/glusterfs/unify-switch-ns
> end-volume
> volume posix-unify
> type performance/io-threads
> option thread-count 4
> subvolumes posix-unify-brick
> end-volume
> volume posix-stripe
> type performance/io-threads
> option thread-count 4
> subvolumes posix-stripe-brick
> end-volume
> volume posix-unify-switch-ns
> type performance/io-threads
> option thread-count 2
> subvolumes posix-unify-switch-ns-brick
> end-volume
> volume server
> type protocol/server
> option transport-type tcp
> option auth.addr.posix-unify.allow 10.8.101.*,10.8.15.50
> option auth.addr.posix-stripe.allow 10.8.101.*,10.8.15.50
> option auth.addr.posix-unify-switch-ns.allow 10.8.101.*,10.8.15.50
> subvolumes posix-unify posix-stripe posix-unify-switch-ns
> end-volume
>