Hi Guys -
I'm fairly clueless about ZFS and looking for some help. We have an OpenSolaris file server which has been running well for 3.5 years but finally has a failed disk. I'm trying to offline the disk so I can replace it but the zpool command is giving me an error.
Note that it's not reporting errors here because I cleared them (in frustration) trying to make things work:
root@lincoln:~# zpool status
pool: p1
state: ONLINE
scan: scrub canceled on Wed Aug 7 20:43:52 2013
config:
NAME STATE READ WRITE CKSUM
p1 ONLINE 0 0 0
c1t2d0 ONLINE 0 0 0
c1t3d0 ONLINE 0 0 0
c1t4d0 ONLINE 0 0 0
c1t5d0 ONLINE 0 0 0
logs
c1t1d0 ONLINE 0 0 0
errors: No known data errors
pool: syspool
state: ONLINE
scan: scrub repaired 0 in 0h1m with 0 errors on Fri Apr 5 17:45:48 2013
config:
NAME STATE READ WRITE CKSUM
syspool ONLINE 0 0 0
c1t0d0s0 ONLINE 0 0 0
errors: No known data errors
The bad device is 'c1t3d0', but for some reason I cannot take it offline:
root@lincoln:~# zpool offline p1 c1t3d0
cannot offline c1t3d0: no such pool or dataset
It seems like it doesn't recognize that id as a valid device name, so I tried adding a slice number:
root@lincoln:~# zpool offline p1 c1t3d0s0
cannot offline c1t3d0s0: no such device in pool
root@lincoln:~# zpool offline p1 c1t3d0s1
cannot offline c1t3d0s1: no such device in pool
root@lincoln:~# zpool offline p1 c1t3d0s2
cannot offline c1t3d0s2: no such device in pool
I am afraid to go ahead with replacing the disk because the server is still up and if it gets screwed up further with unrecognized devices we may lose data.
'zdb' gives some interesting output, but none that has helped me. I tried using the GUID for the disk and the /dev/dsk name from zdb for the offline command and neither worked:
root@lincoln:~# zdb
p1:
version: 26
name: 'p1'
state: 0
txg: 4648880
pool_guid: 4968018013301160962
hostid: 8985187
hostname: 'lincoln'
vdev_children: 5
vdev_tree:
type: 'root'
id: 0
guid: 4968018013301160962
children[0]:
type: 'disk'
id: 0
guid: 13134804042246883594
path: '/dev/dsk/c1t2d0s0'
devid: 'id1,sd@n6000c29521146953aa8896db510f7245/a'
phys_path: '/pci@0,0/pci15ad,1976@10/sd@2,0:a'
whole_disk: 1
metaslab_array: 30
metaslab_shift: 34
ashift: 9
asize: 1997146423296
is_log: 0
DTL: 65
create_txg: 4
children[1]:
type: 'disk'
id: 1
guid: 11536494763799534620
path: '/dev/dsk/c1t3d0s0'
devid: 'id1,sd@n6000c29035e47d3b4dac7f9c56d8ee85/a'
phys_path: '/pci@0,0/pci15ad,1976@10/sd@3,0:a'
whole_disk: 1
metaslab_array: 28
metaslab_shift: 34
ashift: 9
asize: 1997146423296
is_log: 0
DTL: 64
create_txg: 4
children[2]:
type: 'disk'
id: 2
guid: 14498981475727854810
path: '/dev/dsk/c1t4d0s0'
devid: 'id1,sd@n6000c291e255a1ffb0d60be9506ec2a7/a'
phys_path: '/pci@0,0/pci15ad,1976@10/sd@4,0:a'
whole_disk: 1
metaslab_array: 27
metaslab_shift: 34
ashift: 9
asize: 1997146423296
is_log: 0
DTL: 62
create_txg: 4
children[3]:
type: 'disk'
id: 3
guid: 13953919250895671919
path: '/dev/dsk/c1t5d0s0'
devid: 'id1,sd@n6000c29d8d53e0d9102b282e3fe90e29/a'
phys_path: '/pci@0,0/pci15ad,1976@10/sd@5,0:a'
whole_disk: 1
metaslab_array: 25
metaslab_shift: 34
ashift: 9
asize: 1997146423296
is_log: 0
DTL: 63
create_txg: 4
children[4]:
type: 'disk'
id: 4
guid: 12386275127038724319
path: '/dev/dsk/c1t1d0s0'
devid: 'id1,sd@n6000c294669285e7430f3a2b0d9f3da6/a'
phys_path: '/pci@0,0/pci15ad,1976@10/sd@1,0:a'
whole_disk: 1
metaslab_array: 24
metaslab_shift: 28
ashift: 9
asize: 38641336320
is_log: 1
DTL: 61
create_txg: 4
Any suggestions?
I'm fairly clueless about ZFS and looking for some help. We have an OpenSolaris file server which has been running well for 3.5 years but finally has a failed disk. I'm trying to offline the disk so I can replace it but the zpool command is giving me an error.
Note that it's not reporting errors here because I cleared them (in frustration) trying to make things work:
root@lincoln:~# zpool status
pool: p1
state: ONLINE
scan: scrub canceled on Wed Aug 7 20:43:52 2013
config:
NAME STATE READ WRITE CKSUM
p1 ONLINE 0 0 0
c1t2d0 ONLINE 0 0 0
c1t3d0 ONLINE 0 0 0
c1t4d0 ONLINE 0 0 0
c1t5d0 ONLINE 0 0 0
logs
c1t1d0 ONLINE 0 0 0
errors: No known data errors
pool: syspool
state: ONLINE
scan: scrub repaired 0 in 0h1m with 0 errors on Fri Apr 5 17:45:48 2013
config:
NAME STATE READ WRITE CKSUM
syspool ONLINE 0 0 0
c1t0d0s0 ONLINE 0 0 0
errors: No known data errors
The bad device is 'c1t3d0', but for some reason I cannot take it offline:
root@lincoln:~# zpool offline p1 c1t3d0
cannot offline c1t3d0: no such pool or dataset
It seems like it doesn't recognize that id as a valid device name, so I tried adding a slice number:
root@lincoln:~# zpool offline p1 c1t3d0s0
cannot offline c1t3d0s0: no such device in pool
root@lincoln:~# zpool offline p1 c1t3d0s1
cannot offline c1t3d0s1: no such device in pool
root@lincoln:~# zpool offline p1 c1t3d0s2
cannot offline c1t3d0s2: no such device in pool
I am afraid to go ahead with replacing the disk because the server is still up and if it gets screwed up further with unrecognized devices we may lose data.
'zdb' gives some interesting output, but none that has helped me. I tried using the GUID for the disk and the /dev/dsk name from zdb for the offline command and neither worked:
root@lincoln:~# zdb
p1:
version: 26
name: 'p1'
state: 0
txg: 4648880
pool_guid: 4968018013301160962
hostid: 8985187
hostname: 'lincoln'
vdev_children: 5
vdev_tree:
type: 'root'
id: 0
guid: 4968018013301160962
children[0]:
type: 'disk'
id: 0
guid: 13134804042246883594
path: '/dev/dsk/c1t2d0s0'
devid: 'id1,sd@n6000c29521146953aa8896db510f7245/a'
phys_path: '/pci@0,0/pci15ad,1976@10/sd@2,0:a'
whole_disk: 1
metaslab_array: 30
metaslab_shift: 34
ashift: 9
asize: 1997146423296
is_log: 0
DTL: 65
create_txg: 4
children[1]:
type: 'disk'
id: 1
guid: 11536494763799534620
path: '/dev/dsk/c1t3d0s0'
devid: 'id1,sd@n6000c29035e47d3b4dac7f9c56d8ee85/a'
phys_path: '/pci@0,0/pci15ad,1976@10/sd@3,0:a'
whole_disk: 1
metaslab_array: 28
metaslab_shift: 34
ashift: 9
asize: 1997146423296
is_log: 0
DTL: 64
create_txg: 4
children[2]:
type: 'disk'
id: 2
guid: 14498981475727854810
path: '/dev/dsk/c1t4d0s0'
devid: 'id1,sd@n6000c291e255a1ffb0d60be9506ec2a7/a'
phys_path: '/pci@0,0/pci15ad,1976@10/sd@4,0:a'
whole_disk: 1
metaslab_array: 27
metaslab_shift: 34
ashift: 9
asize: 1997146423296
is_log: 0
DTL: 62
create_txg: 4
children[3]:
type: 'disk'
id: 3
guid: 13953919250895671919
path: '/dev/dsk/c1t5d0s0'
devid: 'id1,sd@n6000c29d8d53e0d9102b282e3fe90e29/a'
phys_path: '/pci@0,0/pci15ad,1976@10/sd@5,0:a'
whole_disk: 1
metaslab_array: 25
metaslab_shift: 34
ashift: 9
asize: 1997146423296
is_log: 0
DTL: 63
create_txg: 4
children[4]:
type: 'disk'
id: 4
guid: 12386275127038724319
path: '/dev/dsk/c1t1d0s0'
devid: 'id1,sd@n6000c294669285e7430f3a2b0d9f3da6/a'
phys_path: '/pci@0,0/pci15ad,1976@10/sd@1,0:a'
whole_disk: 1
metaslab_array: 24
metaslab_shift: 28
ashift: 9
asize: 38641336320
is_log: 1
DTL: 61
create_txg: 4
Any suggestions?